Repository: Eelis/cxxdraft-htmlgen
Branch: master
Commit: 63bc5929dc4f
Files: 28
Total size: 213.0 KB

Directory structure:
gitextract_qdd768we/

├── .gitignore
├── 14882.css
├── CxxParser.hs
├── Document.hs
├── LICENSE
├── LaTeXBase.hs
├── LaTeXParser.hs
├── Load14882.hs
├── MathJax.hs
├── Pages.hs
├── README
├── RawDocument.hs
├── Render.hs
├── SectionPages.hs
├── Sentences.hs
├── Setup.hs
├── Toc.hs
├── Util.hs
├── colored.css
├── cxxdraft-htmlgen.cabal
├── expanded.css
├── fulltoc.css
├── genhtml.hs
├── macros.tex
├── mathjax-batch
├── normative-only.css
├── stack.yaml
└── toc.css

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
14882
dist
tags
node_modules
.stack-work/


================================================
FILE: 14882.css
================================================
body {
	font-family: 'Noto Serif';
	hyphens: auto;
	line-height: 1.5;
	margin-left: 20mm;
	margin-right: 16mm;
	margin-top: 12mm;
	margin-bottom: 12mm;
	font-size: 10pt;
}

div {
	background: inherit;
}

div.wrapper {
	max-width: 20cm;
	margin: auto;
}

div.texpara {
	text-align: justify;
	margin-top: 3pt;
	margin-bottom: 3pt;
}

table div.texpara {
	margin-top: 0;
	margin-bottom: 0;
}

table.enumerate div.texpara {
	margin-top: 3pt;
	margin-bottom: 3pt;
}

ul {
	list-style-type: none;
	padding-left: 9mm;
	margin-top: 0;
	margin-bottom: 0;
}

ol {
	margin-top: 0;
	margin-bottom: 0;
}

a { text-decoration: none; }

a.hidden_link {
	text-decoration: none;
	color: inherit;
}

li {
	margin-top: 3pt;
	margin-bottom: 3pt;
}

h1 {
	line-height: 1;
	font-size: 20pt;
	margin-top: 10pt;
	margin-bottom: 10pt;
}

h2 {
	line-height: 1;
	font-size: 16pt;
	margin-top: 10pt;
	margin-bottom: 10pt;
}

h2::after {
    content: "";
    clear: both;
    display: table;
}

h3 {
	line-height: 1;
	font-size: 12pt;
	margin-top: 10pt;
	margin-bottom: 10pt;
}

h3::after {
    content: "";
    clear: both;
    display: table;
}

h4 {
	line-height: 1;
	font-size: 11pt;
	margin-top: 10pt;
	margin-bottom: 10pt;
}

h4::after {
    content: "";
    clear: both;
    display: table;
}

ul > li:before {
	content: "\2014";
	position: absolute;
	margin-left: -1.5em; 
}

.shy:before {
	content: "\00ad";
	/* This is U+00AD SOFT HYPHEN, same as &shy, but we put it in :before
	to stop it from being included when the text is copied to the clipboard
	with Firefox, which is especially annoying when copying to a terminal,
	where the hyphen characters will show up. */
}

:target { background-color: #C9FBC9; }
:target .codeblock { background-color: #C9FBC9; }
:target ul { background-color: #C9FBC9; }

.abbr_ref { float: right; }

.folded_abbr_ref { float: right; }
:target .folded_abbr_ref { display: none; }

:target .unfolded_abbr_ref { float: right; display: inherit; }
.unfolded_abbr_ref { display: none; }

.secnum { display: inline-block; min-width: 35pt; }
.annexnum { display: block; }

div.sourceLinkParent {
	float: right;
}

a.sourceLink {
	position: absolute;
	opacity: 0;
	margin-left: 10pt;
}

a.sourceLink:hover {
	opacity: 1;
}

a.itemDeclLink {
	position: absolute;
	font-size: 75%;
	text-align: right;
	width: 5em;
	opacity: 0;
}
a.itemDeclLink:hover { opacity: 1; }

div.marginalizedparent {
	position: relative;
	text-align: left;
	left: -18mm;
}

a.marginalized {
	width: 15mm;
	position: absolute;
	top: 0.4mm;
	font-size: 7pt;
	text-align: right;
}

a.enumerated_item_num {
	display: block;
	margin-top: 3pt;
	margin-bottom: 3pt;
	margin-right: 6pt;
}

div.para {
	position: relative;
	left: -14mm;
	padding-left: 14mm;
	width: 100%;
	margin-bottom: 6pt;
	margin-top: 6pt;
	text-align: justify;
	min-height: 1.2em;
}

div.section { text-align: justify; }
div.sentence { display: inline; }

a.index {
	position: relative;
	float: right;
	right: -1em;
	display: none;
}

a.index:before {
	position: absolute;
	content: "⟵";
	background-color: #C9FBC9;
}

a.index:target {
	display: inline;
}

.indexitems {
	margin-left: 2em;
	text-indent: -2em;
}

div.itemdescr {
	margin-left: 12mm;
}

.bnf {
	font-family: 'Noto Sans';
	font-size: 10pt;
	font-style: italic;
	margin-left: 25pt;
	margin-right: -15mm;
	margin-top: 0.5em;
	margin-bottom: 0.5em;
	text-indent: -3em;
	padding-left: 3em;
	line-height: 1.5;
}

div.bnf span.texttt { font-family: 'Noto Sans Mono'; font-style: normal; }

.rebnf {
	font-family: 'Noto Serif';
	font-style: italic;
	margin-top: 0.5em;
	margin-bottom: 0.5em;
	margin-left: 30pt;
	text-indent: -3em;
	padding-left: 3em;
	line-height: 1.5;
}

.simplebnf {
	font-family: 'Noto Serif';
	font-style: italic;
	font-size: 10pt;
	margin-top: 0.5em;
	margin-bottom: 0.5em;
	margin-left: 30pt;
	line-height: 1.5;
}

span.textnormal {
	font-style: normal;
	font-family: 'Noto Serif';
	font-size: 10pt;
	white-space: normal;
}

.bnf span.textnormal {
	font-style: normal;
	font-family: 'Noto Serif';
	font-size: 10pt;
	white-space: normal;
}

p {
	margin-top: 4pt;
	margin-bottom: 4pt;
}

span.rlap {
    display: inline-block;
    width: 0px;
    text-indent: 0;
}

span.terminal {
	font-family: 'Noto Sans Mono';
	font-style: normal;
	font-size: 9pt;
	white-space: pre-wrap;
}

span.noncxxterminal {
	font-family: 'Noto Sans Mono';
	font-style: normal;
	font-size: 9pt;
}

span.term { font-style: italic; }
span.tcode { font-family: 'Noto Sans Mono'; font-style: normal; }
span.textbf { font-weight: bold; }
span.textsf { font-family: 'Noto Sans'; font-size: 10pt; }
div.footnote span.textsf { font-family: 'Noto Sans'; font-size: 8pt; }
.bnf span.textsf { font-family: 'Noto Sans'; font-size: 10pt; }
.simplebnf span.textsf { font-family: 'Noto Sans'; font-size: 10pt; }
.example span.textsf { font-family: 'Noto Sans'; font-size: 10pt; }
span.textsc { font-variant: small-caps; }
span.nontermdef { font-style: italic; font-family: 'Noto Sans'; font-size: 10pt; }
span.emph { font-style: italic; }
span.techterm { font-style: italic; }
span.mathit { font-style: italic; }
span.mathsf { font-family: 'Noto Sans'; }
span.mathrm { font-family: 'Noto Serif'; font-style: normal; }
span.textrm { font-family: 'Noto Serif'; font-size: 10pt; }
span.textsl { font-style: italic; }
span.mathtt { font-family: 'Noto Sans Mono'; font-style: normal; }
span.mbox { font-family: 'Noto Serif'; font-style: normal; }
span.ungap { display: inline-block; width: 2pt; }
span.texttt { font-family: 'Noto Sans Mono'; }
span.textit { font-style: italic; }
div.footnote span.texttt { font-family: 'Noto Sans Mono'; }
span.tcode_in_codeblock { font-family: 'Noto Sans Mono'; font-style: normal; font-size: 9pt; }

span.phantom { color: white; }
	/* Unfortunately, this way the text is still selectable. Another
	option is display:none, but then we lose the nice layout.
	Todo: find proper solution. */

span.math {
	font-style: normal;
	font-family: 'Noto Serif';
	font-size: 10pt;
}

span.mathblock {
	display: block;
	margin-left: auto;
	margin-right: auto;
	margin-top: 1.2em;
	margin-bottom: 1.2em;
	text-align: center;
}

span.mathalpha {
	font-style: italic;
}

span.synopsis {
	font-weight: bold;
	margin-top: 0.5em;
	display: block;
}

span.definition {
	font-weight: bold;
	display: block;
}

.codeblock {
	font-family: 'Noto Sans Mono';
	margin-left: 1.2em;
	line-height: 1.5;
	font-size: 9pt;
	white-space: pre;
	display: block;
	margin-top: 3pt;
	margin-bottom: 3pt;
	overflow: auto;
	margin-right: -15mm;
}

table .codeblock { margin-right: 0; }

.outputblock {
	margin-left: 1.2em;
	line-height: 1.5;
	font-family: 'Noto Sans Mono';
	font-size: 9pt;
}

code {
	font-family: 'Noto Sans Mono';
	font-style: normal;
}

div.itemdecl {
	margin-top: 2ex;
}

code.itemdeclcode {
	white-space: pre;
	font-family: 'Noto Sans Mono';
	font-size: 9pt;
	display: block;
	overflow: auto;
	margin-right: -15mm;
}

.comment { color: green; font-style: italic; font-family: 'Noto Serif'; font-size: 10pt; }
.footnote .comment { color: green; font-style: italic; font-family: 'Noto Serif'; font-size: 8pt; }
.example .comment { color: green; font-style: italic; font-family: 'Noto Serif'; font-size: 9pt; }
.note .comment { color: green; font-style: italic; font-family: 'Noto Serif'; font-size: 9pt; }

span.keyword { color: #00607c; font-style: normal; }
span.parenthesis { color: #af1915; }
span.curlybracket { color: #af1915; }
span.squarebracket { color: #af1915; }
span.literal { color: #9F6807; }
span.literalterminal { color: #9F6807; font-family: 'Noto Sans Mono'; font-style: normal; }
span.operator { color: #570057; }
span.anglebracket { color: #570057; }
span.preprocessordirective { color: #6F4E37; }

span.textsuperscript {
	vertical-align: super;
	font-size: smaller;
	line-height: 0;
}

.footnoteref {
	vertical-align: super;
	font-size: smaller;
	line-height: 0;
}

.footnote {
	font-size: 8pt;
}

.footnote .math {
	font-size: 8pt;
}

.footnotenum {
	display: inline-block;
	text-align: right;
	margin-right: 1mm;
	width: 4ch;
}

.footnoteBacklink {
	display: none;
}

:target .footnoteBacklink {
	display: inline-block;
	text-align: right;
	margin-right: 1mm;
	width: 4ch;
}

:target .footnotenum {
	display: none;
}

.footnoteSeparator {
	background: black;
	margin-top: 5mm;
	height: 1px;
	width: 6cm;
}

div.minipage {
	display: inline-block;
	margin-right: 3em;
}

div.numberedTable {
	text-align: center;
	margin-left: 1em;
	margin-right: 1em;
	margin-bottom: 12pt;
	margin-top: 8pt;
}

div.figure {
	text-align: center;
	margin-left: 2em;
	margin-right: 2em;
	margin-bottom: 12pt;
	margin-top: 3pt;
}

table {
	border: 1px solid black;
	border-collapse: collapse;
	margin-left: auto;
	margin-right: auto;
	margin-top: 7pt;
	text-align: left;
}

td, th {
	padding-left: 8pt;
	padding-right: 8pt;
	vertical-align: top;
}

td.empty {
    padding: 0px;
    padding-left: 1px;
}

td.left {
	text-align: left;
}

td.hidden {
	padding: 0;
	width: 0;
}

td.right {
	text-align: right;
}

td.center {
	text-align: center;
}

td.justify {
	text-align: justify;
}

td.border {
	border-left: 1px solid black;
}

tr.rowsep, td.cline {
	border-top: 1px solid black;
}

tr.capsep {
	border-top: 3px solid black;
	border-top-style: double;
}

th {
	border-bottom: 1px solid black;
}

span.centry {
	font-weight: bold;
}

div.table {
	display: block;
	margin-left: auto;
	margin-right: auto;
	text-align: center;
	width: 90%;
}

span.indented {
	background: inherit;
	display: block;
	margin-left: 2em;
	margin-bottom: 1em;
	margin-top: 1em;
}

span.uppercase {
	text-transform: uppercase;
}

span.ucode {
	font-variant: small-caps;
	text-transform: uppercase;
	font-size: 90%;
}

span.uname {
	font-variant: small-caps;
	text-transform: uppercase;
	font-size: 90%;
}

table.enumerate {
	border: 0;
	margin: 0;
}

table.enumerate td {
	padding: 0;
}

table.enumerate td:first-child {
	width: 1cm;
	text-align: right;
}

@media (prefers-color-scheme: dark) {
    body {
        background-color: #171717;
        color: #e0e0e0;
    }

    span.mjx-mstyle { color: #e0e0e0 !important }

    a:link { color: #74bdff; }
    a:visited { color: #c38aff; }

    a.hidden_link {
        text-decoration: none;
        color: inherit;
    }

    span.phantom { color: #171717; }

    a.index:before { color: #e0e0e0; background-color: #4b6353; }

    .comment { color: #40f040; }
    .footnote .comment { color: #60ff60; }
    .example .comment { color: #60ff60; }
    .note .comment { color: #60ff60; }

    span.keyword { color: #32eade; }
    span.parenthesis { color: #ff7070; }
    span.curlybracket { color: #ff7070; }
    span.squarebracket { color: #ff7070; }
    span.literal { color: #ffd867; }
    span.literalterminal { color: #ffd867; }
    span.operator { color: #dac6d9; }
    span.anglebracket { color: #dac6d9; }
    span.preprocessordirective { color: #c28c68; }

    table { border-color: #e0e0e0; }
    td.border { border-color: #e0e0e0; }
    td.border { border-left-color: #e0e0e0; }
    tr.rowsep, td.cline { border-top-color: #e0e0e0; }
    tr.capsep { border-top-color: #e0e0e0; }
    th { border-bottom-color: #e0e0e0; }

    .footnoteSeparator { background-color: #e0e0e0; }

    text { fill: #e0e0e0; }
    path { stroke: #e0e0e0; }
    polygon { stroke: #e0e0e0; fill: #e0e0e0; }
    ellipse { stroke: #e0e0e0; }

    :target { background-color: #4b6345; color: #ffffff; }
    :target .codeblock { background-color: #4b6345; }
    :target ul { background-color: #4b6345; }
    :target a:link { color: #9fcdff; }
    :target a:visited { color: #d39aff; }
    :target a.hidden_link { text-decoration: none; color: inherit; }
    :target span.keyword { color: #52faee; }
    :target span.parenthesis { color: #ff4060; font-weight: bold; }
    :target span.curlybracket { color: #ff4060; font-weight: bold; }
    :target span.squarebracket { color: #ff4060; font-weight: bold; }
    :target span.literal { color: #ffe070; }
    :target span.literalterminal { color: #ffe070; }
    :target span.operator { color: #ffffff; }
    :target span.anglebracket { color: #ffffff; }
    :target span.preprocessordirective { color: #e0968f; }
    :target .comment { color: #75ff20; }
    :target .footnote .comment { color: #75ff20; }
    :target .example .comment { color: #75ff20; }
    :target .note .comment { color: #75ff20; }
}


================================================
FILE: CxxParser.hs
================================================
{-# OPTIONS_GHC -fno-warn-tabs #-}
{-# LANGUAGE
	OverloadedStrings,
	RecordWildCards,
	TupleSections,
	ViewPatterns,
	LambdaCase,
	TypeSynonymInstances,
	FlexibleInstances #-}

module CxxParser (parseLiteral, parseComment, parseCppDirective) where

import LaTeXBase (LaTeX, LaTeXUnit(..), ArgKind(..), concatRaws,
    texStripPrefix, texStripAnyPrefix, texStripInfix, texSpan, unconsRaw)
import qualified Data.Text as Text
import Data.Char (isAlpha, isSpace, isAlphaNum, isDigit)
import Control.Arrow (first)
import Prelude hiding ((.), (++))
import Util ((.), (++), Text)

texStripHash :: LaTeX -> Maybe LaTeX
texStripHash x
    | Just x' <- texStripPrefix "#" x = Just x'
    | TeXComm "#" _ [] : x' <- x = Just x'
    | otherwise = Nothing

cppDirectives :: [Text]
cppDirectives = Text.words "include define elifndef elifdef ifndef endif ifdef pragma error undef line elif warning else if embed"

spanLiteralChars :: String -> (String, String {- rest without the closing ' -})
spanLiteralChars [] = ([], [])
spanLiteralChars ('\\' : '\'' : rest) = first ("\\'"++) (spanLiteralChars rest)
spanLiteralChars ('\\' : '\\' : rest) = first ("\\\\"++) (spanLiteralChars rest)
spanLiteralChars ('\'' : x) = ([], x)
spanLiteralChars (c : rest) = first (c :) (spanLiteralChars rest)

parseLiteralChars :: LaTeX -> (LaTeX, LaTeX)
parseLiteralChars [] = ([], [])
parseLiteralChars (TeXRaw s : rest) = case spanLiteralChars (Text.unpack s) of
    (x, []) -> first (TeXRaw (Text.pack x) :) (parseLiteralChars rest)
    (x, more) -> ([TeXRaw (Text.pack x)], TeXRaw (Text.pack more) : rest)
parseLiteralChars (x : rest) = first (x :) (parseLiteralChars rest)

parseCharLiteral :: LaTeX -> Maybe (LaTeX, LaTeX {- rest -})
parseCharLiteral x
    | Just (pre, x') <- texStripAnyPrefix ["'", "u'", "L'", "U'", "u8'"] x
    , (before, x'') <- parseLiteralChars x'
    , (suffix, x''') <- texSpan (\c -> isAlphaNum c || c == '_') x''
        = Just ([TeXRaw pre] ++ before ++ [TeXRaw $ "'" ++ suffix], x''')
    | otherwise = Nothing

parseCppDirective :: LaTeX -> Maybe (LaTeX, LaTeX {- rest -})
parseCppDirective x
    | Just x'' <- texStripHash x
    , (spaces, x''') <- texSpan isSpace x''
    , Just (directive, x'''') <- texStripAnyPrefix cppDirectives x'''
        = Just ([TeXRaw ("#" ++ spaces ++ directive)], x'''')
    | otherwise = Nothing

parseSingleLineComment :: LaTeX -> Maybe (LaTeX {- comment -}, LaTeX {- subsequent lines -})
parseSingleLineComment x
    | Just x' <- texStripPrefix "//" x = Just $ case texStripInfix "\n" x' of
        Just (commentLine, moreLines) -> (TeXRaw "//" : commentLine, TeXRaw "\n" : moreLines)
        Nothing -> (x, [])
    | rlap@(TeXComm "rlap" _ [(FixArg, [TeXComm "textnormal" _ [(FixArg,[TeXComm "textit" _ [(FixArg,[TeXRaw "//"])]])]])]) : more <- x
    , Just (commentLine, moreLines) <- texStripInfix "\n" more
        = Just ([rlap, TeXComm "tcode" "" [(FixArg, commentLine)]], TeXRaw "\n" : moreLines)
    | TeXComm "comment" _ [(FixArg, c)] : x' <- x = Just (c, x')
    | otherwise = Nothing

fromTeXRaw :: LaTeXUnit -> Text
fromTeXRaw (TeXRaw x) = x
fromTeXRaw x = error $ "fromTeXRaw (" ++ show x ++ ")"

parseStringLiteral :: LaTeX -> Maybe (LaTeX, LaTeX {- rest -})
parseStringLiteral x
    -- raw:
    | Just (pre, x') <- texStripAnyPrefix ["R\"", "u8R\"", "uR\"", "UR\"", "LR\""] x
    , Just (delim, x'') <- texStripInfix "(" x'
    , Just (body, x''') <- texStripInfix (")" ++ Text.concat (map fromTeXRaw delim) ++ "\"") (concatRaws $ f x'')
    , (suffix, x'''') <- texSpan (\c -> isAlphaNum c || c == '_') x'''
        = Just ([TeXRaw pre] ++ delim ++ [TeXRaw "("] ++ body ++ [TeXRaw ")"] ++ delim ++ [TeXRaw $ "\"" ++ suffix], x'''')
    -- normal:
    | Just (pre, x') <- texStripAnyPrefix ["\"", "u\"", "U\"", "L\"", "u8\""] x
    , Just (body, x'') <- parseBody x'
    , (suffix, x''') <- texSpan (\c -> isAlphaNum c || c == '_') x''
        = Just ([TeXRaw pre] ++ body ++ [TeXRaw $ "\"" ++ suffix], x''')
    | otherwise = Nothing
    where
        f :: LaTeX -> LaTeX
        f [] = []
        f (TeXComm "~" _ [] : more) = TeXRaw "~" : f more
        f (TeXBraces [] : more) = f more
        f (hd : t) = hd : f t
        parseBody :: LaTeX -> Maybe (LaTeX, LaTeX {- rest -})
        parseBody [] = Nothing
        parseBody (TeXComm "textbackslash" _ [] : more) = parseBody $ concatRaws $ TeXRaw "\\" : more
        parseBody (TeXRaw (Text.unpack -> raw) : more)
            | '\\':'"':t <- raw = first (TeXRaw "\\\"" :) . parseBody (TeXRaw (Text.pack t) : more)
            | "\"" <- raw = Just ([], more)
            | '"':t <- raw = Just ([], TeXRaw (Text.pack t) : more)
            | raw == "" = parseBody more
            | hd:t <- raw = first (TeXRaw (Text.pack [hd]) :) . parseBody (TeXRaw (Text.pack t) : more)
        parseBody (TeXComm "%" ws [] : more) = first (TeXComm "%" ws [] :) . parseBody more
        parseBody (y : more) = first (y :) . parseBody more

parseNumber :: LaTeX -> Maybe (Text, LaTeX)
parseNumber x
    | (raw, more) <- unconsRaw x
    , Just (n, rest) <- (parseStart `parseSeq` (\t -> Just (parseMany parseSuffix t))) raw
        = Just (n, TeXRaw rest : more)
    | otherwise = Nothing
    where
        parseDigit = parseChar isDigit
        parseNonDigit = parseChar (\c -> isAlpha c || c == '_')
        parseStart :: Text -> Maybe (Text, Text)
        parseStart = parseFirstOf [parseChar (== '.') `parseSeq` parseDigit, parseDigit]
        parseSign :: Text -> Maybe (Text, Text)
        parseSign = parseChar (\c -> c == '-' || c == '+')
        parseSuffix :: Text -> Maybe (Text, Text)
        parseSuffix = parseFirstOf
            [ parseDigit
            , parseChar (== '\'') `parseSeq` parseDigit
            , parseChar (== '\'') `parseSeq` parseNonDigit
            , parseChar (`elem` ("eEpP"::String)) `parseSeq` parseSign
            , parseChar (== '.')
            , parseNonDigit
            ]

parseLiteral :: LaTeX -> Maybe (LaTeX, LaTeX)
parseLiteral x
    | Just (number, x') <- parseNumber x = Just ([TeXRaw number], x')
    | Just (lit, x') <- parseCharLiteral x = Just (lit, x')
    | Just (lit, x') <- parseStringLiteral x = Just (lit, x')
    | otherwise = Nothing

parseComment :: LaTeX -> Maybe (LaTeX, LaTeX)
parseComment x
    | Just x' <- texStripPrefix "/*" x, Just (comment, x'') <- texStripInfix "*/" x'
        = Just ([TeXRaw "/*"] ++ comment ++ [TeXRaw "*/"], x'')
    | Just x' <- texStripPrefix "/*" x
        = Just ([TeXRaw "/*"], x')
    | Just x' <- texStripPrefix "*/" x
        = Just ([TeXRaw "*/"], x')
    | Just (comment, x') <- parseSingleLineComment x
        = Just (comment, x')
    | otherwise = Nothing

parseChar :: (Char -> Bool) -> Text -> Maybe (Text, Text)
parseChar p t
    | t /= "", p (Text.head t) = Just (Text.take 1 t, Text.drop 1 t)
    | otherwise = Nothing

parseSeq :: (Text -> Maybe (Text, Text)) -> (Text -> Maybe (Text, Text)) -> Text -> Maybe (Text, Text)
parseSeq p q t
    | Just (x, t') <- p t
    , Just (y, t'') <- q t' = Just (x ++ y, t'')
    | otherwise = Nothing

parseFirstOf :: [Text -> Maybe (a, Text)] -> Text -> Maybe (a, Text)
parseFirstOf [] _ = Nothing
parseFirstOf (p:pp) t
    | Just r <- p t = Just r
    | otherwise = parseFirstOf pp t

parseMany :: (Text -> Maybe (Text, Text)) -> Text -> (Text, Text)
parseMany p t = case p t of
    Nothing -> ("", t)
    Just (x, t') -> first (x++) (parseMany p t')


================================================
FILE: Document.hs
================================================
{-# OPTIONS_GHC -fno-warn-tabs #-}
{-# LANGUAGE OverloadedStrings, RecordWildCards, ViewPatterns #-}

module Document (
	CellSpan(..), Cell(..), RowSepKind(..), Row(..), Element(..), Paragraph(..),
	Section(..), Chapter(..), Draft(..), Table(..), Figure(..), Item(..), Footnote(..),
	IndexPath, IndexComponent(..), IndexCategory, Index, IndexTree, IndexNode(..),
	ColumnSpec(..), TextAlignment(..), normative, Formula(..), chapterOfSection,
	IndexEntry(..), IndexKind(..), Note(..), Example(..), TeXPara(..), Sentence(..),
	texParaTex, texParaElems, XrefDelta, sectionByAbbr, isDefinitionSection, Abbreviation,
	indexKeyContent, indexCatName, Sections(sections), SectionKind(..), mergeIndices, SourceLocation(..),
	figures, tables, tableByAbbr, figureByAbbr, formulaByAbbr, elemTex, footnotes, allElements,
	LaTeX, makeAbbrMap, formulas) where

import LaTeXBase (LaTeXUnit(..), LaTeX, MathType(Dollar))
import Data.Text (Text, replace)
import qualified Data.Text as Text
import qualified Data.List as List
import Data.IntMap (IntMap)
import Prelude hiding (take, (.), takeWhile, (++), lookup, readFile)
import Data.Map (Map)
import qualified Data.Map as Map
import Data.String (IsString)
import Util ((.), (++), greekAlphabet)

-- Document structure:

data CellSpan = Normal | Multicolumn { width :: Int, colspec :: ColumnSpec } deriving (Eq, Show)
data Cell a = Cell { cellSpan :: CellSpan, content :: a } deriving (Eq, Show)
data RowSepKind = RowSep | CapSep | Clines [(Int, Int)] | NoSep deriving (Eq, Show)
data Row a = Row { rowSep :: RowSepKind, cells :: [Cell a] } deriving (Eq, Show)

data TextAlignment = AlignLeft | AlignRight | AlignCenter | Justify
	deriving Eq

instance Show TextAlignment where
	show AlignLeft = "left"
	show AlignRight = "right"
	show AlignCenter = "center"
	show Justify = "justify"

data ColumnSpec = ColumnSpec
	{ columnAlignment :: TextAlignment
	, columnBorder :: Bool
	, columnWidth :: Maybe Text}
	deriving (Eq, Show)

data Table = Table
	{ tableNumber :: Int
	, tableCaption :: LaTeX
	, columnSpec :: [ColumnSpec]
	, tableAbbr :: Abbreviation
	, tableBody :: [Row [TeXPara]]
	, tableSection :: Section }

instance Show Table where
	show _ = "<table>"

data Figure = Figure
	{ figureNumber :: Int
	, figureName :: LaTeX
	, figureAbbr :: Abbreviation
	, figureSvg :: Text
	, figureSection :: Section }

instance Show Figure where
	show _ = "<figure>"

data Formula = Formula
    { formulaNumber :: Int
    , formulaAbbr :: Abbreviation
    , formulaContent :: LaTeX
    , formulaSection :: Section }

instance Show Formula where
    show _ = "<formula>"

data Item = Item
	{ itemNumber :: Maybe [String]
	, itemLabel :: Maybe LaTeX
	, itemInlineContent :: [Element]
	, itemBlockContent :: [TeXPara] }
	deriving Show

itemElements :: Item -> [Element]
itemElements Item{..} = itemInlineContent ++ allElements itemBlockContent

data Footnote = Footnote
	{ footnoteNumber :: Int
	, footnoteContent :: [TeXPara] }
	deriving Show

data Note = Note { noteNumber :: Int, noteLabel :: Text, noteContent :: [TeXPara] }
	deriving Show

data Example = Example { exampleNumber :: Int, exampleContent :: [TeXPara] }
	deriving Show

data Sentence = Sentence { sentenceNumber :: Maybe Int, sentenceElems :: [Element] }
	deriving Show

newtype TeXPara = TeXPara { sentences :: [Sentence] }
	deriving Show

data Element
	= LatexElement LaTeXUnit
	| Enumerated { enumCmd :: String, enumItems :: [Item] }
	| Bnf String LaTeX
	| TableElement Table
	| Tabbing LaTeX
	| FigureElement Figure
	| FormulaElement Formula
	| Codeblock LaTeXUnit
	| Itemdescr [TeXPara] -- needed because there can be notes in itemdescr envs
	| NoteElement Note
	| ExampleElement Example
	| HtmlElement Text
	deriving Show

normative :: Element -> Bool
normative (NoteElement _) = False
normative (ExampleElement _) = False
normative (LatexElement (TeXComm "index" _ _)) = False
normative _ = True

data SectionKind
	= NormalSection { _level :: Int }
	| DefinitionSection { _level :: Int }
	| InformativeAnnexSection
	| NormativeAnnexSection
	deriving (Eq, Show)

isDefinitionSection :: SectionKind -> Bool
isDefinitionSection (DefinitionSection _) = True
isDefinitionSection _ = False

data Chapter = NormalChapter | InformativeAnnex | NormativeAnnex
	deriving (Eq, Show)

data SourceLocation = SourceLocation
	{ sourceFile :: FilePath
	, sourceLine :: Int }
	deriving (Eq, Show)

data Paragraph = Paragraph
	{ paraNumber :: Maybe Int
	, paraInItemdescr :: Bool
	, paraElems :: [TeXPara]
	, paraSection :: Section
	, paraSourceLoc :: Maybe SourceLocation
	, allParaElems :: [Element] } -- derivable but stored for efficiency
	deriving Show

type Abbreviation = Text -- of a section, figure, or table

data Section = Section
	{ abbreviation :: Abbreviation
	, sectionName :: LaTeX
	, paragraphs :: [Paragraph]
	, sectionFootnotes :: [Footnote]
	, subsections :: [Section]
	, sectionNumber :: Int
	, chapter :: Chapter
	, parents :: [Section] -- if empty, this is the chapter
	, sectionKind :: SectionKind
	, secIndexEntries :: IntMap IndexEntry
	, secIndexEntriesByPath :: Map IndexPath [(Int, IndexEntry)]
	}
	deriving Show

chapterOfSection :: Section -> Section
chapterOfSection s@Section{..}
    | null parents = s
    | otherwise = last parents

instance Eq Section where
	x == y = abbreviation x == abbreviation y

type XrefDelta = [(Abbreviation, [LaTeX])]

data StablyNamedItem
	= StablyNamedTable Table
	| StablyNamedSection Section
	| StablyNamedFigure Figure
	| StablyNamedFormula Formula

data Draft = Draft
	{ commitUrl :: Text
	, chapters  :: [Section]
	, index     :: Index
	, indexEntryMap :: IntMap IndexEntry
	, indexEntriesByPath :: Map IndexPath [(Int, IndexEntry)]
	, xrefDelta :: XrefDelta
	, abbrMap :: Abbreviation -> Maybe StablyNamedItem
	, labels :: Map Text Section }

-- (The index entry maps are derivable but stored for efficiency.)

stablyNamedItems :: Draft -> [(Abbreviation, StablyNamedItem)]
stablyNamedItems d =
	[(abbreviation s, StablyNamedSection s) | s <- sections d] ++
	[(tableAbbr t, StablyNamedTable t) | p <- allParagraphs d, TableElement t <- allParaElems p] ++
	[(formulaAbbr f, StablyNamedFormula f) | p <- allParagraphs d, FormulaElement f <- allParaElems p] ++
	[(figureAbbr f, StablyNamedFigure f) | p <- allParagraphs d, FigureElement f <- allParaElems p]

makeAbbrMap :: Draft -> Abbreviation -> Maybe StablyNamedItem
makeAbbrMap = flip Map.lookup . Map.fromList . stablyNamedItems

-- Indices:

data IndexComponent = IndexComponent { distinctIndexSortKey, indexKey :: LaTeX }
	deriving (Ord, Show)

instance Eq IndexComponent where
	x == y =
		distinctIndexSortKey x == distinctIndexSortKey y &&
		indexKeyContent (indexKey x) == indexKeyContent (indexKey y)

type IndexPath = [IndexComponent]

data IndexKind = See { _also :: Bool, _ref :: LaTeX } | IndexOpen | IndexClose | DefinitionIndexEntry
	deriving (Eq, Show)

type IndexCategory = Text

type Index = Map IndexCategory IndexTree

instance Show IndexEntry where
	show IndexEntry{..} =
		"IndexEntry"
		++ "{indexSection=" ++ show indexEntrySection
		++ ",indexCategory=" ++ show indexCategory
		++ ",indexPath=" ++ show indexPath
		++ ",indexEntryKind=" ++ show indexEntryKind
		++ "}"

data IndexEntry = IndexEntry
	{ indexEntrySection :: Abbreviation
	, indexEntryKind :: Maybe IndexKind
	, indexPath :: IndexPath
	, indexEntryNr :: Maybe Int
	, indexCategory :: Text
	}

type IndexTree = Map IndexComponent IndexNode

data IndexNode = IndexNode
	{ indexEntries :: [IndexEntry]
	, indexSubnodes :: IndexTree }

mergeIndices :: [Index] -> Index
mergeIndices = Map.unionsWith (Map.unionWith mergeIndexNodes)

mergeIndexNodes :: IndexNode -> IndexNode -> IndexNode
mergeIndexNodes x y = IndexNode
	{ indexEntries = indexEntries x ++ indexEntries y
	, indexSubnodes = Map.unionWith mergeIndexNodes (indexSubnodes x) (indexSubnodes y) }

indexKeyContent :: LaTeX -> Text
indexKeyContent = mconcat . map ikc
	where
		ikc :: LaTeXUnit -> Text
		ikc (TeXRaw t) = replace "\n" " " t
		ikc (TeXComm "tcode" _ [(_, x)]) = indexKeyContent x
		ikc (TeXComm "idxcode" _ [(_, x)]) = indexKeyContent x
		ikc (TeXComm "noncxxtcode" _ [(_, x)]) = indexKeyContent x
		ikc (TeXComm "indexedspan" _ [(_, x), _]) = indexKeyContent x
		ikc (TeXComm "text" _ [(_, x)]) = indexKeyContent x
		ikc (TeXComm "texttt" _ [(_, x)]) = indexKeyContent x
		ikc (TeXComm "textit" _ [(_, x)]) = indexKeyContent x
		ikc (TeXComm "textsc" _ [(_, x)]) = indexKeyContent x
		ikc (TeXComm "mathsf" _ [(_, x)]) = indexKeyContent x
		ikc (TeXComm "textsf" _ [(_, x)]) = indexKeyContent x
		ikc (TeXComm "textcolor" _ [_, (_, x)]) = indexKeyContent x
		ikc (TeXComm "xspace" _ []) = "_"
		ikc (TeXComm "Cpp" _ []) = "C++"
		ikc (TeXComm "&" _ []) = "&"
		ikc (TeXComm "%" _ []) = "%"
		ikc (TeXComm "-" _ []) = ""
		ikc (TeXComm "ell" _ []) = "ℓ"
		ikc (TeXComm "~" _ []) = "~"
		ikc (TeXComm "#" _ []) = "#"
		ikc (TeXComm "{" _ []) = "{"
		ikc (TeXComm "}" _ []) = "}"
		ikc (TeXComm "protect" _ []) = ""
		ikc (TeXComm "frenchspacing" _ []) = ""
		ikc (TeXComm "caret" _ []) = "^"
		ikc (TeXComm "tilde" _ []) = "~"
		ikc (TeXComm "^" _ []) = "^"
		ikc (TeXComm "\"" _ []) = "\""
		ikc (TeXComm "" _ []) = ""
		ikc (TeXComm "x" _ []) = "TODO"
		ikc (TeXComm "textbackslash" _ []) = "\\"
		ikc (TeXComm "textunderscore" _ []) = "_"
		ikc (TeXComm "discretionary" _ _) = ""
		ikc (TeXComm "texorpdfstring" _ [_, (_, x)]) = indexKeyContent x
		ikc (TeXComm s _ [])
			| Just c <- List.lookup s greekAlphabet = Text.pack [c]
		ikc (TeXBraces x) = indexKeyContent x
		ikc (TeXMath Dollar x) = indexKeyContent x
		ikc (TeXComm "index" _ _) = ""
		ikc (TeXComm "indexlink" _ ((_, x):_)) = indexKeyContent x
		ikc (TeXComm "hiddenindexlink" _ ((_, x):_)) = indexKeyContent x
		ikc x = error $ "indexKeyContent: unexpected: " ++ show x

indexCatName :: (Eq b, Show b, IsString a, IsString b) => b -> a
indexCatName "impldefindex" = "Index of implementation-defined behavior"
indexCatName "libraryindex" = "Index of library names"
indexCatName "headerindex" = "Index of library headers"
indexCatName "generalindex" = "Index"
indexCatName "grammarindex" = "Index of grammar productions"
indexCatName "conceptindex" = "Index of library concepts"
indexCatName "bibliography" = "Bibliography"
indexCatName x = error $ "indexCatName: " ++ show x

-- Gathering entities:

class Sections a where sections :: a -> [Section]

instance Sections Section where sections s = s : (subsections s >>= sections)
instance Sections Draft where sections = concatMap sections . chapters
instance Sections a => Sections (Maybe a) where sections = maybe [] sections

allParagraphs :: Sections a => a -> [Paragraph]
allParagraphs = (>>= paragraphs) . sections

tables :: Sections a => a -> [(Paragraph, Table)]
tables x = [(p, t) | p <- allParagraphs x, TableElement t <- allParaElems p]

figures :: Sections a => a -> [(Paragraph, Figure)]
figures x = [(p, f) | p <- allParagraphs x, FigureElement f <- allParaElems p]

formulas :: Sections a => a -> [(Paragraph, Formula)]
formulas x = [(p, f) | p <- allParagraphs x, FormulaElement f <- allParaElems p]

footnotes :: Sections a => a -> [(Section, Footnote)]
footnotes x = [(s, f) | s <- sections x, f <- sectionFootnotes s]

allElements :: [TeXPara] -> [Element]
allElements x = x >>= sentences >>= sentenceElems >>= f
	where
		f :: Element -> [Element]
		f e = e : case e of
			Enumerated {..} -> enumItems >>= itemElements
			TableElement Table{..} -> allElements $ tableBody >>= cells >>= content
			NoteElement Note{..} -> allElements noteContent
			Codeblock y -> [LatexElement y]
			ExampleElement Example{..} -> allElements exampleContent
			Tabbing y -> LatexElement . y
			Bnf _ y -> LatexElement . y
			_ -> []

-- Misc:

texParaElems :: TeXPara -> [Element]
texParaElems = (>>= sentenceElems) . sentences

texParaTex :: TeXPara -> LaTeX
texParaTex = (>>= elemTex) . texParaElems

itemTex :: Item -> LaTeX
itemTex Item{..} = (itemInlineContent >>= elemTex) ++ (itemBlockContent >>= texParaTex)

elemTex :: Element -> LaTeX
elemTex (NoteElement n) = noteContent n >>= texParaTex
elemTex (ExampleElement x) = exampleContent x >>= texParaTex
elemTex (LatexElement l) = [l]
elemTex (Enumerated _ e) = e >>= itemTex
elemTex (Bnf _ l) = l
elemTex (Tabbing t) = t
elemTex (Codeblock t) = [t]
elemTex (Itemdescr t) = t >>= texParaTex
elemTex (TableElement Table{..}) = tableCaption ++ (tableBody >>= rowTex)
	where
		rowTex :: Row [TeXPara] -> LaTeX
		rowTex r = content . cells r >>= (>>= texParaTex)
elemTex (FigureElement _) = []
elemTex (FormulaElement f) = formulaContent f
elemTex (HtmlElement _) = []

tableByAbbr :: Draft -> Abbreviation -> Maybe Table
	-- only returns Maybe because some of our tables are broken
tableByAbbr d a = case abbrMap d a of
	Just (StablyNamedTable t) -> Just t
	_ -> Nothing

figureByAbbr :: Draft -> Abbreviation -> Figure
figureByAbbr d a = case abbrMap d a of
	Just (StablyNamedFigure f) -> f
	_ -> error $ "figureByAbbr: " ++ show a

formulaByAbbr :: Draft -> Abbreviation -> Formula
formulaByAbbr d a = case abbrMap d a of
	Just (StablyNamedFormula f) -> f
	_ -> error $ "formulaByAbbr: " ++ show a

sectionByAbbr :: Draft -> Abbreviation -> Maybe Section
sectionByAbbr d a = case abbrMap d a of
	Just (StablyNamedSection s) -> Just s
	_ -> Nothing


================================================
FILE: LICENSE
================================================
All authors involved in the creation of the contents of this package have agreed to release their respective contributions into the Public Domain.


================================================
FILE: LaTeXBase.hs
================================================
{-# LANGUAGE ViewPatterns, OverloadedStrings #-}

module LaTeXBase
 ( MathType(..), LaTeXUnit(..), LaTeX, TeXArg, ArgKind(..), concatRaws, hasCommand, isJustRaw
 , matchCommand, lookForCommand, matchEnv, mapTeX, mapCommandName, renderLaTeX, mapTeXRaw, isTeXEnv, texSpan, unconsRaw
 , trim, trimr, triml, texStripInfix, isCodeblock, isMath, texStripPrefix, texStripAnyPrefix, AllUnits(..) ) where

import Data.String (fromString)
import Prelude hiding ((.), (++), writeFile, dropWhile)
import Data.Text (Text, pack)
import qualified Data.Text as Text
import Data.Char (isSpace)
import Util ((.), (++), textStripInfix)
import Control.Arrow (first, second)

data MathType = Parentheses | Square | Dollar
	deriving (Eq, Show, Ord)

data ArgKind = FixArg | OptArg
	deriving (Eq, Show, Ord)

type TeXArg = (ArgKind, LaTeX)

data LaTeXUnit
	= TeXRaw Text
	| TeXComm String String [TeXArg] -- first string is command name, second is trailing whitespace
	| TeXEnv String [TeXArg] LaTeX
	| TeXMath MathType LaTeX
	| TeXLineBreak
	| TeXBraces LaTeX
	deriving (Eq, Show, Ord)

isTeXEnv :: String -> LaTeXUnit -> Bool
isTeXEnv x (TeXEnv y _ _) = x == y
isTeXEnv _ _ = False

type LaTeX = [LaTeXUnit]

lookForCommand :: String -> LaTeX -> [[TeXArg]]
lookForCommand n = (snd .) . matchCommand (n ==)

class AllUnits a where
	allUnits :: a -> [LaTeXUnit]

instance AllUnits LaTeXUnit where
	allUnits u = u : case u of
		TeXMath _ l -> allUnits l
		TeXBraces l -> allUnits l
		TeXComm _ _ a -> (snd . a) >>= allUnits
		TeXEnv _ a l -> (l : snd . a) >>= allUnits
		_ -> []

instance AllUnits a => AllUnits [a] where
	allUnits = concatMap allUnits

matchCommand :: AllUnits a => (String -> Bool) -> a -> [(String, [TeXArg])]
matchCommand f x = [(str, as) | TeXComm str _ as <- allUnits x, f str]

hasCommand :: (String -> Bool) -> LaTeX -> Bool
hasCommand f = not . null . matchCommand f

matchEnv :: AllUnits a => (String -> Bool) -> a -> [(String, [TeXArg], LaTeX)]
matchEnv f x = [(str, as, l) | TeXEnv str as l <- allUnits x, f str]

mapTeX :: (LaTeXUnit -> Maybe LaTeX) -> LaTeX -> LaTeX
mapTeX f = concatMap g
	where
		g :: LaTeXUnit -> LaTeX
		g (f -> Just x) = x
		g (TeXComm c ws a) = [TeXComm c ws (h . a)]
		g (TeXBraces x) = [TeXBraces (mapTeX f x)]
		g (TeXMath t b) = [TeXMath t (mapTeX f b)]
		g (TeXEnv n a b) = [TeXEnv n (h . a) (mapTeX f b)]
		g x = [x]
		h = second (mapTeX f)

mapCommandName :: (String -> String) -> LaTeX -> LaTeX
mapCommandName f = concatMap g
	where
		g :: LaTeXUnit -> LaTeX
		g (TeXComm c ws a) = [TeXComm (f c) ws (h . a)]
		g (TeXBraces x) = [TeXBraces (mapCommandName f x)]
		g (TeXMath t b) = [TeXMath t (mapCommandName f b)]
		g (TeXEnv n a b) = [TeXEnv n (h . a) (mapCommandName f b)]
		g x = [x]
		h = second (mapCommandName f)

renderLaTeX :: LaTeX -> Text
renderLaTeX = mconcat . (renderUnit .)

renderUnit :: LaTeXUnit -> Text
renderUnit (TeXRaw t) = t
renderUnit (TeXComm "right" _ [(FixArg, [TeXRaw "."])]) = "\\right."
renderUnit (TeXComm name ws [])
	| name `elem` ["left", "sum", "int", "sin", "cos", "right", "bigl", "bigr", "big", "small", "smaller"] = pack $ "\\" <> name <> ws
	| otherwise = "\\" <> fromString name <> "{}"
renderUnit (TeXComm name ws args) = "\\" <> pack (fromString name) <> pack (fromString ws) <> renderArgs args
renderUnit (TeXEnv name args c) =
	"\\begin{" <> fromString name <> "}"
	<> renderArgs args
	<> renderLaTeX c
	<> "\\end{" <> fromString name <> "}"
renderUnit (TeXMath Dollar l) = "$" <> renderLaTeX l <> "$"
renderUnit (TeXMath Square l) = "\\[" <> renderLaTeX l <> "\\]"
renderUnit (TeXMath Parentheses l) = "\\(" <> renderLaTeX l <> "\\)"
renderUnit TeXLineBreak = "\\\\"
renderUnit (TeXBraces l) = "{" <> renderLaTeX l <> "}"

renderArgs :: [TeXArg] -> Text
renderArgs = mconcat . (renderArg .)

renderArg :: TeXArg -> Text
renderArg (FixArg, l) = "{" <> renderLaTeX l <> "}"
renderArg (OptArg, l) = "[" <> renderLaTeX l <> "]"

mapTeXRaw :: (Text -> LaTeXUnit) -> (LaTeX -> LaTeX)
mapTeXRaw f = map go
	where
		go :: LaTeXUnit -> LaTeXUnit
		go (TeXRaw t) = f t
		go (TeXComm s ws args) = TeXComm s ws (second (go .) . args)
		go (TeXEnv s args body) = TeXEnv s (second (go .) . args) (go . body)
		go (TeXBraces l) = TeXBraces $ go . l
		go t@(TeXMath _ _) = t
		go t@TeXLineBreak = t

concatRaws :: LaTeX -> LaTeX
concatRaws (TeXRaw a : TeXRaw b : more) = concatRaws (TeXRaw (a ++ b) : more)
concatRaws (TeXComm s ws args : more) = TeXComm s ws (second concatRaws . args) : concatRaws more
concatRaws (TeXEnv s args bd : more) = TeXEnv s (second concatRaws . args) (concatRaws bd) : concatRaws more
concatRaws (TeXBraces x : more) = TeXBraces (concatRaws x) : concatRaws more
concatRaws (x : more) = x : concatRaws more
concatRaws [] = []

unconsRaw :: LaTeX -> (Text, LaTeX)
unconsRaw (TeXRaw x : y) = first (x ++) (unconsRaw y)
unconsRaw x = ("", x)

texStripPrefix :: Text -> LaTeX -> Maybe LaTeX
texStripPrefix t (TeXRaw s : y) = case Text.stripPrefix t s of
	Just "" -> Just y
	Just s' -> Just (TeXRaw s' : y)
	Nothing -> Nothing
texStripPrefix _ _ = Nothing

texStripAnyPrefix :: [Text] -> LaTeX -> Maybe (Text, LaTeX)
texStripAnyPrefix [] _ = Nothing
texStripAnyPrefix (x:y) z
    | Just a <- texStripPrefix x z = Just (x, a)
    | otherwise = texStripAnyPrefix y z

texStripInfix :: Text -> LaTeX -> Maybe (LaTeX, LaTeX)
texStripInfix t = go
	where
		go [] = Nothing
		go (x : rest)
			| TeXRaw s <- x
			, Just (y, z) <- textStripInfix t s
				= Just (h y, h z ++ rest)
			| otherwise = first (x :) . go rest
		h "" = []
		h x = [TeXRaw x]

texSpan :: (Char -> Bool) -> LaTeX -> (Text, LaTeX)
texSpan p (TeXRaw x : y) = case Text.span p x of
    (stuff, "") ->  first (stuff ++) (texSpan p y)
    (stuff, rest) -> (stuff, TeXRaw rest : y)
texSpan _ x = ("", x)

invisible :: LaTeXUnit -> Bool
invisible (TeXComm "index" _ _) = True
invisible _ = False

dropWhileEnd :: (Char -> Bool) -> LaTeX -> LaTeX
dropWhileEnd _ [] = []
dropWhileEnd p x
	| invisible (last x) = dropWhileEnd p (init x) ++ [last x]
	| TeXRaw y <- last x = init x ++ case Text.dropWhileEnd p y of
		"" -> []
		a -> [TeXRaw a]
	| otherwise = x

trimr, trim :: LaTeX -> LaTeX
trimr = dropWhileEnd isSpace
trim = triml . trimr

triml :: LaTeX -> LaTeX
triml (TeXRaw x : y) = case Text.dropWhile isSpace x of
	"" -> triml y
	x' -> TeXRaw x' : y
triml x = x

isMath :: LaTeXUnit -> Bool
isMath (TeXMath _ _) = True
isMath (TeXComm "ensuremath" _ _) = True
isMath (TeXEnv "eqnarray*" _ _) = True
isMath (TeXEnv "equation*" _ _) = True
isMath _ = False

isCodeblock :: LaTeXUnit -> Bool
isCodeblock (TeXEnv "codeblock" _ _) = True
isCodeblock (TeXEnv "indexedcodeblock" _ _) = True
isCodeblock (TeXEnv "codeblocktu" _ _) = True
isCodeblock (TeXEnv "codeblockdigitsep" _ _) = True
isCodeblock _ = False

isJustRaw :: LaTeX -> Maybe Text
isJustRaw [TeXRaw x] = Just x
isJustRaw _ = Nothing


================================================
FILE: LaTeXParser.hs
================================================
{-# OPTIONS_GHC -fno-warn-tabs #-}
{-# LANGUAGE OverloadedStrings, RecordWildCards, ViewPatterns, TupleSections #-}

module LaTeXParser (parseString,
	Token(Token), Context(..), defaultContext, Signature(..), Macros(..), Environment(..), Command(..), ParseResult(ParseResult),
	defaultMacros,
	nullCmd, storeCmd, codeEnv, normalCmd,
	storeEnv) where

import LaTeXBase (LaTeXUnit(..), LaTeX, TeXArg, ArgKind(..), MathType(..), concatRaws)
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Char (isAlphaNum, isSpace, isAlpha)
import Data.Maybe (fromJust)
import Control.Arrow (first)
import Data.Map (Map)
import qualified Data.Map as Map
import Prelude hiding ((++), (.))
import Util ((.), (++), getDigit, stripInfix)

newtype Token = Token { tokenChars :: String }
	deriving (Eq, Show)

data Environment = Environment (Context -> [Token] -> ParseResult)
data Command = Command { runCommand :: Context -> String {- ws -} -> [Token] -> ParseResult }

data Macros = Macros
	{ commands :: Map Text Command
	, environments :: Map Text Environment
	, counters :: Map Text Int }

newCommand :: Bool {- overwrite -} -> (Text, Command) -> Macros -> Macros
newCommand True (name, cmd) Macros{..} = Macros{commands = Map.insert name cmd commands, ..}
newCommand False (name, cmd) Macros{..} = Macros{commands = Map.insertWith (\_ y -> y) name cmd commands, ..}

instance Semigroup Macros where
	x <> y = Macros
		(commands x ++ commands y)
		(environments x ++ environments y)
		(counters x ++ counters y)

instance Monoid Macros where
	mempty = Macros mempty mempty mempty

data ParseResult = ParseResult
	{ content :: LaTeX
	, newMacros :: Macros
	, remainder :: [Token] }

data Signature = Signature
	{ nrFixArgs :: Int
	, defaultArg :: Maybe [Token] }
	deriving Show

data Context = Context
	{ commentsEnabled :: Bool
	, parsingOptArg :: Bool
	, macros :: Macros }

prependContent :: LaTeX -> ParseResult -> ParseResult
prependContent t p = p{content = t ++ content p}

combineMacros :: Bool {- left biased -} -> Macros -> Macros -> Macros
combineMacros b x y = if b then x ++ y else y ++ x

addMacros :: Bool {- overwrite -} -> Macros -> ParseResult -> ParseResult
addMacros b m p = p{newMacros = combineMacros b m (newMacros p)}

defaultEnvs :: [(Text, Environment)]
defaultEnvs = [outputblockEnv]

codeEnv :: Text -> Signature -> (Text, Environment)
codeEnv name sig = (name, Environment f)
	where
		f :: Context -> [Token] -> ParseResult
		f ctx toks = ParseResult [env] mempty rest'
			where
				(arguments, rest) = parseArgs sig toks
				Just (code, rest') = stripInfix [Token "\\end", Token "{", Token (Text.unpack name), Token "}"] rest
				env = TeXEnv (Text.unpack name) (map ((FixArg, ) . fullParse ctx) arguments) (parseCode name ctx code)

outputblockEnv :: (Text, Environment)
outputblockEnv = ("outputblock", Environment f)
	where
		f :: Context -> [Token] -> ParseResult
		f ctx toks = ParseResult [env] mempty rest
			where
				Just (content, rest) = stripInfix [Token "\\end", Token "{", Token "outputblock", Token "}"] toks
				env = TeXEnv "outputblock" [] (parseOutputBlock ctx content)

parseOutputBlock :: Context -> [Token] -> LaTeX
parseOutputBlock c = concatRaws . go
	where
		go :: [Token] -> LaTeX
		go [] = []
		go (Token "@" : rest) = fullParse c cmd ++ go rest'
			where (cmd, Token "@" : rest') = break (== Token "@") rest
		go s = TeXRaw (Text.pack $ concatMap tokenChars code) : go rest
			where (code, rest) = break (== Token "@") s

storeEnv :: String -> Signature -> (Text, Environment)
storeEnv name sig = (Text.pack name, Environment act)
	where
		act :: Context -> [Token] -> ParseResult
		act ctx toks = ParseResult [env] mempty afterend
			where
				(arguments, rest) = parseArgs sig toks
				ParseResult body _ afterend = parse ctx rest
				env = TeXEnv name (map ((FixArg, ) . fullParse ctx) arguments) (concatRaws body)
							-- todo: not all fixargs

defaultCmds :: [(Text, Command)]
defaultCmds =
	[ ("newcommand", newCommandCommand)
	, ("renewcommand", newCommandCommand)
	, ("DeclareMathOperator", declareMathOperator)
	, ("newcolumntype", newColumnTypeCommand)
	, ("newenvironment", newEnvCommand)
	, ("lstnewenvironment", newEnvCommand)
	, ("raisebox", raiseBoxCommand)
	, ("let", Command $ \ctx _ws rest -> parse ctx (drop 2 rest))
	, beginCommand
	, endCommand
	, oldDefCommand
	]

oldDefCommand :: (Text, Command)
oldDefCommand = ("def", Command pars)
	where
		pars ctx@Context{..} _ws rest
			| (Token ('\\' : name) : rest') <- rest
			, Just (body, rest'') <- balanced ('{', '}') rest' =
				let
					m = Macros (Map.fromList [defCmd (Text.pack name) (Signature 0 Nothing) body]) mempty mempty
					ParseResult p mm r = parse ctx{macros=macros++m} rest''
				in
					ParseResult p (m ++ mm) r
			| otherwise = parse ctx $ snd $ fromJust $ balanced ('{', '}') $ dropWhile (/= Token "{") rest

endCommand :: (Text, Command)
endCommand = ("end", Command $ \c _ws rest ->
	let Just (_, rest') = parseFixArg c rest in ParseResult mempty mempty rest')

beginCommand :: (Text, Command)
beginCommand = ("begin", normalCmd $ Command pars)
	where
		pars c@Context{..} _ws rest
			| Just (Environment f) <- Map.lookup (envname) (environments macros) = f c rest'
			| otherwise = error $ "undefined env: " ++ Text.unpack envname
			where
				Just (arg, rest') = parseFixArg c rest
				[TeXRaw envname] = concatRaws arg

raiseBoxCommand :: Command
raiseBoxCommand = normalCmd $ Command $ \c@Context{..} _ws rest ->
	let
		Just (a0, rest') = balanced ('{', '}') rest
		(a1, rest'') = case parseOptArg rest' of
			Nothing -> (Nothing, rest')
			Just (x, y) -> (Just x, y)
		Just (a2, rest''') = balanced ('{', '}') rest''
		args = [(FixArg, fullParse c a0)]
			++ case a1 of
				Nothing -> []
				Just x -> [(OptArg, fullParse c x)]
			++ [(FixArg, fullParse c a2)]
	in
		ParseResult [TeXComm "raisebox" "" args] mempty rest'''

newCommandCommand :: Command
newCommandCommand = normalCmd $ Command $ \Context{..} _ws (Token "{" : Token ('\\' : name) : Token "}" : rest) ->
	let
		(sig, rest') = parseSignature rest
		Just (body, rest'') = balanced ('{', '}') rest'
		newMacros = newCommand True (defCmd (Text.pack name) sig body) mempty
	in
		ParseResult [] newMacros rest''

declareMathOperator :: Command
declareMathOperator = normalCmd $ Command $ \Context{..} _ws (Token "{" : Token ('\\' : name) : Token "}" : rest) ->
	let
		Just (body, rest') = balanced ('{', '}') rest
		newBody = [Token "\\operatorname", Token "{"] ++ body ++ [Token "}"]
		newMacros = newCommand True (defCmd (Text.pack name) (Signature 0 Nothing) newBody) mempty
	in
		ParseResult [] newMacros rest'

newColumnTypeCommand :: Command
newColumnTypeCommand = normalCmd $ Command $ \Context{..} _ws (Token "{" : Token _ : Token "}" : rest) ->
	let
		(_, rest') = parseSignature rest
		Just (_, rest'') = balanced ('{', '}') rest'
	in
		ParseResult [] mempty rest''

defaultMacros :: Macros
defaultMacros = Macros (Map.fromList defaultCmds) (Map.fromList defaultEnvs) mempty

defaultContext :: Context
defaultContext = Context
	{ commentsEnabled = True
	, parsingOptArg = False
	, macros = defaultMacros }

rmLine :: [Token] -> [Token]
rmLine s = case dropWhile (/= Token "\n") s of
	Token "\n" : x -> x
	x -> x

parseOptArg :: [Token] -> Maybe ([Token], [Token])
parseOptArg = balanced ('[', ']')

parseOptArgs :: [Token] -> ([[Token]], [Token])
parseOptArgs s
	| Just (r, s') <- parseOptArg s = first (r:) (parseOptArgs s')
	| otherwise = ([], s)

parseFixArg :: Context -> [Token] -> Maybe (LaTeX, [Token])
parseFixArg ctx (Token [c] : more) | isSpace c = parseFixArg ctx more
parseFixArg ctx (Token "{" : more) =
	let ParseResult t _macros s = parse ctx more in Just (t, s)
parseFixArg _ _ = Nothing

parseSignature :: [Token] -> (Signature, [Token])
parseSignature t = case optArgs of
	[] -> (Signature 0 Nothing, t')
	[[Token a]] -> (Signature (read a) Nothing, t')
	[[Token a], deflt] -> (Signature (read a) (Just deflt), t')
	_ -> error "unrecognized signature"
	where (optArgs, t') = parseOptArgs t

balanced :: (Char, Char) -> [Token] -> Maybe ([Token], [Token])
balanced (open, close) (dropWhile (all isSpace . tokenChars) -> (Token [o] : s))
	| o == open = Just $ go 0 s
	where
		go :: Int -> [Token] -> ([Token], [Token])
		go 0 [] = ([], [])
		go 0 (Token [c] : x) | c == close = ([], x)
		go n (Token "}" : x) = first (Token "}" :) (go (n-1) x)
		go n (Token "{" : x) = first (Token "{" :) (go (n+1) x)
		go n (x:y) = first (x :) (go n y)
		go n x = error $ "\n\nbalanced: " ++ show (n, x)
balanced oc (dropWhile (all isSpace. tokenChars) -> (Token "%" : x)) = balanced oc (dropWhile (/= Token "\n") x)
balanced _ _ = Nothing

balanced_body :: Context -> String -> [Token] -> ([Token], [Token])
balanced_body ctx end = go 0
	where
		go :: Int -> [Token] -> ([Token], [Token])
		go 0 [] = ([], [])
		go 0 (Token "\\end" : Token "{" : e : Token "}" : x) | fullParse ctx [e] == [TeXRaw $ Text.pack end] = ([], x)
		go n (Token "}" : x) = first (Token "}" :) (go (n-1) x)
		go n (Token "{" : x) = first (Token "{" :) (go (n+1) x)
		go n (x:y) = first (x :) (go n y)
		go n s = error $ "\n\nbalanced: " ++ show (n, s)

parseArgs :: Signature -> [Token] -> ([[Token]], [Token])
parseArgs Signature{..} s = case defaultArg of
	Nothing -> n_balanced ('{', '}') nrFixArgs s
	Just dfl -> case parseOptArg s of
		Nothing ->
			first (dfl :) (n_balanced ('{', '}') (nrFixArgs - 1) s)
		Just (optArg, s') ->
			first (optArg :) (n_balanced ('{', '}') (nrFixArgs - 1) s')

parseArgs2 :: Context -> Signature -> [Token] -> ([TeXArg], [Token])
parseArgs2 c Signature{..} s
	| defaultArg == Nothing = first (map fa) (n_balanced ('{', '}') nrFixArgs s)
	| Just (optArg, s') <- parseOptArg s =
			first (\a -> (OptArg, fullParse c optArg) : map fa a)
			(n_balanced ('{', '}') (nrFixArgs - 1) s')
	| otherwise = first (map fa) (n_balanced ('{', '}') (nrFixArgs - 1) s)
	where
		fa = (FixArg, ) . fullParse c

-- todo: clean up parseArgs/parseArgs2 above

n_balanced :: (Char, Char) -> Int -> [Token] -> ([[Token]], [Token])
n_balanced oc n s
	| n > 0, Just (x, s') <- balanced oc s = first (x:) $ n_balanced oc (n-1) s'
	| otherwise = ([], s)

newEnvCommand :: Command
newEnvCommand = normalCmd $ Command $ \Context{..} _ws (Token "{" : (span (/= Token "}") -> (name, Token "}" : rest))) ->
	let
		nameStr = concatMap tokenChars name
		(sig, rest') = parseSignature rest
		Just (begin, rest'') = balanced ('{', '}') rest'
		Just (end, rest''') = balanced ('{', '}') rest''
		pa :: Context -> [Token] -> ParseResult
		pa c' toks = ParseResult replaced mempty toks''
			where
				replaced = fullParse c' $ replArgs args begin ++ body ++ end
				(args, toks') = parseArgs sig toks
				(body, toks'') = balanced_body c' nameStr toks'
		m = Macros mempty (Map.singleton (Text.pack nameStr) (Environment pa)) mempty
	in
		ParseResult [] m rest'''

parseString :: Context -> String -> (LaTeX, Macros, [Token])
parseString c s = (concatRaws x, y, z)
	where ParseResult x y z = parse c (tokenize s)

literal :: String
literal = " @_{}&,%-#/~>!$;:^"

breakComment :: [Token] -> ([Token], [Token])
breakComment x@(Token "\n" : _) = ([], x)
breakComment (Token ('\\' : cmd) : xs)
	| (c, r@(_:_)) <- span (/= '\n') cmd = ([Token ('\\':c)], Token r : xs)
breakComment (Token "%" : Token "\n" : x) = first ((Token "%" :) . (Token "\n" :)) (breakComment x)
breakComment (x : xs) = first (x:) (breakComment xs)
breakComment [] = ([], [])

data LiteralKind = StringLiteral | CharLiteral

parseCode :: Text -> Context -> [Token] -> LaTeX
parseCode envname c = concatRaws . go Nothing
	where
		go :: Maybe LiteralKind -> [Token] -> LaTeX
		go _ [] = []
		go b (Token "@" : rest) = fullParse c cmd ++ go b rest'
			where (cmd, Token "@" : rest') = break (== Token "@") rest
		go (Just StringLiteral) (Token "\"" : rest) = TeXRaw "\"" : go Nothing rest
		go (Just CharLiteral) (Token "'" : rest) = TeXRaw "'" : go Nothing rest
		go Nothing (Token "\"" : rest) = TeXRaw "\"" : (go (Just StringLiteral) lit ++ go Nothing rest')
			where (lit, rest') = stringLiteral rest
		go Nothing (Token "'" : rest)
		       | envname == "codeblockdigitsep" = TeXRaw "'" : go Nothing rest
		       | otherwise = TeXRaw "'" : (go (Just CharLiteral) lit ++ go Nothing rest')
		       where (lit, rest') = charLiteral rest
		go Nothing (Token "/" : Token "/" : (breakComment -> (comment, rest')))
			= TeXComm "comment" "" [(FixArg, TeXRaw "//" : noncode comment)] : go Nothing rest'
		go Nothing (Token "/" : Token "*" : rest)
		    | Just (comment, rest') <- stripInfix [Token "*", Token "/"] rest
		    = TeXComm "comment" "" [(FixArg, [TeXRaw "/*"] ++ noncode comment ++ [TeXRaw "*/"])] : go Nothing rest'
		go b (Token "/" : rest) = TeXRaw "/" : go b rest
		go b s = TeXRaw (Text.pack $ concatMap tokenChars code) : go b rest
			where
			  breakToks = [Token "@", Token "/"] ++ 
			    case b of
			      Nothing -> [Token "\"", Token "'"]
			      Just StringLiteral -> [Token "\""]
			      Just CharLiteral -> [Token "'"]
			  (code, rest) = break (`elem` breakToks) s
		noncode :: [Token] -> LaTeX
		noncode toks =
		  fullParse c nc ++ case more of
		    [] -> []
		    Token "@" : (break (== Token "@") -> (code, _ : rest)) ->
		        TeXComm "tcode" "" [(FixArg, fullParse c code)] : noncode rest
		    _ -> error "no"
		    where (nc, more) = span (/= Token "@") toks
		stringLiteral :: [Token] -> ([Token], [Token])
		stringLiteral (Token "\\" : Token "\"" : x) = first (Token "\\\"" :) (stringLiteral x)
		stringLiteral (Token "\\" : Token "\\" : x) = first (Token "\\\\" :) (stringLiteral x)
		stringLiteral (Token "\"" : x) = ([Token "\""], x)
		stringLiteral (y : x) = first (y :) (stringLiteral x)
		stringLiteral [] = ([], [])
		charLiteral :: [Token] -> ([Token], [Token])
		charLiteral (Token "\\" : Token "'" : x) = first (Token "\\'" :) (charLiteral x)
		charLiteral (Token "\\" : Token "\\" : x) = first (Token "\\\\" :) (charLiteral x)
		charLiteral (Token "'" : x) = ([Token "'"], x)
		charLiteral (y : x) = first (y :) (charLiteral x)
		charLiteral [] = ([], [])

isCommandChar :: Char -> Bool
isCommandChar c = isAlpha c || c == '*'

tokenize :: String -> [Token]
tokenize "" = []
tokenize ('\\':'v':'e':'r':'b': delim : (break (== delim) -> (arg, _ : rest))) =
	Token ("\\verb:" ++ arg) : tokenize rest
tokenize ('\\' : (span isCommandChar -> (cmd@(_:_), (span isSpace -> (ws, rest)))))
	= Token ('\\' : cmd ++ ws) : tokenize rest
tokenize ('\\' : c : rest) = Token ['\\', c] : tokenize rest
tokenize x@((isAlpha -> True): _) = let (a, b) = span isAlphaNum x in Token a : tokenize b
tokenize (x:y) = Token [x] : tokenize y

	-- \verb is handled in tokenize so that the 'balanced' function doesn't
	-- get confused by \verb|{|

	-- Notice how the whitespace following a command like \bla is included in the Token
	-- This lets the parser include it in the TeXComm/TeXCommS's command field, so that
	-- the whitespace is not lost when serializing back to text when sending to MathJax.

replArgs :: [[Token]] -> [Token] -> [Token]
replArgs args = go
	where
		go [] = []
		go (Token "%" : (span (/= Token "\n") -> (x, y))) = Token "%" : x ++ go y
		go (Token "#" : Token "#" : y) = Token "#" : go y
		go (Token "#" : Token [getDigit -> Just i] : y)
			| length args >= i = (args !! (i-1)) ++ go y
			| otherwise = error $ "need more args than " ++ show args ++ " to replace in " ++ show (concatMap tokenChars y)
		go (x:y) = x : go y

nullCmd :: Text -> Signature -> (Text, Command)
nullCmd name sig = defCmd name sig []

storeCmd :: String -> Signature -> (Text, Command)
storeCmd name sig = (Text.pack name, normalCmd $ Command pars)
	where
		pars context ws tokens = ParseResult [TeXComm name ws args] mempty rest
			where (args, rest) = parseArgs2 context sig tokens

defCmd :: Text -> Signature -> [Token] -> (Text, Command)
defCmd name sig body = (name, normalCmd $ Command pars)
	where
		pars context _ws tokens = ParseResult (fullParse context $ replArgs args body) mempty rest
			where (args, rest) = parseArgs sig tokens

normalCmd :: Command -> Command
normalCmd (Command f) = Command $ \ctx ws toks ->
	let ParseResult content newMacros rest = f ctx ws toks
	in addMacros False newMacros (prependContent content (parse ctx{macros=macros ctx ++ newMacros} rest))

consumeMath :: [Token] -> ([Token], [Token])
consumeMath = f 0
    where
        f :: Integer -> [Token] -> ([Token], [Token])
        f 0 (Token "$" : rest) = ([], rest)
        f depth (Token "{" : rest) = first (Token "{" :) (f (depth + 1) rest)
        f depth (Token "}" : rest) = first (Token "}" :) (f (depth - 1) rest)
        f depth (tok : rest) = first (tok :) (f depth rest)
        f _ [] = error "unexpected end of math"

parse :: Context -> [Token] -> ParseResult
parse c (Token "$" : (consumeMath -> (math, rest))) =
	prependContent [TeXMath Dollar (fullParse c math)] (parse c rest)
parse c (Token "\\[" : (span (/= Token "\\]") -> (math, Token "\\]" : rest))) =
	prependContent [TeXMath Square (fullParse c math)] (parse c rest)
parse c (Token "]" : x)
	| parsingOptArg c = ParseResult mempty mempty x
parse _ (Token "}" : x) = ParseResult mempty mempty x
parse c (Token "{" : x) = prependContent [TeXBraces y] $ parse c rest
	where ParseResult y _ rest = parse c x
parse c (Token "%" : x)
	| commentsEnabled c = parse c (rmLine x)
parse _ [] = ParseResult mempty mempty mempty
parse c (Token "\\\\" : x) = prependContent [TeXLineBreak] (parse c x)
parse c (Token ['\\', ch] : x)
	| ch `elem` literal = prependContent [TeXComm [ch] "" []] (parse c x)
parse c (Token ('\\':'v':'e':'r':'b':':':arg) : rest) =
	prependContent [TeXComm "verb" "" [(FixArg, [TeXRaw $ Text.pack arg])]] (parse c rest)
parse c (Token "\\rSec" : Token [getDigit -> Just i] : s)
		= prependContent [TeXComm "rSec" "" args] $ parse c s''
	where
		Just (a, s') = parseOptArg s
		Just (b, s'') = parseFixArg c s'
		args = [(FixArg, [TeXRaw $ Text.pack $ show i]), (FixArg, fullParse c a), (FixArg, b)]
parse c@Context{..} (Token ('\\' : (span (not . isSpace) -> (nos, w))) : rest)
	| Just f <- Map.lookup (Text.pack cmd) (commands macros) = runCommand f c ws rest
	| otherwise = error $
		"\n\nundefined command: " ++ show cmd ++ " at: " ++ take 50 (concatMap tokenChars rest)
	where (cmd, ws) | nos == "", (x : xx) <- w = ([x], xx)
	                | otherwise = (nos, w)
parse ctx (Token c : rest)
	| all isAlphaNum c
		= prependContent [TeXRaw $ Text.pack c] $ parse ctx rest
parse ctx (Token [c] : rest)
		= prependContent [TeXRaw $ Text.pack [c]] $ parse ctx rest
parse _ s = error $ "parse: unexpected: " ++ take 100 (concatMap tokenChars s)

fullParse :: Context -> [Token] -> LaTeX
fullParse c t
	| all isSpace (concatMap tokenChars remainder) = concatRaws content
	| otherwise = error $ "could not fully parse: "
		++ concatMap tokenChars t
		++ "\n\nremainder: "
		++ concatMap tokenChars remainder
	where ParseResult{..} = parse c t


================================================
FILE: Load14882.hs
================================================
{-# OPTIONS_GHC -fno-warn-tabs #-}
{-# LANGUAGE
	OverloadedStrings,
	ScopedTypeVariables,
	RecordWildCards,
	ViewPatterns,
	LambdaCase,
	TupleSections,
	NamedFieldPuns,
	FlexibleInstances,
	FlexibleContexts,
	RankNTypes,
	MultiParamTypeClasses,
	FunctionalDependencies,
	UndecidableInstances,
	RecursiveDo #-}

module Load14882 (parseIndex, load14882) where

import qualified LaTeXParser as Parser
import qualified Data.IntMap as IntMap
import qualified Data.List as List
import Data.IntMap (IntMap)
import LaTeXBase
	( LaTeXUnit(..), TeXArg, ArgKind(..), lookForCommand
	, mapTeX, mapTeXRaw, concatRaws, texStripInfix, allUnits)
import Data.Text (Text, replace, isPrefixOf)
import Data.Text.IO (readFile)
import Text.Regex (mkRegex, matchRegexAll)
import qualified Data.Text as Text
import Control.Monad (forM, when)
import Prelude hiding (take, (.), takeWhile, (++), lookup, readFile)
import Data.Char (isAlpha)
import Control.Arrow (first)
import Data.Map (Map)
import Data.Maybe (isJust, fromJust)
import qualified Data.Map as Map
import Data.List (unfoldr, (\\), takeWhile)
import System.Process (readProcess)
import System.IO.Unsafe (unsafePerformIO)
import Control.Monad.Fix (MonadFix)
import Control.Monad.State (MonadState, evalState, get, put, liftM2, modify)
import Util ((.), (++), mapLast, stripInfix, measure, textStripInfix)
import RawDocument
import Sentences (splitIntoSentences, isActualSentence, breakSentence)
import Document

getCommitUrl :: IO Text
getCommitUrl = do
	url <- gitGetRemoteUrl
	commit <- gitGetCommitRef
	return $
		( Text.replace "git@github.com:" "http://github.com/"
		$ Text.replace ".git" "/commit/" url)
		++ commit

gitGetRemoteUrl :: IO Text
gitGetRemoteUrl = do
	x <- readProcess "git" ["ls-remote", "--get-url"] ""
	return $ Text.strip $ Text.pack x

gitGetCommitRef :: IO Text
gitGetCommitRef = do
	x <- readProcess "git" ["rev-parse", "HEAD"] ""
	return $ Text.strip $ Text.pack $ x

-- In the LaTeX sources, \definition is often preceded by corresponding \indexdefns.
-- Since we treat definitions like sections (and generate pages for them), we need
-- to move the \indexdefns inside (after) the \definition, so that the index entries
-- don't link to the page for the preceding section.

moveIndexEntriesIntoDefs :: [Text] -> [Text]
moveIndexEntriesIntoDefs [] = []
moveIndexEntriesIntoDefs (x:xs)
	| "\\indexdefn{" `isPrefixOf` x = case moveIndexEntriesIntoDefs xs of
		[] -> [x]
		y:ys
			| "\\definition{" `isPrefixOf` y -> y : x : ys
			| otherwise -> x : y : ys
	| otherwise = x : moveIndexEntriesIntoDefs xs

moveIndexEntriesIntoSecs :: [Text] -> [Text]
moveIndexEntriesIntoSecs = go []
	where
		go x [] = x
		go x (h:t)
		    | "\\indextext{" `isPrefixOf` h = go (h : x) t
		    | "\\rSec" `isPrefixOf` h = h : reverse x ++ go [] t
		    | otherwise = reverse x ++ [h] ++ go [] t

{- The document has a ton of:

  \indexlibraryglobal{bla}%
  \begin{itemdecl}
  void bla();
  \end{itemdecl}

To highlight the whole itemdecl, indexItemDecls converts this to:

  \begin{indexeditemdecl}{
  \indexlibraryglobal{bla}%
  }
  void bla();
  \end{indexeditemdecl}
-}

indexCodeEnvs :: [Text] -> [Text] -> [Text]
indexCodeEnvs envs = go []
	where
		go collected [] = collected
		go collected (x:xs)
			| "\\index" `isPrefixOf` x = go (collected ++ [x]) xs
			| [e] <- [e | e <- envs, ("\\begin{" ++ e ++ "}") `isPrefixOf` x] =
				let (code, _ : rest) = span (not . (("\\end{" ++ e ++ "}") `isPrefixOf`)) xs
				in (if null collected then
						["\\begin{" ++ e ++ "}"]
						++ code
						++ ["\\end{" ++ e ++ "}"]
					else
						["\\begin{indexed" ++ e ++ "}{"] ++ collected ++ ["}"]
						++ code
						++ ["\\end{indexed" ++ e ++ "}"])
				 	++ go [] rest
			| otherwise = collected ++ (x : go [] xs)

data Numbers = Numbers
	{ tableNr, figureNr, footnoteRefNr, footnoteNr, itemDeclNr
	, nextIndexEntryNr, noteNr, exampleNr, nextSentenceNr, formulaNr :: Int }

class AssignNumbers a b | a -> b where
	assignNumbers :: forall m . (Functor m, MonadFix m, MonadState Numbers m) => Section -> a -> m b

instance AssignNumbers TeXArg TeXArg where
	assignNumbers s (y, x) = (y, ) . assignNumbers s x

instance AssignNumbers LaTeXUnit LaTeXUnit where
	assignNumbers s (TeXEnv "itemdecl" [] x) = do
		n <- get
		put n{itemDeclNr = itemDeclNr n + 1}
		TeXEnv "itemdecl" [(FixArg, [TeXRaw $ Text.pack $ show $ itemDeclNr n])] . assignNumbers s x
	assignNumbers s (TeXEnv "indexeditemdecl" indices x) = do
		n <- get
		put n{itemDeclNr = itemDeclNr n + 1}
		liftM2 (TeXEnv "indexeditemdecl") (assignNumbers s indices) (assignNumbers s x)
	assignNumbers s (TeXEnv x y z) = liftM2 (TeXEnv x) (assignNumbers s y) (assignNumbers s z)
	assignNumbers _ (TeXComm "index" ws args) = do
		n <- get
		put n{nextIndexEntryNr = nextIndexEntryNr n + 1}
		return $ TeXComm "index" ws $ (FixArg, [TeXRaw $ Text.pack $ show $ nextIndexEntryNr n]) : args
	assignNumbers _ (TeXComm "defnx" ws args) = do
		n <- get
		put n{nextIndexEntryNr = nextIndexEntryNr n + 1}
		return $ TeXComm "defnx" ws $ (FixArg, [TeXRaw $ Text.pack $ show $ nextIndexEntryNr n]) : args
	assignNumbers _ (TeXComm "footnoteref" ws []) = do
		Numbers{..} <- get
		put Numbers{footnoteRefNr = footnoteRefNr+1, ..}
		return $ TeXComm "footnoteref" ws [(FixArg, [TeXRaw $ Text.pack $ show footnoteRefNr])]
	assignNumbers s (TeXComm x ws args) = TeXComm x ws . assignNumbers s args
	assignNumbers _ x = return x

instance AssignNumbers a b => AssignNumbers (Cell a) (Cell b) where
	assignNumbers s x@Cell{..} = do
		n <- get
		put n{nextSentenceNr=1}
		content' <- assignNumbers s content
		modify $ \m -> m{nextSentenceNr = nextSentenceNr n}
		return x{content=content'}

instance AssignNumbers a b => AssignNumbers (Row a) (Row b) where
	assignNumbers s x@Row{..} = do
		cells' <- assignNumbers s cells
		return x{cells=cells'}

instance AssignNumbers RawTexPara TeXPara where
	assignNumbers s (RawTexPara (splitIntoSentences -> x)) = TeXPara . f x
		where
			f [] = return []
			f (h:t) = do
				h' <- assignNumbers s h
				let actual = isActualSentence h
				n <- get
				put n{nextSentenceNr = nextSentenceNr n + (if actual then 1 else 0)}
				(Sentence (if actual then Just (nextSentenceNr n) else Nothing) h' :) . f t

assignNonInlineItem :: (MonadState Numbers m, MonadFix m) => Section -> RawItem -> m Item
assignNonInlineItem s (RawItem label content) = do
	n <- get
	put n{nextSentenceNr = 1}
	Item Nothing (if null label then Nothing else Just label) [] . assignNumbers s content

breakFirstSentence :: [TeXPara] -> (Sentence, [TeXPara])
breakFirstSentence (TeXPara [x] : z) = (x, z)
breakFirstSentence (TeXPara (x:y) : z) = (x, TeXPara y : z)
breakFirstSentence x = error $ "breakFirstSentence: " ++ show x

assignInlineItem :: (MonadState Numbers m, MonadFix m) => Section -> RawItem -> m Item
assignInlineItem s (RawItem label content) = do
	n <- get
	put n{nextSentenceNr = 1}
	content' <- assignNumbers s content
	let (Sentence _ x, y) = breakFirstSentence content'
	return $ Item Nothing (if null label then Nothing else Just label) x y

endsWithFullStop :: [RawElement] -> Bool
endsWithFullStop = isJust . breakSentence

instance AssignNumbers RawElement Element where
	assignNumbers section RawFigure{..} = do
		Numbers{..} <- get
		put Numbers{figureNr = figureNr+1, ..}
		return $ FigureElement Figure
			{ figureNumber  = figureNr
			, figureName    = rawFigureName
			, figureAbbr    = "fig:" ++ rawFigureAbbr
			, figureSvg     = rawFigureSvg
			, figureSection = section }
	assignNumbers section RawFormula{..} = do
	    Numbers{..} <- get
	    put Numbers{formulaNr = formulaNr + 1, ..}
	    return $ FormulaElement Formula
	        { formulaNumber = formulaNr
	        , formulaAbbr = "eq:" ++ rawFormulaAbbr
	        , formulaContent = rawFormulaContent
	        , formulaSection = section }
	assignNumbers s RawTable{..} = do
		Numbers{..} <- get
		put Numbers{tableNr = tableNr+1, ..}
		tableCaption <- assignNumbers s rawTableCaption
		tableBody <- assignNumbers s rawTableBody
		return $ TableElement Table
			{ tableNumber  = tableNr
			, columnSpec   = rawColumnSpec
			, tableAbbr   = rawTableAbbr
			, tableCaption = tableCaption
			, tableSection = s
			, .. }
	assignNumbers s (RawEnumerated x p) = do
		origNum <- nextSentenceNr . get
		let c = length (filter (any (endsWithFullStop . rawTexParaElems) . rawItemContent) p)
		r <- mapM (if c > 1 then assignNonInlineItem s else assignInlineItem s) p
		modify $ \y -> y{nextSentenceNr = origNum}
		return $ Enumerated x r
	assignNumbers s (RawLatexElement x) = LatexElement . assignNumbers s x
	assignNumbers s (RawBnf x y) = Bnf x . assignNumbers s y
	assignNumbers _ (RawTabbing x) = return $ Tabbing x
	assignNumbers s (RawCodeblock x) = Codeblock . assignNumbers s x
	assignNumbers s (RawItemdescr x) = Itemdescr . assignNumbers s x
	assignNumbers s (RawNote label x) = do
		Numbers{..} <- get
		put Numbers{noteNr = noteNr+1, ..}
		x' <- assignNumbers s x
		return $ NoteElement $ Note noteNr label x'
	assignNumbers s (RawExample x) = do
		Numbers{..} <- get
		put Numbers{exampleNr = exampleNr+1, ..}
		x' <- assignNumbers s x
		return $ ExampleElement $ Example exampleNr x'

instance AssignNumbers RawFootnote Footnote where
	assignNumbers s (RawFootnote t) = do
		Numbers{..} <- get
		put Numbers{footnoteNr = footnoteNr+1, nextSentenceNr = 1, ..}
		t' <- assignNumbers s t
		return $ Footnote{footnoteNumber=footnoteNr,footnoteContent=t'}

lsectionLevel :: LinearSection -> Int
lsectionLevel (lsectionKind -> NormalSection l) = l
lsectionLevel (lsectionKind -> DefinitionSection l) = l
lsectionLevel _ = 0

paraNumbers :: [Bool] -> [Maybe Int]
paraNumbers = f 1
	where
		f _ [] = []
		f i (True : x) = Just i : f (i + 1) x
		f i (False : x) = Nothing : f i x

treeizeChapters :: forall m . (Functor m, MonadFix m, MonadState Numbers m) =>
	Bool -> Int -> [LinearSection] -> m [Section]
treeizeChapters _ _ [] = return []
treeizeChapters annexes secNumber (LinearSection{..} : more) = mdo
		nums <- get
		put nums{formulaNr = 1}
		sectionFootnotes <- assignNumbers newSec lsectionFootnotes
		let
			ie = rawIndexEntriesForSec newSec
			newSec = Section{sectionKind=lsectionKind, secIndexEntries=ie, secIndexEntriesByPath=reverseIndexEntryMap ie, ..}
		let pn = paraNumbers $ paraNumbered . lsectionParagraphs
		paragraphs <- forM (zip pn lsectionParagraphs) $ assignNumbers newSec
		subsections <- treeizeSections 1 chapter [newSec] lsubsections
		(newSec :) . treeizeChapters annexes' (sectionNumber + 1) more'
	where
		sectionNumber = if annexes' /= annexes then 0 else secNumber
		annexes' = chapter /= NormalChapter
		parents = []
		chapter
			| lsectionKind == InformativeAnnexSection = InformativeAnnex
			| lsectionKind == NormativeAnnexSection = NormativeAnnex
			| otherwise = NormalChapter
		abbreviation = lsectionAbbreviation
		sectionName = lsectionName
		(lsubsections, more') = span ((> 0) . lsectionLevel) more

rawIndexEntriesForSec :: Section -> IntMap IndexEntry
rawIndexEntriesForSec s = IntMap.fromList
	[(n, e) | e@IndexEntry{indexEntryNr=Just n} <- sectionIndexEntries s]

reverseIndexEntryMap :: IntMap IndexEntry -> Map IndexPath [(Int, IndexEntry)]
reverseIndexEntryMap m = Map.fromListWith (++) [(indexPath x, [(i, x)])  | (i, x) <- IntMap.assocs m]

assignItemNumbers :: Paragraph -> Paragraph
assignItemNumbers p
	| Just n <- paraNumber p = p{ paraElems = fst $ goParas [n, 1] $ paraElems p }
	| otherwise = p
	where

		goParas :: [Int] -> [TeXPara] -> ([TeXPara], [Int])
		goParas nn [] = ([], nn)
		goParas nn (TeXPara e : pp) = first (TeXPara e' :) (goParas nn' pp)
			where (e', nn') = goSentences nn e

		goSentences :: [Int] -> [Sentence] -> ([Sentence], [Int])
		goSentences nn [] = ([], nn)
		goSentences nn (Sentence m e : ss) = first (Sentence m e' :) (goSentences nn' ss)
			where (e', nn') = goElems nn e

		goElems :: [Int] -> [Element] -> ([Element], [Int])
		goElems nn [] = ([], nn)
		goElems nn (e:ee) = first (e' :) (goElems nn' ee)
			where (e', nn') = goElem nn e

		goElem :: [Int] -> Element -> (Element, [Int])
		goElem nn Enumerated{..} = (Enumerated enumCmd items', mapLast (+ length enumItems) nn)
			where
				items' = map (\(i, Item{..}) ->
					Item
						(Just (map show $ mapLast (+i) nn))
						itemLabel
						(fst $ goElems (mapLast (+i) nn ++ [1]) itemInlineContent)
						(fst $ goParas (mapLast (+i) nn ++ [1]) itemBlockContent)
					) (zip [0..] enumItems)
		goElem nn (NoteElement (Note nr label paras)) = (NoteElement (Note nr label paras'), nn')
			where (paras', nn') = goParas nn paras
		goElem nn (ExampleElement (Example nr paras)) = (ExampleElement (Example nr paras'), nn')
			where (paras', nn') = goParas nn paras
		goElem nn x = (x, nn)

instance AssignNumbers (Maybe Int, RawParagraph) Paragraph where
	assignNumbers paraSection (paraNumber, RawParagraph{..}) = do
		nums <- get
		put nums{nextSentenceNr=if paraNumbered then 1 else nextSentenceNr nums}
		paraElems <- assignNumbers paraSection rawParaElems
		when paraNumbered $ modify $ \newnums -> newnums{nextSentenceNr = nextSentenceNr nums}
		return $ assignItemNumbers Paragraph
		  { paraInItemdescr = rawParaInItemdescr
		  , paraSourceLoc = rawParaSourceLoc
		  , allParaElems = allElements paraElems
		  , .. }

treeizeSections :: forall m . (Functor m, MonadFix m, MonadState Numbers m) =>
	Int -> Chapter -> [Section] -> [LinearSection] -> m [Section]
treeizeSections _ _ _ [] = return []
treeizeSections sectionNumber chapter parents
	(s@LinearSection{..} : (span ((> lsectionLevel s) . lsectionLevel) -> (lsubsections, more'))) = mdo
		let
		  ie = rawIndexEntriesForSec newSec
		  newSec = Section
			{ sectionKind = lsectionKind
			, secIndexEntries = ie
			, secIndexEntriesByPath = reverseIndexEntryMap ie
			, sectionName = lsectionName
			, abbreviation = lsectionAbbreviation
			, .. }
		let pn = paraNumbers $ paraNumbered . lsectionParagraphs
		nums <- get
		put nums{noteNr=1, exampleNr=1, itemDeclNr=1}
		sectionFootnotes <- assignNumbers newSec lsectionFootnotes
		modify $ \n -> n{nextSentenceNr=1}
		paragraphs <- forM (zip pn lsectionParagraphs) $ assignNumbers newSec
		subsections <- treeizeSections 1 chapter (newSec : parents) lsubsections
		(newSec :) . treeizeSections (sectionNumber + 1) chapter parents more'

instance AssignNumbers a b => AssignNumbers [a] [b] where
	assignNumbers s = mapM (assignNumbers s)

resolveGrammarterms :: Parser.Macros -> [Text] -> LinearSection -> LinearSection
resolveGrammarterms macros links LinearSection{..} =
	LinearSection{lsectionParagraphs = map resolve lsectionParagraphs, ..}
	where
		resolveTexPara :: RawTexPara -> RawTexPara
		resolveTexPara RawTexPara{..} = RawTexPara{rawTexParaElems = map resolveRawElem rawTexParaElems, ..}
		resolveRawElem :: RawElement -> RawElement
		resolveRawElem (RawBnf s tex) = RawBnf s (bnfGrammarterms macros links tex)
		resolveRawElem (RawEnumerated s items) = RawEnumerated s (map resolveItem items)
		resolveRawElem y = y
		resolveItem :: RawItem -> RawItem
		resolveItem (RawItem label content) = RawItem label (map resolveTexPara content)
		resolve :: RawParagraph -> RawParagraph
		resolve RawParagraph{..} = RawParagraph{rawParaElems = map resolveTexPara rawParaElems, ..}

bnfGrammarterms :: Parser.Macros -> [Text] -> LaTeX -> LaTeX
bnfGrammarterms macros links = mapTeX go . mapTeX wordify
	where
		wordify :: LaTeXUnit -> Maybe LaTeX
		wordify (TeXRaw stuff) = Just $ map TeXRaw $ unfoldr f stuff
			where
				f s | Text.null s = Nothing
				f s | isName $ Text.head s = Just $ Text.span isName s
				f s = Just $ Text.break isName s

				isName c = isAlpha c || c `elem` ['-', '_']
		wordify _ = Nothing

		go :: LaTeXUnit -> Maybe LaTeX
		go d@(TeXComm cmd _ _) | cmd `elem` ["tcode", "index", "textnormal", "indexlink", "hiddenindexlink", "indexedspan", "terminal", "literalterminal", "noncxxterminal"] = Just [d]
		go (TeXRaw name)
			| name `elem` links = Just $ fst $ RawDocument.doParse macros $ "\\grammarterm{" ++ name ++ "}"
		go _ = Nothing

parseIndex :: LaTeX -> (IndexPath, Maybe IndexKind)
parseIndex = go . mapTeXRaw unescapeIndexPath . concatRaws
	where
		go (texStripInfix "|seealso" -> Just (x, [TeXBraces y])) = (parseIndexPath x, Just $ See True y)
		go (texStripInfix "|see " -> Just (x, [TeXBraces y])) = (parseIndexPath x, Just $ See False y)
		go (texStripInfix "|see" -> Just (x, [TeXBraces y])) = (parseIndexPath x, Just $ See False y)
		go (texStripInfix "|see" -> Just (x, [TeXBraces y, TeXRaw "|idxbfpage"])) = (parseIndexPath x, Just $ See False y)
		go (texStripInfix "|(" -> Just (t, _)) = (parseIndexPath t, Just IndexOpen)
		go (texStripInfix "|)" -> Just (t, _)) = (parseIndexPath t, Just IndexClose)
		go (texStripInfix "|idxbfpage" -> Just (t, _)) = (parseIndexPath t, Just DefinitionIndexEntry)
		go t = (parseIndexPath t, Nothing)

		unescapeIndexPath :: Text -> LaTeXUnit
		unescapeIndexPath = TeXRaw
			. replace "\5" "\""

			. replace "\2" "!"
			. replace "!" "\1"
			. replace "\"!" "\2"

			. replace "\4" "@"
			. replace "@" "\3"
			. replace "\"@" "\4"

			. replace "\"|" "|"
			. replace "\"\"" "\5"

			. (!! 10) . iterate (replace "  " " ")
			. replace "\n" " "

		parseIndexPath :: LaTeX -> IndexPath
		parseIndexPath (texStripInfix "\1" -> Just (x, y)) = parseIndexPath x ++ parseIndexPath y
		parseIndexPath (texStripInfix "\3" -> Just (x, y)) = [IndexComponent x y]
		parseIndexPath t = [IndexComponent [] t]

sectionTexParas :: Section -> [TeXPara]
sectionTexParas s = (paragraphs s >>= paraElems) ++ (sectionFootnotes s >>= footnoteContent)

sectionTex :: Section -> LaTeX
sectionTex s = sectionTexParas s >>= texParaTex

sectionIndexEntries :: Section -> [IndexEntry]
sectionIndexEntries s =
	[ IndexEntry{indexEntrySection=abbreviation sec, ..}
	| sec <- sections s
	, [ (FixArg, [TeXRaw (Text.unpack -> read -> Just -> indexEntryNr)])
	  , (OptArg, [TeXRaw indexCategory]), (FixArg, (parseIndex -> (indexPath, indexEntryKind)))
	  ] <- lookForCommand "index" (sectionTex sec)]

sectionLabels :: Section -> [(Text, Section)]
sectionLabels s =
	[ (label, sec) | sec <- sections s
	, [ (FixArg, [TeXRaw label]) ] <- lookForCommand "label" (sectionTex sec)]

toIndex :: IndexEntry -> Index
toIndex IndexEntry{..} = Map.singleton indexCategory $ go indexPath
	where
		go :: [IndexComponent] -> IndexTree
		go [c] = Map.singleton c (IndexNode [IndexEntry indexEntrySection indexEntryKind indexPath indexEntryNr indexCategory] Map.empty)
		go (c:cs) = Map.singleton c $ IndexNode [] $ go cs
		go _ = error "toIndex"

trackPnums :: FilePath -> Text -> Text
	-- Replaces \pnum with \pnum{file}{line}
trackPnums file = Text.pack . unlines . map (uncurry f) . zip [1..] . lines . Text.unpack
	where
		f :: Integer -> String -> String
		f lineNr line
			| Just (pre, post) <- stripInfix "\\pnum" line
				= pre ++ "\\pnum{" ++ file ++ "}{" ++ show lineNr ++ "}" ++ (if null post then "%" else post)
			| otherwise = line

getFileList :: IO [FilePath]
getFileList =
	(\\ ["front", "back"]) .
	map (Text.unpack . Text.dropEnd 1 . Text.drop (Text.length pre)) .
	filter (pre `isPrefixOf`) .
	Text.lines . readFile "std.tex"
  where pre = "\\include{"

grabBnf :: [String] -> [String]
grabBnf [] = []
grabBnf (line : rest)
    | "\\begin{bnf}" `List.isPrefixOf` line =
        let (x, end : more) = break ("\\end{bnf}" `List.isPrefixOf`) rest
        in ["", line] ++ x ++ [end] ++ grabBnf more
    | "\\gramSec" `List.isPrefixOf` line = ["", line] ++ grabBnf rest
    | otherwise = grabBnf rest

generateStdGramExt :: [FilePath] -> IO Text
generateStdGramExt files =
    Text.pack . unlines . grabBnf . lines . Text.unpack .
    Text.concat . mapM readFile ((++ ".tex") . files)

importExampleFile :: FilePath -> IO Text
importExampleFile =
    (Text.strip .
     Text.unlines .
     takeWhile (/= "\\end{document}") .
     tail .
     dropWhile (/= "\\begin{document}") .
     Text.lines .) .
    readFile

importExamples :: Text -> Text
importExamples x = case matchRegexAll r (Text.unpack x) of
        Nothing -> x
        Just (before, _match, after, subs) ->
            Text.pack before ++
            unsafePerformIO (importExampleFile $ "assets/" ++ (subs !! 1) ++ ".tex") ++
            importExamples (Text.pack after)
    where r = mkRegex "\\\\importexample(\\[[0-9a-zA-Z.-]*\\])?{([a-zA-Z0-9_-]*)}"

parseFiles :: Parser.Macros -> IO ([LinearSection], Parser.Macros)
parseFiles m = do
	files <- getFileList
	stdGramExt <- generateStdGramExt files
	let
		go [] macros = return ([], macros)
		go (c:cc) macros = do
			let p = c ++ ".tex"

			stuff <-
				importExamples .
				replace "multicolfloattable" "floattable" .
				replace "\\indeximpldef{" "\\index[impldefindex]{" .
				Text.unlines .
					indexCodeEnvs ["codeblock", "itemdecl"] .
					moveIndexEntriesIntoSecs .
					moveIndexEntriesIntoDefs .
				Text.lines .
				trackPnums p .
				replace "\\nodiffref\n\\change" "\n\\pnum\\textbf{Change:}\\space" .
				replace "\n\\diffref" "\n\\pnum\\nopnumdiffref" .
					-- Done here because (1) the real \nodiffref is defined with \def in a way
					-- we don't support yet, and (2) this way a source link is generated for the pnum.
				readFile p

			let extra = if c /= "grammar" then "" else replace "\\gramSec" "\\rSec1" stdGramExt
			let (r, macros') = parseFile macros (stuff ++ extra)
			if length r == 0 then undefined else
				first (r ++) . go cc (macros ++ macros')
	
	bib <- fst . parseFile m .
	       fst . fromJust .
	       textStripInfix "\\clearpage" .
	       ("\\rSec0[bibliography]{Bibliography}\n" ++) .
	       readFile "back.tex"

	first (++ bib) . go files m

load14882 :: Text -> IO Draft
load14882 extraMacros = do

	commitUrl <- getCommitUrl

	(macros@Parser.Macros{..}, took) <- measure (loadMacros extraMacros)
	putStrLn $ "Loaded macros in " ++ show (took * 1000) ++ "ms."

	(secs :: [LinearSection], took2) <- measure $ fst . parseFiles macros
	putStrLn $ "Parsed LaTeX in " ++ show (took2 * 1000) ++ "ms."

	xrefDelta <- loadXrefDelta

	(r, took3) <- measure $ if length (show secs) == 0 then undefined else do
		-- force eval before we leave the dir
		let
			grammarNames = [n |
				TeXComm "index" _ [
					(OptArg, [TeXRaw "grammarindex"]) ,
					(FixArg, [TeXRaw _
					 ,TeXComm "textcolor" "" [(FixArg,[TeXRaw "grammar-gray"]),(FixArg,[TeXComm "textsf" _ [(FixArg,[TeXComm "textit" "" [(FixArg,[TeXRaw n])]])]])]
					 ,TeXRaw "|idxbfpage"]
					)] <- allUnits secs]

			secs' = map (resolveGrammarterms macros grammarNames) secs
			chapters = evalState (treeizeChapters False 1 secs') (Numbers 1 1 1 1 0 0 1 1 1 1)
			allEntries :: [IndexEntry]
			allEntries = chapters >>= sectionIndexEntries
			index = mergeIndices $ map toIndex allEntries
			indexEntryMap = IntMap.fromList [(n, e) | e@IndexEntry{indexEntryNr=Just n} <- allEntries]
			indexEntriesByPath = reverseIndexEntryMap indexEntryMap
			labels = Map.fromList $ chapters >>= sectionLabels
	
			abbrMap = makeAbbrMap dr
			dr = Draft{..}
		return dr
	
	putStrLn $ "Processed in " ++ show (took3 * 1000) ++ "ms."
	return r


================================================
FILE: MathJax.hs
================================================
{-# LANGUAGE OverloadedStrings, ViewPatterns #-}

module MathJax (render) where

import Control.Concurrent.MVar (takeMVar, putMVar, newMVar)
import Data.Text (Text)
import qualified Data.Text as Text
import System.IO.Unsafe (unsafePerformIO)
import System.Process (shell, CreateProcess(..), createProcess, StdStream(CreatePipe))
import System.IO (BufferMode(..), hGetLine, hPutStrLn, hSetBuffering)
import Text.Regex (mkRegex, subRegex)
import Prelude hiding ((++))
import Util ((++))
import qualified Data.Map as Map
import Data.Map (Map)

rmTrailingNewline :: Text -> Text
rmTrailingNewline (Text.stripSuffix "\n" -> Just x) = x
rmTrailingNewline x = x

type Renderer = String {- formula -} -> Bool {- inline -} -> Text

data Input = Input { _formula :: String, _inline :: Bool }
    deriving (Eq, Ord)

makeRenderer :: IO Renderer
makeRenderer = do

    (Just stdinPipe, Just stdoutPipe, _, _) <- createProcess (shell "./mathjax-batch")
            {std_in = CreatePipe, std_out = CreatePipe}

    hSetBuffering stdinPipe LineBuffering
    hSetBuffering stdoutPipe LineBuffering

    let
      rm r s = subRegex (mkRegex r) s ""
      readResult = do
        line <- hGetLine stdoutPipe
        if line == "DONE"
          then return ""
          else do
            more <- readResult
            return $ line ++ "\n" ++ more

    mutex <- newMVar (Map.empty :: Map Input Text)

    return $ \formula inline -> unsafePerformIO $ do
        let input = Input formula inline
        cache <- takeMVar mutex
        (result, cache') <- case Map.lookup input cache of
            Just output -> return (output, cache)
            Nothing -> do
                hPutStrLn stdinPipe formula
                hPutStrLn stdinPipe (if inline then "INLINE" else "NONINLINE")
                rawResult <- readResult
                let
                  output
                    = Text.replace " focusable=\"false\"" ""
                    $ rmTrailingNewline -- Prevents artifacts in [rand.adapt.ibits]#4
                    $ Text.pack
                    $ rm " id=\"(MJXc|MathJax)-[0-9A-Za-z-]+\""
                    $ rm " style=\"\""
                    $ rawResult
                return (output, Map.insert input output cache)
        putMVar mutex cache'
        return result

render :: Renderer
render = unsafePerformIO $ makeRenderer


================================================
FILE: Pages.hs
================================================
{-# LANGUAGE OverloadedStrings, RecordWildCards, ViewPatterns #-}

module Pages (fileContent, pageContent, pagePath, writePage, applyPageStyle, Link(..), outputDir, PageStyle(..)) where

import Prelude hiding ((++), (.), writeFile)
import System.Directory (createDirectoryIfMissing)
import Control.Monad (when)
import qualified Data.Text as Text
import qualified Data.Text.Lazy as LazyText
import qualified Data.Text.Lazy.Builder as TextBuilder
import Util ((++), (.), Text, writeFile)

outputDir :: FilePath
outputDir = "14882/"

data PageStyle = Bare | WithExtension | InSubdir
    deriving (Eq, Read)

fileContent :: TextBuilder.Builder -> TextBuilder.Builder -> TextBuilder.Builder -> TextBuilder.Builder -> TextBuilder.Builder
fileContent pathHome title extraHead body =
    "<!DOCTYPE html>" ++
    "<html lang='en'>" ++
        "<head>" ++
            "<title>" ++ title ++ "</title>" ++
            "<meta charset='UTF-8'>" ++
            "<link rel='stylesheet' type='text/css' href='" ++ pathHome ++ "14882.css'>" ++
            "<link rel='stylesheet' type='text/css' href='https://fonts.googleapis.com/css2?family=Noto+Serif'>" ++
            "<link rel='stylesheet' type='text/css' href='https://fonts.googleapis.com/css2?family=Noto+Sans'>" ++
            "<link rel='stylesheet' type='text/css' href='https://fonts.googleapis.com/css2?family=Noto+Sans+Mono'>" ++
            "<link rel='icon' href='icon-light.png' media='(prefers-color-scheme: light)'>" ++
            "<link rel='icon' href='icon-dark.png' media='(prefers-color-scheme: dark)'>" ++
            extraHead ++
        "</head>" ++
        "<body><div class='wrapper'>" ++ body ++ "</div></body>" ++
    "</html>"

data Link = TocToSection | SectionToToc | SectionToSection
	deriving Show

doLink :: PageStyle -> Link -> Text -> Text
doLink sfs l = LazyText.toStrict . TextBuilder.toLazyText . go . Text.splitOn (Text.pack (show l) ++ "/")
	where
		go :: [Text] -> TextBuilder.Builder
		go (x : (Text.break (`elem` ("'#" :: String)) -> (a, b)) : z) = TextBuilder.fromText x ++ f (TextBuilder.fromText a) ++ go (b : z)
		go [x] = TextBuilder.fromText x
		go _ = undefined
		f :: TextBuilder.Builder -> TextBuilder.Builder
		f = case (sfs, l) of
			(Bare, SectionToToc) -> ("./#" ++)
			(Bare, TocToSection) -> id
			(Bare, SectionToSection) -> id
			(InSubdir, SectionToToc) -> ("../#" ++)
			(InSubdir, TocToSection) -> (++ "/")
			(InSubdir, SectionToSection) -> ("../" ++)
			(WithExtension, SectionToToc) -> ("index.html#" ++)
			(WithExtension, TocToSection) -> (++ ".html")
			(WithExtension, SectionToSection) -> (++ ".html")

applyPageStyle :: PageStyle -> Text -> Text
applyPageStyle sfs =
	doLink sfs SectionToSection
	. doLink sfs SectionToToc
	. doLink sfs TocToSection

pagePath :: FilePath -> PageStyle -> String
pagePath n Bare = outputDir ++ n
pagePath n WithExtension = outputDir ++ n ++ ".html"
pagePath n InSubdir = outputDir ++ n ++ "/index.html"

pageContent :: PageStyle -> TextBuilder.Builder -> Text
pageContent sfs content = applyPageStyle sfs $ LazyText.toStrict $ TextBuilder.toLazyText $ content

writePage :: FilePath -> PageStyle -> Text -> IO ()
writePage n sfs content = do
    when (sfs == InSubdir) $ createDirectoryIfMissing True (outputDir ++ n)
    writeFile (pagePath n sfs) content


================================================
FILE: README
================================================
Introduction

  cxxdraft-htmlgen parses the LaTeX sources of the draft,
  and generates static HTML pages from them.

Prerequisites

  - Git
  - The Haskell Platform (https://www.haskell.org/platform/)
  - Graphviz
  - Node.js
  - The 'split' NPM package
  - mathjax-node-cli (https://github.com/mathjax/mathjax-node-cli/)

Usage

  Do:
    git clone https://github.com/Eelis/cxxdraft-htmlgen.git
    cd cxxdraft-htmlgen
    cabal build
    dist/build/cxxdraft-htmlgen/cxxdraft-htmlgen path/to/draft [sectionfilestyle]

  Or with stack:
    stack build
    stack exec cxxdraft-htmlgen path/to/draft [sectionfilestyle]

  The sectionfilestyle parameter is one of:

    Bare            (to generate e.g. intro.execution)
    WithExtension   (to generate e.g. intro.execution.html)
    InSubdir        (to generate e.g. intro.execution/index.html)

  The default is WithExtension, since this is suitable
  for direct browsing on a filesystem without a web server.

  Bare may be used in conjunction with web server configuration
  specifying a default text/html mime type for the directory
  containing the section pages, to get URLs such as:

    temp.res#temp.dep
    temp.dep#3

  InSubdir only requires defaulting to index.html, to give:

    temp.res/#temp.dep
    temp.dep/#3

Custom draft branch

  While cxxdraft-htmlgen works with the official draft sources as-is,
  better results can be obtained by using the following branch:

    https://github.com/Eelis/draft/tree/cxxdraft-htmlgen-fixes

  This branch tracks the official draft sources, but makes some changes to:
    - improve syntax highlighting
    - clean up hyperlinks
    - work around MathJax limitations
    - work around cxxdraft-htmlgen limitations

Output

  The following will be created in ./14882/ :

  - index.html    A table of contents with links to...

  - ~2300 interlinked section pages

    These are named after the section abbreviation, which for
    the Bare section file style look like:

      stmt.goto
      class.member.lookup
      cpp
      iterator.requirements.general
      locale.moneypunct.virtuals

    Since sections nest, content is duplicated at every level.
    This allows one to specify more or less context for a given
    citation. For example, one can link to:

      basic.scope.hiding   (section 6.4.10 "Name hiding" on
                            a page of its own)

      basic.scope#hiding   (the same section highlighted on
                            the page for section 6.4 "Scope")

      basic#scope.hiding   (the same section highlighted on
                            the page for chapter 6 "Basics")

  - full         The entire document (~24 mbyte, or ~2 mbyte compressed).

  - 14882.css    Used by all of the above.

Hidden links

  On any page:

  - defined terms/concepts/nonterminals are links that select themselves;

  - a full stop at the end of a sentence is a link that selects the sentence;

  - moving the mouse over the right margin of a numbered paragraph reveals a link
    to the LaTeX source for that paragraph;

  - moving the mouse over the left margin of an itemdecl or table row reveals a link
    that selects it.


================================================
FILE: RawDocument.hs
================================================
{-# LANGUAGE
	OverloadedStrings,
	RecordWildCards,
	ViewPatterns,
	LambdaCase,
	TupleSections,
	NamedFieldPuns,
	FlexibleInstances,
	FlexibleContexts,
	RankNTypes,
	MultiParamTypeClasses,
	FunctionalDependencies,
	UndecidableInstances,
	RecursiveDo #-}

module RawDocument
	( RawElement(..), RawTexPara(..), RawFootnote(..), RawParagraph(..), LinearSection(..), RawItem(..)
	, loadMacros, parseFile, loadXrefDelta, doParse) where

import qualified LaTeXParser as Parser
import qualified Data.Text as Text
import Data.Text (Text, replace)
import Document (Row(..), SourceLocation(..), RowSepKind(..), SectionKind(..), Cell(..), CellSpan(..), XrefDelta, Abbreviation, ColumnSpec(..), TextAlignment(..))
import Data.Maybe (isJust, fromJust)
import LaTeXParser (Macros(..), Signature(..), nullCmd, storeCmd, storeEnv, Environment(..), Command(..), codeEnv, Token(..), normalCmd, ParseResult(..))
import Data.Text.IO (readFile)
import Text.Regex (mkRegex)
import qualified Data.Map as Map
import Data.Map (Map)
import Data.List (transpose, take, isPrefixOf)
import Util ((.), (++), mapHead, textStripInfix, textSubRegex, splitOn)
import Prelude hiding (take, (.), takeWhile, (++), lookup, readFile)
import System.IO.Unsafe (unsafePerformIO)
import System.Process (readProcess)
import Control.Arrow (first)
import Data.Char (isSpace, isDigit)
import LaTeXBase

data RawItem = RawItem
	{ rawItemLabel :: LaTeX
	, rawItemContent :: [RawTexPara] }
	deriving (Eq, Show)

data RawElement
	= RawLatexElement LaTeXUnit
	| RawEnumerated String [RawItem]
	| RawCodeblock LaTeXUnit
	| RawExample [RawTexPara]
	| RawNote Text [RawTexPara]
	| RawItemdescr [RawTexPara]
	| RawBnf String LaTeX
	| RawTable
		{ rawTableCaption :: LaTeX
		, rawColumnSpec :: [ColumnSpec]
		, rawTableAbbr :: Abbreviation
		, rawTableBody :: [Row [RawTexPara]] }
	| RawTabbing LaTeX
	| RawFormula { rawFormulaAbbr :: Abbreviation, rawFormulaContent :: LaTeX }
	| RawFigure { rawFigureName :: LaTeX, rawFigureAbbr :: Abbreviation, rawFigureSvg :: Text }
	deriving (Eq, Show)

newtype RawTexPara = RawTexPara { rawTexParaElems :: [RawElement] }
	deriving (Eq, Show)

newtype RawFootnote = RawFootnote [RawTexPara]
	deriving Show

data RawParagraph = RawParagraph
	{ paraNumbered :: Bool
	, rawParaInItemdescr :: Bool
	, rawParaElems :: [RawTexPara]
	, rawParaSourceLoc :: Maybe SourceLocation }
	deriving Show

data LinearSection = LinearSection
	{ lsectionAbbreviation :: Abbreviation
	, lsectionKind :: SectionKind
	, lsectionName :: LaTeX
	, lsectionParagraphs :: [RawParagraph]
	, lsectionFootnotes :: [RawFootnote] }
	deriving Show

instance AllUnits RawElement where
	allUnits (RawLatexElement x) = allUnits x
	allUnits (RawBnf _ x) = allUnits x
	allUnits (RawTabbing x) = allUnits x
	allUnits (RawNote _ x) = allUnits x
	allUnits (RawExample x) = allUnits x
	allUnits (RawCodeblock x) = allUnits x
	allUnits (RawItemdescr x) = allUnits x
	allUnits (RawEnumerated _ x) = allUnits x
	allUnits (RawFormula _ x) = allUnits x
	allUnits RawFigure{} = []
	allUnits RawTable{..} = allUnits rawTableCaption ++ concatMap (allUnits . concat . map content) (map cells rawTableBody)

instance AllUnits RawTexPara where
	allUnits = allUnits . rawTexParaElems

instance AllUnits RawItem where
	allUnits RawItem{..} = allUnits rawItemLabel ++ allUnits rawItemContent

instance AllUnits LinearSection where
	allUnits LinearSection{..} = allUnits lsectionName ++ allUnits lsectionParagraphs ++ allUnits lsectionFootnotes

instance AllUnits RawParagraph where
	allUnits RawParagraph{..} = allUnits rawParaElems

instance AllUnits RawFootnote where
	allUnits (RawFootnote x) = allUnits x

bnfEnvs :: [String]
bnfEnvs = ["bnf", "ncbnf", "bnfkeywordtab", "simplebnf", "ncsimplebnf", "ncrebnf"]

isBnf :: LaTeXUnit -> Bool
isBnf (TeXEnv s _ _)
	| s `elem` bnfEnvs = True
isBnf _ = False

isTable, isTabbing, isFigure :: LaTeXUnit -> Bool
isTable x = isTeXEnv "floattablebasex" x || isTeXEnv "htmlTable" x
isTabbing = isTeXEnv "tabbing"
isFigure = isTeXEnv "importgraphic"

isEnumerate :: LaTeXUnit -> Maybe String
isEnumerate (TeXEnv s _ _)
	| s `elem` ["enumerate", "itemize", "description", "thebibliography"] = Just s
isEnumerate _ = Nothing

isParaEnd :: LaTeXUnit -> Bool
isParaEnd (TeXEnv "itemdecl" _ _) = True
isParaEnd (TeXEnv "indexeditemdecl" _ _) = True
isParaEnd (TeXEnv "itemdescr" _ _) = True
isParaEnd (TeXComm "pnum" _ _) = True
isParaEnd x = isParasEnd x

isParasEnd :: LaTeXUnit -> Bool
isParasEnd (TeXComm "definition" _ _) = True
isParasEnd (TeXComm "rSec" _ _) = True
isParasEnd (TeXComm "infannex" _ _) = True
isParasEnd (TeXComm "normannex" _ _) = True
isParasEnd _ = False

isJunk :: LaTeXUnit -> Bool
isJunk (TeXRaw x) = all isSpace (Text.unpack x)
isJunk (TeXComm "index" _ _) = True
isJunk (TeXComm "setlength" _ _) = True
isJunk _ = False

isItem :: LaTeXUnit -> Maybe LaTeX
isItem (TeXComm "item" _ []) = Just []
isItem (TeXComm "item" _ [(_, label)]) = Just label
isItem (TeXComm "bibitem" _ [(_, [TeXRaw label])]) = Just [TeXRaw $ "bib:" ++ label]
isItem _ = Nothing

parseItems :: LaTeX -> [RawItem]
parseItems [] = []
parseItems (x : rest)
	| isJunk x = mapHead (mapItemContent (mapHead addJunk)) (parseItems rest)
	| Just label <- isItem x, (item, rest') <- break (isJust . isItem) rest =
		RawItem label (parsePara item) : parseItems rest'
	where
		mapItemContent f (RawItem l c) = RawItem l (f c)
		addJunk :: RawTexPara -> RawTexPara
		addJunk (RawTexPara z) = RawTexPara (dropWhile isOnlySpace $ RawLatexElement x : z)
parseItems _ = error "need items or nothing"

doParse :: Macros -> Text -> (LaTeX, Macros)
doParse m t = (x, y)
	where
		(x, y, []) = Parser.parseString ctx (Text.unpack t)
		ctx = initialContext{Parser.macros=m}

nullCmds :: [(Int, String)]
nullCmds =
	[ (0, "clearpage kill rmfamily hfill vfill nocorr small larger noindent itcorrwidth itletterwidth global bigskip begingroup endgroup")
	, (1, "enlargethispage lstset newsavebox vspace input thispagestyle")
	, (2, "glossary settowidth addtolength copypagestyle")
	, (3, "definecolor makeheadrule")
	, (4, "makeoddhead")
	]

storeCmds :: [(Int, String)]
storeCmds =
	[ (0, "today def makeatletter bottomline makeatother Sec bmod mod long prime " ++
			"chapter section paragraph subparagraph fi otextup linebreak newpage log " ++
			"textup edef x BnfIndent par leq bot perp Sigma " ++
			"leftmargini BnfInc BnfRest protect caret sum " ++
			"xspace onelineskip textlangle textrangle tilde raggedright = " ++
			"space copyright textregistered textbackslash hsize br Gamma " ++
			"frenchspacing list leftmargin listparindent itemindent itshape relax " ++
			"nonfrenchspacing endlist upshape ttfamily baselineskip nobreak " ++
			"endfirsthead quad qquad cdot cdots dotsc bnfindentinc footnotemark ldots capsep max min " ++
			"continuedcaption hline endhead footnotesize le times dotsb rightarrow to equiv " ++
			"lfloor rfloor pi geq neq ge lceil rceil ell alpha bigl bigr mu lambda beta " ++
			"tabularnewline exp sigma big delta rho Pi nu infty displaystyle lim sin cos " ++
			"phi int theta zeta FlushAndPrintGrammar break backslash centering " ++
			"normalbaselineskip land lor mapsto normalfont textmu tablerefname figurerefname newline " ++
			"obeyspaces bnfindent vdots tabcolsep columnbreak emergencystretch commentellip " ++
			"gamma widowpenalties sffamily parskip left right `")
	, (1, "hspace footnote textit textrm textnormal texttt textbf ensuremath ref ref* mbox bibitem mathop " ++
			"terminal literalterminal noncxxterminal textsl textsc textsf text term overline " ++
			"tcode noncxxtcode literaltcode footnotetext microtypesetup cline mathtt mathit mathrm mathsf " ++
			"label newlength uline value newcounter mathscr c uppercase iref operatorname textlarger " ++
			"phantom hphantom sqrt ln emph minipage url indexescape changeglossnumformat textasciitilde " ++
			"removedxref deprxref textsuperscript rlap mathrel mathbin nopnumdiffref color ucode uname")
	, (2, "pnum definition addtocounter setcounter frac " ++
			"binom infannex normannex parbox link weblink indexedspan movedxref movedxrefs " ++
			"equal setlength textcolor providecommand")
	, (3, "multicolumn discretionary movedxrefii ifthenelse PackageError NewEnviron")
	, (4, "movedxrefiii indexlink hiddenindexlink")
	]

initialCmds :: Map Text Command
initialCmds = Map.fromList $
	[ storeCmd "item" (Signature 0 (Just []))
	, storeCmd "caption" (Signature 2 (Just []))
	, storeCmd "index" (Signature 2 (Just []))
	, storeCmd "hyperref" (Signature 2 (Just []))
	, nullCmd "makebox" (Signature 2 (Just []))
	, storeCmd "\n" (Signature 0 Nothing)
	, storeCmd "nolinebreak" (Signature 0 (Just []))
	, storeCmd "textsmaller" (Signature 2 (Just []))
	, nullCmd "gramSec" (Signature 2 (Just []))
	, ("kern", normalCmd $ Command $ \_ctx _ws -> ParseResult [] mempty . snd . parseDimen)
	]
	++ [storeCmd c (Signature a Nothing) | (a, l) <- storeCmds, c <- words l]
	++ [nullCmd (Text.pack c) (Signature a Nothing) | (a, l) <- nullCmds, c <- words l]

parseDimen :: [Token] -> ([Token], [Token])
parseDimen toks
    | t@(Token txt) : more <- toks, txt `elem` [".", "pt", "-", "em"] || all isDigit txt = first (t :) (parseDimen more)
    | otherwise = ([], toks)

initialEnvs :: Map Text Environment
initialEnvs = Map.fromList $
	[ (storeEnv e (Signature 0 Nothing))
	| e <- bnfEnvs ++
	       words "indented description itemize center tabbing defnote enumerate eqnarray* equation* itemdescr footnote matrix"
	] ++
	[ storeEnv "example" (Signature 1 (Just []))
	, storeEnv "tailexample" (Signature 1 (Just []))
	, storeEnv "note" (Signature 0 (Just [Token "Note"]))
	, storeEnv "tailnote" (Signature 0 (Just [Token "Note"]))
	, storeEnv "table" (Signature 1 Nothing)
	, storeEnv "tabular" (Signature 1 Nothing)
	, storeEnv "longtable" (Signature 1 Nothing)
	, storeEnv "importgraphic" (Signature 3 Nothing)
	, storeEnv "formula" (Signature 1 Nothing)
	, storeEnv "minipage" (Signature 1 Nothing)
	, storeEnv "thebibliography" (Signature 1 Nothing)
	, codeEnv "indexeditemdecl" (Signature 1 Nothing)
	, codeEnv "itemdecl" (Signature 0 Nothing)
	, codeEnv "indexedcodeblock" (Signature 1 Nothing)
	, codeEnv "codeblock" (Signature 0 Nothing)
	, codeEnv "codeblockdigitsep" (Signature 0 Nothing)
	, codeEnv "codeblocktu" (Signature 1 Nothing)
	, storeEnv "array" (Signature 1 Nothing)
	, storeEnv "floattablebasex" (Signature 4 Nothing)
	, storeEnv "htmlTable" (Signature 3 Nothing)
	]

initialMacros :: Parser.Macros
initialMacros = Parser.defaultMacros ++ mempty{Parser.commands=initialCmds, Parser.environments=initialEnvs}

initialContext :: Parser.Context
initialContext = Parser.defaultContext{Parser.macros=initialMacros}

parseFile :: Macros -> Text -> ([LinearSection], Macros)
parseFile macros =
	first (parseSections 0)
	. doParse macros
	. replace "$$" "$"
	. replace "\\hspace*" "\\hspace"
	. replace "``" "“"
	. textSubRegex (mkRegex "(\\grammarterm\\{[A-Za-z-]*\\})(\\{s\\}|s)") "\\1\\textit{s}"
		-- Mixing italic and upright looks okay in the PDF, but looks bad in browsers,
		-- and our linkification makes clear enough that the plural 's' is not part
		-- of the grammarterm.

loadFigure :: Text -> Text
loadFigure f = unsafePerformIO $ do
		dot <- readFile $ "assets/" ++ p
		svg <- readProcess "dot" ["-Tsvg",
			"-Gbgcolor=transparent",
			"-Gsize=8",
			"-Nfontsize=10",
			"-Gfontsize=10",
			"-Efontsize=10",
			"-Nfontname=Noto Serif",
			"-Efontname=Noto Serif",
			"-Gfontname=Noto Serif"] (Text.unpack $ Text.replace "Courier New" "Noto Sans Mono" $ Text.replace ", fontsize=24" "" dot)
		return $ rmIds $ snd $ Text.breakOn "<svg" $ Text.pack svg
	where
		p = Text.unpack $ Text.replace ".pdf" ".dot" f
		r = mkRegex "<g id=\"[^\"]*\"" 
		rmIds = textSubRegex r "<g"
			-- Without rmIds, if a page has more than one figure, it will
			-- have duplicate 'graph1', 'node1', 'edge1' etc ids.

isOnlySpace :: RawElement -> Bool
isOnlySpace (RawLatexElement x) = triml [x] == []
isOnlySpace _ = False

parsePara :: LaTeX -> [RawTexPara]
parsePara u = RawTexPara . dropWhile isOnlySpace . fmap f . splitElems (trim (filter (not . kill) u))
	where
		kill (TeXComm "hline" _ []) = True
		kill (TeXComm "capsep" _ []) = True
		kill (TeXComm "endhead" _ _) = True
		kill _ = False
		f :: LaTeXUnit -> RawElement
		f e@(TeXEnv k a stuff)
			| isFigure e
			, [(FixArg, rawFigureName), (FixArg, [TeXRaw rawFigureAbbr]), (FixArg, [TeXRaw figureFile])] <- a
				= RawFigure{rawFigureSvg=loadFigure figureFile, ..}
			| k == "formula", [(FixArg, [TeXRaw rawFormulaAbbr])] <- a = RawFormula{rawFormulaContent = stuff, ..}
			| isTable e
			, ((_, cap) : (_, [TeXRaw abbr]) : (_, y) : _) <- a
				= RawTable
				{ rawTableCaption = cap
				, rawColumnSpec = parseColspec y
				, rawTableAbbr = "tab:" ++ abbr
				, rawTableBody = breakMultiCols $ parseTable stuff }
			| isTable e = error $ "other table: " ++ show e
			| isTabbing e = RawTabbing stuff
			| isBnf e = RawBnf (if "nc" `isPrefixOf` k then drop 2 k else k) stuff
			| Just ek <- isEnumerate e = RawEnumerated ek (parseItems stuff)
			| isCodeblock e = RawCodeblock e
			| k `elem` ["note", "defnote", "tailnote"] =
			    let label = case a of [(FixArg, [TeXRaw x])] -> x; _ -> "Note"
			    in RawNote label $ parsePara stuff
			| k `elem` ["example", "tailexample"] = RawExample $ parsePara stuff
			| k == "itemdecl" || k == "minipage" || k == "indexeditemdecl" = RawLatexElement e
			| k == "itemdescr" = RawItemdescr $ parsePara stuff
		f x = RawLatexElement x
		splitElems :: LaTeX -> [LaTeX]
		splitElems [] = []
		splitElems (x:xs)
			| TeXRaw (textStripInfix "\n\n" -> Just (a, (Text.stripStart -> b))) <- x =
				(if a == "" then ([] :) else ([TeXRaw a] :)) $
					splitElems (if b == "" then xs else TeXRaw b : xs)
			| otherwise = case splitElems xs of
				[] -> [[x]]
				a:b -> ((x:a):b)

class ExtractFootnotes a where extractFootnotes :: a -> (a, [RawFootnote])

instance ExtractFootnotes LaTeX where
	extractFootnotes [] = ([], [])
	extractFootnotes (TeXRaw x : t@(TeXEnv "footnote" _ _ : _))
		= (TeXRaw (Text.stripEnd x) : t', ft)
		where (t', ft) = extractFootnotes t
			-- stripEnd here implements the footnote's \unskip
	extractFootnotes (h:t) = (h' : t', fh ++ ft)
		where
			(h', fh) = extractFootnotes h
			(t', ft) = extractFootnotes t

instance ExtractFootnotes LaTeXUnit where
	extractFootnotes (TeXEnv "footnote" [] content) =
		(TeXComm "footnoteref" "" [], [RawFootnote $ parsePara content])
	extractFootnotes (TeXComm "footnotemark" _ []) =
		(TeXComm "footnoteref" "" [], [])
	extractFootnotes (TeXComm "footnotetext" _ [(_, content)]) =
		(TeXRaw "" {- todo.. -}, [RawFootnote $ parsePara content])
	extractFootnotes (TeXComm a ws [(FixArg, content)]) =
		first (\c -> TeXComm a ws [(FixArg, c)]) (extractFootnotes content)
	extractFootnotes (TeXEnv env args content) = first (TeXEnv env args) (extractFootnotes content)
	extractFootnotes other = (other, [])

parseParas :: LaTeX -> ([RawParagraph], [RawFootnote], LaTeX {- rest -})
parseParas (break isParasEnd -> (extractFootnotes -> (stuff, fs), rest))
		= (collectParas stuff, fs, rest)
	where
		collectParas :: LaTeX -> [RawParagraph]
		collectParas (t@(TeXEnv "indexeditemdecl" _ _) : more) =
			RawParagraph False False (parsePara [t]) Nothing : collectParas more
		collectParas (t@(TeXEnv "itemdecl" _ _) : more) =
			RawParagraph False False (parsePara [t]) Nothing : collectParas more
		collectParas (TeXEnv "itemdescr" _ desc : more) =
			map (\p -> p{rawParaInItemdescr=True}) (collectParas desc)
			++ collectParas more
		collectParas (TeXComm "pnum" _
			[ (FixArg, [TeXRaw (Text.unpack -> file)])
			, (FixArg, [TeXRaw (Text.unpack -> read -> lineNr)])] : more) =
				(\(p : x) -> p{paraNumbered=True, rawParaSourceLoc=Just (SourceLocation file lineNr)} : x)
				(collectParas more)
		collectParas (TeXComm "pnum" _ [] : more) =
				(\(p : x) -> p{paraNumbered=True, rawParaSourceLoc=Nothing} : x)
				(collectParas more)
		collectParas [] = []
		collectParas x = (if null p then id else (RawParagraph False False p Nothing :)) (collectParas more)
			where (parsePara -> p, more) = break isParaEnd x

parseSections :: Int -> LaTeX -> [LinearSection]
parseSections level (TeXComm "textlarger" _ _ : more) = parseSections level more
parseSections level
	(TeXComm c _ args : (parseParas -> (lsectionParagraphs, lsectionFootnotes, more)))
	| ((FixArg, isJustRaw -> fromJust -> lsectionAbbreviation), (FixArg, lsectionName), lsectionKind, level') <- case (c, args) of
		("normannex", [abbr, name]) -> (abbr, name, NormativeAnnexSection, level)
		("infannex", [abbr, name]) -> (abbr, name, InformativeAnnexSection, level)
		("definition", [name, abbr]) -> (abbr, name, DefinitionSection (level + 1), level)
		("rSec", [(FixArg, [TeXRaw (Text.unpack -> read -> l)]), abbr, name]) ->
			(abbr, name, NormalSection l, l)
		_ -> error $ "not a section command: " ++ show (c, args)
	= LinearSection{..} : parseSections level' more
parseSections _ [] = []
parseSections l (x:xx)
	| TeXRaw t <- x, all isSpace (Text.unpack t) = parseSections l xx
	| otherwise = error $ "parseSections: " ++ show x

parseTable :: LaTeX -> [Row [RawTexPara]]
parseTable latex
	| triml latex == [] = []
	| triml row == [] = parseTable $ tail rest
	| hasCommand (== "endfirsthead") row = parseTable $ findEndHead rest
	| hasCommand (`elem` ["caption", "bottomline"]) row = parseTable rest
	| otherwise = makeRow row : parseTable rest
	where
		(row, rest) = break (== TeXLineBreak) latex
		findEndHead l
			| row' == [] = findEndHead $ tail rest'
			| hasCommand (== "endhead") row' = l
			| otherwise = findEndHead rest'
			where
				(row', rest') = break (== TeXLineBreak) l

columnBreakCell :: Cell [RawTexPara]
columnBreakCell = Cell Normal [RawTexPara [RawLatexElement (TeXComm "columnbreak" "" [])]]
isColumnBreakCell :: Cell [RawTexPara] -> Bool
isColumnBreakCell (Cell Normal [RawTexPara [RawLatexElement (TeXComm "columnbreak" _ [])]]) = True
isColumnBreakCell _ = False

makeRectangular :: a -> [[a]] -> [[a]]
makeRectangular filler rows = (take numCols . (++ repeat filler)) . rows
    where numCols = maximum (length . rows)
        -- Todo: Remove this when the bugs in Chrome's collapsed border rendering are fixed.

breakMultiCols :: [Row [RawTexPara]] -> [Row [RawTexPara]]
    -- implements the multicolfloattable environment's \columnbreak, which is left intact by parseTable
breakMultiCols rows
    | all (\Row{..} -> length cells == 1 && rowSep == NoSep) rows =
        Row NoSep . makeRectangular (Cell Normal []) (transpose $ splitOn isColumnBreakCell $ separateColumnBreaks $ (head . cells) . rows)
    | otherwise = rows
    where
        separateColumnBreaks :: [Cell [RawTexPara]] -> [Cell [RawTexPara]]
        separateColumnBreaks = concatMap f
            where
                f :: Cell [RawTexPara] -> [Cell [RawTexPara]]
                f c@Cell{..} | [RawTexPara (RawLatexElement (TeXComm "columnbreak" _ []) : rest)] <- content =
                        [columnBreakCell, c{content = [RawTexPara rest]}]
                    | otherwise = [c]

makeRow :: LaTeX -> Row [RawTexPara]
makeRow l = Row sep $ makeRowCells l
	where
		sep
			| hasCommand (== "hline") l = RowSep
			| hasCommand (== "capsep") l = CapSep
			| hasCommand (== "cline") l = Clines $ clines $ lookForCommand "cline" l
			| otherwise = NoSep

		clines [] = []
		clines (([(FixArg, [TeXRaw c])]) : rest) = (begin, end) : clines rest
			where
				(begin', end') = Text.breakOn "-" c
				begin = read $ Text.unpack begin' :: Int
				end = read $ Text.unpack $ Text.tail end' :: Int
		clines other = error $ "Unexpected \\clines syntax: " ++ show other

parseWidth :: LaTeX -> (Maybe Text, LaTeX)
parseWidth (TeXRaw "" : x) = parseWidth x
parseWidth (TeXBraces [TeXRaw x] : rest) = (Just x, rest)
parseWidth (TeXBraces [TeXRaw x, TeXComm "hsize" "" []] : rest) =
	(Just $ Text.pack (show (round ((read ("0" ++ Text.unpack x) :: Double) * 100) :: Int)) ++ "%", rest)
parseWidth (TeXBraces _ : rest) = (Nothing, rest) -- remaining cases unsupported for now
parseWidth x = (Nothing, x)

parseColspec :: LaTeX -> [ColumnSpec]
parseColspec = \x -> case x of
		[] -> []
		TeXRaw (Text.unpack -> '|' : z) : y -> go (TeXRaw (Text.pack z) : y)
		_ -> go x
	where
		go :: LaTeX -> [ColumnSpec]
		go [] = []
		go [TeXRaw "|"] = []
		go (TeXRaw "@" : TeXBraces _ : x) = go x -- unimplemented
		go (TeXRaw ">" : TeXBraces _ : x) = go x -- unimplemented
		go (TeXRaw "" : y) = go y
		go (TeXRaw (Text.uncons -> Just (letter, rest)) : y)
		    | letter == ' ' = go (TeXRaw rest : y)
		    | letter == '|' = mapHead (\(ColumnSpec x _ z) -> ColumnSpec x True z) $ go (TeXRaw rest : y)
		    | otherwise =
		    	let (w, rest') = parseWidth (TeXRaw rest : y)
		    	in ColumnSpec (colClass letter) False w : go rest'
		go x = error ("parseColspec: " ++ show x)
		
		colClass :: Char -> TextAlignment
		colClass x | x `elem` ['l', 'm', 'x'] = AlignLeft
		colClass 'p' = Justify
		colClass 'r' = AlignRight
		colClass 'c' = AlignCenter
		colClass other = error $ "Unexpected column type " ++ (other : [])

makeRowCells :: LaTeX -> [Cell [RawTexPara]]
makeRowCells [] = []
makeRowCells latex =
	case rest of
		[] -> [makeCell cell]
		_ : r ->
			(makeCell $ cell <> [TeXRaw cell']) : makeRowCells (TeXRaw rest'' : r)
	where
		(cell, rest) = break isColEnd latex
		isColEnd (TeXRaw c) = isJust $ Text.find (== '&') c
		isColEnd _ = False

		(cell', rest') = Text.break (== '&') $ getText rest
		rest'' = Text.drop 1 rest'
		getText (TeXRaw s : _) = s
		getText other = error $ "Didn't expect " ++ show other

		makeCell content
			| [[(FixArg, [TeXRaw w]), (FixArg, cs), (FixArg, content')]] <- lookForCommand "multicolumn" content =
				Cell (Multicolumn (read $ Text.unpack w) (head $ parseColspec cs)) $ parsePara content'
			| otherwise =
				Cell Normal $ parsePara content

rmExplSyntax :: Text -> Text
rmExplSyntax = Text.unlines . f . Text.lines
    where
        f [] = []
        f ("\\ExplSyntaxOn" : (dropWhile (/= "\\ExplSyntaxOff") -> (_ : x))) = f x
        f (h : t) = h : f t

loadMacros :: Text -> IO Macros
loadMacros extraMacros =
	(initialMacros ++)
	. snd
	. doParse initialMacros
	. replace "\\indeximpldef{" "\\index[impldefindex]{"
	. textSubRegex (mkRegex "\\\\penalty[0-9]+{}") ""
	. textSubRegex (mkRegex "\\\\verbtocs{[\\a-zA-Z]*}\\|[^|]*\\|") ""
	. rmExplSyntax
	. (++ extraMacros)
	. mconcat
	. mapM readFile
	["config.tex", "macros.tex", "tables.tex"]

loadXrefDelta :: IO XrefDelta
loadXrefDelta = do
	(tex, _, _) <- Parser.parseString initialContext . Text.unpack . readFile "xrefdelta.tex"
	let lfc c = lookForCommand c tex
	return $
		[ (fromJust $ isJustRaw $ snd from, [snd to])
			| [from, to] <- lfc "movedxrefs" ] ++
		[ (fromJust $ isJustRaw $ snd from, (:[]) . TeXComm "ref" "" . (:[]) . tos)
			| from : tos <- lfc "movedxref" ++ lfc "movedxrefii" ++ lfc "movedxrefiii" ] ++
		[ (abbr, [])
			| [(_, [TeXRaw abbr])] <- lfc "removedxref" ] ++
		[ (abbr, [[TeXComm "ref" "" [(FixArg, [TeXRaw ("depr." ++ abbr)])]]])
			| [(_, [TeXRaw abbr])] <- lfc "deprxref" ]


================================================
FILE: Render.hs
================================================
{-# OPTIONS_GHC -fno-warn-tabs #-}
{-# LANGUAGE
	OverloadedStrings,
	RecordWildCards,
	TupleSections,
	ViewPatterns,
	NamedFieldPuns,
	LambdaCase,
	TypeSynonymInstances,
	FlexibleInstances #-}

module Render (
	Render(render), concatRender, renderTab, renderFig, renderIndex, simpleRender, simpleRender2, squareAbbr,
	linkToSection, secnum, Page(..), parentLink, abbrHref,
	defaultRenderContext, isSectionPage,
	RenderContext(..), renderLatexParas
	) where

import Load14882 (parseIndex) -- todo: bad
import Document (
	CellSpan(..), Cell(..), RowSepKind(..), Row(..), Element(..), Draft(..), Footnote(..),
	TeXPara(..), Sentence(..), Abbreviation, sectionByAbbr, footnotes, ColumnSpec(..),
	Section(..), Chapter(..), Table(..), Figure(..), Sections(..), figures, formulas, tables, Item(..),
	IndexComponent(..), IndexTree, IndexNode(..), IndexKind(..), IndexEntry(..), Formula(..),
	IndexPath, indexKeyContent, tableByAbbr, figureByAbbr, formulaByAbbr, Paragraph(..), Note(..), Example(..),
	chapterOfSection)
import LaTeXBase (LaTeX, LaTeXUnit(..), ArgKind(..), MathType(..), lookForCommand, concatRaws,
    renderLaTeX, trim, isMath, isCodeblock, texStripPrefix, texSpan, mapTeX, mapCommandName)
import qualified Data.IntMap as IntMap
import Data.Text (isPrefixOf)
import qualified Data.Text.Lazy.Builder as TextBuilder
import Debug.Trace (trace)
import qualified Data.Text as Text
import qualified Data.Text.Lazy as LazyText
import qualified Text.HTML.TagSoup as Soup
import Data.Char (isAlpha, isSpace, isAlphaNum, toLower, isUpper, ord, isDigit, toUpper)
import Control.Arrow (second)
import qualified Prelude ()
import qualified MathJax
import Prelude hiding (take, (.), (++), writeFile)
import Data.List (find, nub, intersperse, (\\), sortOn, dropWhileEnd)
import qualified Data.Map as Map
import Data.Maybe (isJust, fromJust)
import Pages (Link(..))
import Sentences (linkifyFullStop)
import Util ((.), (++), replace, Text, xml, spanTag, anchor, Anchor(..), greekAlphabet,
    urlChars, intercalateBuilders, replaceXmlChars, spanJust, h, partitionBy, mapHead)
import CxxParser (parseCppDirective, parseLiteral, parseComment)

kill, literal :: [String]
kill = words $
	"clearpage renewcommand newcommand enlargethispage noindent indent vfill pagebreak setlength " ++
	"caption capsep continuedcaption bottomline hline rowsep hspace endlist cline " ++
	"hfill nocorr small endhead kill footnotesize rmfamily microtypesetup nobreak nolinebreak " ++
	"topline FlushAndPrintGrammar left right protect = ! @ - xspace obeyspaces"
literal = ["#", "{", "}", "~", "%", ""]

simpleMacros :: [(String, Text)]
simpleMacros =
	[ (","              , "<span style='white-space:nowrap'>&thinsp;</span>")
	                           -- thin, non-breaking, non-stretching space
	, ("\""             , "\"")
	, ("`"              , "`")
	, ("prime"          , "'")
	, ("caret"          , "^")
	, ("copyright"      , "&copy;")
	, ("textregistered" , "&reg;")
	, ("Cpp"            , "C++")
	, ("sum"            , "∑")
	, ("bot"            , "⊥")
	, ("perp"           , "⊥")
	, ("ell"            , "ℓ")
	, ("shr"            , ">>")
	, ("cv"             , "cv")
	, ("shl"            , "&lt;&lt;")
	, ("br"             , "<br/>")
	, ("linebreak"      , "<br/>")
	, ("sim"            , "~")
	, ("quad"           , "&emsp;&ensp;")
	, ("qquad"          , "&emsp;&emsp;")
	, ("indent"         , "&emsp;")
	, ("unun"           , "__")
	, ("^"              , "^")
	, ("ldots"          , "&hellip;")
	, ("vdots"          , "&#8942;")
	, ("dotsc"          , "&hellip;")
	, ("times"          , "&times;")
	, ("&"              , "&amp;")
	, ("$"              , "&#36;")
	, ("backslash"      , "\\")
	, ("textbackslash"  , "\\")
	, ("colcol"         , "::")
	, ("tilde"          , "~")
	, ("textasciitilde" , "~")
	, ("hspace"         , " ")
	, ("space"          , " ")
	, ("equiv"          , "&equiv;")
	, ("le"             , "&ensp;≤&ensp;")
	, ("leq"            , "&ensp;≤&ensp;")
	, ("ge"             , "&ensp;≥&ensp;")
	, ("geq"            , "&ensp;≥&ensp;")
	, ("neq"            , "&ensp;≠&ensp;")
	, ("land"           , "&ensp;∧&ensp;")
	, ("lor"            , "&ensp;∨&ensp;")
	, ("cdot"           , "·")
	, ("cdots"          , "⋯")
	, ("to"             , "→")
	, ("rightarrow"     , "→")
	, ("mapsto"         , "↦")
	, ("sqrt"           , "√")
	, ("lfloor"         , "⌊")
	, ("rfloor"         , "⌋")
	, ("lceil"          , "⌈")
	, ("rceil"          , "⌉")
	, (";"              , " ")
	, ("min"            , "<span class=\"mathrm\">min</span>")
	, ("max"            , "<span class=\"mathrm\">max</span>")
	, ("bmod"           , "<span class=\"mathrm\">mod</span>")
	, ("exp"            , "<span class=\"mathrm\">exp</span>")
	, ("ln"             , "<span class=\"mathrm\">ln</span>")
	, ("log"            , "<span class=\"mathrm\">log</span>")
	, ("opt"            , "<sub><small>opt</small></sub>")
	, ("rightshift"     , "<span class=\"mathsf\">rshift</span>")
	, ("textlangle"     , "&langle;")
	, ("textrangle"     , "&rangle;")
	, ("textmu"         , "μ")
	, ("tablerefname"   , "Table")
	, ("figurerefname"  , "Figure")
	, ("newline"        , "<br>")
	, (">"              , "&#9;")
	, ("bnfindent"      , "&emsp;&emsp;&emsp;")
	, ("\n"             , "\n")
	]
	++ [(n, Text.pack [c]) | (n, c) <- greekAlphabet]

zwsp :: Text
zwsp = "&#x200b;" -- U+200B ZERO WIDTH SPACE

makeSpan, makeDiv :: [String]
makeSpan = words "center mbox mathsf emph textsc phantom term mathtt textnormal textrm descr textsl textit mathit indented"
makeDiv = words "definition cvqual emph exitnote footnote mathit paras ttfamily TableBase table tabular longtable"

indexPathString :: IndexPath -> Text
indexPathString =
	replace " " "_" . -- HTML forbids space.
	Text.intercalate "," .
	map (indexKeyContent . indexKey)

indexShortName :: Text -> Maybe IndexKind -> Text
indexShortName "grammarindex" (Just DefinitionIndexEntry) = "nt"
indexShortName "grammarindex" Nothing = "ntref"
indexShortName "conceptindex" (Just DefinitionIndexEntry) = "concept"
indexShortName "conceptindex" Nothing = "conceptref"
indexShortName "headerindex" (Just DefinitionIndexEntry) = "header"
indexShortName "headerindex" Nothing = "headerref"
indexShortName "generalindex" (Just DefinitionIndexEntry) = "def"
indexShortName "generalindex" _ = ""
indexShortName "libraryindex" _ = "lib"
indexShortName "impldefindex" _ = ""
indexShortName "bibliography" _ = "bib"
indexShortName cat _ = error $ "indexShortName: unrecognized category: " ++ Text.unpack cat

indexPathId :: Text -> Maybe IndexKind -> IndexPath -> Text
indexPathId category kind =
	(indexShortName category kind ++) .
	(":" ++) .
	replace " "  "%20" .
	replace "'" "&#39;" .
	replace "&" "&amp;" .
	indexPathString

indexPathId2 :: RenderContext -> Int -> Text -> IndexPath -> Maybe IndexKind -> Text
indexPathId2 ctx entryNr cat path kind = indexPathId cat kind path ++ indexOccurrenceSuffix ctx entryNr

indexPathId3 :: RenderContext -> LaTeX -> Text
indexPathId3 ctx indices = indexPathId2 ctx inum icat ipath ikind
	where (icat, ipath, inum, ikind) : _ = indexPaths indices

indexPathHref :: Text -> Maybe IndexKind -> IndexPath -> Text
indexPathHref cat kind = (("#" ++ indexShortName cat kind ++ ":") ++) . urlChars . replace "&" "&amp;" . indexPathString

asId :: LaTeX -> Text
asId = mconcat . map f
	where
		f :: LaTeXUnit -> Text
		f (TeXRaw t) = replace "\n" "_" $ replace " " "_" t
		f (TeXComm "tcode" _ [(_, x)]) = asId x
		f (TeXComm "noncxxtcode" _ [(_, x)]) = asId x
		f (TeXComm "texttt" _ [(_, x)]) = asId x
		f (TeXComm "textit" _ [(_, x)]) = asId x
		f (TeXComm "mathsf" _ [(_, x)]) = asId x
		f (TeXComm "xspace" _ []) = "_"
		f (TeXBraces x) = asId x
		f (TeXMath Dollar x) = asId x
		f (TeXComm "texorpdfstring" _ [_, (_, x)]) = asId x
		f x = error $ "asId: unexpected: " ++ show x

instance Render Anchor where
	render Anchor{..} _ =
		xml "a" ([("class", aClass) | aClass /= ""] ++
		         [("href" , aHref ) | aHref  /= ""] ++
		         [("id"   , aId   ) | aId    /= ""] ++
		         [("title", aTitle) | aTitle /= ""] ++
		         [("style", aStyle) | aStyle /= ""]) aText

class Render a where render :: a -> RenderContext -> TextBuilder.Builder

concatRender :: Render a => [a] -> RenderContext -> TextBuilder.Builder
concatRender x c = mconcat $ map (\y -> render y c) x

instance Render Char where render c _ = TextBuilder.singleton c

instance (Render a, Render b) => Render (a, b) where
	render (x, y) = render x ++ render y

renderCodeblock :: String -> [(ArgKind, LaTeX)] -> LaTeX -> RenderContext -> TextBuilder.Builder
renderCodeblock env args code ctx =
    (case (env, args) of
      ("codeblocktu", [(FixArg, title)]) -> (("<p>" ++ render title ctx ++ ":") ++)
      ("indexedcodeblock", [(FixArg, indices)]) ->
      	let
      		link = anchor
      			{ aClass = "itemDeclLink"
      			, aHref = "#" ++ urlChars (indexPathId3 ctx indices)
      			, aText = "🔗" }
      	in	renderIndexed ctx "span" indices .
      		(xml "div" [("class", "marginalizedparent")] (render link ctx) ++)
      _ -> id) $
    xml "span" [("class", "codeblock")] (
        highlightLines ctx{rawTilde=True, rawHyphens=True, rawSpace=True, inCodeBlock=True} $
        concatRaws $ expandTcode (dropInitialNewline code))
  where
    dropInitialNewline :: LaTeX -> LaTeX
    dropInitialNewline (TeXRaw (Text.uncons -> Just ('\n', rest)) : more) = TeXRaw rest : more
    dropInitialNewline x = x
    expandTcode :: LaTeX -> LaTeX
    expandTcode [] = []
    expandTcode (TeXComm "tcode" _ [(FixArg, x)] : y) = expandTcode (x ++ y)
    expandTcode (x : y) = x : expandTcode y

renderOutputblock :: LaTeX -> RenderContext -> TextBuilder.Builder
renderOutputblock code ctx = xml "pre" [("class", "outputblock")] $
    render code ctx{rawTilde=True, rawHyphens=True, rawSpace=True}

sameIdNamespace :: Maybe IndexKind -> Maybe IndexKind -> Bool
sameIdNamespace Nothing (Just IndexOpen) = True
sameIdNamespace (Just IndexOpen) Nothing = True
sameIdNamespace x y = x == y

isFullPage :: Page -> Bool
isFullPage FullPage = True
isFullPage _ = False

abbrIsOnPage :: Abbreviation -> Page -> Bool
abbrIsOnPage _ FullPage = True
abbrIsOnPage abbr TablesPage = "tab:" `isPrefixOf` abbr
abbrIsOnPage abbr FiguresPage = "fig:" `isPrefixOf` abbr
abbrIsOnPage abbr (FigurePage Figure{..}) = abbr == figureAbbr
abbrIsOnPage abbr (TablePage Table{..}) = abbr == tableAbbr
abbrIsOnPage abbr (SectionPage sec)
	| "fig:" `isPrefixOf` abbr = abbr `elem` (figureAbbr . snd . figures sec)
	| "eq:" `isPrefixOf` abbr = abbr `elem` (formulaAbbr . snd . formulas sec)
	| "tab:" `isPrefixOf` abbr = abbr `elem` (tableAbbr . snd . tables sec)
	| otherwise = abbr `elem` (abbreviation . sections sec)
abbrIsOnPage _ _ = False

pageIndexEntries :: RenderContext -> IntMap.IntMap IndexEntry
pageIndexEntries c
    | SectionPage s <- page c = secIndexEntries s
    | otherwise = indexEntryMap (draft c)

indexOccurrenceSuffix :: RenderContext -> Int -> Text
	-- Returns the _ that distinguishes expr#def:object_expression from
	-- expr#def:object_expression_ ([expr] has two definitions of 'object expression',
	-- one for E1.E2 and one for E1.*E2.)
indexOccurrenceSuffix c indexNum = underscores
	where
		Just theEntry = IntMap.lookup indexNum (pageIndexEntries c)
		ies
			| SectionPage s <- page c = secIndexEntriesByPath s
			| otherwise = indexEntriesByPath (draft c)
		underscores = Text.pack
			[ '_' | (i, e) <- fromJust (Map.lookup (indexPath theEntry) ies)
			     , indexCategory e == indexCategory theEntry
			     , sameIdNamespace (indexEntryKind e) (indexEntryKind theEntry)
			     , i < indexNum ]

instance Render LaTeX where
	render (TeXComm "textbackslash" _ [] : y)
		| (TeXRaw s : rest) <- y  = \sec -> "\\" ++ render (TeXRaw $ if rawSpace sec then s else unspace s) sec ++ render rest sec
		where
			unspace s
				| Just (c, cc) <- Text.uncons s, isSpace c = cc
				| otherwise = s
	render (TeXComm "itshape" _ [] : x) = ("<i>" ++) . (++ "</i>") . render x
	render (x : y) = render x ++ render y
	render [] = return ""

keywords :: [Text]
keywords = map Text.pack $ words $
    "char8_t char16_t char32_t namespace struct void operator friend template typedef long short class double public extern " ++
    "using char new union unsigned sizeof alignas typename virtual this return const_cast delete noexcept static_cast " ++
    "reinterpret_cast mutable bool private protected inline constexpr consteval final volatile default explicit enum export asm " ++
    "typeid dynamic_cast throw if else for do while goto auto concept requires decltype try catch static_assert wchar_t " ++
    "case switch alignof break continue signed audit axiom override const register thread_local int float static module import " ++
    "co_return co_await co_yield constinit contract_assert"
    -- todo: read the real keyword table instead

highlightLines :: RenderContext -> LaTeX -> TextBuilder.Builder
highlightLines ctx x
    | (spaces, x') <- texSpan (== ' ') x, spaces /= "" = TextBuilder.fromText spaces ++ highlightLines ctx x'
    | Just (directive, x') <- parseCppDirective x = spanTag "preprocessordirective" (render directive ctx) ++ highlight ctx x'
    | TeXComm (Text.pack -> c) _ [(FixArg, y)] : more <- x, c `elem` ["terminal"] = spanTag c (highlightLines ctx y) ++ highlight ctx more
    | i@(TeXComm cmd _ _) : more <- x, cmd `elem` ["index", "obeyspaces"] = render i ctx ++ highlightLines ctx more
    | otherwise = highlight ctx x

highlightUnit :: RenderContext -> LaTeXUnit -> TextBuilder.Builder
highlightUnit ctx x = case x of
    TeXComm "rlap" _ [(FixArg, text)] ->
        spanTag "rlap" (highlight ctx text)
    TeXComm "indexedspan" _ [(FixArg, text), (FixArg, indices)] ->
        renderIndexed ctx "span" indices (highlight ctx text)
    TeXComm "terminal" _ [(FixArg, y)] ->
        spanTag "terminal" (highlight ctx y)
    TeXComm c _ []
        | c `elem` ["%", "&", "caret", "~"] -> spanTag "operator" (render x ctx)
        | c == "#" -> spanTag "preprocessordirective" (render x ctx)
        | c `elem` ["{", "}"] -> spanTag "curlybracket" (render x ctx)
    TeXBraces y -> highlight ctx y
    _ -> render x ctx

highlight :: RenderContext -> LaTeX -> TextBuilder.Builder
highlight ctx (TeXRaw kwd : TeXComm "-" "" [] : x)
    | kwd `elem` keywords = TextBuilder.fromText kwd ++ highlight ctx x
highlight ctx (TeXRaw kwd : rest@(TeXComm "textit" _ [(FixArg, (TeXRaw (Text.uncons -> Just (isAlphaNum -> True, _)) : _))] : _))
    | kwd `elem` keywords = TextBuilder.fromText kwd ++ highlight ctx rest
highlight ctx x
    | Just x' <- texStripPrefix "\n" x = "\n" ++ highlightLines ctx x'
    | (TeXRaw "" : t) <- x = highlight ctx t
    | Just (lit, x') <- parseLiteral x = spanTag "literal" (render lit ctx) ++ highlight ctx x'
    | Just (comment, x') <- parseComment x = spanTag "comment" (render comment ctx{inComment=True, rawTilde=False}) ++ highlightLines ctx x'
    | Just x' <- texStripPrefix "<new>" x = spanTag "operator""<" ++ "new" ++ spanTag "operator" ">" ++ highlight ctx x'
    -- keywords
    | (a, x') <- texSpan p x, a /= "" = (case () of
        _ | a `elem` keywords -> spanTag "keyword"
        _ | a `elem` ["defined", "__has_include", "__has_embed", "__has_cpp_attribute", "_Pragma"] -> spanTag "preprocessordirective"
        _ | a `elem` ["nullptr", "true", "false"] -> spanTag "literal"
        _ | otherwise -> id) (render (TeXRaw a) ctx) ++ highlight ctx x'
    where p c = isAlphaNum c || c == '_'
highlight ctx (TeXRaw x : more)
    | Text.head x `elem` ("'\"" :: String) = render (TeXRaw $ Text.take 1 x) ctx ++ highlight ctx (TeXRaw (Text.tail x) : more)
    | Text.head x `elem` ("()"::String) = spanTag "parenthesis" (render (TeXRaw $ Text.take 1 x) ctx) ++ highlight ctx (TeXRaw (Text.tail x) : more)
    | Text.head x `elem` ("{}"::String) = spanTag "curlybracket" (render (TeXRaw $ Text.take 1 x) ctx) ++ highlight ctx (TeXRaw (Text.tail x) : more)
    | Text.head x `elem` ("[]"::String) = spanTag "squarebracket" (render (TeXRaw $ Text.take 1 x) ctx) ++ highlight ctx (TeXRaw (Text.tail x) : more)
    | Text.head x `elem` ("<>"::String) = spanTag "anglebracket" (render (TeXRaw $ Text.take 1 x) ctx) ++ highlight ctx (TeXRaw (Text.tail x) : more)
    | Text.head x == '#' = spanTag "preprocessordirective" "#" ++ highlight ctx (TeXRaw (Text.tail x) : more)
    | Text.take 2 x == "::"
        = spanTag "operator" (render (TeXRaw "::") ctx) ++ highlight ctx (TeXRaw (Text.drop 2 x) : more)
    | Text.head x `elem` ("*&^.-+/!=|:?%~#"::String)
        = spanTag "operator" (render (TeXRaw (Text.take 1 x)) ctx) ++ highlight ctx (TeXRaw (Text.tail x) : more)
    | (a, x') <- Text.span (\c -> not (isAlphaNum c || c `elem` ("#%_(){}[]<>.*:?'\"+=-/|&!^~\n" :: String))) x, a /= ""
        = render (TeXRaw a) ctx ++ highlight ctx (TeXRaw x' : more)
    | otherwise = error ("shit: " ++ show x)
highlight ctx (x : more) = highlightUnit ctx x ++ highlight ctx more
highlight _ [] = ""

indexPaths :: LaTeX -> [(Text, IndexPath, Int, Maybe IndexKind)]
indexPaths indices =
	[ (cat, path, entryNr, kind)
	| [ (FixArg, [TeXRaw (Text.unpack -> read -> entryNr)])
	   , (OptArg, [TeXRaw cat])
	   , (FixArg, (parseIndex -> (path, kind))) ] <- lookForCommand "index" indices]

renderIndexed :: RenderContext -> Text -> LaTeX -> TextBuilder.Builder -> TextBuilder.Builder
renderIndexed ctx thing indices body = foldl f body (indexPaths indices)
	where f t (cat, path, entryNr, kind) = xml thing [("id", indexPathId2 ctx entryNr cat path kind)] t

commasAnd :: [TextBuilder.Builder] -> TextBuilder.Builder
commasAnd [] = undefined
commasAnd [x] = x
commasAnd [x, y] = x ++ " and " ++ y
commasAnd [x, y, z] = x ++ ", " ++ y ++ ", and " ++ z
commasAnd (x : y) = x ++ ", " ++ commasAnd y

abbrTitle :: Text -> Bool -> RenderContext -> Text
abbrTitle "bibliography" _ _ = "Bibliography"
abbrTitle abbr includeAbbr ctx
	| "tab:" `isPrefixOf` abbr
	, Just Table{..} <- tableByAbbr (draft ctx) abbr =
			"Table " ++ Text.pack (show tableNumber) ++ ": " ++
			LazyText.toStrict (TextBuilder.toLazyText $ render tableCaption ctx{noTags=True})
	| Just sec@Section{..} <- sectionByAbbr (draft ctx) abbr =
		LazyText.toStrict $ TextBuilder.toLazyText $
			secnumText sec ++ "&emsp;" ++
			render sectionName ctx{noTags=True} ++
			TextBuilder.fromText (if includeAbbr then "&emsp;[" ++ abbr ++ "]" else "")
	| otherwise = ""

renderBreak :: RenderContext -> TextBuilder.Builder
renderBreak ctx = if noTags ctx then "\n" else "<br>"

renderIndexLink :: String -> [(ArgKind, [LaTeXUnit])] -> RenderContext -> TextBuilder.Builder
renderIndexLink cmd [(FixArg, txt), (FixArg, [TeXRaw cat]), (FixArg, rawIndexPath), (FixArg, abbr_arg)] ctx
	| not (noTags ctx)
	, Just abbr <- mabbr = render anchor
		{ aText = render txt ctx{inLink=True}
		, aHref = (if abbrIsOnPage abbr (page ctx) then "" else linkToSectionHref SectionToSection abbr)
			++ indexPathHref cat kind p
		, aTitle = abbrTitle abbr True ctx
		, aClass = if cmd == "hiddenindexlink" then "hidden_link" else ""
		} ctx
	| otherwise = render txt ctx
	where
		(p, kind) = parseIndex rawIndexPath
		resolved :: Maybe Text
		resolved = case Map.lookup p $ indexEntriesByPath (draft ctx) of
				Just entries
					| (hd:_) <- [ abbreviation
					            | (_, IndexEntry{indexEntrySection=abbreviation, indexEntryKind}) <- entries
					            , indexEntryKind == kind
					            , not ("gram." `isPrefixOf` abbreviation) ] -> Just hd
				_ -> Nothing
		traceIfBad
			| resolved == Nothing, cat /= "grammarindex", cat /= "bibliography" =
				trace $ "\nbad index link: " ++ show (cat, rawIndexPath)
					++ "\nlookup result: " ++ show (Map.lookup p $ indexEntriesByPath (draft ctx))
			| otherwise = id
		mabbr = traceIfBad $ case abbr_arg of
				[] -> resolved
				[TeXRaw x] -> Just x
				y -> error $ "bad indexlink arg: " ++ show y
renderIndexLink _ _ _ = error "bad indexlink"

instance Render LaTeXUnit where
	render (TeXRaw x                 ) = \RenderContext{..} -> TextBuilder.fromText
	    $ (if rawHyphens then id else replace "--" "–" . replace "---" "—")
	    $ (if not inCodeBlock then replace "''" "”" else id)
	    $ (if rawTilde then id else replace "~" " ")
	    $ (if not insertBreaks then id else
	    	replace "::" (zwsp ++ "::" ++ zwsp) .
	    	replace "\1" "__" .
	    	replace "_" (if noTags then "_&shy;" else "_<span class='shy'></span>") .
	    	replace "__" "\1")
	    $ (if replXmlChars then replaceXmlChars else id)
	    $ x
	render (TeXComm "br" _ _         ) = renderBreak
	render  TeXLineBreak               = renderBreak
	render (TeXComm "break" _ []     ) = renderBreak
	render (TeXBraces t              ) = render t
	render m@(TeXMath _ _            ) = renderMath [m]
	render (TeXComm "commentellip" _ []) = const $ spanTag "comment" "/* ... */"
	render (TeXComm "ensuremath" _ [(FixArg, x)]) = renderMath x
	render (TeXComm "hyperref" _ [_, (FixArg, x)]) = render x
	render (TeXComm "label" _ [(FixArg, [TeXRaw x])]) = render anchor{aId = x, aClass = "index"}
	render (TeXComm "ref*" x y) = render (TeXComm "ref" x y)
	render (TeXComm "ref" _ [(FixArg, concatRaws -> [TeXRaw abbr])]) = \ctx@RenderContext{..} ->
		let
			linkText :: TextBuilder.Builder
			linkText
				| "tab:" `isPrefixOf` abbr
				, Just Table{..} <- tableByAbbr draft abbr = TextBuilder.fromString $ show tableNumber
				| "fig:" `isPrefixOf` abbr
				, Figure{..} <- figureByAbbr draft abbr = TextBuilder.fromString $ show figureNumber
				| "eq:" `isPrefixOf` abbr
				, f@Formula{..} <- formulaByAbbr draft abbr = TextBuilder.fromText $ fullFormulaNumber f
				| otherwise = squareAbbr (not noTags) abbr
			renderLabelRef sec =
			   simpleRender2 anchor{
			     aHref = abbrHref (abbreviation sec) ctx ++ "#" ++ abbr,
			     aText = squareAbbr (not noTags) (abbreviation sec),
			     aTitle = abbrTitle (abbreviation sec) False ctx }
			renderSectionRef =
			  simpleRender2 anchor{
			    aHref = abbrHref abbr ctx,
			    aText = linkText,
			    aTitle = abbrTitle abbr False ctx }
	    in if noTags then linkText else
	        case Map.lookup abbr (labels draft) of
	            Just sec -> renderLabelRef sec
	            Nothing
	                | SectionPage pageSec <- page, abbreviation pageSec == abbr -> linkText
	                | otherwise -> renderSectionRef
	render (TeXComm "iref" _ [(FixArg, [TeXRaw abbrs])]) = \ctx ->
	    let renderAbbr abbr = render (TeXComm "ref" "" [(FixArg, [TeXRaw abbr])]) ctx
	    in " (" ++ mconcat (intersperse ", " $ map (renderAbbr . Text.strip) $ Text.splitOn "," abbrs) ++ ")"
	render (TeXComm "nopnumdiffref" _ [(FixArg, [TeXRaw (Text.splitOn "," -> abbrs)])]) = \ctx ->
	    let f abbr = simpleRender2 anchor{aHref = abbrHref abbr ctx, aText = squareAbbr True abbr}
	    in "<b>Affected " ++ (if length abbrs == 1 then "subclause" else "subclauses") ++ ":</b> "
	        ++ commasAnd (map f abbrs)
	render (TeXComm "weblink" _ [(FixArg, text), (FixArg, href)])
		= render anchor
			{ aText = simpleRender2 text
			, aHref = simpleRender href}
	render (TeXComm "url" _ [(FixArg, u)])
		= render anchor
			{ aText = simpleRender2 u
			, aHref = simpleRender u }
	render (TeXComm "link" _ [(FixArg, txt), (FixArg, [TeXRaw abbr])])
		= \ctx -> if noTags ctx then render txt ctx else render anchor{
			aHref = abbrHref abbr ctx,
			aText = render txt ctx{inLink=True},
			aTitle = abbrTitle abbr True ctx} ctx
	render (TeXComm c _ l) | c `elem` ["indexlink", "hiddenindexlink"] = renderIndexLink c l
	render (TeXComm "color" _ _) = const ""
	render (TeXComm "textcolor" _ [_, (FixArg, x)]) = render x
	render (TeXComm "textsmaller" _ [_, (FixArg, x)]) = render x
	render (TeXComm "terminal" _ [(FixArg, x)]) = spanTag "terminal" . flip highlightLines x
	render (TeXComm "texttt" _ [(FixArg, x)]) = \ctx ->
		(if noTags ctx then id else spanTag "texttt") $ render x ctx{rawHyphens = True, insertBreaks = True}
	render (TeXComm "literaltcode" _ [(FixArg, x)]) = spanTag "literal" . spanTag "texttt" . render x
	render (TeXComm cmd _ [(FixArg, x)])
		| cmd `elem` ["tcode"] = \ctx ->
		if noTags ctx then render x ctx{rawHyphens=True, insertBreaks=True}
		else spanTag (if inCodeBlock ctx then "tcode_in_codeblock" else "texttt") $
			if not (inComment ctx) && not (inLink ctx) && not (inSectionTitle ctx)
			then highlightLines ctx{rawHyphens=True, insertBreaks=True} x
			else render x ctx{rawHyphens=True, insertBreaks=True}
	render (TeXComm "noncxxtcode" _ [(FixArg, x)]) = \ctx ->
		spanTag (if inCodeBlock ctx then "tcode_in_codeblock" else "texttt") $
		    render x ctx{rawHyphens=True, insertBreaks=True}
	render (TeXComm "textbf" _ [(FixArg, x)]) = ("<b>" ++) . (++ "</b>") . render x
	render (TeXComm "index" _
			[ (FixArg, [TeXRaw (Text.unpack -> read -> entryNr)])
			, (OptArg, [TeXRaw category])
			, (FixArg, (parseIndex -> (p, kind)))
			])
		= \ctx -> if noTags ctx then "" else case kind of
			Just IndexClose -> ""
			Just (See _ _) -> ""
			_ -> render anchor
						{ aId = indexPathId2 ctx entryNr category p kind
						, aClass = "index"} ctx
	render (TeXComm "indexedspan" _ [(FixArg, text), (FixArg, indices)]) =
		\ctx -> (if noTags ctx then id else renderIndexed ctx "span" indices) $ render text ctx
	render (TeXEnv "indexeditemdecl" [(FixArg, indices)] t) = \ctx ->
		let
			link = anchor
				{ aClass = "itemDeclLink"
				, aHref = "#" ++ urlChars (indexPathId3 ctx indices)
				, aText = "🔗" }
		in
			renderIndexed ctx "div" indices $
			xml "div" [("class", "itemdecl")] $
			xml "div" [("class", "marginalizedparent")] (render link ctx) ++
			xml "code" [("class", "itemdeclcode")] (TextBuilder.fromText $ Text.dropWhile (== '\n') $ LazyText.toStrict $ TextBuilder.toLazyText $ highlightLines ctx{rawTilde=True, rawHyphens=True} t)
	render (TeXComm "discretionary" _ _) = const (TextBuilder.fromText zwsp)
	render (TeXComm "ifthenelse" _ [_, _, (FixArg, x)]) = render x
	render (TeXComm "multicolumn" _ [(FixArg, [TeXRaw n]), _, (FixArg, content)]) = xml "td" [("colspan", n)] . render content
	render (TeXComm "leftshift" _ [(FixArg, content)]) =
		(spanTag "mathsf" "lshift" ++) . xml "sub" [("class", "math")] . render content
	render (TeXComm "verb" _ [(FixArg, a)]) = \c -> xml "code" [] $ render a c{rawTilde=True, rawHyphens=True}
	render (TeXComm "footnoteref" _ [(FixArg, [TeXRaw n])]) = \ctx -> flip render ctx $ anchor
		{ aClass = "footnoteref"
		, aText  = TextBuilder.fromText n
		, aId    = "footnoteref-" ++ n
		, aTitle = (!! 3) $ iterate (Text.replace "  " " ")
				 $ Text.replace "\n" " "
				 $ Text.replace "'" "&apos;"
				 $ LazyText.toStrict $ TextBuilder.toLazyText
				 $ mconcat $ map (flip render ctx{noTags = True})
				 $ footnoteContent $ snd
				 $ footnotes (draft ctx) !! ((read (Text.unpack n) :: Int) - 1)
		, aHref  =
			(if isFullPage (page ctx) || isSectionPage (page ctx) then "" else "SectionToSection/" ++ paraUrl ctx)
			++ "#footnote-" ++ n }
	render (TeXComm "raisebox" _ args)
		| (FixArg, concatRaws -> [TeXRaw d]) <- head args
		, (FixArg, content) <- Prelude.last args =
			let neg s
				| Text.head s == '-' = Text.tail s
				| otherwise = "-" ++ s
			in xml "span" [("style", "position: relative; top: " ++ neg d)] . render content
	render (TeXComm "parbox" _ [_, (FixArg, x)]) = render x
	render (TeXComm "term" _ [(FixArg, x)]) =
		\sec ->
			let
				i = "def:" ++ asId x
				-- It's tempting to use 'term:' instead of 'def:' here, but if we do that,
				-- URLs break when upstream promotes a \term to a \defn.
			in render anchor
				{ aText  = "<i>" ++ render x sec ++ "</i>"
				, aId    = i
				, aHref  = "#" ++ urlChars i
				, aClass = "hidden_link" } sec
	render (TeXComm "texorpdfstring" _ [_, (FixArg, x)]) = render x
	render (TeXComm " " _ [])            = return "&nbsp;"
	render (TeXComm "\n" _ [])           = return "\n"
	render (TeXComm "textit" _ [(FixArg, x)]) = \c -> (if noTags c then id else xml "i" []) $ render x c{rawTilde = False}
	render (TeXComm "c" _ [(FixArg, [TeXRaw "t"])]) = return "ţ"
	render (TeXComm s _ [])
	    | s == "caret"                 = return "^"
	    | s `elem` literal             = return $ TextBuilder.fromString s
	    | Just x <-
	       lookup s simpleMacros       = return $ TextBuilder.fromText x
	    | s `elem` kill                = return ""
	    | otherwise                    = return $ spanTag (Text.pack s) ""
	render (TeXComm "class" _ [(FixArg, [TeXRaw cls]), (FixArg, [TeXComm "href" _ [(FixArg, [TeXRaw href]), (FixArg, text)]])])
	    = \ctx -> render anchor{aHref=href, aText=render text ctx, aClass=cls} ctx
	render (TeXComm "class" _ [(FixArg, [TeXRaw cls]), (FixArg, x)])
	    = \ctx -> spanTag cls $ render x ctx
	render (TeXComm "href" _ [(FixArg, [TeXRaw href]), (FixArg, text)])
	    = \ctx -> render anchor{aHref=href, aText=render text ctx} ctx
	render (TeXComm "ucode" _ [(FixArg, code)]) = spanTag "ucode" . render (TeXRaw "U+" : code)
	render (TeXComm x _ s)
	    | x `elem` kill                = return ""
	    | null s, Just y <-
	       lookup x simpleMacros       = return $ TextBuilder.fromText y
	    | [(FixArg, z)] <- s, Just y <-
	       lookup x simpleMacros       = (TextBuilder.fromText y ++) . render z
	    | otherwise                    = \ctx -> (if noTags ctx then id else spanTag (Text.pack x)) $ render (s >>= snd) ctx
	render (TeXEnv "itemdecl" [(FixArg, [TeXRaw num])] t) = \c ->
		let
			i = case [(icat, ipath) | (icat, ipath, _inum, Just DefinitionIndexEntry) <- indexPaths t] of
				[(icat, ipath)] -> indexPathId icat (Just DefinitionIndexEntry) ipath
				_ -> mconcat (idPrefixes c) ++ "itemdecl:" ++ num
			link = anchor{aClass="itemDeclLink", aHref="#" ++ urlChars i, aText="🔗"}
		in
			xml "div" [("class", "itemdecl"), ("id", i)] $
			xml "div" [("class", "marginalizedparent")] (render link c) ++
			xml "code" [("class", "itemdeclcode")] (TextBuilder.fromText $ Text.dropWhile (== '\n') $ LazyText.toStrict $ TextBuilder.toLazyText $ highlightLines c{rawTilde=True, rawHyphens=True} t)
	render env@(TeXEnv e args t)
	    | e `elem` makeSpan            = \ctx -> (if noTags ctx then id else spanTag (Text.pack e)) (render t ctx)
	    | e `elem` makeDiv             = xml "div" [("class", Text.pack e)] . render t
	    | isMath env && hasComplexMath True [env] = renderComplexMath [env]
	    | isCodeblock env              = renderCodeblock e args t
		| e == "minipage", [e2@(TeXEnv _ _ cb)] <- trim t, isCodeblock e2 =
			xml "div" [("class", "minipage")] . renderCodeblock "codeblock" [] cb
		| e == "outputblock"           = renderOutputblock t
		| e == "itemdescr"             = render t
		| e == "thebibliography"       = render t
	    | otherwise                    = error $ "render: unexpected " ++ show env

instance Render Int where render = return . TextBuilder.fromString . show

instance Render IndexComponent where
	render IndexComponent{..} = render indexKey

instance Render IndexEntry where
	render IndexEntry{indexEntryKind=Just (See also x), ..} = \ctx ->
		"<i>" ++ (if also then "see also" else "see") ++ "</i> " ++
			 render (anchor
				 { aHref = "#:" ++
				 (urlChars $
				  replace " " "_" $
				  replace ", " "," $
				  indexKeyContent x)
				 , aText = render x ctx}) ctx
	render IndexEntry{indexEntryKind=Just IndexClose} = return ""
	render IndexEntry{..} =
		return $ simpleRender2 anchor
			{ aHref = "SectionToSection/" ++ urlChars indexEntrySection
				++ indexPathHref indexCategory indexEntryKind indexPath
			, aText = (if indexEntryKind == Just DefinitionIndexEntry then xml "b" [] else id) $ squareAbbr True indexEntrySection }

indexDisplayOrder :: IndexComponent -> (([(Int, Int)], Int), ([(Int, Int)], Int))
indexDisplayOrder y = (f (indexSortKey y), f (indexKey y))
	where
		g :: Char -> (Int, Int)
		g c
			| isDigit c = (1, ord c)
			| isAlpha c = (2, ord (toLower c))
			| otherwise = (0, ord c)
		he :: String -> ([(Int, Int)], Int)
		he x = (map g x, if isUpper (head x) then 0 else 1)
		f = he . Text.unpack . indexKeyContent

instance Render [(IndexComponent, IndexNode)] where
	render tree ctx = go [] tree
		where
			IndexPage cat = page ctx
			go :: IndexPath -> [(IndexComponent, IndexNode)] -> TextBuilder.Builder
			go up x = mconcat $ f up . (sortOn (indexDisplayOrder . fst) x)

			f :: IndexPath -> (IndexComponent, IndexNode) -> TextBuilder.Builder
			f up (comp, IndexNode{..}) =
				let
					up' = up ++ [comp]
				in
					xml "div" [("id", indexPathId cat Nothing up')] $
					xml "div" [("class", "indexitems")] $
					TextBuilder.fromText (
					Text.intercalate ", " (nub $ filter (/= "") $ map (LazyText.toStrict . TextBuilder.toLazyText) $ render comp ctx : flip render ctx . indexEntries)) ++
					go up' (Map.toList indexSubnodes)

data IndexHeading = Symbols | Numbers | Letter Char
	deriving (Eq, Ord)

instance Show IndexHeading where
	show Symbols = "Symbols"
	show Numbers = "Numbers"
	show (Letter c) = [c]

indexHeading :: IndexComponent -> IndexHeading
indexHeading (indexSortKey -> indexKeyContent -> Text.head -> c)
	| isDigit c = Numbers
	| isAlpha c = Letter (toUpper c)
	| otherwise = Symbols

indexSortKey :: IndexComponent -> LaTeX
indexSortKey IndexComponent{..}
	| distinctIndexSortKey /= [] = distinctIndexSortKey
	| otherwise = indexKey

renderIndex :: RenderContext -> IndexTree -> TextBuilder.Builder
renderIndex ctx tree
	| name `elem` ["generalindex", "libraryindex"] = mconcat $ ["<hr>"] ++ linklines ++ ["<hr>"] ++ map sub p
	| otherwise = render (Map.toList tree) ctx
	where
		IndexPage name = page ctx
		p = partitionBy (indexHeading . fst) $ Map.toList tree
		sub (n, ii) = h 2 (render anchor{aText=TextBuilder.fromText $ Text.pack (show n), aId=Text.pack (show n)} ctx) ++ render ii ctx
		(symnum, rest) = splitAt 2 p
		linklines = map (h 2 . mconcat . intersperse " " . map (li . fst)) [symnum, rest]
		li n = render anchor{aText = TextBuilder.fromText $ Text.pack (show n), aHref = "#" ++ Text.pack (show n)} ctx

renderTab :: Bool -> Table -> Text -> Bool -> Bool -> RenderContext -> TextBuilder.Builder
renderTab stripTab Table{..} href boldCaption linkifyTableNum ctx =
	xml "div" [("class", "numberedTable"), ("id", id_)] $ -- todo: multiple abbrs?
		(if boldCaption then "<b>" else "") ++
		"Table " ++ tableNumF (render tableNumber ctx) ++ " &mdash; " ++
		render tableCaption ctx ++
		"&emsp;" ++
		render anchor{aText="[" ++ TextBuilder.fromText tableAbbr ++ "]", aHref=href} ctx ++
		(if boldCaption then "</b>" else "") ++
		"<br>" ++
		renderTable columnSpec tableBody ctx
	where
		tableNumF = if linkifyTableNum then linkify anchor{aHref = "#" ++ id_} ctx else id
		id_ = (if stripTab then replace "tab:" "" else id) tableAbbr

linkify :: Anchor -> RenderContext -> TextBuilder.Builder -> TextBuilder.Builder
linkify a ctx txt = render a{aText=txt} ctx

renderFig :: Bool -> Figure -> Text -> Bool -> Bool -> RenderContext -> TextBuilder.Builder
renderFig stripFig Figure{..} href boldCaption linkifyFigureNum ctx =
	xml "div" [("class", "figure"), ("id", id_)] $
		TextBuilder.fromText figureSvg ++ "<br>" ++
		(if boldCaption then "<b>" else "") ++
		"Figure " ++ figureNumF (render figureNumber ctx) ++ " &mdash; " ++
		render figureName ctx ++ "&emsp;&ensp;" ++
		render anchor{aText=squareAbbr False figureAbbr, aHref=href} ctx ++
		(if boldCaption then "</b>" else "")
	where
		figureNumF = if linkifyFigureNum then linkify anchor{aHref="#" ++ id_} ctx else id
		id_ = (if stripFig then replace "fig:" "" else id) figureAbbr

data RenderItem = RenderItem { listOrdered :: Bool, item :: Item }

spacedJoin :: TextBuilder.Builder -> TextBuilder.Builder -> TextBuilder.Builder
spacedJoin x y
	| TextBuilder.toLazyText x == "" = y
	| TextBuilder.toLazyText y == "" = x
	| otherwise = x ++ " " ++ y

instance Render RenderItem where
	render RenderItem{item=Item Nothing mlabel elems paras} ctx
		= xml "li" attrs $ render elems ctx ++ renderLatexParas paras ctx
			where
			    attrs
			        | Just [TeXRaw l] <- mlabel = [("id", l)]
			        | otherwise = []

	render RenderItem{item=Item (Just nn) mlabel elems paras, ..} ctx
		| listOrdered =
			xml "tr" [("id", thisId)] $
				(xml "td" [] (case mlabel of
					Nothing -> render link ctx'
					Just label -> render anchor{aHref = linkHref, aText=simpleRender2 label} ctx' ++ " ")) ++
				(xml "td" [] content)
		| otherwise =
			xml "li" [("id", thisId)] $ case mlabel of
				Nothing -> xml "div" [("class", "marginalizedparent"), ("style", "left:" ++ left)] (render link ctx') ++ content
				Just label ->
					render anchor{aHref = linkHref, aText=simpleRender2 label} ctx'
					++ " " ++ content
		where
			content = spacedJoin (render elems ctx') (renderLatexParas paras ctx')
			left
				| listOrdered = "-4.5em"
				| otherwise = simpleRender (-marginalizedParentLeft - ulPaddingLeft * (length nn - 1) - extraIndentation ctx) ++ "mm"
			ulPaddingLeft = 9
			marginalizedParentLeft = 18
			thisId = mconcat (idPrefixes ctx) ++ Text.pack (Prelude.last nn)
			ctx' = ctx{ idPrefixes = idPrefixes ctx ++ [Text.pack (Prelude.last nn) ++ "."] }
			dottedNumber = Text.intercalate "." (Text.pack . nn)
			linkText
				| listOrdered =
					let
						s = Prelude.last nn
						punct
							| isAlpha (head s) = ")"
							| otherwise = "."
					in
						Text.pack $ s ++ punct
				| otherwise = "(" ++ dottedNumber ++ ")"
			linkClass
				| listOrdered = "enumerated_item_num"
				| otherwise = "marginalized"
			linkHref = "#" ++ thisId
			link = anchor{aClass=linkClass, aHref=linkHref, aText=TextBuilder.fromText linkText}

paraUrl :: RenderContext -> Text
paraUrl RenderContext{..} = urlChars $ abbreviation $ case nearestEnclosing of
	Left p -> paraSection p
	Right s -> s

prependSentence :: Sentence -> TeXPara -> TeXPara
prependSentence s (TeXPara ss) = TeXPara (s : ss)

instance Render Footnote where
	render (Footnote n content) ctx =
			xml "div" [("class", "footnote"), ("id", i)] $
			renderParas (mapHead (prependSentence footnoteNum) content)
		where
			footnoteNum = Sentence Nothing [HtmlElement (LazyText.toStrict $ TextBuilder.toLazyText $ render (link, backlink) ctx)]
			ctx' = ctx{idPrefixes = [i ++ "."]}
			backlink = anchor{aText = linkText, aHref = "#footnoteref-" ++ num, aClass = "footnoteBacklink"}
			renderParas [] = ""
			renderParas (p:pp) = xml "div" [("class", "texpara")] (render p ctx') ++ renderParas pp
			num = Text.pack $ show n
			i = "footnote-" ++ num
			footnoteIsOnPage = isFullPage (page ctx) || isSectionPage (page ctx)
			linkText = TextBuilder.fromText $ num ++ ")"
			link = anchor
				{ aText = linkText
				, aClass = "footnotenum"
				, aHref =
					(if footnoteIsOnPage then "" else "SectionToSection/" ++ paraUrl ctx)
					++ "#" ++ i }

noWrapSpace :: TextBuilder.Builder
noWrapSpace = "&nbsp;"

instance Render Note where
	render Note{..} ctx = xml "div" [("id", i), ("class", "note")] (renderParas True noteContent)
		where
			prefix = "[<i>" ++ TextBuilder.fromText noteLabel ++ "&nbsp;" ++ render link ctx ++ "</i>:&ensp;"
			suffix = " —" ++ noWrapSpace ++ "<i>end note</i>]"
			renderParas _ [] = ""
			renderParas isFirst (p:pp) = xml "div" [("class", "texpara")] ((if isFirst then prefix else "") ++ render p ctx ++ (if null pp then suffix else "")) ++ renderParas False pp
			i = mconcat (dropWhileEnd (isDigit . Text.head) (idPrefixes ctx)) ++ "note-" ++ noteNum
			noteNum = Text.pack $ show noteNumber
			link = anchor{aHref = "#" ++ i, aText = TextBuilder.fromText noteNum }

instance Render Example where
	render Example{..} ctx
		| noTags ctx =
			"[Example: "
			++ renderLatexParas exampleContent ctx
			++ " —&nbsp;end&nbsp;example] "
		| otherwise = xml "div" [("id", i), ("class", "example")] (renderParas True exampleContent)
		where
			prefix = "[<i>Example&nbsp;" ++ render link ctx ++ "</i>:&ensp;"
			suffix = " —" ++ noWrapSpace ++ "<i>end example</i>]"
			renderParas _ [] = ""
			renderParas isFirst (p:pp) = xml "div" [("class", "texpara")] ((if isFirst then prefix else "") ++ render p ctx ++ (if null pp then suffix else "")) ++ renderParas False pp
			i = mconcat (dropWhileEnd (isDigit . Text.head) (idPrefixes ctx)) ++ "example-" ++ exNum
			exNum = Text.pack $ show exampleNumber
			link = anchor{aHref = "#" ++ i, aText = TextBuilder.fromText exNum }

instance Render Formula where
    render f@Formula{..} ctx =
            xml "div" [("class", "formula"), ("id", formulaAbbr)] $
                doRenderComplexMath [TeXMath Square ([tag] ++ formulaContent)] ctx
        where tag = TeXComm "tag" "" [(FixArg, [TeXRaw (fullFormulaNumber f)])]

fullFormulaNumber :: Formula -> Text
fullFormulaNumber Formula{..} = Text.pack $ show chapterNum ++ "." ++ show formulaNumber
  where chapterNum = sectionNumber $ chapterOfSection formulaSection

nontermDef :: LaTeX -> Maybe Text
nontermDef t
	| [n] <- [n | ("grammarindex", [IndexComponent{distinctIndexSortKey=[TeXRaw n]}], _inum, Just DefinitionIndexEntry) <- indexPaths t] = Just n
	| otherwise = Nothing
		
instance Render Element where
	render (HtmlElement html) = const $ TextBuilder.fromText html
	render (LatexElement x) = render x
	render (Codeblock x) = render x
	render (Itemdescr x) = xml "div" [("class", "itemdescr")] . renderLatexParas x
	render (NoteElement x) = render x
	render (ExampleElement x) = render x
	render (Bnf e t) = xml "div" ([("class", Text.pack e)] ++ idattr) . render t
		where
			idattr
				| Just nt <- nontermDef t = [("id", "nt:" ++ nt)]
				| otherwise = []
	render (TableElement t) = \ctx ->
			renderTab False t ("./SectionToSection/" ++ tableAbbr t) False True ctx{idPrefixes=[tableAbbr t++"-"]}
	render (FigureElement f) = renderFig False f ("./SectionToSection/" ++ figureAbbr f) False True
	render (FormulaElement f) = render f
	render (Tabbing t) =
		xml "pre" [] . TextBuilder.fromText . htmlTabs . LazyText.toStrict . TextBuilder.toLazyText . render (preprocessPre t) -- todo: this is horrible
	render Enumerated{..} = xml t [("class", Text.pack enumCmd)] .
			concatRender (RenderItem (enumCmd == "enumerate") . enumItems)
		where
			t = case enumCmd of
				"enumerate" -> "table"
				"itemize" -> "ul"
				"description" -> "ul"
				"thebibliography" -> "ul"
				_ -> undefined

class HasComplexMath a where
    hasComplexMath :: Bool -> a -> Bool

instance HasComplexMath LaTeXUnit where
    hasComplexMath mathMode (TeXRaw x) = mathMode && Text.any (`elem` ("+-*/^_=' " :: String)) (Text.strip x)
    hasComplexMath m (TeXComm c _ args)
        | c `elem` words "frac sum binom int sqrt lfloor rfloor lceil rceil log mathscr mapsto cdot bmod" = True
        | c `elem` words "tcode" = hasComplexMath False (map snd args)
        | otherwise = hasComplexMath m (map snd args)
    hasComplexMath _ (TeXMath _ x) = hasComplexMath True x
    hasComplexMath m (TeXBraces x) = hasComplexMath m x
    hasComplexMath m (TeXEnv e _ args)
        | e `elem` ["array", "eqnarray"] = True
        | otherwise = hasComplexMath m args
    hasComplexMath _ TeXLineBreak = False

instance HasComplexMath a => HasComplexMath [a] where
    hasComplexMath m = any (hasComplexMath m)

data Page
	= SectionPage Section
	| TablePage Table
	| FigurePage Figure
	| FullPage
	| IndexPage Text {- category -}
	| XrefDeltaPage
	| FootnotesPage
	| TablesPage
	| FiguresPage
	| TocPage
	| ExpandedTocPage

isSectionPage :: Page -> Bool
isSectionPage (SectionPage _) = True
isSectionPage _ = False

data RenderContext = RenderContext
	{ page :: Page
	, draft :: Draft
	, nearestEnclosing :: Either Paragraph Section
	, rawHyphens :: Bool -- in real code envs /and/ in \texttt
	, rawTilde :: Bool   -- in real code envs but not in \texttt
	, rawSpace :: Bool
	, insertBreaks :: Bool
	, inLink :: Bool -- so as not to linkify grammarterms that appear as part of a defined/linkified term/phrase
	, inCodeBlock :: Bool -- in codeblocks, some commands like \tcode have a different meaning
	, inComment :: Bool -- in comments, \tcode should not be highlighted
	, inSectionTitle :: Bool -- in section titles, there should be no highlighting
	, replXmlChars :: Bool -- replace < with &lt;, etc
	, noTags :: Bool -- means we're rendering the contents of e.g. a "title" attribute which cannot contain tags/elements
	, extraIndentation :: Int -- in em
	, idPrefixes :: [Text] }

defaultRenderContext :: RenderContext
defaultRenderContext = RenderContext
	{ page = error "no page"
	, draft = error "no draft"
	, nearestEnclosing = error "no para/sec"
	, rawHyphens = False
	, rawTilde = False
	, rawSpace = False
	, insertBreaks = False
	, inLink = False
	, inCodeBlock = False
	, inComment = False
	, inSectionTitle = False
	, replXmlChars = True
	, noTags = False
	, extraIndentation = 0
	, idPrefixes = [] }

squareAbbr :: Bool -> Abbreviation -> TextBuilder.Builder
squareAbbr softHyphens =
	("[" ++) . (++ "]") .
	TextBuilder.fromText .
	(if softHyphens then Text.replace "." ".<span class='shy'></span>" else id)

parentLink :: Section -> Abbreviation -> Text
parentLink parent child
	| Just sub <- Text.stripPrefix (abbreviation parent ++ ".") child = sub
	| otherwise = child

abbrHref :: Abbreviation -> RenderContext -> Text
abbrHref abbr RenderContext{..}
	| SectionPage sec <- page, abbreviation sec == abbr = "#"
	| abbrIsOnPage abbr page = "#" ++ case page of
	    SectionPage sec -> urlChars (parentLink sec abbr)
	    TablesPage | Just abbr' <- Text.stripPrefix "tab:" abbr -> urlChars abbr'
	    _ -> urlChars abbr
	| "fig:" `isPrefixOf` abbr =
		let Figure{figureSection=Section{..}, ..} = figureByAbbr draft abbr
		in "SectionToSection/" ++ urlChars abbr ++ "#" ++ urlChars figureAbbr
	| "eq:" `isPrefixOf` abbr =
		let Formula{formulaSection=Section{..}, ..} = formulaByAbbr draft abbr
		in "SectionToSection/" ++ urlChars abbr ++ "#" ++ urlChars formulaAbbr
	| "tab:" `isPrefixOf` abbr =
		case tableByAbbr draft abbr of
			Just Table{tableSection=Section{..}, ..} -> "SectionToSection/" ++ urlChars abbreviation ++ "#" ++ urlChars tableAbbr
			_ -> "#" ++ urlChars abbr
	| otherwise = linkToSectionHref SectionToSection abbr

prepMath :: LaTeX -> String
prepMath = Text.unpack . renderLaTeX . (>>= cleanup) . replaceTcode
  where
    replaceTcode = mapCommandName (\x -> if x == "tcode" then "texttt" else x)
    cleanupText :: LaTeX -> LaTeX -- MathJax does not support \, in \text
    cleanupText [] = []
    cleanupText (TeXComm "," _ [] : x) = TeXRaw " " : cleanupText x
    cleanupText (x : y) = cleanup x ++ cleanupText y
    cleanup :: LaTeXUnit -> LaTeX
    cleanup (TeXComm "texttt" _ [(FixArg, TeXComm "texttt" "" [(FixArg, x)] : y)]) =
            cleanup (TeXComm "texttt" "" [(FixArg, x ++ y)])
    cleanup (TeXComm "texttt" _ [(FixArg, TeXRaw x : y)]) =
            TeXComm "texttt" "" [(FixArg, [TeXRaw x])] : cleanup (TeXComm "texttt" "" [(FixArg, y)])
    cleanup (TeXComm "texttt" _ [(FixArg, TeXComm "textit" "" x : y)]) =
        [TeXComm "class" "" [(FixArg, [TeXRaw "textit"]), (FixArg, [TeXComm "texttt" "" x])]]
        ++ cleanup (TeXComm "texttt" "" [(FixArg, y)])
        -- \texttt{\textit{x}y} -> \class{textit}{\texttt{x}}\texttt{y}
        -- MathJax does not support \textit inside \texttt
    cleanup (TeXComm "nontcode" _ x) = [TeXComm "texttt" "" (map (second (>>= cleanup)) x)]
    cleanup (TeXComm "ensuremath" _ [(FixArg, x)]) = x >>= cleanup
    cleanup (TeXComm "discretionary" _ _) = []
    cleanup (TeXComm "hfill" _ []) = []
    cleanup (TeXComm "text" ws [(FixArg, x)]) = [TeXComm "text" ws [(FixArg, cleanupText x)]]
    cleanup (TeXComm "break" _ []) = []
    cleanup (TeXComm "br" _ []) = []
    cleanup (TeXComm "-" _ []) = []
    cleanup (TeXComm "quad" _ []) = [TeXRaw " "] -- because MathJax does not support \quad
    cleanup (TeXComm x ws y) = [TeXComm x ws (map (second (>>= cleanup)) y)]
    cleanup x@(TeXRaw _) = [x]
    cleanup (TeXBraces x) = [TeXBraces (x >>= cleanup)]
    cleanup (TeXEnv x y z) = [TeXEnv x (map (second (>>= cleanup)) y) (z >>= cleanup)]
    cleanup (TeXMath Dollar [c@(TeXComm "text" _ _)]) = cleanup c
        -- because the draft sources have \bigoh{$\text{bla}$}, which MathJax doesn't support
    cleanup (TeXMath x y) = [TeXMath x (y >>= cleanup)]
    cleanup x@TeXLineBreak = [x]

renderMath :: LaTeX -> RenderContext -> TextBuilder.Builder
renderMath [TeXMath Dollar (c@(TeXComm "noncxxtcode" _ _) : more)] ctx =
  render c ctx ++ renderMath [TeXMath Dollar more] ctx
renderMath m ctx
	| noTags ctx = renderSimpleMath m ctx
	| hasComplexMath True m = renderComplexMath (mapTeX replaceNonCxxTcode m) ctx
	| otherwise = spanTag (mathKind m) $ renderSimpleMath m ctx
	where
		mathKind [TeXMath Square _] = "mathblock"
		mathKind _ = "math"
		replaceNonCxxTcode :: LaTeXUnit -> Maybe LaTeX
		replaceNonCxxTcode (TeXComm "noncxxtcode" _ args) = Just [TeXComm "tcode" "" args]
		replaceNonCxxTcode _ = Nothing

renderSimpleMath :: LaTeX -> RenderContext -> TextBuilder.Builder
renderSimpleMath [] _ = ""
renderSimpleMath (TeXRaw s : rest) sec
	| tlast `elem` ["^", "_"] = if noTags sec then "�" else
		renderSimpleMathUnit (TeXRaw $ Text.reverse $ Text.drop 1 s') sec
		++ xml tag [] (renderSimpleMath content sec)
		++ renderSimpleMath rest' sec
	| otherwise = renderSimpleMathUnit (TeXRaw s) sec ++ renderSimpleMath rest sec
	where
		s' = Text.reverse s
		tlast = Text.take 1 s'
		tag = case tlast of
			"^" -> "sup"
			"_" -> "sub"
			_ -> error ""
		(content, rest') = case rest of
			(a : b) -> ([a], b)
			other -> (other, [])
renderSimpleMath (TeXComm "frac" _ [(FixArg, num)] : rest) sec =
	"[" ++ renderSimpleMath num sec ++ "] / [" ++ renderSimpleMath den sec ++ "]" ++ renderSimpleMath rest' sec
	where
		(den, rest') = findDenum rest
		findDenum (TeXBraces d : r) = (d, r)
		findDenum (_ : r) = findDenum r
		findDenum r = (r, [])
renderSimpleMath (x : y) ctx = renderSimpleMathUnit x ctx ++ renderSimpleMath y ctx

renderSimpleMathUnit :: LaTeXUnit -> RenderContext -> TextBuilder.Builder
renderSimpleMathUnit (TeXRaw s) sec =
	case suffix of
		Just ('^', rest) -> if noTags sec then "�" else italicise prefix ++ output "sup" rest
		Just ('_', rest) -> if noTags sec then "�" else italicise prefix ++ output "sub" rest
		_ -> italicise s
	where
		(prefix, suffix') = Text.break (`elem` ['^', '_']) s
		suffix = Text.uncons suffix'

		output :: Text -> Text -> TextBuilder.Builder
		output tag rest =
			case Text.uncons rest of
				Just (c, rest') -> xml tag [] (italicise $ Text.singleton c) ++ (renderSimpleMathUnit (TeXRaw rest') sec)
				Nothing -> error "Malformed math"

		italicise :: Text -> TextBuilder.Builder
		italicise t = if noTags sec then TextBuilder.fromText t else
			case Text.span isAlpha t of
				("", "") -> TextBuilder.fromString ""
				("", rest) ->
					case Text.uncons rest of
						Just (c, rest') -> entities c ++ italicise rest'
						Nothing -> error ""
				(alpha, rest) -> spanTag "mathalpha" (TextBuilder.fromText alpha) ++ italicise rest

		entities :: Char -> TextBuilder.Builder
		entities '<' = "&lt;"
		entities '>' = "&gt;"
		entities c = TextBuilder.singleton c
renderSimpleMathUnit (TeXComm "mathtt" _ [(FixArg, x)]) ctx = spanTag "mathtt" (highlight ctx x)
renderSimpleMathUnit (TeXBraces x) sec = renderSimpleMath x sec
renderSimpleMathUnit (TeXMath Dollar m) sec = renderSimpleMath (trim m) sec
renderSimpleMathUnit (TeXMath _ m) sec = renderSimpleMath m sec
renderSimpleMathUnit other sec = render other sec

mathKey :: LaTeX -> (String, Bool)
mathKey m = case m of
		[TeXMath kind t] -> (prepMath t, kind == Dollar)
		[TeXEnv "eqnarray*" [] _] -> (prepMath m, False)
		[TeXEnv "equation*" [] _] -> (prepMath m, False)
		_ -> (prepMath m, True)

highlightCodeInMath :: RenderContext -> [Soup.Tag Text] -> TextBuilder.Builder
highlightCodeInMath ctx
    ( open@(Soup.TagOpen "span" (("class", cls) : _))
    : Soup.TagText code
    : close@(Soup.TagClose "span")
    : more )
      | cls `elem` ["mjx-char MJXc-TeX-type-R", "mjx-charbox MJXc-TeX-type-R"]
        = TextBuilder.fromText (Soup.renderTags [open])
        ++ highlight ctx [TeXRaw code]
        ++ TextBuilder.fromText (Soup.renderTags [close])
        ++ highlightCodeInMath ctx more
highlightCodeInMath ctx (a:b) = TextBuilder.fromText (Soup.renderTags [a]) ++ highlightCodeInMath ctx b
highlightCodeInMath _ [] = ""

{- Unfortunately, for:    \class{hidden_link}{\href{url}{bla}}
MathJax generates:        <a href="url"><span class="yada hidden_link">bla</span></a>

But CSS does not let you say "apply the following style to 'a' elements that have a 'span' child with class 'hidden_link'".

So fixHiddenLinks moves the "hidden_link" class from the span to the a... -}

fixHiddenLinks :: [Soup.Tag Text] -> [Soup.Tag Text]
fixHiddenLinks (Soup.TagOpen "a" attrs : Soup.TagOpen "span" [("class", Text.words -> cls)] : rest)
    | "hidden_link" `elem` cls
        = Soup.TagOpen "a" (("class", "hidden_link") : attrs) :
          Soup.TagOpen "span" [("class", Text.unwords $ cls \\ ["hidden_link"])] : rest
fixHiddenLinks (x:y) = x : fixHiddenLinks y
fixHiddenLinks [] = []

removeAriaLabel :: Soup.Tag Text -> Soup.Tag Text
removeAriaLabel (Soup.TagOpen x attrs) = Soup.TagOpen x (filter ((/= "aria-label") . fst) attrs)
removeAriaLabel x = x

doRenderComplexMath :: LaTeX -> RenderContext -> TextBuilder.Builder
doRenderComplexMath math ctx =
        (if inComment ctx then TextBuilder.fromText . Soup.renderTags else highlightCodeInMath ctx) $
          fixHiddenLinks $ map removeAriaLabel $ Soup.parseTags $ MathJax.render formula inline
    where (formula, inline) = mathKey math

renderComplexMath :: LaTeX -> RenderContext -> TextBuilder.Builder
renderComplexMath math ctx = (if inline then "" else "<br>") ++ spanTag "math" (doRenderComplexMath math ctx)
    where (_, inline) = mathKey math

cssClasses :: ColumnSpec -> Text
cssClasses (ColumnSpec alignment border _) =
	(if border then "border " else "") ++ Text.pack (show alignment)

cssStyle :: ColumnSpec -> Maybe Text
cssStyle (ColumnSpec _ _ (Just w)) = Just $ "width:" ++ w
cssStyle _ = Nothing

renderTable :: [ColumnSpec] -> [Row [TeXPara]] -> RenderContext -> TextBuilder.Builder
renderTable colspec a = \ctx -> xml "table" [] $ mconcat (renderRow ctx . zip [1..] a)
	where
		combine (ColumnSpec x False w) (ColumnSpec _ True _) = ColumnSpec x True w
		combine x _ = x

		renderRow :: RenderContext -> (Integer, Row [TeXPara]) -> TextBuilder.Builder
		renderRow ctx (rowNum, Row{..}) =
			xml "tr" ([("id", rowId)] ++ cls) (renderCols ctx' rowId colspec 1 clines cells)
			where
				rowId = mconcat (idPrefixes ctx) ++ "row-" ++ Text.pack (show rowNum)
				ctx' = ctx{idPrefixes = idPrefixes ctx ++ ["row-" ++ Text.pack (show rowNum) ++ "-"]}
				cls | RowSep <- rowSep = [("class", "rowsep")]
				    | CapSep <- rowSep = [("class", "capsep")]
				    | otherwise = []
				clines
					| Clines clns <- rowSep = clns
					| otherwise = []

		renderCols :: RenderContext -> Text -> [ColumnSpec] -> Int -> [(Int, Int)] -> [Cell [TeXPara]] -> TextBuilder.Builder
		renderCols _ _ _ _ _ [] = ""
		renderCols ctx rowId (c : cs) colnum clines (Cell{..} : rest)
			| length cs < length rest = undefined
			| Multicolumn w cs' <- cellSpan =
				let
					cs'' = combine cs' c
					colspan
						| null rest = length cs + 1
						| otherwise = w
				in
					renderCell colspan (cssClasses cs'' ++ clineClass colnum clines) (cssStyle cs'') cnt
					++ renderCols ctx rowId (drop (colspan - 1) cs) (colnum + colspan) clines rest
			| otherwise =
				renderCell 1 (cssClasses c ++ clineClass colnum clines) (cssStyle c) cnt
				++ renderCols ctx rowId cs (colnum + 1) clines rest
			where
				marginalizedLink = xml "div" [("class", "marginalizedparent")] (render link ctx)
				link = anchor{aClass="itemDeclLink", aHref="#" ++ urlChars rowId, aText="🔗"}
				cnt = (if colnum == 1 then marginalizedLink else "") ++ renderLatexParas content ctx'
				ctx' = ctx{idPrefixes = idPrefixes ctx ++ ["column-" ++ Text.pack (show colnum) ++ "-"]}
		renderCols _ _ [] _ _ (_ : _) = error "Too many columns"

		clineClass n clines
			| isJust $ find (\(begin, end) -> begin <= n && n <= end) clines =
				" cline"
			| otherwise = ""

renderCell :: Int -> Text -> Maybe Text -> TextBuilder.Builder -> TextBuilder.Builder
renderCell colspan classes style content = xml "td" attrs content
    where
        classes' = if TextBuilder.toLazyText content == "" then "empty " ++ classes else classes
        attrs = [("colspan", Text.pack $ show colspan) | colspan /= 1]
            ++ [("class", classes')]
            ++ [("style", s) | Just s <- [style]]

instance Render TeXPara where
	render = (mconcat .) . (intersperse " " .) . mapM render . sentences

instance Render [Element] where
    render l@(LatexElement _ : _) ctx = render (spanJust l p) ctx
        where
            p (LatexElement e) = Just e
            p _ = Nothing
    render (x : y) ctx = render x ctx ++ render y ctx
    render [] _ = ""

instance Render Sentence where
	render Sentence{..} ctx
			| (Enumerated _ _ : _) <- sentenceElems = render sentenceElems ctx -- not a real sentence
			| not (noTags ctx), Just v <- i
				= xml "div" [("id", v), ("class", "sentence")] $
					render (case linkifyFullStop link sentenceElems of Just x -> x; Nothing -> sentenceElems) ctx
			| otherwise = render sentenceElems ctx
		where
			i = case sentenceNumber of
				Just v -> Just $ mconcat (idPrefixes ctx) ++ "sentence-" ++ Text.pack (show v)
				Nothing -> Nothing
			link = TeXComm "class" ""
			    [ (FixArg, [TeXRaw "hidden_link"])
			    , (FixArg, [TeXComm "href" "" [(FixArg, [TeXRaw ("#" ++ fromJust i)]), (FixArg, [TeXRaw "."])]])
			    ] -- in math, \class and \href are recognized by mathjax

renderLatexParas :: [TeXPara] -> RenderContext -> TextBuilder.Builder
renderLatexParas pp ctx = mconcat $ map (xml "div" [("class", "texpara")] . flip render ctx) pp

-- Explicit <br/>'s are redundant in <pre>, so strip them.
preprocessPre :: LaTeX -> LaTeX
preprocessPre = concatMap f
	where
		f TeXLineBreak = []
		f (TeXComm "br" _ []) = []
		f (TeXEnv e a c) = [TeXEnv e a (preprocessPre c)]
		f x = [x]

htmlTabs :: Text -> Text
htmlTabs = replace "\t" "&#9;" -- todo: still necessary?

linkToSectionHref :: Link -> Abbreviation -> Text
linkToSectionHref link abbr = Text.pack (show link) ++ "/" ++ urlChars abbr

linkToSection :: Link -> Abbreviation -> Anchor
linkToSection link abbr = anchor{ aHref = linkToSectionHref link abbr, aText = squareAbbr True abbr }

--url :: Text -> Text
--url = urlChars . LazyText.toStrict . TextBuilder.toLazyText . flip render defaultRenderContext{replXmlChars = False}

simpleRender :: Render a => a -> Text
simpleRender = LazyText.toStrict . TextBuilder.toLazyText . simpleRender2

simpleRender2 :: Render a => a -> TextBuilder.Builder
simpleRender2 = flip render defaultRenderContext

secnum :: Int -> Text -> Section -> TextBuilder.Builder
secnum reduceIndent href se@Section{..} =
	simpleRender2 (anchor{aClass=c, aHref=href, aText=secnumText se, aStyle=Text.pack style})
	where
		style = "min-width:" ++ show (50 + (length parents - reduceIndent) * 15) ++ "pt"
		c	| chapter /= NormalChapter, null parents = "annexnum"
			| otherwise = "secnum"

secnumText :: Section -> TextBuilder.Builder
secnumText Section{sectionNumber=n,..}
	| chapter == InformativeAnnex, null parents = "Annex " ++ chap ++ "&emsp;(informative)"
	| chapter == NormativeAnnex, null parents = "Annex " ++ chap ++ "&emsp;(normative)"
	| otherwise = intercalateBuilders "." (chap : simpleRender2 . tail ns)
	where
		ns = reverse $ n : sectionNumber . parents
		chap :: TextBuilder.Builder
		chap
			| chapter == NormalChapter = simpleRender2 (head ns)
			| otherwise = TextBuilder.singleton $ ['A'..] !! head ns


================================================
FILE: SectionPages.hs
================================================
{-# OPTIONS_GHC -fno-warn-tabs #-}
{-# LANGUAGE OverloadedStrings, RecordWildCards, TupleSections, ViewPatterns, NamedFieldPuns #-}

module SectionPages
	( writeSectionFiles
	, writeSingleSectionFile
	, writeFiguresFile
	, writeFigureFiles
	, writeTablesFile
	, writeTableFiles
	, writeIndexFiles
	, writeFootnotesFile
	, writeCssFile
	, writeXrefDeltaFiles
	) where

import Prelude hiding ((++), (.), writeFile)
import System.Directory (createDirectoryIfMissing)
import Control.Monad (when, forM_)
import Control.Arrow (first)
import Data.Maybe (fromJust)
import System.Process (readProcess)
import qualified Data.Map as Map
import qualified Data.Text as Text
import qualified Data.Text.Lazy.Builder as TextBuilder
import LaTeXBase (LaTeXUnit(..))
import Pages (writePage, pageContent, pagePath, PageStyle(..), fileContent, outputDir, Link(..))
import Render (render, concatRender, simpleRender2, renderFig, abbrHref,
	defaultRenderContext, renderTab, RenderContext(..), Page(..),linkToSection, squareAbbr,
	secnum, renderLatexParas, isSectionPage, parentLink, renderIndex)
import Document
import Util (urlChars, (++), (.), h, anchor, xml, Anchor(..), Text, writeFile, intercalateBuilders)

renderParagraph :: RenderContext -> TextBuilder.Builder
renderParagraph ctx@RenderContext{nearestEnclosing=Left Paragraph{..}, draft=Draft{..}} =
		(case paraNumber of
			Just i -> renderNumbered (Text.pack $ show i)
			Nothing -> id)
		$ (if paraInItemdescr then xml "div" [("class", "itemdescr")] else id)
		$ (sourceLink
		  ++ renderLatexParas paraElems ctx'{extraIndentation=if paraInItemdescr then 12 else 0})
			-- the 12 here must match div.itemdescr's margin-left value in mm
	where
		urlBase = Text.replace "/commit/" "/tree/" commitUrl ++ "/source/"
		sourceLink :: TextBuilder.Builder
		sourceLink
			| Just SourceLocation{..} <- paraSourceLoc =
				xml "div" [("class", "sourceLinkParent")]
				$ simpleRender2 $ anchor
					{ aClass = "sourceLink"
					, aText = "#"
					, aHref = urlBase ++ Text.pack (sourceFile ++ "#L" ++ show sourceLine) }
			| otherwise = ""

		renderNumbered :: Text -> TextBuilder.Builder -> TextBuilder.Builder
		renderNumbered n =
			let
				idTag = if isSectionPage (page ctx) then [("id", mconcat (idPrefixes ctx) ++ n)] else []
				a = anchor
					{ aClass = "marginalized"
					, aHref  =
						if isSectionPage (page ctx)
							then "#" ++ urlChars (mconcat (idPrefixes ctx)) ++ n
							else "SectionToSection/" ++ urlChars (abbreviation paraSection) ++ "#" ++ n
					, aText  = TextBuilder.fromText n }
				classes = "para" ++
					(if all (not . normative) (paraElems >>= sentences >>= sentenceElems)
						then " nonNormativeOnly"
						else "")
			in
				xml "div" (("class", classes) : idTag) .
				(xml "div" [("class", "marginalizedparent")] (render a ctx') ++)
		ctx' = case paraNumber of
			Just n -> ctx{ idPrefixes = idPrefixes ctx ++ [Text.pack (show n) ++ "."] }
			Nothing -> ctx
renderParagraph _ = undefined

tocSection :: RenderContext -> Section -> TextBuilder.Builder
tocSection _ Section{sectionKind=DefinitionSection _} = ""
tocSection ctx s@Section{..} = header ++ mconcat (tocSection ctx . subsections)
  where
    header = h (min 4 $ 1 + length parents) $
        secnum 0 "" s ++ " "
        ++ render ( sectionName ++ [TeXRaw " "]
                  , anchor{ aHref = abbrHref abbreviation ctx, aText = squareAbbr True abbreviation, aClass="abbr_ref" })
                  ctx{ inSectionTitle = True }
        ++ "<div style='clear:right'></div>"

renderSection :: RenderContext -> Maybe Section -> Bool -> Section -> (TextBuilder.Builder, Bool)
renderSection context specific parasEmitted s@Section{abbreviation, subsections, sectionFootnotes, paragraphs}
	| full = (, True) $
		idDiv header ++
		(if specific == Just s && any (not . isDefinitionSection . sectionKind) subsections then toc else "") ++
		mconcat (map
			(\p -> renderParagraph (context{nearestEnclosing=Left p,idPrefixes=if parasEmitted then [secOnPage ++ "-"] else []}))
			paragraphs) ++
		(if null sectionFootnotes then "" else "<div class='footnoteSeparator'></div>") ++
		concatRender sectionFootnotes context{nearestEnclosing=Right s} ++
		mconcat (fst . renderSection context Nothing True . subsections)
	| not anysubcontent = ("", False)
	| otherwise =
		( header ++
		  mconcat (fst . renderSection context specific False . subsections)
		, anysubcontent )
	where
		idDiv
			| specific == Just s = id
			| otherwise = xml "div" [("id", secOnPage), ("class", "section")]
		secOnPage :: Text
		secOnPage = case page context of
			SectionPage parent -> parentLink parent abbreviation
			_ -> abbreviation
		full = specific == Nothing || specific == Just s
		reduceHeaderIndent = case page context of
		    SectionPage p | specific == Nothing -> length (parents p) + 1
		    _ -> 0
		header = sectionHeader reduceHeaderIndent (min 4 $ 1 + length (parents s)) s
			(if specific == Nothing && isSectionPage (page context) then "#" ++ urlChars secOnPage else "")
			abbr context
		toc = "<hr>" ++ mconcat (tocSection context . subsections) ++ "<hr>"
		abbr
			| specific == Just s && not (null (parents s))
				= anchor
			| Just sp <- specific, sp /= s, not (null (parents s))
				= anchor{aHref = "SectionToSection/" ++ urlChars abbreviation ++ "#" ++ parentLink s (Document.abbreviation sp)}
			| otherwise = linkToSection
					(if null (parents s) then SectionToToc else SectionToSection)
					abbreviation
		anysubcontent =
			or $ map (snd . renderSection context specific True)
			   $ subsections

sectionFileContent :: PageStyle -> TextBuilder.Builder -> TextBuilder.Builder -> Text
sectionFileContent sfs title body = pageContent sfs $ fileContent pathHome title sectionPageCss body
  where
    pathHome = if sfs == InSubdir then "../" else ""
    sectionPageCss =
        "<link rel='stylesheet' type='text/css' href='" ++ pathHome ++ "expanded.css' title='Normal'>" ++
        "<link rel='alternate stylesheet' type='text/css' href='" ++ pathHome ++ "colored.css' title='Notes and examples colored'>" ++
        "<link rel='alternate stylesheet' type='text/css' href='" ++ pathHome ++ "normative-only.css' title='Notes and examples hidden'>"

writeSectionFile :: FilePath -> PageStyle -> TextBuilder.Builder -> TextBuilder.Builder -> IO ()
writeSectionFile n sfs title body = writePage n sfs (sectionFileContent sfs title body)

sectionHeader :: Int -> Int -> Section -> Text -> Anchor -> RenderContext -> TextBuilder.Builder
sectionHeader reduceIndent hLevel s@Section{..} secnumHref abbr_ref ctx
    | isDef = xml "h4" [("style", "margin-bottom:3pt")] $ num ++ abbrR ++ name
    | abbreviation == "bibliography" = h hLevel name
    | otherwise = h hLevel $ num ++ " " ++ name ++ " " ++ abbrR
  where
    num = secnum reduceIndent secnumHref s
    abbrR = simpleRender2 abbr_ref{aClass = "abbr_ref", aText = squareAbbr False abbreviation}
    name = render sectionName ctx{inSectionTitle=True}
    isDef = isDefinitionSection sectionKind

writeFiguresFile :: PageStyle -> Draft -> IO ()
writeFiguresFile sfs draft = writeSectionFile "fig" sfs "14882: Figures" $
	"<h1>Figures <a href='SectionToToc/fig' class='abbr_ref'>[fig]</a></h1>"
	++ mconcat (uncurry r . figures draft)
	where
		r :: Paragraph -> Figure -> TextBuilder.Builder
		r p f@Figure{..} =
			renderFig True f ("./SectionToSection/" ++ urlChars figureAbbr) False True ctx
			where ctx = defaultRenderContext{draft=draft, nearestEnclosing=Left p, page=FiguresPage}

writeTablesFile :: PageStyle -> Draft -> IO ()
writeTablesFile sfs draft = writeSectionFile "tab" sfs "14882: Tables" $
	"<h1>Tables <a href='SectionToToc/tab' class='abbr_ref'>[tab]</a></h1>"
	++ mconcat (uncurry r . tables draft)
	where
		r :: Paragraph -> Table -> TextBuilder.Builder
		r p t@Table{tableSection=Section{..}, ..} =
			renderTab True t ("./SectionToSection/" ++ urlChars tableAbbr) False True ctx
			where ctx = defaultRenderContext{
				draft = draft,
				nearestEnclosing = Left p,
				page = TablesPage,
				idPrefixes = [fromJust (Text.stripPrefix "tab:" tableAbbr) ++ "-"]}

writeFootnotesFile :: PageStyle -> Draft -> IO ()
writeFootnotesFile sfs draft = writeSectionFile "footnotes" sfs "14882: Footnotes" $
	"<h1>List of Footnotes</h1>"
	++ mconcat (uncurry r . footnotes draft)
	where
		r :: Section -> Footnote -> TextBuilder.Builder
		r s fn = render fn defaultRenderContext{draft=draft, nearestEnclosing = Right s, page=FootnotesPage}

writeSingleSectionFile :: PageStyle -> Draft -> String -> IO ()
writeSingleSectionFile sfs draft abbr = do
	let Just section@Section{..} = Document.sectionByAbbr draft (Text.pack abbr)
	let baseFilename = Text.unpack abbreviation
	writeSectionFile baseFilename sfs (squareAbbr False abbreviation) $ mconcat $ fst . renderSection (defaultRenderContext{draft=draft,page=SectionPage section}) (Just section) False . chapters draft
	putStrLn $ "  " ++ baseFilename

writeTableFiles :: PageStyle -> Draft -> IO ()
writeTableFiles sfs draft =
	forM_ (snd . tables draft) $ \tab@Table{..} -> do
		let
			context = defaultRenderContext{draft=draft, page=TablePage tab, nearestEnclosing=Right tableSection}
			header :: Section -> TextBuilder.Builder
			header sec = sectionHeader 0 (min 4 $ 1 + length (parents sec)) sec "" anchor{aHref=href} context
				where href="SectionToSection/" ++ urlChars (abbreviation sec) ++ "#" ++ urlChars tableAbbr
			headers = mconcat $ map header $ reverse $ tableSection : parents tableSection
		writeSectionFile (Text.unpack tableAbbr) sfs (TextBuilder.fromText $ "[" ++ tableAbbr ++ "]") $
			headers ++ renderTab True tab "" True False context

writeFigureFiles :: PageStyle -> Draft -> IO ()
writeFigureFiles sfs draft =
	forM_ (snd . figures draft) $ \fig@Figure{..} -> do
		let
			context = defaultRenderContext{draft=draft, page=FigurePage fig, nearestEnclosing=Right figureSection}
			header :: Section -> TextBuilder.Builder
			header sec = sectionHeader 0 (min 4 $ 1 + length (parents sec)) sec "" anchor{aHref=href} context
				where href="SectionToSection/" ++ urlChars (abbreviation sec) ++ "#" ++ urlChars figureAbbr
			headers = mconcat $ map header $ reverse $ figureSection : parents figureSection
		writeSectionFile (Text.unpack figureAbbr) sfs (TextBuilder.fromText $ "[" ++ figureAbbr ++ "]") $
			headers ++ renderFig True fig "" True False context

writeSectionFiles :: PageStyle -> Draft -> [IO ()]
writeSectionFiles sfs draft = flip map (zip names contents) $ \(n, content) -> do
		when (sfs == InSubdir) $ createDirectoryIfMissing True (outputDir ++ n)
		writeFile (pagePath n sfs) content
	where
		secs = Document.sections draft
		renSec section@Section{..} = (Text.unpack abbreviation, sectionFileContent sfs title body)
		  where
			title = squareAbbr False abbreviation
			body = mconcat $ fst . renderSection (defaultRenderContext{draft=draft,page=SectionPage section}) (Just section) False . chapters draft
		fullbody = mconcat $ fst . renderSection defaultRenderContext{draft=draft, page=FullPage} Nothing True . chapters draft
		fullfile = ("full", sectionFileContent sfs "14882" fullbody)
		files = fullfile : map renSec secs
		names = fst . files
		contents = snd . files

writeIndexFile :: PageStyle -> Draft -> String -> IndexTree -> IO ()
writeIndexFile sfs draft cat index =
	writeSectionFile cat sfs ("14882: " ++ indexCatName cat) $
		h 1 (indexCatName cat) ++ renderIndex defaultRenderContext{page=IndexPage (Text.pack cat), draft=draft} index

writeIndexFiles :: PageStyle -> Draft -> Index -> [IO ()]
writeIndexFiles sfs draft index = flip map (Map.toList index) $ uncurry (writeIndexFile sfs draft) . first Text.unpack

writeCssFile :: IO ()
writeCssFile = do
	base <- Text.pack . readFile "14882.css"
	let
		replaceFonts =
			Text.replace
				".MJXc-TeX-sans-R {font-family: MJXc-TeX-sans-R,MJXc-TeX-sans-Rw}"
				".MJXc-TeX-sans-R {font-family: 'Noto Sans'; font-size: 10pt; }" .
			Text.replace
				".MJXc-TeX-type-R {font-family: MJXc-TeX-type-R,MJXc-TeX-type-Rw}"
				".MJXc-TeX-type-R {font-family: 'Noto Sans Mono'; font-size: 10pt; }" .
			Text.replace
				".MJXc-TeX-main-R {font-family: MJXc-TeX-main-R,MJXc-TeX-main-Rw}"
				".MJXc-TeX-main-R {}" .
			Text.replace
				".MJXc-TeX-math-I {font-family: MJXc-TeX-math-I,MJXc-TeX-math-Ix,MJXc-TeX-math-Iw}"
				".MJXc-TeX-math-I {font-style: italic}" .
			Text.replace
				".MJXc-TeX-main-I {font-family: MJXc-TeX-main-I,MJXc-TeX-main-Ix,MJXc-TeX-main-Iw}"
				".MJXc-TeX-main-I {font-style: italic}"
		-- Replace fonts to make sure code in formulas matches code in code blocks, etc.
	mjx <- Text.replace "display: block" "display: block;background:inherit" . replaceFonts . Text.pack .
		readProcess "tex2html" ["--css", ""] ""
	writeFile (outputDir ++ "/14882.css") (base ++ mjx)

writeXrefDeltaFiles :: PageStyle -> Draft -> [IO ()]
writeXrefDeltaFiles sfs draft = flip map (xrefDelta draft) $ \(from, to) ->
	writeSectionFile (Text.unpack from) sfs (squareAbbr False from) $
		if to == []
			then "Subclause " ++ squareAbbr False from ++ " was removed."
			else "See " ++ intercalateBuilders ", " (flip render ctx . to) ++ "."
	where ctx = defaultRenderContext{draft=draft, page=XrefDeltaPage}


================================================
FILE: Sentences.hs
================================================
{-# LANGUAGE OverloadedStrings, ViewPatterns, LambdaCase, TypeSynonymInstances, FlexibleInstances #-}

module Sentences (splitIntoSentences, isActualSentence, linkifyFullStop, breakSentence) where

import LaTeXBase (LaTeXUnit(..), triml, ArgKind(FixArg))
import Data.Text (isPrefixOf, isSuffixOf, stripPrefix, Text)
import qualified Data.Text as Text
import Prelude hiding (take, (.), takeWhile, (++), lookup, readFile)
import Data.Char (isSpace, isDigit, isAlphaNum, isUpper, isLower)
import Control.Arrow (first)
import Data.Maybe (isNothing)
import Util ((++), textStripInfix, dropTrailingWs, (.))
import RawDocument
import Document

startsSentence :: RawElement -> Bool
startsSentence (RawLatexElement e) | [TeXRaw x] <- triml [e], x /= "" = isUpper (Text.head x)
startsSentence _ = False

unitContinuesSentence :: LaTeXUnit -> Bool
unitContinuesSentence (TeXComm " " _ []) = True
unitContinuesSentence (TeXRaw txt) = "," `isPrefixOf` txt
unitContinuesSentence _ = False

elemContinuesSentence :: RawElement -> Bool
elemContinuesSentence (RawLatexElement u) = unitContinuesSentence u
elemContinuesSentence _ = False

elemsContinueSentence :: [RawElement] -> Bool
elemsContinueSentence (RawLatexElement (TeXRaw "") : more) = elemsContinueSentence more
elemsContinueSentence (x : _) = elemContinuesSentence x
elemsContinueSentence _ = False

simpleHead :: [RawElement] -> Maybe Char
simpleHead [] = Nothing
simpleHead (RawLatexElement (TeXRaw x) : more)
	| x == "" = simpleHead more
	| otherwise = Just (Text.head x)
simpleHead (RawLatexElement (TeXComm " " "" []) : _) = Just ' '
simpleHead (RawLatexElement (TeXComm "tcode" _ [(_, x)]) : more) = simpleHead (map RawLatexElement x ++ more)
simpleHead (RawLatexElement (TeXComm "index" _ _) : more) = simpleHead more
simpleHead (RawLatexElement (TeXComm "footnoteref" _ _) : _) = Nothing -- hmm
simpleHead (RawLatexElement TeXLineBreak : _) = Nothing
simpleHead (RawLatexElement (TeXComm "br" _ _) : _) = Nothing
simpleHead (RawLatexElement (TeXComm "linebreak" _ _) : _) = Nothing
simpleHead (RawLatexElement (TeXComm "newline" _ _) : _) = Nothing
simpleHead (RawLatexElement (TeXComm "par" _ _) : _) = Nothing
simpleHead (RawLatexElement (TeXComm "nolinebreak" _ _) : _) = Nothing
simpleHead (RawLatexElement (TeXComm "iref" _ _) : _) = Nothing
simpleHead (RawLatexElement (TeXComm "," _ _) : _) = Just ','
simpleHead x = error $ "simpleHead: " ++ show x

splitIntoSentences :: [RawElement] -> [[RawElement]]
splitIntoSentences = go []
	where
		go [] [] = []
		go [] (RawLatexElement (TeXRaw "\n") : y) = go [] y
		go [] (x@(RawExample _) : y) = [x] : go [] y
		go [] (x@(RawNote _ _) : y) = [x] : go [] y
		go partial (x@(RawCodeblock _) : y) | z : _ <- rmIndices y, startsSentence z = (partial ++ [x]) : go [] y
		go x [] = [x]
		go x z@(e : y)
			| Just (s, rest) <- breakSentence z = (x ++ s) : go [] rest
			| otherwise = go (x ++ [e]) y
		rmIndices (RawLatexElement (TeXRaw "\n") : RawLatexElement (TeXComm "index" _ _) : x) = rmIndices x
		rmIndices x = x

breakSentence :: [RawElement] -> Maybe ([RawElement] {- sentence -}, [RawElement] {- remainder -})
breakSentence (e@(RawLatexElement (TeXMath _ math)) : more)
    | f (reverse math) = Just ([e], more)
    | otherwise = first (e :) . breakSentence more
    where
        f :: LaTeX -> Bool
        f (TeXRaw y : z) | all isSpace (Text.unpack y) = f z
        f (TeXComm "text" _ [(FixArg, a)] : _) = f (reverse a)
        f (TeXComm "mbox" _ [(FixArg, a)] : _) = f (reverse a)
        f (TeXRaw ".\n" : TeXComm "right" "" [] : y) = f y
        f (TeXRaw y : _) = "." `isSuffixOf` (Text.pack $ dropTrailingWs $ Text.unpack y)
        f _ = False
breakSentence (b@(RawLatexElement TeXLineBreak) : more) = Just ([b], more)
breakSentence (RawLatexElement (TeXBraces x) : more) = breakSentence (map RawLatexElement x ++ more)
breakSentence (e@(RawLatexElement (TeXEnv "eqnarray*" _ _)) : more) = first (e :) . breakSentence more
breakSentence (b@(RawLatexElement (TeXComm cmd _ _)) : more) =
	if cmd `elem` ["break"]
		then Just ([b], more)
		else (first (b :)) . breakSentence more
breakSentence (e@(RawLatexElement (TeXRaw (textStripInfix "." -> (Just ((++ ".") -> pr, po))))) : more)
    = f pr po
  where
   f :: Text -> Text -> Maybe ([RawElement], [RawElement])
   f pre post
    | "''" `isPrefixOf` post = f (pre ++ "''") (Text.drop 2 post)
    | not (("(." `isSuffixOf` pre) && (")" `isPrefixOf` post))
    , not ("" == post && maybe False (\c -> isLower c || isDigit c) (simpleHead more))
    , not ("" == post && length more /= 0 && head more == RawLatexElement (TeXComm " " "" []))
    , not (Text.length post > 0 && ((Text.head post == '.')
                                    || isLower (Text.head post)
                                    || isDigit (Text.head post)))
    , not (Text.length pre > 1 && Text.length post > 0 && isAlphaNum (Text.last $ Text.init pre) && isDigit (Text.head post))
    , not (elemsContinueSentence (RawLatexElement (TeXRaw post) : more))
    , not (Text.length pre >= 2 && ("." `isSuffixOf` pre) && isUpper (Text.last $ Text.init pre))
    , not ("e.g." `isSuffixOf` pre)
    , not ("i.e." `isSuffixOf` pre) =
        let
            post' = Text.stripStart post
            (pre', post'') = case stripPrefix ")" post' of
                Just z -> (pre ++ ")" , Text.stripStart z)
                Nothing -> (pre, post')
            more' = if post'' == "" then more else RawLatexElement (TeXRaw post'') : more
            (maybefootnote, more'') = case more' of
                fn@(RawLatexElement (TeXComm "footnoteref" _ _)) : z -> ([fn], z)
                _ -> ([], more')
            sentence = [RawLatexElement (TeXRaw pre')] ++ maybefootnote
        in
            Just (sentence, more'')
    | Just ((++ ".") -> pre', post') <- textStripInfix "." post = f (pre ++ pre') post'
    | otherwise = first (e :) . breakSentence more
breakSentence (e@(RawLatexElement (TeXRaw _)) : more) = first (e :) . breakSentence more
breakSentence (enum@(RawEnumerated _ (last -> rawItemContent -> (_ : _ : _))) : more)
    = Just ([enum], more)
breakSentence (enum@(RawEnumerated _ (last -> rawItemContent -> [RawTexPara y])) : more)
    | Just _ <- breakSentence y = Just ([enum], more)
breakSentence _ = Nothing

isActualSentence :: [RawElement] -> Bool
isActualSentence (RawEnumerated _ _ : _) = False
isActualSentence l = any p l
	where
		yes = words $
			"link tcode noncxxtcode textit ref grammarterm indexedspan " ++
			"defnx textbf textrm textsl textsc indexlink hiddenindexlink"

		q :: LaTeXUnit -> Bool
		q (TeXRaw s) = not $ all isSpace $ Text.unpack s
		q (TeXComm c _ _) | c `elem` yes = True
		q (TeXEnv c _ _) | c `elem` yes = True
		q (TeXEnv "indexed" _ body) = any q body
		q (TeXBraces body) = any q body
		q _ = False

		p :: RawElement -> Bool
		p (RawLatexElement u) = q u
		p RawEnumerated{} = True
		p _ = False

class LinkifyFullStop a where
    linkifyFullStop :: LaTeXUnit -> a -> Maybe a

instance LinkifyFullStop LaTeX where
    linkifyFullStop link l = reverse . f (reverse l)
      where
        f [] = Nothing
        f (x@(TeXRaw ".\n") : y@(TeXComm "right" _ _) : more) = ([x, y] ++) . f more
        f (u : uu)
            | Just u' <- inUnit u = Just (reverse u' ++ uu)
            | otherwise = (u :) . f uu
        inUnit :: LaTeXUnit -> Maybe LaTeX -- returns content in regular order
        inUnit (TeXEnv "array" args body)
            | Just body' <- linkifyFullStop link body = Just [TeXEnv "array" args body']
        inUnit (TeXEnv "indented" [] body)
            | Just body' <- linkifyFullStop link body = Just [TeXEnv "indented" [] body']
        inUnit (TeXComm "text" ws [(FixArg, x)])
            | Just x' <- linkifyFullStop link x = Just (moveStuffOutsideText (TeXComm "text" ws [(FixArg, x')]))
            | otherwise = Nothing
        inUnit (TeXComm "mbox" ws [(FixArg, x)])
            | Just x' <- linkifyFullStop link x = Just (moveStuffOutsideText (TeXComm "mbox" ws [(FixArg, x')]))
            | otherwise = Nothing
        inUnit (TeXMath kind m)
            | Just m' <- linkifyFullStop link m = Just [TeXMath kind m']
        inUnit (TeXRaw (Text.dropWhileEnd (=='\n') -> Text.stripSuffix "." -> Just s)) = Just [TeXRaw s, link]
        inUnit (TeXRaw (Text.stripSuffix ".)" -> Just s)) = Just [TeXRaw s, link, TeXRaw ")"]
        inUnit (TeXRaw (Text.stripSuffix ".''" -> Just s)) = Just [TeXRaw s, link, TeXRaw "''"]
        inUnit _ = Nothing

instance LinkifyFullStop Item where
    linkifyFullStop link it@Item{itemInlineContent=e}
        | Just y <- linkifyFullStop link e
            = Just it{itemInlineContent=y}
    linkifyFullStop _ _ = Nothing

instance LinkifyFullStop [Element] where
    linkifyFullStop link = (reverse .) . f . reverse
      where
        f :: [Element] -> Maybe [Element]
        f (Enumerated cmd (reverse -> (lastItem : moreItems)) : more)
            | all (isNothing . linkifyFullStop link) moreItems
            , Just lastItem' <- linkifyFullStop link lastItem
            = Just $ Enumerated cmd (reverse (lastItem' : moreItems)) : more
        f (LatexElement u : more)
            | Just u' <- linkifyFullStop link [u] = Just $ map LatexElement (reverse u') ++ more
            | otherwise = (LatexElement u :) . f more
        f _ = Nothing

moveStuffOutsideText :: LaTeXUnit -> LaTeX
    -- Turns \text{ \class{bla} } into \text{ }\class{\text{bla}}\text{ }, and similar for \href,
    -- because MathJax does not support \class and \href in \text.
moveStuffOutsideText (TeXComm parent pws [(FixArg, [TeXComm nested nws [x, y]])])
    | parent `elem` ["text", "mbox"]
    , nested `elem` ["class", "href"] = [TeXComm nested nws [x, (FixArg, moveStuffOutsideText (TeXComm parent pws [y]))]]
moveStuffOutsideText (TeXComm parent pws [(FixArg, t)])
    | parent `elem` ["text", "mbox"]
    , length t >= 2 = concatMap (\u -> moveStuffOutsideText $ TeXComm parent pws [(FixArg, [u])]) t
moveStuffOutsideText u = [u]


================================================
FILE: Setup.hs
================================================
import Distribution.Simple
main = defaultMain


================================================
FILE: Toc.hs
================================================
{-# OPTIONS_GHC -fno-warn-tabs #-}
{-# LANGUAGE RecordWildCards, OverloadedStrings, ViewPatterns, NamedFieldPuns #-}

module Toc (writeTocFiles) where

import qualified Data.Text as Text
import qualified Data.Text.Lazy as LazyText
import qualified Data.Text.Lazy.Builder as TextBuilder
import Data.Time.Format (formatTime, defaultTimeLocale)
import Data.Time.Clock (getCurrentTime, UTCTime)
import Prelude hiding ((.), (++), writeFile)
import LaTeXBase (LaTeXUnit(..))
import Pages (Link(..), fileContent, applyPageStyle, PageStyle(..), outputDir, writePage)
import Render (secnum, linkToSection, simpleRender2, RenderContext(..), render, defaultRenderContext, Page(..))
import Util
import Document (Section(..), Draft(..), SectionKind(..), indexCatName, isDefinitionSection)

tocSection :: Draft -> Bool -> Section -> TextBuilder.Builder
tocSection _ _ Section{sectionKind=DefinitionSection _} = ""
tocSection draft expanded s@Section{..} =
	xml "div" [("id", abbreviation)] $ header ++ mconcat (tocSection draft expanded . subsections)
  where
  	header = h (min 4 $ 2 + length parents) $
		secnum 0 (if expanded then "#" ++ urlChars abbreviation else "") s ++ " "
		++ render ( sectionName ++ [TeXRaw " "]
		          , (linkToSection (if expanded then SectionToSection else TocToSection) abbreviation){aClass="abbr_ref"})
		          defaultRenderContext{page=if expanded then ExpandedTocPage else TocPage, inSectionTitle=True, draft=draft}
		++ "<div style='clear:right'></div>"

tocChapter :: Draft -> Bool -> Section -> TextBuilder.Builder
tocChapter draft expanded s@Section{abbreviation, sectionName, subsections, parents} =
	xml "div" [("id", abbreviation)] $
	h (min 4 $ 2 + length parents) header ++
	xml "div" [("class", "tocChapter")] (mconcat (tocSection draft expanded . subsections))
  where
	href
	    | expanded = "SectionToSection/" ++ urlChars abbreviation
	    | otherwise = (if any (not . isDefinitionSection . sectionKind) subsections then "#" else "TocToSection/") ++ urlChars abbreviation
	link = anchor{
		aClass = "folded_abbr_ref",
		aText = TextBuilder.fromText $ "[" ++ abbreviation ++ "]",
		aHref = href}
	header
	  | abbreviation == "bibliography" =
	      render anchor{aText = "Bibliography", aHref = href}
	        defaultRenderContext{inSectionTitle=True, draft=draft}
	  | otherwise =
	      secnum 0 (if expanded then "#" ++ urlChars abbreviation else "") s ++ " " ++
	      render (sectionName ++ [TeXRaw " "], link) defaultRenderContext{inSectionTitle=True, draft=draft} ++
	      (if expanded then "" else simpleRender2 (linkToSection TocToSection abbreviation){aClass="unfolded_abbr_ref"})

tocHeader :: UTCTime -> Text -> Text
tocHeader date commitUrl =
	"(Generated on " ++ Text.pack (formatTime defaultTimeLocale "%F" date)
	++ " from the <a href='" ++ commitUrl ++ "'>LaTeX sources</a>"
	++ " by <a href='https://github.com/Eelis/cxxdraft-htmlgen'>cxxdraft-htmlgen</a>."
	++ " This is <em>not</em> an ISO publication.)"
	++ "<br><br>"
	++ "<b>Note: this is an early draft. It's known to be incomplet and incorrekt, and it has lots of"
	++ " b<span style='position:relative;left:-1.2pt'>a</span><span style='position:relative;left:1pt'>d</span>"
	++ " for<span style='position:relative;left:-3pt'>matti<span style='position:relative;bottom:0.15ex'>n</span>g.</span></b>"

writeTocFiles :: PageStyle -> Draft -> IO ()
writeTocFiles sfs draft@Draft{..} = do
	date <- getCurrentTime
	tocCss <- readFile "toc.css"
	let
	    descMeta = "<meta name='description' content='Browser-friendly rendering of a recent draft of the C++ standard'>"
	    tocStyle = "<style>" ++ TextBuilder.fromString tocCss ++ "</style>"
	writeFile (outputDir ++ "/index.html") $ applyPageStyle sfs $ LazyText.toStrict $ TextBuilder.toLazyText $
		fileContent "" "Draft C++ Standard: Contents" (descMeta ++ tocStyle) $
			"<h1 style='text-align:center; hyphens:none; margin: 1cm'>Working Draft<br>Programming Languages &mdash; C++</h1>" ++
			xml "div" [("class", "tocHeader")] (TextBuilder.fromText $ tocHeader date commitUrl) ++
			"<br><h1>Contents</h1>" ++
			mconcat (tocChapter draft False . chapters) ++
			mconcat (h 2
				. (\cat -> simpleRender2 anchor{aHref="TocToSection/" ++ cat, aText=indexCatName cat})
				. ["generalindex", "grammarindex", "headerindex", "libraryindex", "conceptindex", "impldefindex"])

	fullTocCss <- readFile "fulltoc.css"
	let
	    fullTocStyle = "<style>" ++ TextBuilder.fromString fullTocCss ++ "</style>"
	    pathHome = if sfs == InSubdir then "../" else ""
	writePage "fulltoc" sfs $ applyPageStyle sfs $ LazyText.toStrict $ TextBuilder.toLazyText $
		fileContent pathHome "Draft C++ Standard: Contents" (descMeta ++ fullTocStyle) $
			"<h1 style='text-align:center; hyphens:none; margin: 1cm'>Working Draft<br>Programming Languages &mdash; C++</h1>" ++
			xml "div" [("class", "tocHeader")] (TextBuilder.fromText $ tocHeader date commitUrl) ++
			"<br><h1>Contents</h1>" ++
			mconcat (tocChapter draft True . chapters) ++
			mconcat (h 2
				. (\cat -> simpleRender2 anchor{aHref="SectionToSection/" ++ cat, aText=indexCatName cat})
				. ["generalindex", "grammarindex", "headerindex", "libraryindex", "conceptindex", "impldefindex"])


================================================
FILE: Util.hs
================================================
{-# OPTIONS_GHC -fno-warn-tabs #-}
{-# LANGUAGE TupleSections, OverloadedStrings, ViewPatterns #-}

module Util (
	mconcat, (.), (++), Text, replace, xml, spanTag, h, getDigit, startsWith, urlChars,
	anchor, Anchor(..), writeFile, readFile, greekAlphabet, mapLast, mapHead, stripInfix, dropTrailingWs,
	textStripInfix, textSubRegex, splitOn, intercalateBuilders, replaceXmlChars, stripAnyPrefix, trimString,
	spanJust, measure, partitionBy
	) where

import Prelude hiding ((.), (++), writeFile)
import qualified Data.Text as Text
import qualified Data.Map as Map
import Data.List (stripPrefix, intersperse)
import Data.Char (ord, isDigit, isSpace)
import Data.Text (Text, replace)
import Data.Text.IO (writeFile)
import Data.Time (getCurrentTime, diffUTCTime)
import Control.Arrow (first)
import Text.Regex (subRegex, Regex)
import qualified Data.Text.Lazy.Builder as TextBuilder

(.) :: Functor f => (a -> b) -> (f a -> f b)
(.) = fmap

(++) :: Monoid a => a -> a -> a
(++) = mappend

xml :: Text -> [(Text, Text)] -> TextBuilder.Builder -> TextBuilder.Builder
xml t attrs = (TextBuilder.fromText ("<" ++ t ++ " " ++ Text.unwords (map f attrs) ++ ">") ++) . (++ TextBuilder.fromText ("</" ++ t ++ ">"))
	where
		f (n, v) = n ++ "='" ++ v ++ "'"

spanTag :: Text -> TextBuilder.Builder -> TextBuilder.Builder
spanTag = xml "span" . (:[]) . ("class",)

h :: Int -> TextBuilder.Builder -> TextBuilder.Builder
h = flip xml [] . ("h" ++) . Text.pack . show

data Anchor = Anchor
    { aClass, aId, aHref :: Text
    , aText :: TextBuilder.Builder
    , aStyle, aTitle :: Text }

intercalateBuilders :: TextBuilder.Builder -> [TextBuilder.Builder] -> TextBuilder.Builder
intercalateBuilders x y = mconcat $ intersperse x y

anchor :: Anchor
anchor = Anchor{aClass="", aId="", aHref="", aText=TextBuilder.fromText "", aStyle="", aTitle=""}

greekAlphabet :: [(String, Char)]
greekAlphabet =
	[ ("alpha"          , 'α')
	, ("beta"           , 'β')
	, ("gamma"          , 'γ')
	, ("delta"          , 'δ')
	, ("mu"             , 'μ')
	, ("nu"             , 'ν')
	, ("lambda"         , 'λ')
	, ("pi"             , 'π')
	, ("phi"            , 'φ')
	, ("rho"            , 'ρ')
	, ("sigma"          , 'σ')
	, ("theta"          , 'θ')
	, ("zeta"           , 'ζ')

	, ("Gamma"          , 'Γ')
	, ("Sigma"          , 'Σ')
	, ("Pi"             , 'Π') ]

mapLast :: (a -> a) -> [a] -> [a]
mapLast _ [] = []
mapLast f [x] = [f x]
mapLast f (x:xx) = x : mapLast f xx

mapHead :: (a -> a) -> [a] -> [a]
mapHead f (x:y) = f x : y
mapHead _ [] = []

getDigit :: Char -> Maybe Int
getDigit c
	| isDigit c = Just $ ord c - ord '0'
	| otherwise = Nothing

stripInfix :: Eq a => [a] -> [a] -> Maybe ([a], [a])
stripInfix p s | Just r <- stripPrefix p s = Just ([], r)
stripInfix p (hd:t) = first (hd:) . stripInfix p t
stripInfix _ _  = Nothing

textStripInfix :: Text -> Text -> Maybe (Text, Text)
textStripInfix inf (Text.breakOn inf -> (a, b))
	| b == "" = Nothing
	| otherwise = Just (a, Text.drop (Text.length inf) b)

startsWith :: (Char -> Bool) -> (Text -> Bool)
startsWith _ "" = False
startsWith p t = p (Text.head t)

dropTrailingWs :: String -> String
dropTrailingWs = reverse . dropWhile isSpace . reverse

urlChars :: Text -> Text
urlChars =
	replace "'"  "&#39;" .
	replace "<"  "%3c" .
	replace ">"  "%3e" .
	replace "\"" "%22" .
	replace "#"  "%23" .
	replace "{"  "%7b" .
	replace "|"  "%7c" .
	replace "}"  "%7d" .
	replace "["  "%5b" .
	replace "\\" "%5c" .
	replace "]"  "%5d" .
	replace "^"  "%5e" .
	replace " "  "%20" .
	replace "%"  "%25"

textSubRegex :: Regex -> String -> Text -> Text
textSubRegex pat repl txt = Text.pack $ subRegex pat (Text.unpack txt) repl

splitOn :: (a -> Bool) -> [a] -> [[a]]
splitOn _ [] = [[]]
splitOn sep (x:y)
    | sep x = [] : splitOn sep y
    | otherwise = mapHead (x :) $ splitOn sep y

replaceXmlChars :: Text -> Text
replaceXmlChars =
	replace ">" "&gt;" .
	replace "<" "&lt;" .
	replace "&" "&amp;"

stripAnyPrefix :: [Text] -> Text -> Maybe (Text, Text)
stripAnyPrefix [] _ = Nothing
stripAnyPrefix (x:y) z
    | Just a <- Text.stripPrefix x z = Just (x, a)
    | otherwise = stripAnyPrefix y z

trimString :: String -> String
trimString = reverse . dropWhile isSpace . reverse . dropWhile isSpace

spanJust :: [a] -> (a -> Maybe b) -> ([b], [a])
spanJust (x : z) f
    | Just y <- f x = first (y :) (spanJust z f)
spanJust z _ = ([], z)

measure :: IO a -> IO (a, Float)
measure f = do
	start <- getCurrentTime
	r <- f
	end <- getCurrentTime
	return (r, realToFrac $ diffUTCTime end start)

partitionBy :: (Ord b, Eq b) => (a -> b) -> [a] -> [(b, [a])]
partitionBy f l = Map.assocs $ Map.fromListWith (flip (++)) [(f x, [x]) | x <- l]


================================================
FILE: colored.css
================================================
div.example {
	display: block;
	margin-top: 5pt;
	margin-bottom: 5pt;
	font-size: 11pt;
	color: #bb00bb;
}

div.note {
	display: block;
	margin-top: 5pt;
	margin-bottom: 5pt;
	font-size: 11pt;
	color: #bb00bb;
}


================================================
FILE: cxxdraft-htmlgen.cabal
================================================
name:                cxxdraft-htmlgen
version:             0
synopsis:            Converts C++ Standard draft documents from their LaTeX sources to HTML
license:             PublicDomain
license-file:        LICENSE
author:              Eelis
maintainer:          eelis@eelis.net
category:            Text
build-type:          Simple
extra-source-files:  README
cabal-version:       >=1.10

executable cxxdraft-htmlgen
  main-is:             genhtml.hs
  other-modules:       Load14882, Render, Util, SectionPages, Toc, Document, LaTeXBase, LaTeXParser, RawDocument, MathJax, Sentences, CxxParser, Pages
  other-extensions:    OverloadedStrings, RecordWildCards, TupleSections, ViewPatterns
  build-depends: base >=4.6
               , text >=1.2
               , process >=1.1
               , directory >=1.2
               , hashable >=1.2
               , containers >=0.5
               , mtl >=2.2
               , time >=1.4
               , regex-compat-tdfa
               , temporary
               , parallel
               , tagsoup
               , monad-parallel
  hs-source-dirs:      .
  default-language:    Haskell2010
  ghc-options:         -Wall -fno-warn-tabs -threaded "-with-rtsopts=-N"


================================================
FILE: expanded.css
================================================
div.example {
	display: block;
	margin-top: 5pt;
	margin-bottom: 5pt;
	font-size: 9pt;
}

div.note {
	display: block;
	margin-top: 5pt;
	margin-bottom: 5pt;
	font-size: 9pt;
}

div.note .texttt { font-size: 9pt; }
div.example .texttt { font-size: 9pt; }

div.note .textsf { font-family: 'Noto Sans'; font-size: 9pt; }
div.example .textsf { font-family: 'Noto Sans'; font-size: 9pt; }

div.note .math { font-size: 9pt; }
div.example .math { font-size: 9pt; }


================================================
FILE: fulltoc.css
================================================
h1 { margin: 0.2em 5pt 0.2em 5pt; line-height: 1.5; }
h2 { margin: 0.2em 5pt 0.2em 5pt; border-bottom: 1px dashed rgba(0, 0, 0, 0.2); line-height: 1.5; }
h3 { margin: 0.2em 5pt 0.2em 5pt; border-bottom: 1px dashed rgba(0, 0, 0, 0.2); line-height: 1.5; }
h4 { margin: 0.1em 5pt 0.1em 5pt; border-bottom: 1px dashed rgba(0, 0, 0, 0.2); line-height: 1.5; }
:target h2 { border-bottom: none; }
.tocHeader { text-align: center; }
:target > div.tocChapter { display: block; }

@media (prefers-color-scheme: dark) {
    h2 { border-bottom-color: #b0b0b05a; }
    h3 { border-bottom-color: #b0b0b05a; }
    h4 { border-bottom-color: #b0b0b05a; }
}


================================================
FILE: genhtml.hs
================================================
{-# OPTIONS_GHC -fno-warn-tabs #-}
{-# LANGUAGE LambdaCase, ViewPatterns, RecordWildCards, OverloadedStrings #-}

import Document (Draft(..))
import Load14882 (load14882)
import Prelude hiding ((++), (.), writeFile, readFile)
import System.Directory (createDirectoryIfMissing, setCurrentDirectory, getCurrentDirectory, copyFile)
import System.Environment (getArgs)
import Control.Monad (forM_)
import Data.Text.IO (readFile)
import qualified Control.Monad.Parallel as ParallelMonad
import Util hiding (readFile)
import Toc (writeTocFiles)
import Pages (outputDir, PageStyle(..))
import SectionPages

data CmdLineArgs = CmdLineArgs
	{ repo :: FilePath
	, sectionFileStyle :: PageStyle
	, sectionToWrite :: Maybe String }

readCmdLineArgs :: [String] -> CmdLineArgs
readCmdLineArgs = \case
	[repo, read -> sectionFileStyle, sec] -> CmdLineArgs{sectionToWrite=Just sec, ..}
	[repo, read -> sectionFileStyle] -> CmdLineArgs{sectionToWrite=Nothing,..}
	[repo] -> CmdLineArgs{sectionFileStyle=WithExtension,sectionToWrite=Nothing,..}
	_ -> error "param: path/to/repo"

main :: IO ()
main = do
	cwd <- getCurrentDirectory
	CmdLineArgs{..} <- readCmdLineArgs . getArgs

	extraMacros <- readFile "macros.tex"

	setCurrentDirectory $ repo ++ "/source"
	draft@Draft{..} <- load14882 extraMacros

	setCurrentDirectory cwd
	createDirectoryIfMissing True outputDir
	copyFile "icon-light.png" (outputDir ++ "/icon-light.png")
	copyFile "icon-dark.png" (outputDir ++ "/icon-dark.png")
	forM_ ["expanded.css", "colored.css", "normative-only.css"] $
		\f -> do
			copyFile f (outputDir ++ "/" ++ f)
	case sectionToWrite of
		Just abbr -> writeSingleSectionFile sectionFileStyle draft abbr
		Nothing -> do
			let acts =
				[ writeTocFiles sectionFileStyle draft
				, writeCssFile
				, writeFiguresFile sectionFileStyle draft
				, writeFigureFiles sectionFileStyle draft
				, writeFootnotesFile sectionFileStyle draft
				, writeTablesFile sectionFileStyle draft
				, writeTableFiles sectionFileStyle draft
				] ++
				writeXrefDeltaFiles sectionFileStyle draft ++
				writeIndexFiles sectionFileStyle draft index ++
				writeSectionFiles sectionFileStyle draft

			((), took) <- measure $ ParallelMonad.sequence_ acts
			putStrLn $ "Wrote files to " ++ outputDir ++ " in " ++ show (took * 1000) ++ "ms."


================================================
FILE: macros.tex
================================================
%% cxxdraft-htmlgen builtins:
%
% \link
%   Link to section.
%   arg 0: link text
%   arg 1: section abbreviation
%
% \weblink
%   arg 0: link text
%   arg 1: URL
%
% \indexlink
%   Link to indexed position.
%   arg 0: link text
%   arg 1: index category
%   arg 2: index key
%   arg 3: abbreviation of section to link to (empty to auto-resolve)
%
% \hiddenindexlink
%   Hidden link to indexed position.
%   arg 0: link text
%   arg 1: index category
%   arg 2: index key
%   arg 3: abbreviation of section to link to (empty to auto-resolve)
%
% \indexedspan
%   arg 0: text
%   arg 1: indices (zero or more \index commands)
	
%% cxxdraft-htmlgen derived macros:

\newcommand{\linkx}[3]{\indexlink{#1}{generalindex}{#2}{#3}}
	% Link to indexed position.
	%   arg 0: link text
	%   arg 1: generalindex key
	%   arg 2: section abbreviation

\newcommand{\deflinkx}[3]{\indexlink{#1}{generalindex}{#2|idxbfpage}{#3}}
	% Link to definition.
	%   arg 0: link text
	%   arg 1: definition key
	%   arg 2: section abbreviation

\newcommand{\deflink}[2]{\deflinkx{#1}{#1}{#2}}
	% Convenience macro for when the link
	% text is also the definition key.

\newcommand{\libmemberrefx}[3]{\indexlink{\tcode{#1}}{libraryindex}{\idxcode{#2}!\idxcode{#3}}{}}
\newcommand{\libglobalref}[1]{\libglobalrefx{#1}{#1}}
\newcommand{\libglobalrefx}[2]{\indexlink{\tcode{#1}}{libraryindex}{\idxcode{#2}}{}}
\newcommand{\noncxxtcode}[1]{\tcode{#1}}
\newcommand{\literaltcode}[1]{\tcode{#1}}
\newcommand{\literalterminal}[1]{\terminal{##1}}
\newcommand{\noncxxterminal}[1]{\terminal{##1}}
\newcommand{\oldconceptref}[1]{\indexlink{\oldconcept{#1}}{generalindex}{\idxoldconcept{#1}}{}}

%% replacements for existing macros:

\newcommand{\defnoldconcept}[1]{\indexedspan{\oldconcept{#1}}{\indextext{\idxoldconcept{#1}}}}
\newcommand{\indexdefn}[1]{\indextext{#1|idxbfpage}}
\newcommand{\idxcode}[1]{#1@\tcode{#1}}
\newcommand{\nontermdef}[1]{\hiddenindexlink{\indexedspan{#1\textnormal{:}}{\indexgrammar{\idxgram{#1}}}}{grammarindex}{\idxgram{#1}|idxbfpage}{}}
\newcommand{\renontermdef}[1]{#1\,\textnormal{::}}
\newcommand{\fmtnontermdef}[1]{#1\,\textnormal{:}}
\newcommand{\locnontermdef}[1]{#1\,\textnormal{:}}
\newcommand{\grammarterm}[1]{\indexlink{\indexedspan{\gterm{#1}}{\indexgram{\idxgram{#1}}}}{grammarindex}{\idxgram{#1}|idxbfpage}{}}
\newcommand{\cite}[1]{\indexlink{[bib]}{bibliography}{#1}{bibliography}}
\newcommand{\libglobal}[1]{\indexedspan{\hiddenindexlink{#1}{libraryindex}{\idxcode{#1}}{}}{\indexlibraryglobal{#1}}}
\newcommand{\libmember}[2]{\indexedspan{\hiddenindexlink{#1}{libraryindex}{\idxcode{#2}!\idxcode{#1}}{}}{\indexlibrarymember{#1}{#2}}}
\newcommand{\libheader}[1]{\indexlink{\indexedspan{\tcode{<#1>}}{\indexhdr{#1}}}{headerindex}{\idxhdr{#1}|idxbfpage}{}}
\newcommand{\libheaderdef}[1]{\indexedspan{\tcode{<#1>}}{\indexheader{#1}}}
\newcommand{\libheaderrefx}[2]{\libheader{#1}}
\newcommand{\libconceptx}[2]{\indexlink{\indexedspan{\cname{#1}}{\indexconcept{\idxconcept{#2}}}}{conceptindex}{\idxconcept{#2}|idxbfpage}{}}
\newcommand{\libmacro}[1]{\indexedspan{\tcode{#1}}{\indexlibraryglobal{#1}}}
\newcommand{\libxmacro}[1]{\indexedspan{\tcode{__#1}}{\indexlibraryglobal{__#1}}}
\newcommand{\Range}[4]{#1\tcode{#3,\penalty2000{} #4}#2}
\newcommand{\deflibconcept}[1]{\hiddenindexlink{\indexedspan{\cname{#1}}{\indexlibrary{\idxconcept{#1}}\indexconcept{\idxconcept{#1}|idxbfpage}}}{conceptindex}{\idxconcept{#1}|idxbfpage}{}}
\newcommand{\defexposconcept}[1]{\hiddenindexlink{\indexedspan{\ecname{#1}}{\indexconcept{\idxexposconcept{#1}|idxbfpage}}}{conceptindex}{\idxexposconcept{#1}|idxbfpage}{}}
\newcommand{\defexposconceptnc}[1]{\defexposconcept{#1}}
\newcommand{\exposconcept}[1]{\indexlink{\indexedspan{\ecname{#1}}{\indexconcept{\idxexposconcept{#1}}}}{conceptindex}{\idxexposconcept{#1}|idxbfpage}{}}
\newcommand{\exposconceptx}[2]{\indexedspan{\ecname{#1}}{\indexconcept{\idxexposconcept{#2}}}}
\newcommand{\exposconceptnc}[1]{\exposconcept{#1}}
\newcommand{\keyword}[1]{\indexedspan{\tcode{#1}}{\indextext{\idxcode{#1}}}}
\newcommand{\itcorr}[1][]{}
\newcommand{\diffdef}[1]{\break\diffhead{#1}}
\newcommand{\defnx}[2]{\hiddenindexlink{\indexedspan{\textit{#1}}{\indexdefn{#2}}}{generalindex}{#2|idxbfpage}{}}
\newcommand{\defnxname}[1]{\indexedspan{\xname{#1}}{\indextext{\idxxname{#1}}}}
\newcommand{\defnadj}[2]{\indextext{#1 #2|see{#2, #1}}\defnx{#1 #2}{#2!#1}}
\newcommand{\defnadjx}[3]{\indextext{#1 #3|see{#3, #1}}\defnx{#1 #2}{#3!#1}}
\newcommand{\defnlibxname}[1]{\indexedspan{\xname{#1}}{\indexlibrary{\idxxname{#1}}}}
\newcommand{\descr}[1]{\textnormal{#1}}
\newcommand{\cv}{\mathit{cv}}
\newcommand{\texorpdfstring}[2]{#2}
\newcommand{\textunderscore}{_}
\newcommand{\emo}[1]{#1}
\newcommand{\bm}[1]{\textbf{#1}}

\newenvironment{LongTable}[3]
{
 \newcommand{\continuedcaption}{\caption[]{#1 (continued)}}
 \begin{htmlTable}{#1}{#2}{#3}
 \begin{TableBase}
}
{
 \bottomline
 \end{TableBase}
 \end{htmltable}
}


================================================
FILE: mathjax-batch
================================================
#! /usr/bin/env node

var mjAPI = require("mathjax-node");
var split = require("split");

mjAPI.config(
  { extensions: ""
  , fontURL: "https://cdn.mathjax.org/mathjax/latest/fonts/HTML-CSS" });
mjAPI.start();

var math = '';
function processLine(line)
{
    var format;
    if (line == "NONINLINE") format = "TeX";
    else if (line == "INLINE") format = "inline-TeX";
    else
    {
        if (math != '') math += '\n';
        math += line;
        return;
    }

    mjAPI.typeset({
      math: math, format: format, html: true, css: false,
      speakText: true, ex: 6, width: 100, linebreaks: true
    }, function (data) {
      // todo: if (data.errors) abort
      console.log(data.html)
      console.log("DONE")
    });

    math = '';
}

process.stdin.pipe(split(/\n/, null, {trailing: false})).on('data', processLine)


================================================
FILE: normative-only.css
================================================
div.example { display: none; }
div.note { display: none; }

a.footnotenum { display: none; }
div.footnote { display: none; }
div.footnoteSeparator { display: none; }
.footnoteref { display: none; }

div.nonNormativeOnly { display: none; }


================================================
FILE: stack.yaml
================================================
resolver: lts-12.17
packages:
- .

extra-deps: []

flags: {}

extra-package-dbs: []


================================================
FILE: toc.css
================================================
h1 { margin: 0.2em 5pt 0.2em 5pt; line-height: 1.5; }
h2 { margin: 0.2em 5pt 0.2em 5pt; border-bottom: 1px dashed rgba(0, 0, 0, 0.2); line-height: 1.5; }
h3 { margin: 0.2em 5pt 0.2em 5pt; border-bottom: 1px dashed rgba(0, 0, 0, 0.2); line-height: 1.5; }
h4 { margin: 0.1em 5pt 0.1em 5pt; border-bottom: 1px dashed rgba(0, 0, 0, 0.2); line-height: 1.5; }
:target h2 { border-bottom: none; }
.tocHeader { text-align: center; }
div.tocChapter { display: none; }
:target > div.tocChapter { display: block; }

@media (prefers-color-scheme: dark) {
    h2 { border-bottom-color: #b0b0b05a; }
    h3 { border-bottom-color: #b0b0b05a; }
    h4 { border-bottom-color: #b0b0b05a; }
}