Showing preview only (222K chars total). Download the full file or copy to clipboard to get everything.
Repository: Eelis/cxxdraft-htmlgen
Branch: master
Commit: 63bc5929dc4f
Files: 28
Total size: 213.0 KB
Directory structure:
gitextract_qdd768we/
├── .gitignore
├── 14882.css
├── CxxParser.hs
├── Document.hs
├── LICENSE
├── LaTeXBase.hs
├── LaTeXParser.hs
├── Load14882.hs
├── MathJax.hs
├── Pages.hs
├── README
├── RawDocument.hs
├── Render.hs
├── SectionPages.hs
├── Sentences.hs
├── Setup.hs
├── Toc.hs
├── Util.hs
├── colored.css
├── cxxdraft-htmlgen.cabal
├── expanded.css
├── fulltoc.css
├── genhtml.hs
├── macros.tex
├── mathjax-batch
├── normative-only.css
├── stack.yaml
└── toc.css
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
14882
dist
tags
node_modules
.stack-work/
================================================
FILE: 14882.css
================================================
body {
font-family: 'Noto Serif';
hyphens: auto;
line-height: 1.5;
margin-left: 20mm;
margin-right: 16mm;
margin-top: 12mm;
margin-bottom: 12mm;
font-size: 10pt;
}
div {
background: inherit;
}
div.wrapper {
max-width: 20cm;
margin: auto;
}
div.texpara {
text-align: justify;
margin-top: 3pt;
margin-bottom: 3pt;
}
table div.texpara {
margin-top: 0;
margin-bottom: 0;
}
table.enumerate div.texpara {
margin-top: 3pt;
margin-bottom: 3pt;
}
ul {
list-style-type: none;
padding-left: 9mm;
margin-top: 0;
margin-bottom: 0;
}
ol {
margin-top: 0;
margin-bottom: 0;
}
a { text-decoration: none; }
a.hidden_link {
text-decoration: none;
color: inherit;
}
li {
margin-top: 3pt;
margin-bottom: 3pt;
}
h1 {
line-height: 1;
font-size: 20pt;
margin-top: 10pt;
margin-bottom: 10pt;
}
h2 {
line-height: 1;
font-size: 16pt;
margin-top: 10pt;
margin-bottom: 10pt;
}
h2::after {
content: "";
clear: both;
display: table;
}
h3 {
line-height: 1;
font-size: 12pt;
margin-top: 10pt;
margin-bottom: 10pt;
}
h3::after {
content: "";
clear: both;
display: table;
}
h4 {
line-height: 1;
font-size: 11pt;
margin-top: 10pt;
margin-bottom: 10pt;
}
h4::after {
content: "";
clear: both;
display: table;
}
ul > li:before {
content: "\2014";
position: absolute;
margin-left: -1.5em;
}
.shy:before {
content: "\00ad";
/* This is U+00AD SOFT HYPHEN, same as ­, but we put it in :before
to stop it from being included when the text is copied to the clipboard
with Firefox, which is especially annoying when copying to a terminal,
where the hyphen characters will show up. */
}
:target { background-color: #C9FBC9; }
:target .codeblock { background-color: #C9FBC9; }
:target ul { background-color: #C9FBC9; }
.abbr_ref { float: right; }
.folded_abbr_ref { float: right; }
:target .folded_abbr_ref { display: none; }
:target .unfolded_abbr_ref { float: right; display: inherit; }
.unfolded_abbr_ref { display: none; }
.secnum { display: inline-block; min-width: 35pt; }
.annexnum { display: block; }
div.sourceLinkParent {
float: right;
}
a.sourceLink {
position: absolute;
opacity: 0;
margin-left: 10pt;
}
a.sourceLink:hover {
opacity: 1;
}
a.itemDeclLink {
position: absolute;
font-size: 75%;
text-align: right;
width: 5em;
opacity: 0;
}
a.itemDeclLink:hover { opacity: 1; }
div.marginalizedparent {
position: relative;
text-align: left;
left: -18mm;
}
a.marginalized {
width: 15mm;
position: absolute;
top: 0.4mm;
font-size: 7pt;
text-align: right;
}
a.enumerated_item_num {
display: block;
margin-top: 3pt;
margin-bottom: 3pt;
margin-right: 6pt;
}
div.para {
position: relative;
left: -14mm;
padding-left: 14mm;
width: 100%;
margin-bottom: 6pt;
margin-top: 6pt;
text-align: justify;
min-height: 1.2em;
}
div.section { text-align: justify; }
div.sentence { display: inline; }
a.index {
position: relative;
float: right;
right: -1em;
display: none;
}
a.index:before {
position: absolute;
content: "⟵";
background-color: #C9FBC9;
}
a.index:target {
display: inline;
}
.indexitems {
margin-left: 2em;
text-indent: -2em;
}
div.itemdescr {
margin-left: 12mm;
}
.bnf {
font-family: 'Noto Sans';
font-size: 10pt;
font-style: italic;
margin-left: 25pt;
margin-right: -15mm;
margin-top: 0.5em;
margin-bottom: 0.5em;
text-indent: -3em;
padding-left: 3em;
line-height: 1.5;
}
div.bnf span.texttt { font-family: 'Noto Sans Mono'; font-style: normal; }
.rebnf {
font-family: 'Noto Serif';
font-style: italic;
margin-top: 0.5em;
margin-bottom: 0.5em;
margin-left: 30pt;
text-indent: -3em;
padding-left: 3em;
line-height: 1.5;
}
.simplebnf {
font-family: 'Noto Serif';
font-style: italic;
font-size: 10pt;
margin-top: 0.5em;
margin-bottom: 0.5em;
margin-left: 30pt;
line-height: 1.5;
}
span.textnormal {
font-style: normal;
font-family: 'Noto Serif';
font-size: 10pt;
white-space: normal;
}
.bnf span.textnormal {
font-style: normal;
font-family: 'Noto Serif';
font-size: 10pt;
white-space: normal;
}
p {
margin-top: 4pt;
margin-bottom: 4pt;
}
span.rlap {
display: inline-block;
width: 0px;
text-indent: 0;
}
span.terminal {
font-family: 'Noto Sans Mono';
font-style: normal;
font-size: 9pt;
white-space: pre-wrap;
}
span.noncxxterminal {
font-family: 'Noto Sans Mono';
font-style: normal;
font-size: 9pt;
}
span.term { font-style: italic; }
span.tcode { font-family: 'Noto Sans Mono'; font-style: normal; }
span.textbf { font-weight: bold; }
span.textsf { font-family: 'Noto Sans'; font-size: 10pt; }
div.footnote span.textsf { font-family: 'Noto Sans'; font-size: 8pt; }
.bnf span.textsf { font-family: 'Noto Sans'; font-size: 10pt; }
.simplebnf span.textsf { font-family: 'Noto Sans'; font-size: 10pt; }
.example span.textsf { font-family: 'Noto Sans'; font-size: 10pt; }
span.textsc { font-variant: small-caps; }
span.nontermdef { font-style: italic; font-family: 'Noto Sans'; font-size: 10pt; }
span.emph { font-style: italic; }
span.techterm { font-style: italic; }
span.mathit { font-style: italic; }
span.mathsf { font-family: 'Noto Sans'; }
span.mathrm { font-family: 'Noto Serif'; font-style: normal; }
span.textrm { font-family: 'Noto Serif'; font-size: 10pt; }
span.textsl { font-style: italic; }
span.mathtt { font-family: 'Noto Sans Mono'; font-style: normal; }
span.mbox { font-family: 'Noto Serif'; font-style: normal; }
span.ungap { display: inline-block; width: 2pt; }
span.texttt { font-family: 'Noto Sans Mono'; }
span.textit { font-style: italic; }
div.footnote span.texttt { font-family: 'Noto Sans Mono'; }
span.tcode_in_codeblock { font-family: 'Noto Sans Mono'; font-style: normal; font-size: 9pt; }
span.phantom { color: white; }
/* Unfortunately, this way the text is still selectable. Another
option is display:none, but then we lose the nice layout.
Todo: find proper solution. */
span.math {
font-style: normal;
font-family: 'Noto Serif';
font-size: 10pt;
}
span.mathblock {
display: block;
margin-left: auto;
margin-right: auto;
margin-top: 1.2em;
margin-bottom: 1.2em;
text-align: center;
}
span.mathalpha {
font-style: italic;
}
span.synopsis {
font-weight: bold;
margin-top: 0.5em;
display: block;
}
span.definition {
font-weight: bold;
display: block;
}
.codeblock {
font-family: 'Noto Sans Mono';
margin-left: 1.2em;
line-height: 1.5;
font-size: 9pt;
white-space: pre;
display: block;
margin-top: 3pt;
margin-bottom: 3pt;
overflow: auto;
margin-right: -15mm;
}
table .codeblock { margin-right: 0; }
.outputblock {
margin-left: 1.2em;
line-height: 1.5;
font-family: 'Noto Sans Mono';
font-size: 9pt;
}
code {
font-family: 'Noto Sans Mono';
font-style: normal;
}
div.itemdecl {
margin-top: 2ex;
}
code.itemdeclcode {
white-space: pre;
font-family: 'Noto Sans Mono';
font-size: 9pt;
display: block;
overflow: auto;
margin-right: -15mm;
}
.comment { color: green; font-style: italic; font-family: 'Noto Serif'; font-size: 10pt; }
.footnote .comment { color: green; font-style: italic; font-family: 'Noto Serif'; font-size: 8pt; }
.example .comment { color: green; font-style: italic; font-family: 'Noto Serif'; font-size: 9pt; }
.note .comment { color: green; font-style: italic; font-family: 'Noto Serif'; font-size: 9pt; }
span.keyword { color: #00607c; font-style: normal; }
span.parenthesis { color: #af1915; }
span.curlybracket { color: #af1915; }
span.squarebracket { color: #af1915; }
span.literal { color: #9F6807; }
span.literalterminal { color: #9F6807; font-family: 'Noto Sans Mono'; font-style: normal; }
span.operator { color: #570057; }
span.anglebracket { color: #570057; }
span.preprocessordirective { color: #6F4E37; }
span.textsuperscript {
vertical-align: super;
font-size: smaller;
line-height: 0;
}
.footnoteref {
vertical-align: super;
font-size: smaller;
line-height: 0;
}
.footnote {
font-size: 8pt;
}
.footnote .math {
font-size: 8pt;
}
.footnotenum {
display: inline-block;
text-align: right;
margin-right: 1mm;
width: 4ch;
}
.footnoteBacklink {
display: none;
}
:target .footnoteBacklink {
display: inline-block;
text-align: right;
margin-right: 1mm;
width: 4ch;
}
:target .footnotenum {
display: none;
}
.footnoteSeparator {
background: black;
margin-top: 5mm;
height: 1px;
width: 6cm;
}
div.minipage {
display: inline-block;
margin-right: 3em;
}
div.numberedTable {
text-align: center;
margin-left: 1em;
margin-right: 1em;
margin-bottom: 12pt;
margin-top: 8pt;
}
div.figure {
text-align: center;
margin-left: 2em;
margin-right: 2em;
margin-bottom: 12pt;
margin-top: 3pt;
}
table {
border: 1px solid black;
border-collapse: collapse;
margin-left: auto;
margin-right: auto;
margin-top: 7pt;
text-align: left;
}
td, th {
padding-left: 8pt;
padding-right: 8pt;
vertical-align: top;
}
td.empty {
padding: 0px;
padding-left: 1px;
}
td.left {
text-align: left;
}
td.hidden {
padding: 0;
width: 0;
}
td.right {
text-align: right;
}
td.center {
text-align: center;
}
td.justify {
text-align: justify;
}
td.border {
border-left: 1px solid black;
}
tr.rowsep, td.cline {
border-top: 1px solid black;
}
tr.capsep {
border-top: 3px solid black;
border-top-style: double;
}
th {
border-bottom: 1px solid black;
}
span.centry {
font-weight: bold;
}
div.table {
display: block;
margin-left: auto;
margin-right: auto;
text-align: center;
width: 90%;
}
span.indented {
background: inherit;
display: block;
margin-left: 2em;
margin-bottom: 1em;
margin-top: 1em;
}
span.uppercase {
text-transform: uppercase;
}
span.ucode {
font-variant: small-caps;
text-transform: uppercase;
font-size: 90%;
}
span.uname {
font-variant: small-caps;
text-transform: uppercase;
font-size: 90%;
}
table.enumerate {
border: 0;
margin: 0;
}
table.enumerate td {
padding: 0;
}
table.enumerate td:first-child {
width: 1cm;
text-align: right;
}
@media (prefers-color-scheme: dark) {
body {
background-color: #171717;
color: #e0e0e0;
}
span.mjx-mstyle { color: #e0e0e0 !important }
a:link { color: #74bdff; }
a:visited { color: #c38aff; }
a.hidden_link {
text-decoration: none;
color: inherit;
}
span.phantom { color: #171717; }
a.index:before { color: #e0e0e0; background-color: #4b6353; }
.comment { color: #40f040; }
.footnote .comment { color: #60ff60; }
.example .comment { color: #60ff60; }
.note .comment { color: #60ff60; }
span.keyword { color: #32eade; }
span.parenthesis { color: #ff7070; }
span.curlybracket { color: #ff7070; }
span.squarebracket { color: #ff7070; }
span.literal { color: #ffd867; }
span.literalterminal { color: #ffd867; }
span.operator { color: #dac6d9; }
span.anglebracket { color: #dac6d9; }
span.preprocessordirective { color: #c28c68; }
table { border-color: #e0e0e0; }
td.border { border-color: #e0e0e0; }
td.border { border-left-color: #e0e0e0; }
tr.rowsep, td.cline { border-top-color: #e0e0e0; }
tr.capsep { border-top-color: #e0e0e0; }
th { border-bottom-color: #e0e0e0; }
.footnoteSeparator { background-color: #e0e0e0; }
text { fill: #e0e0e0; }
path { stroke: #e0e0e0; }
polygon { stroke: #e0e0e0; fill: #e0e0e0; }
ellipse { stroke: #e0e0e0; }
:target { background-color: #4b6345; color: #ffffff; }
:target .codeblock { background-color: #4b6345; }
:target ul { background-color: #4b6345; }
:target a:link { color: #9fcdff; }
:target a:visited { color: #d39aff; }
:target a.hidden_link { text-decoration: none; color: inherit; }
:target span.keyword { color: #52faee; }
:target span.parenthesis { color: #ff4060; font-weight: bold; }
:target span.curlybracket { color: #ff4060; font-weight: bold; }
:target span.squarebracket { color: #ff4060; font-weight: bold; }
:target span.literal { color: #ffe070; }
:target span.literalterminal { color: #ffe070; }
:target span.operator { color: #ffffff; }
:target span.anglebracket { color: #ffffff; }
:target span.preprocessordirective { color: #e0968f; }
:target .comment { color: #75ff20; }
:target .footnote .comment { color: #75ff20; }
:target .example .comment { color: #75ff20; }
:target .note .comment { color: #75ff20; }
}
================================================
FILE: CxxParser.hs
================================================
{-# OPTIONS_GHC -fno-warn-tabs #-}
{-# LANGUAGE
OverloadedStrings,
RecordWildCards,
TupleSections,
ViewPatterns,
LambdaCase,
TypeSynonymInstances,
FlexibleInstances #-}
module CxxParser (parseLiteral, parseComment, parseCppDirective) where
import LaTeXBase (LaTeX, LaTeXUnit(..), ArgKind(..), concatRaws,
texStripPrefix, texStripAnyPrefix, texStripInfix, texSpan, unconsRaw)
import qualified Data.Text as Text
import Data.Char (isAlpha, isSpace, isAlphaNum, isDigit)
import Control.Arrow (first)
import Prelude hiding ((.), (++))
import Util ((.), (++), Text)
texStripHash :: LaTeX -> Maybe LaTeX
texStripHash x
| Just x' <- texStripPrefix "#" x = Just x'
| TeXComm "#" _ [] : x' <- x = Just x'
| otherwise = Nothing
cppDirectives :: [Text]
cppDirectives = Text.words "include define elifndef elifdef ifndef endif ifdef pragma error undef line elif warning else if embed"
spanLiteralChars :: String -> (String, String {- rest without the closing ' -})
spanLiteralChars [] = ([], [])
spanLiteralChars ('\\' : '\'' : rest) = first ("\\'"++) (spanLiteralChars rest)
spanLiteralChars ('\\' : '\\' : rest) = first ("\\\\"++) (spanLiteralChars rest)
spanLiteralChars ('\'' : x) = ([], x)
spanLiteralChars (c : rest) = first (c :) (spanLiteralChars rest)
parseLiteralChars :: LaTeX -> (LaTeX, LaTeX)
parseLiteralChars [] = ([], [])
parseLiteralChars (TeXRaw s : rest) = case spanLiteralChars (Text.unpack s) of
(x, []) -> first (TeXRaw (Text.pack x) :) (parseLiteralChars rest)
(x, more) -> ([TeXRaw (Text.pack x)], TeXRaw (Text.pack more) : rest)
parseLiteralChars (x : rest) = first (x :) (parseLiteralChars rest)
parseCharLiteral :: LaTeX -> Maybe (LaTeX, LaTeX {- rest -})
parseCharLiteral x
| Just (pre, x') <- texStripAnyPrefix ["'", "u'", "L'", "U'", "u8'"] x
, (before, x'') <- parseLiteralChars x'
, (suffix, x''') <- texSpan (\c -> isAlphaNum c || c == '_') x''
= Just ([TeXRaw pre] ++ before ++ [TeXRaw $ "'" ++ suffix], x''')
| otherwise = Nothing
parseCppDirective :: LaTeX -> Maybe (LaTeX, LaTeX {- rest -})
parseCppDirective x
| Just x'' <- texStripHash x
, (spaces, x''') <- texSpan isSpace x''
, Just (directive, x'''') <- texStripAnyPrefix cppDirectives x'''
= Just ([TeXRaw ("#" ++ spaces ++ directive)], x'''')
| otherwise = Nothing
parseSingleLineComment :: LaTeX -> Maybe (LaTeX {- comment -}, LaTeX {- subsequent lines -})
parseSingleLineComment x
| Just x' <- texStripPrefix "//" x = Just $ case texStripInfix "\n" x' of
Just (commentLine, moreLines) -> (TeXRaw "//" : commentLine, TeXRaw "\n" : moreLines)
Nothing -> (x, [])
| rlap@(TeXComm "rlap" _ [(FixArg, [TeXComm "textnormal" _ [(FixArg,[TeXComm "textit" _ [(FixArg,[TeXRaw "//"])]])]])]) : more <- x
, Just (commentLine, moreLines) <- texStripInfix "\n" more
= Just ([rlap, TeXComm "tcode" "" [(FixArg, commentLine)]], TeXRaw "\n" : moreLines)
| TeXComm "comment" _ [(FixArg, c)] : x' <- x = Just (c, x')
| otherwise = Nothing
fromTeXRaw :: LaTeXUnit -> Text
fromTeXRaw (TeXRaw x) = x
fromTeXRaw x = error $ "fromTeXRaw (" ++ show x ++ ")"
parseStringLiteral :: LaTeX -> Maybe (LaTeX, LaTeX {- rest -})
parseStringLiteral x
-- raw:
| Just (pre, x') <- texStripAnyPrefix ["R\"", "u8R\"", "uR\"", "UR\"", "LR\""] x
, Just (delim, x'') <- texStripInfix "(" x'
, Just (body, x''') <- texStripInfix (")" ++ Text.concat (map fromTeXRaw delim) ++ "\"") (concatRaws $ f x'')
, (suffix, x'''') <- texSpan (\c -> isAlphaNum c || c == '_') x'''
= Just ([TeXRaw pre] ++ delim ++ [TeXRaw "("] ++ body ++ [TeXRaw ")"] ++ delim ++ [TeXRaw $ "\"" ++ suffix], x'''')
-- normal:
| Just (pre, x') <- texStripAnyPrefix ["\"", "u\"", "U\"", "L\"", "u8\""] x
, Just (body, x'') <- parseBody x'
, (suffix, x''') <- texSpan (\c -> isAlphaNum c || c == '_') x''
= Just ([TeXRaw pre] ++ body ++ [TeXRaw $ "\"" ++ suffix], x''')
| otherwise = Nothing
where
f :: LaTeX -> LaTeX
f [] = []
f (TeXComm "~" _ [] : more) = TeXRaw "~" : f more
f (TeXBraces [] : more) = f more
f (hd : t) = hd : f t
parseBody :: LaTeX -> Maybe (LaTeX, LaTeX {- rest -})
parseBody [] = Nothing
parseBody (TeXComm "textbackslash" _ [] : more) = parseBody $ concatRaws $ TeXRaw "\\" : more
parseBody (TeXRaw (Text.unpack -> raw) : more)
| '\\':'"':t <- raw = first (TeXRaw "\\\"" :) . parseBody (TeXRaw (Text.pack t) : more)
| "\"" <- raw = Just ([], more)
| '"':t <- raw = Just ([], TeXRaw (Text.pack t) : more)
| raw == "" = parseBody more
| hd:t <- raw = first (TeXRaw (Text.pack [hd]) :) . parseBody (TeXRaw (Text.pack t) : more)
parseBody (TeXComm "%" ws [] : more) = first (TeXComm "%" ws [] :) . parseBody more
parseBody (y : more) = first (y :) . parseBody more
parseNumber :: LaTeX -> Maybe (Text, LaTeX)
parseNumber x
| (raw, more) <- unconsRaw x
, Just (n, rest) <- (parseStart `parseSeq` (\t -> Just (parseMany parseSuffix t))) raw
= Just (n, TeXRaw rest : more)
| otherwise = Nothing
where
parseDigit = parseChar isDigit
parseNonDigit = parseChar (\c -> isAlpha c || c == '_')
parseStart :: Text -> Maybe (Text, Text)
parseStart = parseFirstOf [parseChar (== '.') `parseSeq` parseDigit, parseDigit]
parseSign :: Text -> Maybe (Text, Text)
parseSign = parseChar (\c -> c == '-' || c == '+')
parseSuffix :: Text -> Maybe (Text, Text)
parseSuffix = parseFirstOf
[ parseDigit
, parseChar (== '\'') `parseSeq` parseDigit
, parseChar (== '\'') `parseSeq` parseNonDigit
, parseChar (`elem` ("eEpP"::String)) `parseSeq` parseSign
, parseChar (== '.')
, parseNonDigit
]
parseLiteral :: LaTeX -> Maybe (LaTeX, LaTeX)
parseLiteral x
| Just (number, x') <- parseNumber x = Just ([TeXRaw number], x')
| Just (lit, x') <- parseCharLiteral x = Just (lit, x')
| Just (lit, x') <- parseStringLiteral x = Just (lit, x')
| otherwise = Nothing
parseComment :: LaTeX -> Maybe (LaTeX, LaTeX)
parseComment x
| Just x' <- texStripPrefix "/*" x, Just (comment, x'') <- texStripInfix "*/" x'
= Just ([TeXRaw "/*"] ++ comment ++ [TeXRaw "*/"], x'')
| Just x' <- texStripPrefix "/*" x
= Just ([TeXRaw "/*"], x')
| Just x' <- texStripPrefix "*/" x
= Just ([TeXRaw "*/"], x')
| Just (comment, x') <- parseSingleLineComment x
= Just (comment, x')
| otherwise = Nothing
parseChar :: (Char -> Bool) -> Text -> Maybe (Text, Text)
parseChar p t
| t /= "", p (Text.head t) = Just (Text.take 1 t, Text.drop 1 t)
| otherwise = Nothing
parseSeq :: (Text -> Maybe (Text, Text)) -> (Text -> Maybe (Text, Text)) -> Text -> Maybe (Text, Text)
parseSeq p q t
| Just (x, t') <- p t
, Just (y, t'') <- q t' = Just (x ++ y, t'')
| otherwise = Nothing
parseFirstOf :: [Text -> Maybe (a, Text)] -> Text -> Maybe (a, Text)
parseFirstOf [] _ = Nothing
parseFirstOf (p:pp) t
| Just r <- p t = Just r
| otherwise = parseFirstOf pp t
parseMany :: (Text -> Maybe (Text, Text)) -> Text -> (Text, Text)
parseMany p t = case p t of
Nothing -> ("", t)
Just (x, t') -> first (x++) (parseMany p t')
================================================
FILE: Document.hs
================================================
{-# OPTIONS_GHC -fno-warn-tabs #-}
{-# LANGUAGE OverloadedStrings, RecordWildCards, ViewPatterns #-}
module Document (
CellSpan(..), Cell(..), RowSepKind(..), Row(..), Element(..), Paragraph(..),
Section(..), Chapter(..), Draft(..), Table(..), Figure(..), Item(..), Footnote(..),
IndexPath, IndexComponent(..), IndexCategory, Index, IndexTree, IndexNode(..),
ColumnSpec(..), TextAlignment(..), normative, Formula(..), chapterOfSection,
IndexEntry(..), IndexKind(..), Note(..), Example(..), TeXPara(..), Sentence(..),
texParaTex, texParaElems, XrefDelta, sectionByAbbr, isDefinitionSection, Abbreviation,
indexKeyContent, indexCatName, Sections(sections), SectionKind(..), mergeIndices, SourceLocation(..),
figures, tables, tableByAbbr, figureByAbbr, formulaByAbbr, elemTex, footnotes, allElements,
LaTeX, makeAbbrMap, formulas) where
import LaTeXBase (LaTeXUnit(..), LaTeX, MathType(Dollar))
import Data.Text (Text, replace)
import qualified Data.Text as Text
import qualified Data.List as List
import Data.IntMap (IntMap)
import Prelude hiding (take, (.), takeWhile, (++), lookup, readFile)
import Data.Map (Map)
import qualified Data.Map as Map
import Data.String (IsString)
import Util ((.), (++), greekAlphabet)
-- Document structure:
data CellSpan = Normal | Multicolumn { width :: Int, colspec :: ColumnSpec } deriving (Eq, Show)
data Cell a = Cell { cellSpan :: CellSpan, content :: a } deriving (Eq, Show)
data RowSepKind = RowSep | CapSep | Clines [(Int, Int)] | NoSep deriving (Eq, Show)
data Row a = Row { rowSep :: RowSepKind, cells :: [Cell a] } deriving (Eq, Show)
data TextAlignment = AlignLeft | AlignRight | AlignCenter | Justify
deriving Eq
instance Show TextAlignment where
show AlignLeft = "left"
show AlignRight = "right"
show AlignCenter = "center"
show Justify = "justify"
data ColumnSpec = ColumnSpec
{ columnAlignment :: TextAlignment
, columnBorder :: Bool
, columnWidth :: Maybe Text}
deriving (Eq, Show)
data Table = Table
{ tableNumber :: Int
, tableCaption :: LaTeX
, columnSpec :: [ColumnSpec]
, tableAbbr :: Abbreviation
, tableBody :: [Row [TeXPara]]
, tableSection :: Section }
instance Show Table where
show _ = "<table>"
data Figure = Figure
{ figureNumber :: Int
, figureName :: LaTeX
, figureAbbr :: Abbreviation
, figureSvg :: Text
, figureSection :: Section }
instance Show Figure where
show _ = "<figure>"
data Formula = Formula
{ formulaNumber :: Int
, formulaAbbr :: Abbreviation
, formulaContent :: LaTeX
, formulaSection :: Section }
instance Show Formula where
show _ = "<formula>"
data Item = Item
{ itemNumber :: Maybe [String]
, itemLabel :: Maybe LaTeX
, itemInlineContent :: [Element]
, itemBlockContent :: [TeXPara] }
deriving Show
itemElements :: Item -> [Element]
itemElements Item{..} = itemInlineContent ++ allElements itemBlockContent
data Footnote = Footnote
{ footnoteNumber :: Int
, footnoteContent :: [TeXPara] }
deriving Show
data Note = Note { noteNumber :: Int, noteLabel :: Text, noteContent :: [TeXPara] }
deriving Show
data Example = Example { exampleNumber :: Int, exampleContent :: [TeXPara] }
deriving Show
data Sentence = Sentence { sentenceNumber :: Maybe Int, sentenceElems :: [Element] }
deriving Show
newtype TeXPara = TeXPara { sentences :: [Sentence] }
deriving Show
data Element
= LatexElement LaTeXUnit
| Enumerated { enumCmd :: String, enumItems :: [Item] }
| Bnf String LaTeX
| TableElement Table
| Tabbing LaTeX
| FigureElement Figure
| FormulaElement Formula
| Codeblock LaTeXUnit
| Itemdescr [TeXPara] -- needed because there can be notes in itemdescr envs
| NoteElement Note
| ExampleElement Example
| HtmlElement Text
deriving Show
normative :: Element -> Bool
normative (NoteElement _) = False
normative (ExampleElement _) = False
normative (LatexElement (TeXComm "index" _ _)) = False
normative _ = True
data SectionKind
= NormalSection { _level :: Int }
| DefinitionSection { _level :: Int }
| InformativeAnnexSection
| NormativeAnnexSection
deriving (Eq, Show)
isDefinitionSection :: SectionKind -> Bool
isDefinitionSection (DefinitionSection _) = True
isDefinitionSection _ = False
data Chapter = NormalChapter | InformativeAnnex | NormativeAnnex
deriving (Eq, Show)
data SourceLocation = SourceLocation
{ sourceFile :: FilePath
, sourceLine :: Int }
deriving (Eq, Show)
data Paragraph = Paragraph
{ paraNumber :: Maybe Int
, paraInItemdescr :: Bool
, paraElems :: [TeXPara]
, paraSection :: Section
, paraSourceLoc :: Maybe SourceLocation
, allParaElems :: [Element] } -- derivable but stored for efficiency
deriving Show
type Abbreviation = Text -- of a section, figure, or table
data Section = Section
{ abbreviation :: Abbreviation
, sectionName :: LaTeX
, paragraphs :: [Paragraph]
, sectionFootnotes :: [Footnote]
, subsections :: [Section]
, sectionNumber :: Int
, chapter :: Chapter
, parents :: [Section] -- if empty, this is the chapter
, sectionKind :: SectionKind
, secIndexEntries :: IntMap IndexEntry
, secIndexEntriesByPath :: Map IndexPath [(Int, IndexEntry)]
}
deriving Show
chapterOfSection :: Section -> Section
chapterOfSection s@Section{..}
| null parents = s
| otherwise = last parents
instance Eq Section where
x == y = abbreviation x == abbreviation y
type XrefDelta = [(Abbreviation, [LaTeX])]
data StablyNamedItem
= StablyNamedTable Table
| StablyNamedSection Section
| StablyNamedFigure Figure
| StablyNamedFormula Formula
data Draft = Draft
{ commitUrl :: Text
, chapters :: [Section]
, index :: Index
, indexEntryMap :: IntMap IndexEntry
, indexEntriesByPath :: Map IndexPath [(Int, IndexEntry)]
, xrefDelta :: XrefDelta
, abbrMap :: Abbreviation -> Maybe StablyNamedItem
, labels :: Map Text Section }
-- (The index entry maps are derivable but stored for efficiency.)
stablyNamedItems :: Draft -> [(Abbreviation, StablyNamedItem)]
stablyNamedItems d =
[(abbreviation s, StablyNamedSection s) | s <- sections d] ++
[(tableAbbr t, StablyNamedTable t) | p <- allParagraphs d, TableElement t <- allParaElems p] ++
[(formulaAbbr f, StablyNamedFormula f) | p <- allParagraphs d, FormulaElement f <- allParaElems p] ++
[(figureAbbr f, StablyNamedFigure f) | p <- allParagraphs d, FigureElement f <- allParaElems p]
makeAbbrMap :: Draft -> Abbreviation -> Maybe StablyNamedItem
makeAbbrMap = flip Map.lookup . Map.fromList . stablyNamedItems
-- Indices:
data IndexComponent = IndexComponent { distinctIndexSortKey, indexKey :: LaTeX }
deriving (Ord, Show)
instance Eq IndexComponent where
x == y =
distinctIndexSortKey x == distinctIndexSortKey y &&
indexKeyContent (indexKey x) == indexKeyContent (indexKey y)
type IndexPath = [IndexComponent]
data IndexKind = See { _also :: Bool, _ref :: LaTeX } | IndexOpen | IndexClose | DefinitionIndexEntry
deriving (Eq, Show)
type IndexCategory = Text
type Index = Map IndexCategory IndexTree
instance Show IndexEntry where
show IndexEntry{..} =
"IndexEntry"
++ "{indexSection=" ++ show indexEntrySection
++ ",indexCategory=" ++ show indexCategory
++ ",indexPath=" ++ show indexPath
++ ",indexEntryKind=" ++ show indexEntryKind
++ "}"
data IndexEntry = IndexEntry
{ indexEntrySection :: Abbreviation
, indexEntryKind :: Maybe IndexKind
, indexPath :: IndexPath
, indexEntryNr :: Maybe Int
, indexCategory :: Text
}
type IndexTree = Map IndexComponent IndexNode
data IndexNode = IndexNode
{ indexEntries :: [IndexEntry]
, indexSubnodes :: IndexTree }
mergeIndices :: [Index] -> Index
mergeIndices = Map.unionsWith (Map.unionWith mergeIndexNodes)
mergeIndexNodes :: IndexNode -> IndexNode -> IndexNode
mergeIndexNodes x y = IndexNode
{ indexEntries = indexEntries x ++ indexEntries y
, indexSubnodes = Map.unionWith mergeIndexNodes (indexSubnodes x) (indexSubnodes y) }
indexKeyContent :: LaTeX -> Text
indexKeyContent = mconcat . map ikc
where
ikc :: LaTeXUnit -> Text
ikc (TeXRaw t) = replace "\n" " " t
ikc (TeXComm "tcode" _ [(_, x)]) = indexKeyContent x
ikc (TeXComm "idxcode" _ [(_, x)]) = indexKeyContent x
ikc (TeXComm "noncxxtcode" _ [(_, x)]) = indexKeyContent x
ikc (TeXComm "indexedspan" _ [(_, x), _]) = indexKeyContent x
ikc (TeXComm "text" _ [(_, x)]) = indexKeyContent x
ikc (TeXComm "texttt" _ [(_, x)]) = indexKeyContent x
ikc (TeXComm "textit" _ [(_, x)]) = indexKeyContent x
ikc (TeXComm "textsc" _ [(_, x)]) = indexKeyContent x
ikc (TeXComm "mathsf" _ [(_, x)]) = indexKeyContent x
ikc (TeXComm "textsf" _ [(_, x)]) = indexKeyContent x
ikc (TeXComm "textcolor" _ [_, (_, x)]) = indexKeyContent x
ikc (TeXComm "xspace" _ []) = "_"
ikc (TeXComm "Cpp" _ []) = "C++"
ikc (TeXComm "&" _ []) = "&"
ikc (TeXComm "%" _ []) = "%"
ikc (TeXComm "-" _ []) = ""
ikc (TeXComm "ell" _ []) = "ℓ"
ikc (TeXComm "~" _ []) = "~"
ikc (TeXComm "#" _ []) = "#"
ikc (TeXComm "{" _ []) = "{"
ikc (TeXComm "}" _ []) = "}"
ikc (TeXComm "protect" _ []) = ""
ikc (TeXComm "frenchspacing" _ []) = ""
ikc (TeXComm "caret" _ []) = "^"
ikc (TeXComm "tilde" _ []) = "~"
ikc (TeXComm "^" _ []) = "^"
ikc (TeXComm "\"" _ []) = "\""
ikc (TeXComm "" _ []) = ""
ikc (TeXComm "x" _ []) = "TODO"
ikc (TeXComm "textbackslash" _ []) = "\\"
ikc (TeXComm "textunderscore" _ []) = "_"
ikc (TeXComm "discretionary" _ _) = ""
ikc (TeXComm "texorpdfstring" _ [_, (_, x)]) = indexKeyContent x
ikc (TeXComm s _ [])
| Just c <- List.lookup s greekAlphabet = Text.pack [c]
ikc (TeXBraces x) = indexKeyContent x
ikc (TeXMath Dollar x) = indexKeyContent x
ikc (TeXComm "index" _ _) = ""
ikc (TeXComm "indexlink" _ ((_, x):_)) = indexKeyContent x
ikc (TeXComm "hiddenindexlink" _ ((_, x):_)) = indexKeyContent x
ikc x = error $ "indexKeyContent: unexpected: " ++ show x
indexCatName :: (Eq b, Show b, IsString a, IsString b) => b -> a
indexCatName "impldefindex" = "Index of implementation-defined behavior"
indexCatName "libraryindex" = "Index of library names"
indexCatName "headerindex" = "Index of library headers"
indexCatName "generalindex" = "Index"
indexCatName "grammarindex" = "Index of grammar productions"
indexCatName "conceptindex" = "Index of library concepts"
indexCatName "bibliography" = "Bibliography"
indexCatName x = error $ "indexCatName: " ++ show x
-- Gathering entities:
class Sections a where sections :: a -> [Section]
instance Sections Section where sections s = s : (subsections s >>= sections)
instance Sections Draft where sections = concatMap sections . chapters
instance Sections a => Sections (Maybe a) where sections = maybe [] sections
allParagraphs :: Sections a => a -> [Paragraph]
allParagraphs = (>>= paragraphs) . sections
tables :: Sections a => a -> [(Paragraph, Table)]
tables x = [(p, t) | p <- allParagraphs x, TableElement t <- allParaElems p]
figures :: Sections a => a -> [(Paragraph, Figure)]
figures x = [(p, f) | p <- allParagraphs x, FigureElement f <- allParaElems p]
formulas :: Sections a => a -> [(Paragraph, Formula)]
formulas x = [(p, f) | p <- allParagraphs x, FormulaElement f <- allParaElems p]
footnotes :: Sections a => a -> [(Section, Footnote)]
footnotes x = [(s, f) | s <- sections x, f <- sectionFootnotes s]
allElements :: [TeXPara] -> [Element]
allElements x = x >>= sentences >>= sentenceElems >>= f
where
f :: Element -> [Element]
f e = e : case e of
Enumerated {..} -> enumItems >>= itemElements
TableElement Table{..} -> allElements $ tableBody >>= cells >>= content
NoteElement Note{..} -> allElements noteContent
Codeblock y -> [LatexElement y]
ExampleElement Example{..} -> allElements exampleContent
Tabbing y -> LatexElement . y
Bnf _ y -> LatexElement . y
_ -> []
-- Misc:
texParaElems :: TeXPara -> [Element]
texParaElems = (>>= sentenceElems) . sentences
texParaTex :: TeXPara -> LaTeX
texParaTex = (>>= elemTex) . texParaElems
itemTex :: Item -> LaTeX
itemTex Item{..} = (itemInlineContent >>= elemTex) ++ (itemBlockContent >>= texParaTex)
elemTex :: Element -> LaTeX
elemTex (NoteElement n) = noteContent n >>= texParaTex
elemTex (ExampleElement x) = exampleContent x >>= texParaTex
elemTex (LatexElement l) = [l]
elemTex (Enumerated _ e) = e >>= itemTex
elemTex (Bnf _ l) = l
elemTex (Tabbing t) = t
elemTex (Codeblock t) = [t]
elemTex (Itemdescr t) = t >>= texParaTex
elemTex (TableElement Table{..}) = tableCaption ++ (tableBody >>= rowTex)
where
rowTex :: Row [TeXPara] -> LaTeX
rowTex r = content . cells r >>= (>>= texParaTex)
elemTex (FigureElement _) = []
elemTex (FormulaElement f) = formulaContent f
elemTex (HtmlElement _) = []
tableByAbbr :: Draft -> Abbreviation -> Maybe Table
-- only returns Maybe because some of our tables are broken
tableByAbbr d a = case abbrMap d a of
Just (StablyNamedTable t) -> Just t
_ -> Nothing
figureByAbbr :: Draft -> Abbreviation -> Figure
figureByAbbr d a = case abbrMap d a of
Just (StablyNamedFigure f) -> f
_ -> error $ "figureByAbbr: " ++ show a
formulaByAbbr :: Draft -> Abbreviation -> Formula
formulaByAbbr d a = case abbrMap d a of
Just (StablyNamedFormula f) -> f
_ -> error $ "formulaByAbbr: " ++ show a
sectionByAbbr :: Draft -> Abbreviation -> Maybe Section
sectionByAbbr d a = case abbrMap d a of
Just (StablyNamedSection s) -> Just s
_ -> Nothing
================================================
FILE: LICENSE
================================================
All authors involved in the creation of the contents of this package have agreed to release their respective contributions into the Public Domain.
================================================
FILE: LaTeXBase.hs
================================================
{-# LANGUAGE ViewPatterns, OverloadedStrings #-}
module LaTeXBase
( MathType(..), LaTeXUnit(..), LaTeX, TeXArg, ArgKind(..), concatRaws, hasCommand, isJustRaw
, matchCommand, lookForCommand, matchEnv, mapTeX, mapCommandName, renderLaTeX, mapTeXRaw, isTeXEnv, texSpan, unconsRaw
, trim, trimr, triml, texStripInfix, isCodeblock, isMath, texStripPrefix, texStripAnyPrefix, AllUnits(..) ) where
import Data.String (fromString)
import Prelude hiding ((.), (++), writeFile, dropWhile)
import Data.Text (Text, pack)
import qualified Data.Text as Text
import Data.Char (isSpace)
import Util ((.), (++), textStripInfix)
import Control.Arrow (first, second)
data MathType = Parentheses | Square | Dollar
deriving (Eq, Show, Ord)
data ArgKind = FixArg | OptArg
deriving (Eq, Show, Ord)
type TeXArg = (ArgKind, LaTeX)
data LaTeXUnit
= TeXRaw Text
| TeXComm String String [TeXArg] -- first string is command name, second is trailing whitespace
| TeXEnv String [TeXArg] LaTeX
| TeXMath MathType LaTeX
| TeXLineBreak
| TeXBraces LaTeX
deriving (Eq, Show, Ord)
isTeXEnv :: String -> LaTeXUnit -> Bool
isTeXEnv x (TeXEnv y _ _) = x == y
isTeXEnv _ _ = False
type LaTeX = [LaTeXUnit]
lookForCommand :: String -> LaTeX -> [[TeXArg]]
lookForCommand n = (snd .) . matchCommand (n ==)
class AllUnits a where
allUnits :: a -> [LaTeXUnit]
instance AllUnits LaTeXUnit where
allUnits u = u : case u of
TeXMath _ l -> allUnits l
TeXBraces l -> allUnits l
TeXComm _ _ a -> (snd . a) >>= allUnits
TeXEnv _ a l -> (l : snd . a) >>= allUnits
_ -> []
instance AllUnits a => AllUnits [a] where
allUnits = concatMap allUnits
matchCommand :: AllUnits a => (String -> Bool) -> a -> [(String, [TeXArg])]
matchCommand f x = [(str, as) | TeXComm str _ as <- allUnits x, f str]
hasCommand :: (String -> Bool) -> LaTeX -> Bool
hasCommand f = not . null . matchCommand f
matchEnv :: AllUnits a => (String -> Bool) -> a -> [(String, [TeXArg], LaTeX)]
matchEnv f x = [(str, as, l) | TeXEnv str as l <- allUnits x, f str]
mapTeX :: (LaTeXUnit -> Maybe LaTeX) -> LaTeX -> LaTeX
mapTeX f = concatMap g
where
g :: LaTeXUnit -> LaTeX
g (f -> Just x) = x
g (TeXComm c ws a) = [TeXComm c ws (h . a)]
g (TeXBraces x) = [TeXBraces (mapTeX f x)]
g (TeXMath t b) = [TeXMath t (mapTeX f b)]
g (TeXEnv n a b) = [TeXEnv n (h . a) (mapTeX f b)]
g x = [x]
h = second (mapTeX f)
mapCommandName :: (String -> String) -> LaTeX -> LaTeX
mapCommandName f = concatMap g
where
g :: LaTeXUnit -> LaTeX
g (TeXComm c ws a) = [TeXComm (f c) ws (h . a)]
g (TeXBraces x) = [TeXBraces (mapCommandName f x)]
g (TeXMath t b) = [TeXMath t (mapCommandName f b)]
g (TeXEnv n a b) = [TeXEnv n (h . a) (mapCommandName f b)]
g x = [x]
h = second (mapCommandName f)
renderLaTeX :: LaTeX -> Text
renderLaTeX = mconcat . (renderUnit .)
renderUnit :: LaTeXUnit -> Text
renderUnit (TeXRaw t) = t
renderUnit (TeXComm "right" _ [(FixArg, [TeXRaw "."])]) = "\\right."
renderUnit (TeXComm name ws [])
| name `elem` ["left", "sum", "int", "sin", "cos", "right", "bigl", "bigr", "big", "small", "smaller"] = pack $ "\\" <> name <> ws
| otherwise = "\\" <> fromString name <> "{}"
renderUnit (TeXComm name ws args) = "\\" <> pack (fromString name) <> pack (fromString ws) <> renderArgs args
renderUnit (TeXEnv name args c) =
"\\begin{" <> fromString name <> "}"
<> renderArgs args
<> renderLaTeX c
<> "\\end{" <> fromString name <> "}"
renderUnit (TeXMath Dollar l) = "$" <> renderLaTeX l <> "$"
renderUnit (TeXMath Square l) = "\\[" <> renderLaTeX l <> "\\]"
renderUnit (TeXMath Parentheses l) = "\\(" <> renderLaTeX l <> "\\)"
renderUnit TeXLineBreak = "\\\\"
renderUnit (TeXBraces l) = "{" <> renderLaTeX l <> "}"
renderArgs :: [TeXArg] -> Text
renderArgs = mconcat . (renderArg .)
renderArg :: TeXArg -> Text
renderArg (FixArg, l) = "{" <> renderLaTeX l <> "}"
renderArg (OptArg, l) = "[" <> renderLaTeX l <> "]"
mapTeXRaw :: (Text -> LaTeXUnit) -> (LaTeX -> LaTeX)
mapTeXRaw f = map go
where
go :: LaTeXUnit -> LaTeXUnit
go (TeXRaw t) = f t
go (TeXComm s ws args) = TeXComm s ws (second (go .) . args)
go (TeXEnv s args body) = TeXEnv s (second (go .) . args) (go . body)
go (TeXBraces l) = TeXBraces $ go . l
go t@(TeXMath _ _) = t
go t@TeXLineBreak = t
concatRaws :: LaTeX -> LaTeX
concatRaws (TeXRaw a : TeXRaw b : more) = concatRaws (TeXRaw (a ++ b) : more)
concatRaws (TeXComm s ws args : more) = TeXComm s ws (second concatRaws . args) : concatRaws more
concatRaws (TeXEnv s args bd : more) = TeXEnv s (second concatRaws . args) (concatRaws bd) : concatRaws more
concatRaws (TeXBraces x : more) = TeXBraces (concatRaws x) : concatRaws more
concatRaws (x : more) = x : concatRaws more
concatRaws [] = []
unconsRaw :: LaTeX -> (Text, LaTeX)
unconsRaw (TeXRaw x : y) = first (x ++) (unconsRaw y)
unconsRaw x = ("", x)
texStripPrefix :: Text -> LaTeX -> Maybe LaTeX
texStripPrefix t (TeXRaw s : y) = case Text.stripPrefix t s of
Just "" -> Just y
Just s' -> Just (TeXRaw s' : y)
Nothing -> Nothing
texStripPrefix _ _ = Nothing
texStripAnyPrefix :: [Text] -> LaTeX -> Maybe (Text, LaTeX)
texStripAnyPrefix [] _ = Nothing
texStripAnyPrefix (x:y) z
| Just a <- texStripPrefix x z = Just (x, a)
| otherwise = texStripAnyPrefix y z
texStripInfix :: Text -> LaTeX -> Maybe (LaTeX, LaTeX)
texStripInfix t = go
where
go [] = Nothing
go (x : rest)
| TeXRaw s <- x
, Just (y, z) <- textStripInfix t s
= Just (h y, h z ++ rest)
| otherwise = first (x :) . go rest
h "" = []
h x = [TeXRaw x]
texSpan :: (Char -> Bool) -> LaTeX -> (Text, LaTeX)
texSpan p (TeXRaw x : y) = case Text.span p x of
(stuff, "") -> first (stuff ++) (texSpan p y)
(stuff, rest) -> (stuff, TeXRaw rest : y)
texSpan _ x = ("", x)
invisible :: LaTeXUnit -> Bool
invisible (TeXComm "index" _ _) = True
invisible _ = False
dropWhileEnd :: (Char -> Bool) -> LaTeX -> LaTeX
dropWhileEnd _ [] = []
dropWhileEnd p x
| invisible (last x) = dropWhileEnd p (init x) ++ [last x]
| TeXRaw y <- last x = init x ++ case Text.dropWhileEnd p y of
"" -> []
a -> [TeXRaw a]
| otherwise = x
trimr, trim :: LaTeX -> LaTeX
trimr = dropWhileEnd isSpace
trim = triml . trimr
triml :: LaTeX -> LaTeX
triml (TeXRaw x : y) = case Text.dropWhile isSpace x of
"" -> triml y
x' -> TeXRaw x' : y
triml x = x
isMath :: LaTeXUnit -> Bool
isMath (TeXMath _ _) = True
isMath (TeXComm "ensuremath" _ _) = True
isMath (TeXEnv "eqnarray*" _ _) = True
isMath (TeXEnv "equation*" _ _) = True
isMath _ = False
isCodeblock :: LaTeXUnit -> Bool
isCodeblock (TeXEnv "codeblock" _ _) = True
isCodeblock (TeXEnv "indexedcodeblock" _ _) = True
isCodeblock (TeXEnv "codeblocktu" _ _) = True
isCodeblock (TeXEnv "codeblockdigitsep" _ _) = True
isCodeblock _ = False
isJustRaw :: LaTeX -> Maybe Text
isJustRaw [TeXRaw x] = Just x
isJustRaw _ = Nothing
================================================
FILE: LaTeXParser.hs
================================================
{-# OPTIONS_GHC -fno-warn-tabs #-}
{-# LANGUAGE OverloadedStrings, RecordWildCards, ViewPatterns, TupleSections #-}
module LaTeXParser (parseString,
Token(Token), Context(..), defaultContext, Signature(..), Macros(..), Environment(..), Command(..), ParseResult(ParseResult),
defaultMacros,
nullCmd, storeCmd, codeEnv, normalCmd,
storeEnv) where
import LaTeXBase (LaTeXUnit(..), LaTeX, TeXArg, ArgKind(..), MathType(..), concatRaws)
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Char (isAlphaNum, isSpace, isAlpha)
import Data.Maybe (fromJust)
import Control.Arrow (first)
import Data.Map (Map)
import qualified Data.Map as Map
import Prelude hiding ((++), (.))
import Util ((.), (++), getDigit, stripInfix)
newtype Token = Token { tokenChars :: String }
deriving (Eq, Show)
data Environment = Environment (Context -> [Token] -> ParseResult)
data Command = Command { runCommand :: Context -> String {- ws -} -> [Token] -> ParseResult }
data Macros = Macros
{ commands :: Map Text Command
, environments :: Map Text Environment
, counters :: Map Text Int }
newCommand :: Bool {- overwrite -} -> (Text, Command) -> Macros -> Macros
newCommand True (name, cmd) Macros{..} = Macros{commands = Map.insert name cmd commands, ..}
newCommand False (name, cmd) Macros{..} = Macros{commands = Map.insertWith (\_ y -> y) name cmd commands, ..}
instance Semigroup Macros where
x <> y = Macros
(commands x ++ commands y)
(environments x ++ environments y)
(counters x ++ counters y)
instance Monoid Macros where
mempty = Macros mempty mempty mempty
data ParseResult = ParseResult
{ content :: LaTeX
, newMacros :: Macros
, remainder :: [Token] }
data Signature = Signature
{ nrFixArgs :: Int
, defaultArg :: Maybe [Token] }
deriving Show
data Context = Context
{ commentsEnabled :: Bool
, parsingOptArg :: Bool
, macros :: Macros }
prependContent :: LaTeX -> ParseResult -> ParseResult
prependContent t p = p{content = t ++ content p}
combineMacros :: Bool {- left biased -} -> Macros -> Macros -> Macros
combineMacros b x y = if b then x ++ y else y ++ x
addMacros :: Bool {- overwrite -} -> Macros -> ParseResult -> ParseResult
addMacros b m p = p{newMacros = combineMacros b m (newMacros p)}
defaultEnvs :: [(Text, Environment)]
defaultEnvs = [outputblockEnv]
codeEnv :: Text -> Signature -> (Text, Environment)
codeEnv name sig = (name, Environment f)
where
f :: Context -> [Token] -> ParseResult
f ctx toks = ParseResult [env] mempty rest'
where
(arguments, rest) = parseArgs sig toks
Just (code, rest') = stripInfix [Token "\\end", Token "{", Token (Text.unpack name), Token "}"] rest
env = TeXEnv (Text.unpack name) (map ((FixArg, ) . fullParse ctx) arguments) (parseCode name ctx code)
outputblockEnv :: (Text, Environment)
outputblockEnv = ("outputblock", Environment f)
where
f :: Context -> [Token] -> ParseResult
f ctx toks = ParseResult [env] mempty rest
where
Just (content, rest) = stripInfix [Token "\\end", Token "{", Token "outputblock", Token "}"] toks
env = TeXEnv "outputblock" [] (parseOutputBlock ctx content)
parseOutputBlock :: Context -> [Token] -> LaTeX
parseOutputBlock c = concatRaws . go
where
go :: [Token] -> LaTeX
go [] = []
go (Token "@" : rest) = fullParse c cmd ++ go rest'
where (cmd, Token "@" : rest') = break (== Token "@") rest
go s = TeXRaw (Text.pack $ concatMap tokenChars code) : go rest
where (code, rest) = break (== Token "@") s
storeEnv :: String -> Signature -> (Text, Environment)
storeEnv name sig = (Text.pack name, Environment act)
where
act :: Context -> [Token] -> ParseResult
act ctx toks = ParseResult [env] mempty afterend
where
(arguments, rest) = parseArgs sig toks
ParseResult body _ afterend = parse ctx rest
env = TeXEnv name (map ((FixArg, ) . fullParse ctx) arguments) (concatRaws body)
-- todo: not all fixargs
defaultCmds :: [(Text, Command)]
defaultCmds =
[ ("newcommand", newCommandCommand)
, ("renewcommand", newCommandCommand)
, ("DeclareMathOperator", declareMathOperator)
, ("newcolumntype", newColumnTypeCommand)
, ("newenvironment", newEnvCommand)
, ("lstnewenvironment", newEnvCommand)
, ("raisebox", raiseBoxCommand)
, ("let", Command $ \ctx _ws rest -> parse ctx (drop 2 rest))
, beginCommand
, endCommand
, oldDefCommand
]
oldDefCommand :: (Text, Command)
oldDefCommand = ("def", Command pars)
where
pars ctx@Context{..} _ws rest
| (Token ('\\' : name) : rest') <- rest
, Just (body, rest'') <- balanced ('{', '}') rest' =
let
m = Macros (Map.fromList [defCmd (Text.pack name) (Signature 0 Nothing) body]) mempty mempty
ParseResult p mm r = parse ctx{macros=macros++m} rest''
in
ParseResult p (m ++ mm) r
| otherwise = parse ctx $ snd $ fromJust $ balanced ('{', '}') $ dropWhile (/= Token "{") rest
endCommand :: (Text, Command)
endCommand = ("end", Command $ \c _ws rest ->
let Just (_, rest') = parseFixArg c rest in ParseResult mempty mempty rest')
beginCommand :: (Text, Command)
beginCommand = ("begin", normalCmd $ Command pars)
where
pars c@Context{..} _ws rest
| Just (Environment f) <- Map.lookup (envname) (environments macros) = f c rest'
| otherwise = error $ "undefined env: " ++ Text.unpack envname
where
Just (arg, rest') = parseFixArg c rest
[TeXRaw envname] = concatRaws arg
raiseBoxCommand :: Command
raiseBoxCommand = normalCmd $ Command $ \c@Context{..} _ws rest ->
let
Just (a0, rest') = balanced ('{', '}') rest
(a1, rest'') = case parseOptArg rest' of
Nothing -> (Nothing, rest')
Just (x, y) -> (Just x, y)
Just (a2, rest''') = balanced ('{', '}') rest''
args = [(FixArg, fullParse c a0)]
++ case a1 of
Nothing -> []
Just x -> [(OptArg, fullParse c x)]
++ [(FixArg, fullParse c a2)]
in
ParseResult [TeXComm "raisebox" "" args] mempty rest'''
newCommandCommand :: Command
newCommandCommand = normalCmd $ Command $ \Context{..} _ws (Token "{" : Token ('\\' : name) : Token "}" : rest) ->
let
(sig, rest') = parseSignature rest
Just (body, rest'') = balanced ('{', '}') rest'
newMacros = newCommand True (defCmd (Text.pack name) sig body) mempty
in
ParseResult [] newMacros rest''
declareMathOperator :: Command
declareMathOperator = normalCmd $ Command $ \Context{..} _ws (Token "{" : Token ('\\' : name) : Token "}" : rest) ->
let
Just (body, rest') = balanced ('{', '}') rest
newBody = [Token "\\operatorname", Token "{"] ++ body ++ [Token "}"]
newMacros = newCommand True (defCmd (Text.pack name) (Signature 0 Nothing) newBody) mempty
in
ParseResult [] newMacros rest'
newColumnTypeCommand :: Command
newColumnTypeCommand = normalCmd $ Command $ \Context{..} _ws (Token "{" : Token _ : Token "}" : rest) ->
let
(_, rest') = parseSignature rest
Just (_, rest'') = balanced ('{', '}') rest'
in
ParseResult [] mempty rest''
defaultMacros :: Macros
defaultMacros = Macros (Map.fromList defaultCmds) (Map.fromList defaultEnvs) mempty
defaultContext :: Context
defaultContext = Context
{ commentsEnabled = True
, parsingOptArg = False
, macros = defaultMacros }
rmLine :: [Token] -> [Token]
rmLine s = case dropWhile (/= Token "\n") s of
Token "\n" : x -> x
x -> x
parseOptArg :: [Token] -> Maybe ([Token], [Token])
parseOptArg = balanced ('[', ']')
parseOptArgs :: [Token] -> ([[Token]], [Token])
parseOptArgs s
| Just (r, s') <- parseOptArg s = first (r:) (parseOptArgs s')
| otherwise = ([], s)
parseFixArg :: Context -> [Token] -> Maybe (LaTeX, [Token])
parseFixArg ctx (Token [c] : more) | isSpace c = parseFixArg ctx more
parseFixArg ctx (Token "{" : more) =
let ParseResult t _macros s = parse ctx more in Just (t, s)
parseFixArg _ _ = Nothing
parseSignature :: [Token] -> (Signature, [Token])
parseSignature t = case optArgs of
[] -> (Signature 0 Nothing, t')
[[Token a]] -> (Signature (read a) Nothing, t')
[[Token a], deflt] -> (Signature (read a) (Just deflt), t')
_ -> error "unrecognized signature"
where (optArgs, t') = parseOptArgs t
balanced :: (Char, Char) -> [Token] -> Maybe ([Token], [Token])
balanced (open, close) (dropWhile (all isSpace . tokenChars) -> (Token [o] : s))
| o == open = Just $ go 0 s
where
go :: Int -> [Token] -> ([Token], [Token])
go 0 [] = ([], [])
go 0 (Token [c] : x) | c == close = ([], x)
go n (Token "}" : x) = first (Token "}" :) (go (n-1) x)
go n (Token "{" : x) = first (Token "{" :) (go (n+1) x)
go n (x:y) = first (x :) (go n y)
go n x = error $ "\n\nbalanced: " ++ show (n, x)
balanced oc (dropWhile (all isSpace. tokenChars) -> (Token "%" : x)) = balanced oc (dropWhile (/= Token "\n") x)
balanced _ _ = Nothing
balanced_body :: Context -> String -> [Token] -> ([Token], [Token])
balanced_body ctx end = go 0
where
go :: Int -> [Token] -> ([Token], [Token])
go 0 [] = ([], [])
go 0 (Token "\\end" : Token "{" : e : Token "}" : x) | fullParse ctx [e] == [TeXRaw $ Text.pack end] = ([], x)
go n (Token "}" : x) = first (Token "}" :) (go (n-1) x)
go n (Token "{" : x) = first (Token "{" :) (go (n+1) x)
go n (x:y) = first (x :) (go n y)
go n s = error $ "\n\nbalanced: " ++ show (n, s)
parseArgs :: Signature -> [Token] -> ([[Token]], [Token])
parseArgs Signature{..} s = case defaultArg of
Nothing -> n_balanced ('{', '}') nrFixArgs s
Just dfl -> case parseOptArg s of
Nothing ->
first (dfl :) (n_balanced ('{', '}') (nrFixArgs - 1) s)
Just (optArg, s') ->
first (optArg :) (n_balanced ('{', '}') (nrFixArgs - 1) s')
parseArgs2 :: Context -> Signature -> [Token] -> ([TeXArg], [Token])
parseArgs2 c Signature{..} s
| defaultArg == Nothing = first (map fa) (n_balanced ('{', '}') nrFixArgs s)
| Just (optArg, s') <- parseOptArg s =
first (\a -> (OptArg, fullParse c optArg) : map fa a)
(n_balanced ('{', '}') (nrFixArgs - 1) s')
| otherwise = first (map fa) (n_balanced ('{', '}') (nrFixArgs - 1) s)
where
fa = (FixArg, ) . fullParse c
-- todo: clean up parseArgs/parseArgs2 above
n_balanced :: (Char, Char) -> Int -> [Token] -> ([[Token]], [Token])
n_balanced oc n s
| n > 0, Just (x, s') <- balanced oc s = first (x:) $ n_balanced oc (n-1) s'
| otherwise = ([], s)
newEnvCommand :: Command
newEnvCommand = normalCmd $ Command $ \Context{..} _ws (Token "{" : (span (/= Token "}") -> (name, Token "}" : rest))) ->
let
nameStr = concatMap tokenChars name
(sig, rest') = parseSignature rest
Just (begin, rest'') = balanced ('{', '}') rest'
Just (end, rest''') = balanced ('{', '}') rest''
pa :: Context -> [Token] -> ParseResult
pa c' toks = ParseResult replaced mempty toks''
where
replaced = fullParse c' $ replArgs args begin ++ body ++ end
(args, toks') = parseArgs sig toks
(body, toks'') = balanced_body c' nameStr toks'
m = Macros mempty (Map.singleton (Text.pack nameStr) (Environment pa)) mempty
in
ParseResult [] m rest'''
parseString :: Context -> String -> (LaTeX, Macros, [Token])
parseString c s = (concatRaws x, y, z)
where ParseResult x y z = parse c (tokenize s)
literal :: String
literal = " @_{}&,%-#/~>!$;:^"
breakComment :: [Token] -> ([Token], [Token])
breakComment x@(Token "\n" : _) = ([], x)
breakComment (Token ('\\' : cmd) : xs)
| (c, r@(_:_)) <- span (/= '\n') cmd = ([Token ('\\':c)], Token r : xs)
breakComment (Token "%" : Token "\n" : x) = first ((Token "%" :) . (Token "\n" :)) (breakComment x)
breakComment (x : xs) = first (x:) (breakComment xs)
breakComment [] = ([], [])
data LiteralKind = StringLiteral | CharLiteral
parseCode :: Text -> Context -> [Token] -> LaTeX
parseCode envname c = concatRaws . go Nothing
where
go :: Maybe LiteralKind -> [Token] -> LaTeX
go _ [] = []
go b (Token "@" : rest) = fullParse c cmd ++ go b rest'
where (cmd, Token "@" : rest') = break (== Token "@") rest
go (Just StringLiteral) (Token "\"" : rest) = TeXRaw "\"" : go Nothing rest
go (Just CharLiteral) (Token "'" : rest) = TeXRaw "'" : go Nothing rest
go Nothing (Token "\"" : rest) = TeXRaw "\"" : (go (Just StringLiteral) lit ++ go Nothing rest')
where (lit, rest') = stringLiteral rest
go Nothing (Token "'" : rest)
| envname == "codeblockdigitsep" = TeXRaw "'" : go Nothing rest
| otherwise = TeXRaw "'" : (go (Just CharLiteral) lit ++ go Nothing rest')
where (lit, rest') = charLiteral rest
go Nothing (Token "/" : Token "/" : (breakComment -> (comment, rest')))
= TeXComm "comment" "" [(FixArg, TeXRaw "//" : noncode comment)] : go Nothing rest'
go Nothing (Token "/" : Token "*" : rest)
| Just (comment, rest') <- stripInfix [Token "*", Token "/"] rest
= TeXComm "comment" "" [(FixArg, [TeXRaw "/*"] ++ noncode comment ++ [TeXRaw "*/"])] : go Nothing rest'
go b (Token "/" : rest) = TeXRaw "/" : go b rest
go b s = TeXRaw (Text.pack $ concatMap tokenChars code) : go b rest
where
breakToks = [Token "@", Token "/"] ++
case b of
Nothing -> [Token "\"", Token "'"]
Just StringLiteral -> [Token "\""]
Just CharLiteral -> [Token "'"]
(code, rest) = break (`elem` breakToks) s
noncode :: [Token] -> LaTeX
noncode toks =
fullParse c nc ++ case more of
[] -> []
Token "@" : (break (== Token "@") -> (code, _ : rest)) ->
TeXComm "tcode" "" [(FixArg, fullParse c code)] : noncode rest
_ -> error "no"
where (nc, more) = span (/= Token "@") toks
stringLiteral :: [Token] -> ([Token], [Token])
stringLiteral (Token "\\" : Token "\"" : x) = first (Token "\\\"" :) (stringLiteral x)
stringLiteral (Token "\\" : Token "\\" : x) = first (Token "\\\\" :) (stringLiteral x)
stringLiteral (Token "\"" : x) = ([Token "\""], x)
stringLiteral (y : x) = first (y :) (stringLiteral x)
stringLiteral [] = ([], [])
charLiteral :: [Token] -> ([Token], [Token])
charLiteral (Token "\\" : Token "'" : x) = first (Token "\\'" :) (charLiteral x)
charLiteral (Token "\\" : Token "\\" : x) = first (Token "\\\\" :) (charLiteral x)
charLiteral (Token "'" : x) = ([Token "'"], x)
charLiteral (y : x) = first (y :) (charLiteral x)
charLiteral [] = ([], [])
isCommandChar :: Char -> Bool
isCommandChar c = isAlpha c || c == '*'
tokenize :: String -> [Token]
tokenize "" = []
tokenize ('\\':'v':'e':'r':'b': delim : (break (== delim) -> (arg, _ : rest))) =
Token ("\\verb:" ++ arg) : tokenize rest
tokenize ('\\' : (span isCommandChar -> (cmd@(_:_), (span isSpace -> (ws, rest)))))
= Token ('\\' : cmd ++ ws) : tokenize rest
tokenize ('\\' : c : rest) = Token ['\\', c] : tokenize rest
tokenize x@((isAlpha -> True): _) = let (a, b) = span isAlphaNum x in Token a : tokenize b
tokenize (x:y) = Token [x] : tokenize y
-- \verb is handled in tokenize so that the 'balanced' function doesn't
-- get confused by \verb|{|
-- Notice how the whitespace following a command like \bla is included in the Token
-- This lets the parser include it in the TeXComm/TeXCommS's command field, so that
-- the whitespace is not lost when serializing back to text when sending to MathJax.
replArgs :: [[Token]] -> [Token] -> [Token]
replArgs args = go
where
go [] = []
go (Token "%" : (span (/= Token "\n") -> (x, y))) = Token "%" : x ++ go y
go (Token "#" : Token "#" : y) = Token "#" : go y
go (Token "#" : Token [getDigit -> Just i] : y)
| length args >= i = (args !! (i-1)) ++ go y
| otherwise = error $ "need more args than " ++ show args ++ " to replace in " ++ show (concatMap tokenChars y)
go (x:y) = x : go y
nullCmd :: Text -> Signature -> (Text, Command)
nullCmd name sig = defCmd name sig []
storeCmd :: String -> Signature -> (Text, Command)
storeCmd name sig = (Text.pack name, normalCmd $ Command pars)
where
pars context ws tokens = ParseResult [TeXComm name ws args] mempty rest
where (args, rest) = parseArgs2 context sig tokens
defCmd :: Text -> Signature -> [Token] -> (Text, Command)
defCmd name sig body = (name, normalCmd $ Command pars)
where
pars context _ws tokens = ParseResult (fullParse context $ replArgs args body) mempty rest
where (args, rest) = parseArgs sig tokens
normalCmd :: Command -> Command
normalCmd (Command f) = Command $ \ctx ws toks ->
let ParseResult content newMacros rest = f ctx ws toks
in addMacros False newMacros (prependContent content (parse ctx{macros=macros ctx ++ newMacros} rest))
consumeMath :: [Token] -> ([Token], [Token])
consumeMath = f 0
where
f :: Integer -> [Token] -> ([Token], [Token])
f 0 (Token "$" : rest) = ([], rest)
f depth (Token "{" : rest) = first (Token "{" :) (f (depth + 1) rest)
f depth (Token "}" : rest) = first (Token "}" :) (f (depth - 1) rest)
f depth (tok : rest) = first (tok :) (f depth rest)
f _ [] = error "unexpected end of math"
parse :: Context -> [Token] -> ParseResult
parse c (Token "$" : (consumeMath -> (math, rest))) =
prependContent [TeXMath Dollar (fullParse c math)] (parse c rest)
parse c (Token "\\[" : (span (/= Token "\\]") -> (math, Token "\\]" : rest))) =
prependContent [TeXMath Square (fullParse c math)] (parse c rest)
parse c (Token "]" : x)
| parsingOptArg c = ParseResult mempty mempty x
parse _ (Token "}" : x) = ParseResult mempty mempty x
parse c (Token "{" : x) = prependContent [TeXBraces y] $ parse c rest
where ParseResult y _ rest = parse c x
parse c (Token "%" : x)
| commentsEnabled c = parse c (rmLine x)
parse _ [] = ParseResult mempty mempty mempty
parse c (Token "\\\\" : x) = prependContent [TeXLineBreak] (parse c x)
parse c (Token ['\\', ch] : x)
| ch `elem` literal = prependContent [TeXComm [ch] "" []] (parse c x)
parse c (Token ('\\':'v':'e':'r':'b':':':arg) : rest) =
prependContent [TeXComm "verb" "" [(FixArg, [TeXRaw $ Text.pack arg])]] (parse c rest)
parse c (Token "\\rSec" : Token [getDigit -> Just i] : s)
= prependContent [TeXComm "rSec" "" args] $ parse c s''
where
Just (a, s') = parseOptArg s
Just (b, s'') = parseFixArg c s'
args = [(FixArg, [TeXRaw $ Text.pack $ show i]), (FixArg, fullParse c a), (FixArg, b)]
parse c@Context{..} (Token ('\\' : (span (not . isSpace) -> (nos, w))) : rest)
| Just f <- Map.lookup (Text.pack cmd) (commands macros) = runCommand f c ws rest
| otherwise = error $
"\n\nundefined command: " ++ show cmd ++ " at: " ++ take 50 (concatMap tokenChars rest)
where (cmd, ws) | nos == "", (x : xx) <- w = ([x], xx)
| otherwise = (nos, w)
parse ctx (Token c : rest)
| all isAlphaNum c
= prependContent [TeXRaw $ Text.pack c] $ parse ctx rest
parse ctx (Token [c] : rest)
= prependContent [TeXRaw $ Text.pack [c]] $ parse ctx rest
parse _ s = error $ "parse: unexpected: " ++ take 100 (concatMap tokenChars s)
fullParse :: Context -> [Token] -> LaTeX
fullParse c t
| all isSpace (concatMap tokenChars remainder) = concatRaws content
| otherwise = error $ "could not fully parse: "
++ concatMap tokenChars t
++ "\n\nremainder: "
++ concatMap tokenChars remainder
where ParseResult{..} = parse c t
================================================
FILE: Load14882.hs
================================================
{-# OPTIONS_GHC -fno-warn-tabs #-}
{-# LANGUAGE
OverloadedStrings,
ScopedTypeVariables,
RecordWildCards,
ViewPatterns,
LambdaCase,
TupleSections,
NamedFieldPuns,
FlexibleInstances,
FlexibleContexts,
RankNTypes,
MultiParamTypeClasses,
FunctionalDependencies,
UndecidableInstances,
RecursiveDo #-}
module Load14882 (parseIndex, load14882) where
import qualified LaTeXParser as Parser
import qualified Data.IntMap as IntMap
import qualified Data.List as List
import Data.IntMap (IntMap)
import LaTeXBase
( LaTeXUnit(..), TeXArg, ArgKind(..), lookForCommand
, mapTeX, mapTeXRaw, concatRaws, texStripInfix, allUnits)
import Data.Text (Text, replace, isPrefixOf)
import Data.Text.IO (readFile)
import Text.Regex (mkRegex, matchRegexAll)
import qualified Data.Text as Text
import Control.Monad (forM, when)
import Prelude hiding (take, (.), takeWhile, (++), lookup, readFile)
import Data.Char (isAlpha)
import Control.Arrow (first)
import Data.Map (Map)
import Data.Maybe (isJust, fromJust)
import qualified Data.Map as Map
import Data.List (unfoldr, (\\), takeWhile)
import System.Process (readProcess)
import System.IO.Unsafe (unsafePerformIO)
import Control.Monad.Fix (MonadFix)
import Control.Monad.State (MonadState, evalState, get, put, liftM2, modify)
import Util ((.), (++), mapLast, stripInfix, measure, textStripInfix)
import RawDocument
import Sentences (splitIntoSentences, isActualSentence, breakSentence)
import Document
getCommitUrl :: IO Text
getCommitUrl = do
url <- gitGetRemoteUrl
commit <- gitGetCommitRef
return $
( Text.replace "git@github.com:" "http://github.com/"
$ Text.replace ".git" "/commit/" url)
++ commit
gitGetRemoteUrl :: IO Text
gitGetRemoteUrl = do
x <- readProcess "git" ["ls-remote", "--get-url"] ""
return $ Text.strip $ Text.pack x
gitGetCommitRef :: IO Text
gitGetCommitRef = do
x <- readProcess "git" ["rev-parse", "HEAD"] ""
return $ Text.strip $ Text.pack $ x
-- In the LaTeX sources, \definition is often preceded by corresponding \indexdefns.
-- Since we treat definitions like sections (and generate pages for them), we need
-- to move the \indexdefns inside (after) the \definition, so that the index entries
-- don't link to the page for the preceding section.
moveIndexEntriesIntoDefs :: [Text] -> [Text]
moveIndexEntriesIntoDefs [] = []
moveIndexEntriesIntoDefs (x:xs)
| "\\indexdefn{" `isPrefixOf` x = case moveIndexEntriesIntoDefs xs of
[] -> [x]
y:ys
| "\\definition{" `isPrefixOf` y -> y : x : ys
| otherwise -> x : y : ys
| otherwise = x : moveIndexEntriesIntoDefs xs
moveIndexEntriesIntoSecs :: [Text] -> [Text]
moveIndexEntriesIntoSecs = go []
where
go x [] = x
go x (h:t)
| "\\indextext{" `isPrefixOf` h = go (h : x) t
| "\\rSec" `isPrefixOf` h = h : reverse x ++ go [] t
| otherwise = reverse x ++ [h] ++ go [] t
{- The document has a ton of:
\indexlibraryglobal{bla}%
\begin{itemdecl}
void bla();
\end{itemdecl}
To highlight the whole itemdecl, indexItemDecls converts this to:
\begin{indexeditemdecl}{
\indexlibraryglobal{bla}%
}
void bla();
\end{indexeditemdecl}
-}
indexCodeEnvs :: [Text] -> [Text] -> [Text]
indexCodeEnvs envs = go []
where
go collected [] = collected
go collected (x:xs)
| "\\index" `isPrefixOf` x = go (collected ++ [x]) xs
| [e] <- [e | e <- envs, ("\\begin{" ++ e ++ "}") `isPrefixOf` x] =
let (code, _ : rest) = span (not . (("\\end{" ++ e ++ "}") `isPrefixOf`)) xs
in (if null collected then
["\\begin{" ++ e ++ "}"]
++ code
++ ["\\end{" ++ e ++ "}"]
else
["\\begin{indexed" ++ e ++ "}{"] ++ collected ++ ["}"]
++ code
++ ["\\end{indexed" ++ e ++ "}"])
++ go [] rest
| otherwise = collected ++ (x : go [] xs)
data Numbers = Numbers
{ tableNr, figureNr, footnoteRefNr, footnoteNr, itemDeclNr
, nextIndexEntryNr, noteNr, exampleNr, nextSentenceNr, formulaNr :: Int }
class AssignNumbers a b | a -> b where
assignNumbers :: forall m . (Functor m, MonadFix m, MonadState Numbers m) => Section -> a -> m b
instance AssignNumbers TeXArg TeXArg where
assignNumbers s (y, x) = (y, ) . assignNumbers s x
instance AssignNumbers LaTeXUnit LaTeXUnit where
assignNumbers s (TeXEnv "itemdecl" [] x) = do
n <- get
put n{itemDeclNr = itemDeclNr n + 1}
TeXEnv "itemdecl" [(FixArg, [TeXRaw $ Text.pack $ show $ itemDeclNr n])] . assignNumbers s x
assignNumbers s (TeXEnv "indexeditemdecl" indices x) = do
n <- get
put n{itemDeclNr = itemDeclNr n + 1}
liftM2 (TeXEnv "indexeditemdecl") (assignNumbers s indices) (assignNumbers s x)
assignNumbers s (TeXEnv x y z) = liftM2 (TeXEnv x) (assignNumbers s y) (assignNumbers s z)
assignNumbers _ (TeXComm "index" ws args) = do
n <- get
put n{nextIndexEntryNr = nextIndexEntryNr n + 1}
return $ TeXComm "index" ws $ (FixArg, [TeXRaw $ Text.pack $ show $ nextIndexEntryNr n]) : args
assignNumbers _ (TeXComm "defnx" ws args) = do
n <- get
put n{nextIndexEntryNr = nextIndexEntryNr n + 1}
return $ TeXComm "defnx" ws $ (FixArg, [TeXRaw $ Text.pack $ show $ nextIndexEntryNr n]) : args
assignNumbers _ (TeXComm "footnoteref" ws []) = do
Numbers{..} <- get
put Numbers{footnoteRefNr = footnoteRefNr+1, ..}
return $ TeXComm "footnoteref" ws [(FixArg, [TeXRaw $ Text.pack $ show footnoteRefNr])]
assignNumbers s (TeXComm x ws args) = TeXComm x ws . assignNumbers s args
assignNumbers _ x = return x
instance AssignNumbers a b => AssignNumbers (Cell a) (Cell b) where
assignNumbers s x@Cell{..} = do
n <- get
put n{nextSentenceNr=1}
content' <- assignNumbers s content
modify $ \m -> m{nextSentenceNr = nextSentenceNr n}
return x{content=content'}
instance AssignNumbers a b => AssignNumbers (Row a) (Row b) where
assignNumbers s x@Row{..} = do
cells' <- assignNumbers s cells
return x{cells=cells'}
instance AssignNumbers RawTexPara TeXPara where
assignNumbers s (RawTexPara (splitIntoSentences -> x)) = TeXPara . f x
where
f [] = return []
f (h:t) = do
h' <- assignNumbers s h
let actual = isActualSentence h
n <- get
put n{nextSentenceNr = nextSentenceNr n + (if actual then 1 else 0)}
(Sentence (if actual then Just (nextSentenceNr n) else Nothing) h' :) . f t
assignNonInlineItem :: (MonadState Numbers m, MonadFix m) => Section -> RawItem -> m Item
assignNonInlineItem s (RawItem label content) = do
n <- get
put n{nextSentenceNr = 1}
Item Nothing (if null label then Nothing else Just label) [] . assignNumbers s content
breakFirstSentence :: [TeXPara] -> (Sentence, [TeXPara])
breakFirstSentence (TeXPara [x] : z) = (x, z)
breakFirstSentence (TeXPara (x:y) : z) = (x, TeXPara y : z)
breakFirstSentence x = error $ "breakFirstSentence: " ++ show x
assignInlineItem :: (MonadState Numbers m, MonadFix m) => Section -> RawItem -> m Item
assignInlineItem s (RawItem label content) = do
n <- get
put n{nextSentenceNr = 1}
content' <- assignNumbers s content
let (Sentence _ x, y) = breakFirstSentence content'
return $ Item Nothing (if null label then Nothing else Just label) x y
endsWithFullStop :: [RawElement] -> Bool
endsWithFullStop = isJust . breakSentence
instance AssignNumbers RawElement Element where
assignNumbers section RawFigure{..} = do
Numbers{..} <- get
put Numbers{figureNr = figureNr+1, ..}
return $ FigureElement Figure
{ figureNumber = figureNr
, figureName = rawFigureName
, figureAbbr = "fig:" ++ rawFigureAbbr
, figureSvg = rawFigureSvg
, figureSection = section }
assignNumbers section RawFormula{..} = do
Numbers{..} <- get
put Numbers{formulaNr = formulaNr + 1, ..}
return $ FormulaElement Formula
{ formulaNumber = formulaNr
, formulaAbbr = "eq:" ++ rawFormulaAbbr
, formulaContent = rawFormulaContent
, formulaSection = section }
assignNumbers s RawTable{..} = do
Numbers{..} <- get
put Numbers{tableNr = tableNr+1, ..}
tableCaption <- assignNumbers s rawTableCaption
tableBody <- assignNumbers s rawTableBody
return $ TableElement Table
{ tableNumber = tableNr
, columnSpec = rawColumnSpec
, tableAbbr = rawTableAbbr
, tableCaption = tableCaption
, tableSection = s
, .. }
assignNumbers s (RawEnumerated x p) = do
origNum <- nextSentenceNr . get
let c = length (filter (any (endsWithFullStop . rawTexParaElems) . rawItemContent) p)
r <- mapM (if c > 1 then assignNonInlineItem s else assignInlineItem s) p
modify $ \y -> y{nextSentenceNr = origNum}
return $ Enumerated x r
assignNumbers s (RawLatexElement x) = LatexElement . assignNumbers s x
assignNumbers s (RawBnf x y) = Bnf x . assignNumbers s y
assignNumbers _ (RawTabbing x) = return $ Tabbing x
assignNumbers s (RawCodeblock x) = Codeblock . assignNumbers s x
assignNumbers s (RawItemdescr x) = Itemdescr . assignNumbers s x
assignNumbers s (RawNote label x) = do
Numbers{..} <- get
put Numbers{noteNr = noteNr+1, ..}
x' <- assignNumbers s x
return $ NoteElement $ Note noteNr label x'
assignNumbers s (RawExample x) = do
Numbers{..} <- get
put Numbers{exampleNr = exampleNr+1, ..}
x' <- assignNumbers s x
return $ ExampleElement $ Example exampleNr x'
instance AssignNumbers RawFootnote Footnote where
assignNumbers s (RawFootnote t) = do
Numbers{..} <- get
put Numbers{footnoteNr = footnoteNr+1, nextSentenceNr = 1, ..}
t' <- assignNumbers s t
return $ Footnote{footnoteNumber=footnoteNr,footnoteContent=t'}
lsectionLevel :: LinearSection -> Int
lsectionLevel (lsectionKind -> NormalSection l) = l
lsectionLevel (lsectionKind -> DefinitionSection l) = l
lsectionLevel _ = 0
paraNumbers :: [Bool] -> [Maybe Int]
paraNumbers = f 1
where
f _ [] = []
f i (True : x) = Just i : f (i + 1) x
f i (False : x) = Nothing : f i x
treeizeChapters :: forall m . (Functor m, MonadFix m, MonadState Numbers m) =>
Bool -> Int -> [LinearSection] -> m [Section]
treeizeChapters _ _ [] = return []
treeizeChapters annexes secNumber (LinearSection{..} : more) = mdo
nums <- get
put nums{formulaNr = 1}
sectionFootnotes <- assignNumbers newSec lsectionFootnotes
let
ie = rawIndexEntriesForSec newSec
newSec = Section{sectionKind=lsectionKind, secIndexEntries=ie, secIndexEntriesByPath=reverseIndexEntryMap ie, ..}
let pn = paraNumbers $ paraNumbered . lsectionParagraphs
paragraphs <- forM (zip pn lsectionParagraphs) $ assignNumbers newSec
subsections <- treeizeSections 1 chapter [newSec] lsubsections
(newSec :) . treeizeChapters annexes' (sectionNumber + 1) more'
where
sectionNumber = if annexes' /= annexes then 0 else secNumber
annexes' = chapter /= NormalChapter
parents = []
chapter
| lsectionKind == InformativeAnnexSection = InformativeAnnex
| lsectionKind == NormativeAnnexSection = NormativeAnnex
| otherwise = NormalChapter
abbreviation = lsectionAbbreviation
sectionName = lsectionName
(lsubsections, more') = span ((> 0) . lsectionLevel) more
rawIndexEntriesForSec :: Section -> IntMap IndexEntry
rawIndexEntriesForSec s = IntMap.fromList
[(n, e) | e@IndexEntry{indexEntryNr=Just n} <- sectionIndexEntries s]
reverseIndexEntryMap :: IntMap IndexEntry -> Map IndexPath [(Int, IndexEntry)]
reverseIndexEntryMap m = Map.fromListWith (++) [(indexPath x, [(i, x)]) | (i, x) <- IntMap.assocs m]
assignItemNumbers :: Paragraph -> Paragraph
assignItemNumbers p
| Just n <- paraNumber p = p{ paraElems = fst $ goParas [n, 1] $ paraElems p }
| otherwise = p
where
goParas :: [Int] -> [TeXPara] -> ([TeXPara], [Int])
goParas nn [] = ([], nn)
goParas nn (TeXPara e : pp) = first (TeXPara e' :) (goParas nn' pp)
where (e', nn') = goSentences nn e
goSentences :: [Int] -> [Sentence] -> ([Sentence], [Int])
goSentences nn [] = ([], nn)
goSentences nn (Sentence m e : ss) = first (Sentence m e' :) (goSentences nn' ss)
where (e', nn') = goElems nn e
goElems :: [Int] -> [Element] -> ([Element], [Int])
goElems nn [] = ([], nn)
goElems nn (e:ee) = first (e' :) (goElems nn' ee)
where (e', nn') = goElem nn e
goElem :: [Int] -> Element -> (Element, [Int])
goElem nn Enumerated{..} = (Enumerated enumCmd items', mapLast (+ length enumItems) nn)
where
items' = map (\(i, Item{..}) ->
Item
(Just (map show $ mapLast (+i) nn))
itemLabel
(fst $ goElems (mapLast (+i) nn ++ [1]) itemInlineContent)
(fst $ goParas (mapLast (+i) nn ++ [1]) itemBlockContent)
) (zip [0..] enumItems)
goElem nn (NoteElement (Note nr label paras)) = (NoteElement (Note nr label paras'), nn')
where (paras', nn') = goParas nn paras
goElem nn (ExampleElement (Example nr paras)) = (ExampleElement (Example nr paras'), nn')
where (paras', nn') = goParas nn paras
goElem nn x = (x, nn)
instance AssignNumbers (Maybe Int, RawParagraph) Paragraph where
assignNumbers paraSection (paraNumber, RawParagraph{..}) = do
nums <- get
put nums{nextSentenceNr=if paraNumbered then 1 else nextSentenceNr nums}
paraElems <- assignNumbers paraSection rawParaElems
when paraNumbered $ modify $ \newnums -> newnums{nextSentenceNr = nextSentenceNr nums}
return $ assignItemNumbers Paragraph
{ paraInItemdescr = rawParaInItemdescr
, paraSourceLoc = rawParaSourceLoc
, allParaElems = allElements paraElems
, .. }
treeizeSections :: forall m . (Functor m, MonadFix m, MonadState Numbers m) =>
Int -> Chapter -> [Section] -> [LinearSection] -> m [Section]
treeizeSections _ _ _ [] = return []
treeizeSections sectionNumber chapter parents
(s@LinearSection{..} : (span ((> lsectionLevel s) . lsectionLevel) -> (lsubsections, more'))) = mdo
let
ie = rawIndexEntriesForSec newSec
newSec = Section
{ sectionKind = lsectionKind
, secIndexEntries = ie
, secIndexEntriesByPath = reverseIndexEntryMap ie
, sectionName = lsectionName
, abbreviation = lsectionAbbreviation
, .. }
let pn = paraNumbers $ paraNumbered . lsectionParagraphs
nums <- get
put nums{noteNr=1, exampleNr=1, itemDeclNr=1}
sectionFootnotes <- assignNumbers newSec lsectionFootnotes
modify $ \n -> n{nextSentenceNr=1}
paragraphs <- forM (zip pn lsectionParagraphs) $ assignNumbers newSec
subsections <- treeizeSections 1 chapter (newSec : parents) lsubsections
(newSec :) . treeizeSections (sectionNumber + 1) chapter parents more'
instance AssignNumbers a b => AssignNumbers [a] [b] where
assignNumbers s = mapM (assignNumbers s)
resolveGrammarterms :: Parser.Macros -> [Text] -> LinearSection -> LinearSection
resolveGrammarterms macros links LinearSection{..} =
LinearSection{lsectionParagraphs = map resolve lsectionParagraphs, ..}
where
resolveTexPara :: RawTexPara -> RawTexPara
resolveTexPara RawTexPara{..} = RawTexPara{rawTexParaElems = map resolveRawElem rawTexParaElems, ..}
resolveRawElem :: RawElement -> RawElement
resolveRawElem (RawBnf s tex) = RawBnf s (bnfGrammarterms macros links tex)
resolveRawElem (RawEnumerated s items) = RawEnumerated s (map resolveItem items)
resolveRawElem y = y
resolveItem :: RawItem -> RawItem
resolveItem (RawItem label content) = RawItem label (map resolveTexPara content)
resolve :: RawParagraph -> RawParagraph
resolve RawParagraph{..} = RawParagraph{rawParaElems = map resolveTexPara rawParaElems, ..}
bnfGrammarterms :: Parser.Macros -> [Text] -> LaTeX -> LaTeX
bnfGrammarterms macros links = mapTeX go . mapTeX wordify
where
wordify :: LaTeXUnit -> Maybe LaTeX
wordify (TeXRaw stuff) = Just $ map TeXRaw $ unfoldr f stuff
where
f s | Text.null s = Nothing
f s | isName $ Text.head s = Just $ Text.span isName s
f s = Just $ Text.break isName s
isName c = isAlpha c || c `elem` ['-', '_']
wordify _ = Nothing
go :: LaTeXUnit -> Maybe LaTeX
go d@(TeXComm cmd _ _) | cmd `elem` ["tcode", "index", "textnormal", "indexlink", "hiddenindexlink", "indexedspan", "terminal", "literalterminal", "noncxxterminal"] = Just [d]
go (TeXRaw name)
| name `elem` links = Just $ fst $ RawDocument.doParse macros $ "\\grammarterm{" ++ name ++ "}"
go _ = Nothing
parseIndex :: LaTeX -> (IndexPath, Maybe IndexKind)
parseIndex = go . mapTeXRaw unescapeIndexPath . concatRaws
where
go (texStripInfix "|seealso" -> Just (x, [TeXBraces y])) = (parseIndexPath x, Just $ See True y)
go (texStripInfix "|see " -> Just (x, [TeXBraces y])) = (parseIndexPath x, Just $ See False y)
go (texStripInfix "|see" -> Just (x, [TeXBraces y])) = (parseIndexPath x, Just $ See False y)
go (texStripInfix "|see" -> Just (x, [TeXBraces y, TeXRaw "|idxbfpage"])) = (parseIndexPath x, Just $ See False y)
go (texStripInfix "|(" -> Just (t, _)) = (parseIndexPath t, Just IndexOpen)
go (texStripInfix "|)" -> Just (t, _)) = (parseIndexPath t, Just IndexClose)
go (texStripInfix "|idxbfpage" -> Just (t, _)) = (parseIndexPath t, Just DefinitionIndexEntry)
go t = (parseIndexPath t, Nothing)
unescapeIndexPath :: Text -> LaTeXUnit
unescapeIndexPath = TeXRaw
. replace "\5" "\""
. replace "\2" "!"
. replace "!" "\1"
. replace "\"!" "\2"
. replace "\4" "@"
. replace "@" "\3"
. replace "\"@" "\4"
. replace "\"|" "|"
. replace "\"\"" "\5"
. (!! 10) . iterate (replace " " " ")
. replace "\n" " "
parseIndexPath :: LaTeX -> IndexPath
parseIndexPath (texStripInfix "\1" -> Just (x, y)) = parseIndexPath x ++ parseIndexPath y
parseIndexPath (texStripInfix "\3" -> Just (x, y)) = [IndexComponent x y]
parseIndexPath t = [IndexComponent [] t]
sectionTexParas :: Section -> [TeXPara]
sectionTexParas s = (paragraphs s >>= paraElems) ++ (sectionFootnotes s >>= footnoteContent)
sectionTex :: Section -> LaTeX
sectionTex s = sectionTexParas s >>= texParaTex
sectionIndexEntries :: Section -> [IndexEntry]
sectionIndexEntries s =
[ IndexEntry{indexEntrySection=abbreviation sec, ..}
| sec <- sections s
, [ (FixArg, [TeXRaw (Text.unpack -> read -> Just -> indexEntryNr)])
, (OptArg, [TeXRaw indexCategory]), (FixArg, (parseIndex -> (indexPath, indexEntryKind)))
] <- lookForCommand "index" (sectionTex sec)]
sectionLabels :: Section -> [(Text, Section)]
sectionLabels s =
[ (label, sec) | sec <- sections s
, [ (FixArg, [TeXRaw label]) ] <- lookForCommand "label" (sectionTex sec)]
toIndex :: IndexEntry -> Index
toIndex IndexEntry{..} = Map.singleton indexCategory $ go indexPath
where
go :: [IndexComponent] -> IndexTree
go [c] = Map.singleton c (IndexNode [IndexEntry indexEntrySection indexEntryKind indexPath indexEntryNr indexCategory] Map.empty)
go (c:cs) = Map.singleton c $ IndexNode [] $ go cs
go _ = error "toIndex"
trackPnums :: FilePath -> Text -> Text
-- Replaces \pnum with \pnum{file}{line}
trackPnums file = Text.pack . unlines . map (uncurry f) . zip [1..] . lines . Text.unpack
where
f :: Integer -> String -> String
f lineNr line
| Just (pre, post) <- stripInfix "\\pnum" line
= pre ++ "\\pnum{" ++ file ++ "}{" ++ show lineNr ++ "}" ++ (if null post then "%" else post)
| otherwise = line
getFileList :: IO [FilePath]
getFileList =
(\\ ["front", "back"]) .
map (Text.unpack . Text.dropEnd 1 . Text.drop (Text.length pre)) .
filter (pre `isPrefixOf`) .
Text.lines . readFile "std.tex"
where pre = "\\include{"
grabBnf :: [String] -> [String]
grabBnf [] = []
grabBnf (line : rest)
| "\\begin{bnf}" `List.isPrefixOf` line =
let (x, end : more) = break ("\\end{bnf}" `List.isPrefixOf`) rest
in ["", line] ++ x ++ [end] ++ grabBnf more
| "\\gramSec" `List.isPrefixOf` line = ["", line] ++ grabBnf rest
| otherwise = grabBnf rest
generateStdGramExt :: [FilePath] -> IO Text
generateStdGramExt files =
Text.pack . unlines . grabBnf . lines . Text.unpack .
Text.concat . mapM readFile ((++ ".tex") . files)
importExampleFile :: FilePath -> IO Text
importExampleFile =
(Text.strip .
Text.unlines .
takeWhile (/= "\\end{document}") .
tail .
dropWhile (/= "\\begin{document}") .
Text.lines .) .
readFile
importExamples :: Text -> Text
importExamples x = case matchRegexAll r (Text.unpack x) of
Nothing -> x
Just (before, _match, after, subs) ->
Text.pack before ++
unsafePerformIO (importExampleFile $ "assets/" ++ (subs !! 1) ++ ".tex") ++
importExamples (Text.pack after)
where r = mkRegex "\\\\importexample(\\[[0-9a-zA-Z.-]*\\])?{([a-zA-Z0-9_-]*)}"
parseFiles :: Parser.Macros -> IO ([LinearSection], Parser.Macros)
parseFiles m = do
files <- getFileList
stdGramExt <- generateStdGramExt files
let
go [] macros = return ([], macros)
go (c:cc) macros = do
let p = c ++ ".tex"
stuff <-
importExamples .
replace "multicolfloattable" "floattable" .
replace "\\indeximpldef{" "\\index[impldefindex]{" .
Text.unlines .
indexCodeEnvs ["codeblock", "itemdecl"] .
moveIndexEntriesIntoSecs .
moveIndexEntriesIntoDefs .
Text.lines .
trackPnums p .
replace "\\nodiffref\n\\change" "\n\\pnum\\textbf{Change:}\\space" .
replace "\n\\diffref" "\n\\pnum\\nopnumdiffref" .
-- Done here because (1) the real \nodiffref is defined with \def in a way
-- we don't support yet, and (2) this way a source link is generated for the pnum.
readFile p
let extra = if c /= "grammar" then "" else replace "\\gramSec" "\\rSec1" stdGramExt
let (r, macros') = parseFile macros (stuff ++ extra)
if length r == 0 then undefined else
first (r ++) . go cc (macros ++ macros')
bib <- fst . parseFile m .
fst . fromJust .
textStripInfix "\\clearpage" .
("\\rSec0[bibliography]{Bibliography}\n" ++) .
readFile "back.tex"
first (++ bib) . go files m
load14882 :: Text -> IO Draft
load14882 extraMacros = do
commitUrl <- getCommitUrl
(macros@Parser.Macros{..}, took) <- measure (loadMacros extraMacros)
putStrLn $ "Loaded macros in " ++ show (took * 1000) ++ "ms."
(secs :: [LinearSection], took2) <- measure $ fst . parseFiles macros
putStrLn $ "Parsed LaTeX in " ++ show (took2 * 1000) ++ "ms."
xrefDelta <- loadXrefDelta
(r, took3) <- measure $ if length (show secs) == 0 then undefined else do
-- force eval before we leave the dir
let
grammarNames = [n |
TeXComm "index" _ [
(OptArg, [TeXRaw "grammarindex"]) ,
(FixArg, [TeXRaw _
,TeXComm "textcolor" "" [(FixArg,[TeXRaw "grammar-gray"]),(FixArg,[TeXComm "textsf" _ [(FixArg,[TeXComm "textit" "" [(FixArg,[TeXRaw n])]])]])]
,TeXRaw "|idxbfpage"]
)] <- allUnits secs]
secs' = map (resolveGrammarterms macros grammarNames) secs
chapters = evalState (treeizeChapters False 1 secs') (Numbers 1 1 1 1 0 0 1 1 1 1)
allEntries :: [IndexEntry]
allEntries = chapters >>= sectionIndexEntries
index = mergeIndices $ map toIndex allEntries
indexEntryMap = IntMap.fromList [(n, e) | e@IndexEntry{indexEntryNr=Just n} <- allEntries]
indexEntriesByPath = reverseIndexEntryMap indexEntryMap
labels = Map.fromList $ chapters >>= sectionLabels
abbrMap = makeAbbrMap dr
dr = Draft{..}
return dr
putStrLn $ "Processed in " ++ show (took3 * 1000) ++ "ms."
return r
================================================
FILE: MathJax.hs
================================================
{-# LANGUAGE OverloadedStrings, ViewPatterns #-}
module MathJax (render) where
import Control.Concurrent.MVar (takeMVar, putMVar, newMVar)
import Data.Text (Text)
import qualified Data.Text as Text
import System.IO.Unsafe (unsafePerformIO)
import System.Process (shell, CreateProcess(..), createProcess, StdStream(CreatePipe))
import System.IO (BufferMode(..), hGetLine, hPutStrLn, hSetBuffering)
import Text.Regex (mkRegex, subRegex)
import Prelude hiding ((++))
import Util ((++))
import qualified Data.Map as Map
import Data.Map (Map)
rmTrailingNewline :: Text -> Text
rmTrailingNewline (Text.stripSuffix "\n" -> Just x) = x
rmTrailingNewline x = x
type Renderer = String {- formula -} -> Bool {- inline -} -> Text
data Input = Input { _formula :: String, _inline :: Bool }
deriving (Eq, Ord)
makeRenderer :: IO Renderer
makeRenderer = do
(Just stdinPipe, Just stdoutPipe, _, _) <- createProcess (shell "./mathjax-batch")
{std_in = CreatePipe, std_out = CreatePipe}
hSetBuffering stdinPipe LineBuffering
hSetBuffering stdoutPipe LineBuffering
let
rm r s = subRegex (mkRegex r) s ""
readResult = do
line <- hGetLine stdoutPipe
if line == "DONE"
then return ""
else do
more <- readResult
return $ line ++ "\n" ++ more
mutex <- newMVar (Map.empty :: Map Input Text)
return $ \formula inline -> unsafePerformIO $ do
let input = Input formula inline
cache <- takeMVar mutex
(result, cache') <- case Map.lookup input cache of
Just output -> return (output, cache)
Nothing -> do
hPutStrLn stdinPipe formula
hPutStrLn stdinPipe (if inline then "INLINE" else "NONINLINE")
rawResult <- readResult
let
output
= Text.replace " focusable=\"false\"" ""
$ rmTrailingNewline -- Prevents artifacts in [rand.adapt.ibits]#4
$ Text.pack
$ rm " id=\"(MJXc|MathJax)-[0-9A-Za-z-]+\""
$ rm " style=\"\""
$ rawResult
return (output, Map.insert input output cache)
putMVar mutex cache'
return result
render :: Renderer
render = unsafePerformIO $ makeRenderer
================================================
FILE: Pages.hs
================================================
{-# LANGUAGE OverloadedStrings, RecordWildCards, ViewPatterns #-}
module Pages (fileContent, pageContent, pagePath, writePage, applyPageStyle, Link(..), outputDir, PageStyle(..)) where
import Prelude hiding ((++), (.), writeFile)
import System.Directory (createDirectoryIfMissing)
import Control.Monad (when)
import qualified Data.Text as Text
import qualified Data.Text.Lazy as LazyText
import qualified Data.Text.Lazy.Builder as TextBuilder
import Util ((++), (.), Text, writeFile)
outputDir :: FilePath
outputDir = "14882/"
data PageStyle = Bare | WithExtension | InSubdir
deriving (Eq, Read)
fileContent :: TextBuilder.Builder -> TextBuilder.Builder -> TextBuilder.Builder -> TextBuilder.Builder -> TextBuilder.Builder
fileContent pathHome title extraHead body =
"<!DOCTYPE html>" ++
"<html lang='en'>" ++
"<head>" ++
"<title>" ++ title ++ "</title>" ++
"<meta charset='UTF-8'>" ++
"<link rel='stylesheet' type='text/css' href='" ++ pathHome ++ "14882.css'>" ++
"<link rel='stylesheet' type='text/css' href='https://fonts.googleapis.com/css2?family=Noto+Serif'>" ++
"<link rel='stylesheet' type='text/css' href='https://fonts.googleapis.com/css2?family=Noto+Sans'>" ++
"<link rel='stylesheet' type='text/css' href='https://fonts.googleapis.com/css2?family=Noto+Sans+Mono'>" ++
"<link rel='icon' href='icon-light.png' media='(prefers-color-scheme: light)'>" ++
"<link rel='icon' href='icon-dark.png' media='(prefers-color-scheme: dark)'>" ++
extraHead ++
"</head>" ++
"<body><div class='wrapper'>" ++ body ++ "</div></body>" ++
"</html>"
data Link = TocToSection | SectionToToc | SectionToSection
deriving Show
doLink :: PageStyle -> Link -> Text -> Text
doLink sfs l = LazyText.toStrict . TextBuilder.toLazyText . go . Text.splitOn (Text.pack (show l) ++ "/")
where
go :: [Text] -> TextBuilder.Builder
go (x : (Text.break (`elem` ("'#" :: String)) -> (a, b)) : z) = TextBuilder.fromText x ++ f (TextBuilder.fromText a) ++ go (b : z)
go [x] = TextBuilder.fromText x
go _ = undefined
f :: TextBuilder.Builder -> TextBuilder.Builder
f = case (sfs, l) of
(Bare, SectionToToc) -> ("./#" ++)
(Bare, TocToSection) -> id
(Bare, SectionToSection) -> id
(InSubdir, SectionToToc) -> ("../#" ++)
(InSubdir, TocToSection) -> (++ "/")
(InSubdir, SectionToSection) -> ("../" ++)
(WithExtension, SectionToToc) -> ("index.html#" ++)
(WithExtension, TocToSection) -> (++ ".html")
(WithExtension, SectionToSection) -> (++ ".html")
applyPageStyle :: PageStyle -> Text -> Text
applyPageStyle sfs =
doLink sfs SectionToSection
. doLink sfs SectionToToc
. doLink sfs TocToSection
pagePath :: FilePath -> PageStyle -> String
pagePath n Bare = outputDir ++ n
pagePath n WithExtension = outputDir ++ n ++ ".html"
pagePath n InSubdir = outputDir ++ n ++ "/index.html"
pageContent :: PageStyle -> TextBuilder.Builder -> Text
pageContent sfs content = applyPageStyle sfs $ LazyText.toStrict $ TextBuilder.toLazyText $ content
writePage :: FilePath -> PageStyle -> Text -> IO ()
writePage n sfs content = do
when (sfs == InSubdir) $ createDirectoryIfMissing True (outputDir ++ n)
writeFile (pagePath n sfs) content
================================================
FILE: README
================================================
Introduction
cxxdraft-htmlgen parses the LaTeX sources of the draft,
and generates static HTML pages from them.
Prerequisites
- Git
- The Haskell Platform (https://www.haskell.org/platform/)
- Graphviz
- Node.js
- The 'split' NPM package
- mathjax-node-cli (https://github.com/mathjax/mathjax-node-cli/)
Usage
Do:
git clone https://github.com/Eelis/cxxdraft-htmlgen.git
cd cxxdraft-htmlgen
cabal build
dist/build/cxxdraft-htmlgen/cxxdraft-htmlgen path/to/draft [sectionfilestyle]
Or with stack:
stack build
stack exec cxxdraft-htmlgen path/to/draft [sectionfilestyle]
The sectionfilestyle parameter is one of:
Bare (to generate e.g. intro.execution)
WithExtension (to generate e.g. intro.execution.html)
InSubdir (to generate e.g. intro.execution/index.html)
The default is WithExtension, since this is suitable
for direct browsing on a filesystem without a web server.
Bare may be used in conjunction with web server configuration
specifying a default text/html mime type for the directory
containing the section pages, to get URLs such as:
temp.res#temp.dep
temp.dep#3
InSubdir only requires defaulting to index.html, to give:
temp.res/#temp.dep
temp.dep/#3
Custom draft branch
While cxxdraft-htmlgen works with the official draft sources as-is,
better results can be obtained by using the following branch:
https://github.com/Eelis/draft/tree/cxxdraft-htmlgen-fixes
This branch tracks the official draft sources, but makes some changes to:
- improve syntax highlighting
- clean up hyperlinks
- work around MathJax limitations
- work around cxxdraft-htmlgen limitations
Output
The following will be created in ./14882/ :
- index.html A table of contents with links to...
- ~2300 interlinked section pages
These are named after the section abbreviation, which for
the Bare section file style look like:
stmt.goto
class.member.lookup
cpp
iterator.requirements.general
locale.moneypunct.virtuals
Since sections nest, content is duplicated at every level.
This allows one to specify more or less context for a given
citation. For example, one can link to:
basic.scope.hiding (section 6.4.10 "Name hiding" on
a page of its own)
basic.scope#hiding (the same section highlighted on
the page for section 6.4 "Scope")
basic#scope.hiding (the same section highlighted on
the page for chapter 6 "Basics")
- full The entire document (~24 mbyte, or ~2 mbyte compressed).
- 14882.css Used by all of the above.
Hidden links
On any page:
- defined terms/concepts/nonterminals are links that select themselves;
- a full stop at the end of a sentence is a link that selects the sentence;
- moving the mouse over the right margin of a numbered paragraph reveals a link
to the LaTeX source for that paragraph;
- moving the mouse over the left margin of an itemdecl or table row reveals a link
that selects it.
================================================
FILE: RawDocument.hs
================================================
{-# LANGUAGE
OverloadedStrings,
RecordWildCards,
ViewPatterns,
LambdaCase,
TupleSections,
NamedFieldPuns,
FlexibleInstances,
FlexibleContexts,
RankNTypes,
MultiParamTypeClasses,
FunctionalDependencies,
UndecidableInstances,
RecursiveDo #-}
module RawDocument
( RawElement(..), RawTexPara(..), RawFootnote(..), RawParagraph(..), LinearSection(..), RawItem(..)
, loadMacros, parseFile, loadXrefDelta, doParse) where
import qualified LaTeXParser as Parser
import qualified Data.Text as Text
import Data.Text (Text, replace)
import Document (Row(..), SourceLocation(..), RowSepKind(..), SectionKind(..), Cell(..), CellSpan(..), XrefDelta, Abbreviation, ColumnSpec(..), TextAlignment(..))
import Data.Maybe (isJust, fromJust)
import LaTeXParser (Macros(..), Signature(..), nullCmd, storeCmd, storeEnv, Environment(..), Command(..), codeEnv, Token(..), normalCmd, ParseResult(..))
import Data.Text.IO (readFile)
import Text.Regex (mkRegex)
import qualified Data.Map as Map
import Data.Map (Map)
import Data.List (transpose, take, isPrefixOf)
import Util ((.), (++), mapHead, textStripInfix, textSubRegex, splitOn)
import Prelude hiding (take, (.), takeWhile, (++), lookup, readFile)
import System.IO.Unsafe (unsafePerformIO)
import System.Process (readProcess)
import Control.Arrow (first)
import Data.Char (isSpace, isDigit)
import LaTeXBase
data RawItem = RawItem
{ rawItemLabel :: LaTeX
, rawItemContent :: [RawTexPara] }
deriving (Eq, Show)
data RawElement
= RawLatexElement LaTeXUnit
| RawEnumerated String [RawItem]
| RawCodeblock LaTeXUnit
| RawExample [RawTexPara]
| RawNote Text [RawTexPara]
| RawItemdescr [RawTexPara]
| RawBnf String LaTeX
| RawTable
{ rawTableCaption :: LaTeX
, rawColumnSpec :: [ColumnSpec]
, rawTableAbbr :: Abbreviation
, rawTableBody :: [Row [RawTexPara]] }
| RawTabbing LaTeX
| RawFormula { rawFormulaAbbr :: Abbreviation, rawFormulaContent :: LaTeX }
| RawFigure { rawFigureName :: LaTeX, rawFigureAbbr :: Abbreviation, rawFigureSvg :: Text }
deriving (Eq, Show)
newtype RawTexPara = RawTexPara { rawTexParaElems :: [RawElement] }
deriving (Eq, Show)
newtype RawFootnote = RawFootnote [RawTexPara]
deriving Show
data RawParagraph = RawParagraph
{ paraNumbered :: Bool
, rawParaInItemdescr :: Bool
, rawParaElems :: [RawTexPara]
, rawParaSourceLoc :: Maybe SourceLocation }
deriving Show
data LinearSection = LinearSection
{ lsectionAbbreviation :: Abbreviation
, lsectionKind :: SectionKind
, lsectionName :: LaTeX
, lsectionParagraphs :: [RawParagraph]
, lsectionFootnotes :: [RawFootnote] }
deriving Show
instance AllUnits RawElement where
allUnits (RawLatexElement x) = allUnits x
allUnits (RawBnf _ x) = allUnits x
allUnits (RawTabbing x) = allUnits x
allUnits (RawNote _ x) = allUnits x
allUnits (RawExample x) = allUnits x
allUnits (RawCodeblock x) = allUnits x
allUnits (RawItemdescr x) = allUnits x
allUnits (RawEnumerated _ x) = allUnits x
allUnits (RawFormula _ x) = allUnits x
allUnits RawFigure{} = []
allUnits RawTable{..} = allUnits rawTableCaption ++ concatMap (allUnits . concat . map content) (map cells rawTableBody)
instance AllUnits RawTexPara where
allUnits = allUnits . rawTexParaElems
instance AllUnits RawItem where
allUnits RawItem{..} = allUnits rawItemLabel ++ allUnits rawItemContent
instance AllUnits LinearSection where
allUnits LinearSection{..} = allUnits lsectionName ++ allUnits lsectionParagraphs ++ allUnits lsectionFootnotes
instance AllUnits RawParagraph where
allUnits RawParagraph{..} = allUnits rawParaElems
instance AllUnits RawFootnote where
allUnits (RawFootnote x) = allUnits x
bnfEnvs :: [String]
bnfEnvs = ["bnf", "ncbnf", "bnfkeywordtab", "simplebnf", "ncsimplebnf", "ncrebnf"]
isBnf :: LaTeXUnit -> Bool
isBnf (TeXEnv s _ _)
| s `elem` bnfEnvs = True
isBnf _ = False
isTable, isTabbing, isFigure :: LaTeXUnit -> Bool
isTable x = isTeXEnv "floattablebasex" x || isTeXEnv "htmlTable" x
isTabbing = isTeXEnv "tabbing"
isFigure = isTeXEnv "importgraphic"
isEnumerate :: LaTeXUnit -> Maybe String
isEnumerate (TeXEnv s _ _)
| s `elem` ["enumerate", "itemize", "description", "thebibliography"] = Just s
isEnumerate _ = Nothing
isParaEnd :: LaTeXUnit -> Bool
isParaEnd (TeXEnv "itemdecl" _ _) = True
isParaEnd (TeXEnv "indexeditemdecl" _ _) = True
isParaEnd (TeXEnv "itemdescr" _ _) = True
isParaEnd (TeXComm "pnum" _ _) = True
isParaEnd x = isParasEnd x
isParasEnd :: LaTeXUnit -> Bool
isParasEnd (TeXComm "definition" _ _) = True
isParasEnd (TeXComm "rSec" _ _) = True
isParasEnd (TeXComm "infannex" _ _) = True
isParasEnd (TeXComm "normannex" _ _) = True
isParasEnd _ = False
isJunk :: LaTeXUnit -> Bool
isJunk (TeXRaw x) = all isSpace (Text.unpack x)
isJunk (TeXComm "index" _ _) = True
isJunk (TeXComm "setlength" _ _) = True
isJunk _ = False
isItem :: LaTeXUnit -> Maybe LaTeX
isItem (TeXComm "item" _ []) = Just []
isItem (TeXComm "item" _ [(_, label)]) = Just label
isItem (TeXComm "bibitem" _ [(_, [TeXRaw label])]) = Just [TeXRaw $ "bib:" ++ label]
isItem _ = Nothing
parseItems :: LaTeX -> [RawItem]
parseItems [] = []
parseItems (x : rest)
| isJunk x = mapHead (mapItemContent (mapHead addJunk)) (parseItems rest)
| Just label <- isItem x, (item, rest') <- break (isJust . isItem) rest =
RawItem label (parsePara item) : parseItems rest'
where
mapItemContent f (RawItem l c) = RawItem l (f c)
addJunk :: RawTexPara -> RawTexPara
addJunk (RawTexPara z) = RawTexPara (dropWhile isOnlySpace $ RawLatexElement x : z)
parseItems _ = error "need items or nothing"
doParse :: Macros -> Text -> (LaTeX, Macros)
doParse m t = (x, y)
where
(x, y, []) = Parser.parseString ctx (Text.unpack t)
ctx = initialContext{Parser.macros=m}
nullCmds :: [(Int, String)]
nullCmds =
[ (0, "clearpage kill rmfamily hfill vfill nocorr small larger noindent itcorrwidth itletterwidth global bigskip begingroup endgroup")
, (1, "enlargethispage lstset newsavebox vspace input thispagestyle")
, (2, "glossary settowidth addtolength copypagestyle")
, (3, "definecolor makeheadrule")
, (4, "makeoddhead")
]
storeCmds :: [(Int, String)]
storeCmds =
[ (0, "today def makeatletter bottomline makeatother Sec bmod mod long prime " ++
"chapter section paragraph subparagraph fi otextup linebreak newpage log " ++
"textup edef x BnfIndent par leq bot perp Sigma " ++
"leftmargini BnfInc BnfRest protect caret sum " ++
"xspace onelineskip textlangle textrangle tilde raggedright = " ++
"space copyright textregistered textbackslash hsize br Gamma " ++
"frenchspacing list leftmargin listparindent itemindent itshape relax " ++
"nonfrenchspacing endlist upshape ttfamily baselineskip nobreak " ++
"endfirsthead quad qquad cdot cdots dotsc bnfindentinc footnotemark ldots capsep max min " ++
"continuedcaption hline endhead footnotesize le times dotsb rightarrow to equiv " ++
"lfloor rfloor pi geq neq ge lceil rceil ell alpha bigl bigr mu lambda beta " ++
"tabularnewline exp sigma big delta rho Pi nu infty displaystyle lim sin cos " ++
"phi int theta zeta FlushAndPrintGrammar break backslash centering " ++
"normalbaselineskip land lor mapsto normalfont textmu tablerefname figurerefname newline " ++
"obeyspaces bnfindent vdots tabcolsep columnbreak emergencystretch commentellip " ++
"gamma widowpenalties sffamily parskip left right `")
, (1, "hspace footnote textit textrm textnormal texttt textbf ensuremath ref ref* mbox bibitem mathop " ++
"terminal literalterminal noncxxterminal textsl textsc textsf text term overline " ++
"tcode noncxxtcode literaltcode footnotetext microtypesetup cline mathtt mathit mathrm mathsf " ++
"label newlength uline value newcounter mathscr c uppercase iref operatorname textlarger " ++
"phantom hphantom sqrt ln emph minipage url indexescape changeglossnumformat textasciitilde " ++
"removedxref deprxref textsuperscript rlap mathrel mathbin nopnumdiffref color ucode uname")
, (2, "pnum definition addtocounter setcounter frac " ++
"binom infannex normannex parbox link weblink indexedspan movedxref movedxrefs " ++
"equal setlength textcolor providecommand")
, (3, "multicolumn discretionary movedxrefii ifthenelse PackageError NewEnviron")
, (4, "movedxrefiii indexlink hiddenindexlink")
]
initialCmds :: Map Text Command
initialCmds = Map.fromList $
[ storeCmd "item" (Signature 0 (Just []))
, storeCmd "caption" (Signature 2 (Just []))
, storeCmd "index" (Signature 2 (Just []))
, storeCmd "hyperref" (Signature 2 (Just []))
, nullCmd "makebox" (Signature 2 (Just []))
, storeCmd "\n" (Signature 0 Nothing)
, storeCmd "nolinebreak" (Signature 0 (Just []))
, storeCmd "textsmaller" (Signature 2 (Just []))
, nullCmd "gramSec" (Signature 2 (Just []))
, ("kern", normalCmd $ Command $ \_ctx _ws -> ParseResult [] mempty . snd . parseDimen)
]
++ [storeCmd c (Signature a Nothing) | (a, l) <- storeCmds, c <- words l]
++ [nullCmd (Text.pack c) (Signature a Nothing) | (a, l) <- nullCmds, c <- words l]
parseDimen :: [Token] -> ([Token], [Token])
parseDimen toks
| t@(Token txt) : more <- toks, txt `elem` [".", "pt", "-", "em"] || all isDigit txt = first (t :) (parseDimen more)
| otherwise = ([], toks)
initialEnvs :: Map Text Environment
initialEnvs = Map.fromList $
[ (storeEnv e (Signature 0 Nothing))
| e <- bnfEnvs ++
words "indented description itemize center tabbing defnote enumerate eqnarray* equation* itemdescr footnote matrix"
] ++
[ storeEnv "example" (Signature 1 (Just []))
, storeEnv "tailexample" (Signature 1 (Just []))
, storeEnv "note" (Signature 0 (Just [Token "Note"]))
, storeEnv "tailnote" (Signature 0 (Just [Token "Note"]))
, storeEnv "table" (Signature 1 Nothing)
, storeEnv "tabular" (Signature 1 Nothing)
, storeEnv "longtable" (Signature 1 Nothing)
, storeEnv "importgraphic" (Signature 3 Nothing)
, storeEnv "formula" (Signature 1 Nothing)
, storeEnv "minipage" (Signature 1 Nothing)
, storeEnv "thebibliography" (Signature 1 Nothing)
, codeEnv "indexeditemdecl" (Signature 1 Nothing)
, codeEnv "itemdecl" (Signature 0 Nothing)
, codeEnv "indexedcodeblock" (Signature 1 Nothing)
, codeEnv "codeblock" (Signature 0 Nothing)
, codeEnv "codeblockdigitsep" (Signature 0 Nothing)
, codeEnv "codeblocktu" (Signature 1 Nothing)
, storeEnv "array" (Signature 1 Nothing)
, storeEnv "floattablebasex" (Signature 4 Nothing)
, storeEnv "htmlTable" (Signature 3 Nothing)
]
initialMacros :: Parser.Macros
initialMacros = Parser.defaultMacros ++ mempty{Parser.commands=initialCmds, Parser.environments=initialEnvs}
initialContext :: Parser.Context
initialContext = Parser.defaultContext{Parser.macros=initialMacros}
parseFile :: Macros -> Text -> ([LinearSection], Macros)
parseFile macros =
first (parseSections 0)
. doParse macros
. replace "$$" "$"
. replace "\\hspace*" "\\hspace"
. replace "``" "“"
. textSubRegex (mkRegex "(\\grammarterm\\{[A-Za-z-]*\\})(\\{s\\}|s)") "\\1\\textit{s}"
-- Mixing italic and upright looks okay in the PDF, but looks bad in browsers,
-- and our linkification makes clear enough that the plural 's' is not part
-- of the grammarterm.
loadFigure :: Text -> Text
loadFigure f = unsafePerformIO $ do
dot <- readFile $ "assets/" ++ p
svg <- readProcess "dot" ["-Tsvg",
"-Gbgcolor=transparent",
"-Gsize=8",
"-Nfontsize=10",
"-Gfontsize=10",
"-Efontsize=10",
"-Nfontname=Noto Serif",
"-Efontname=Noto Serif",
"-Gfontname=Noto Serif"] (Text.unpack $ Text.replace "Courier New" "Noto Sans Mono" $ Text.replace ", fontsize=24" "" dot)
return $ rmIds $ snd $ Text.breakOn "<svg" $ Text.pack svg
where
p = Text.unpack $ Text.replace ".pdf" ".dot" f
r = mkRegex "<g id=\"[^\"]*\""
rmIds = textSubRegex r "<g"
-- Without rmIds, if a page has more than one figure, it will
-- have duplicate 'graph1', 'node1', 'edge1' etc ids.
isOnlySpace :: RawElement -> Bool
isOnlySpace (RawLatexElement x) = triml [x] == []
isOnlySpace _ = False
parsePara :: LaTeX -> [RawTexPara]
parsePara u = RawTexPara . dropWhile isOnlySpace . fmap f . splitElems (trim (filter (not . kill) u))
where
kill (TeXComm "hline" _ []) = True
kill (TeXComm "capsep" _ []) = True
kill (TeXComm "endhead" _ _) = True
kill _ = False
f :: LaTeXUnit -> RawElement
f e@(TeXEnv k a stuff)
| isFigure e
, [(FixArg, rawFigureName), (FixArg, [TeXRaw rawFigureAbbr]), (FixArg, [TeXRaw figureFile])] <- a
= RawFigure{rawFigureSvg=loadFigure figureFile, ..}
| k == "formula", [(FixArg, [TeXRaw rawFormulaAbbr])] <- a = RawFormula{rawFormulaContent = stuff, ..}
| isTable e
, ((_, cap) : (_, [TeXRaw abbr]) : (_, y) : _) <- a
= RawTable
{ rawTableCaption = cap
, rawColumnSpec = parseColspec y
, rawTableAbbr = "tab:" ++ abbr
, rawTableBody = breakMultiCols $ parseTable stuff }
| isTable e = error $ "other table: " ++ show e
| isTabbing e = RawTabbing stuff
| isBnf e = RawBnf (if "nc" `isPrefixOf` k then drop 2 k else k) stuff
| Just ek <- isEnumerate e = RawEnumerated ek (parseItems stuff)
| isCodeblock e = RawCodeblock e
| k `elem` ["note", "defnote", "tailnote"] =
let label = case a of [(FixArg, [TeXRaw x])] -> x; _ -> "Note"
in RawNote label $ parsePara stuff
| k `elem` ["example", "tailexample"] = RawExample $ parsePara stuff
| k == "itemdecl" || k == "minipage" || k == "indexeditemdecl" = RawLatexElement e
| k == "itemdescr" = RawItemdescr $ parsePara stuff
f x = RawLatexElement x
splitElems :: LaTeX -> [LaTeX]
splitElems [] = []
splitElems (x:xs)
| TeXRaw (textStripInfix "\n\n" -> Just (a, (Text.stripStart -> b))) <- x =
(if a == "" then ([] :) else ([TeXRaw a] :)) $
splitElems (if b == "" then xs else TeXRaw b : xs)
| otherwise = case splitElems xs of
[] -> [[x]]
a:b -> ((x:a):b)
class ExtractFootnotes a where extractFootnotes :: a -> (a, [RawFootnote])
instance ExtractFootnotes LaTeX where
extractFootnotes [] = ([], [])
extractFootnotes (TeXRaw x : t@(TeXEnv "footnote" _ _ : _))
= (TeXRaw (Text.stripEnd x) : t', ft)
where (t', ft) = extractFootnotes t
-- stripEnd here implements the footnote's \unskip
extractFootnotes (h:t) = (h' : t', fh ++ ft)
where
(h', fh) = extractFootnotes h
(t', ft) = extractFootnotes t
instance ExtractFootnotes LaTeXUnit where
extractFootnotes (TeXEnv "footnote" [] content) =
(TeXComm "footnoteref" "" [], [RawFootnote $ parsePara content])
extractFootnotes (TeXComm "footnotemark" _ []) =
(TeXComm "footnoteref" "" [], [])
extractFootnotes (TeXComm "footnotetext" _ [(_, content)]) =
(TeXRaw "" {- todo.. -}, [RawFootnote $ parsePara content])
extractFootnotes (TeXComm a ws [(FixArg, content)]) =
first (\c -> TeXComm a ws [(FixArg, c)]) (extractFootnotes content)
extractFootnotes (TeXEnv env args content) = first (TeXEnv env args) (extractFootnotes content)
extractFootnotes other = (other, [])
parseParas :: LaTeX -> ([RawParagraph], [RawFootnote], LaTeX {- rest -})
parseParas (break isParasEnd -> (extractFootnotes -> (stuff, fs), rest))
= (collectParas stuff, fs, rest)
where
collectParas :: LaTeX -> [RawParagraph]
collectParas (t@(TeXEnv "indexeditemdecl" _ _) : more) =
RawParagraph False False (parsePara [t]) Nothing : collectParas more
collectParas (t@(TeXEnv "itemdecl" _ _) : more) =
RawParagraph False False (parsePara [t]) Nothing : collectParas more
collectParas (TeXEnv "itemdescr" _ desc : more) =
map (\p -> p{rawParaInItemdescr=True}) (collectParas desc)
++ collectParas more
collectParas (TeXComm "pnum" _
[ (FixArg, [TeXRaw (Text.unpack -> file)])
, (FixArg, [TeXRaw (Text.unpack -> read -> lineNr)])] : more) =
(\(p : x) -> p{paraNumbered=True, rawParaSourceLoc=Just (SourceLocation file lineNr)} : x)
(collectParas more)
collectParas (TeXComm "pnum" _ [] : more) =
(\(p : x) -> p{paraNumbered=True, rawParaSourceLoc=Nothing} : x)
(collectParas more)
collectParas [] = []
collectParas x = (if null p then id else (RawParagraph False False p Nothing :)) (collectParas more)
where (parsePara -> p, more) = break isParaEnd x
parseSections :: Int -> LaTeX -> [LinearSection]
parseSections level (TeXComm "textlarger" _ _ : more) = parseSections level more
parseSections level
(TeXComm c _ args : (parseParas -> (lsectionParagraphs, lsectionFootnotes, more)))
| ((FixArg, isJustRaw -> fromJust -> lsectionAbbreviation), (FixArg, lsectionName), lsectionKind, level') <- case (c, args) of
("normannex", [abbr, name]) -> (abbr, name, NormativeAnnexSection, level)
("infannex", [abbr, name]) -> (abbr, name, InformativeAnnexSection, level)
("definition", [name, abbr]) -> (abbr, name, DefinitionSection (level + 1), level)
("rSec", [(FixArg, [TeXRaw (Text.unpack -> read -> l)]), abbr, name]) ->
(abbr, name, NormalSection l, l)
_ -> error $ "not a section command: " ++ show (c, args)
= LinearSection{..} : parseSections level' more
parseSections _ [] = []
parseSections l (x:xx)
| TeXRaw t <- x, all isSpace (Text.unpack t) = parseSections l xx
| otherwise = error $ "parseSections: " ++ show x
parseTable :: LaTeX -> [Row [RawTexPara]]
parseTable latex
| triml latex == [] = []
| triml row == [] = parseTable $ tail rest
| hasCommand (== "endfirsthead") row = parseTable $ findEndHead rest
| hasCommand (`elem` ["caption", "bottomline"]) row = parseTable rest
| otherwise = makeRow row : parseTable rest
where
(row, rest) = break (== TeXLineBreak) latex
findEndHead l
| row' == [] = findEndHead $ tail rest'
| hasCommand (== "endhead") row' = l
| otherwise = findEndHead rest'
where
(row', rest') = break (== TeXLineBreak) l
columnBreakCell :: Cell [RawTexPara]
columnBreakCell = Cell Normal [RawTexPara [RawLatexElement (TeXComm "columnbreak" "" [])]]
isColumnBreakCell :: Cell [RawTexPara] -> Bool
isColumnBreakCell (Cell Normal [RawTexPara [RawLatexElement (TeXComm "columnbreak" _ [])]]) = True
isColumnBreakCell _ = False
makeRectangular :: a -> [[a]] -> [[a]]
makeRectangular filler rows = (take numCols . (++ repeat filler)) . rows
where numCols = maximum (length . rows)
-- Todo: Remove this when the bugs in Chrome's collapsed border rendering are fixed.
breakMultiCols :: [Row [RawTexPara]] -> [Row [RawTexPara]]
-- implements the multicolfloattable environment's \columnbreak, which is left intact by parseTable
breakMultiCols rows
| all (\Row{..} -> length cells == 1 && rowSep == NoSep) rows =
Row NoSep . makeRectangular (Cell Normal []) (transpose $ splitOn isColumnBreakCell $ separateColumnBreaks $ (head . cells) . rows)
| otherwise = rows
where
separateColumnBreaks :: [Cell [RawTexPara]] -> [Cell [RawTexPara]]
separateColumnBreaks = concatMap f
where
f :: Cell [RawTexPara] -> [Cell [RawTexPara]]
f c@Cell{..} | [RawTexPara (RawLatexElement (TeXComm "columnbreak" _ []) : rest)] <- content =
[columnBreakCell, c{content = [RawTexPara rest]}]
| otherwise = [c]
makeRow :: LaTeX -> Row [RawTexPara]
makeRow l = Row sep $ makeRowCells l
where
sep
| hasCommand (== "hline") l = RowSep
| hasCommand (== "capsep") l = CapSep
| hasCommand (== "cline") l = Clines $ clines $ lookForCommand "cline" l
| otherwise = NoSep
clines [] = []
clines (([(FixArg, [TeXRaw c])]) : rest) = (begin, end) : clines rest
where
(begin', end') = Text.breakOn "-" c
begin = read $ Text.unpack begin' :: Int
end = read $ Text.unpack $ Text.tail end' :: Int
clines other = error $ "Unexpected \\clines syntax: " ++ show other
parseWidth :: LaTeX -> (Maybe Text, LaTeX)
parseWidth (TeXRaw "" : x) = parseWidth x
parseWidth (TeXBraces [TeXRaw x] : rest) = (Just x, rest)
parseWidth (TeXBraces [TeXRaw x, TeXComm "hsize" "" []] : rest) =
(Just $ Text.pack (show (round ((read ("0" ++ Text.unpack x) :: Double) * 100) :: Int)) ++ "%", rest)
parseWidth (TeXBraces _ : rest) = (Nothing, rest) -- remaining cases unsupported for now
parseWidth x = (Nothing, x)
parseColspec :: LaTeX -> [ColumnSpec]
parseColspec = \x -> case x of
[] -> []
TeXRaw (Text.unpack -> '|' : z) : y -> go (TeXRaw (Text.pack z) : y)
_ -> go x
where
go :: LaTeX -> [ColumnSpec]
go [] = []
go [TeXRaw "|"] = []
go (TeXRaw "@" : TeXBraces _ : x) = go x -- unimplemented
go (TeXRaw ">" : TeXBraces _ : x) = go x -- unimplemented
go (TeXRaw "" : y) = go y
go (TeXRaw (Text.uncons -> Just (letter, rest)) : y)
| letter == ' ' = go (TeXRaw rest : y)
| letter == '|' = mapHead (\(ColumnSpec x _ z) -> ColumnSpec x True z) $ go (TeXRaw rest : y)
| otherwise =
let (w, rest') = parseWidth (TeXRaw rest : y)
in ColumnSpec (colClass letter) False w : go rest'
go x = error ("parseColspec: " ++ show x)
colClass :: Char -> TextAlignment
colClass x | x `elem` ['l', 'm', 'x'] = AlignLeft
colClass 'p' = Justify
colClass 'r' = AlignRight
colClass 'c' = AlignCenter
colClass other = error $ "Unexpected column type " ++ (other : [])
makeRowCells :: LaTeX -> [Cell [RawTexPara]]
makeRowCells [] = []
makeRowCells latex =
case rest of
[] -> [makeCell cell]
_ : r ->
(makeCell $ cell <> [TeXRaw cell']) : makeRowCells (TeXRaw rest'' : r)
where
(cell, rest) = break isColEnd latex
isColEnd (TeXRaw c) = isJust $ Text.find (== '&') c
isColEnd _ = False
(cell', rest') = Text.break (== '&') $ getText rest
rest'' = Text.drop 1 rest'
getText (TeXRaw s : _) = s
getText other = error $ "Didn't expect " ++ show other
makeCell content
| [[(FixArg, [TeXRaw w]), (FixArg, cs), (FixArg, content')]] <- lookForCommand "multicolumn" content =
Cell (Multicolumn (read $ Text.unpack w) (head $ parseColspec cs)) $ parsePara content'
| otherwise =
Cell Normal $ parsePara content
rmExplSyntax :: Text -> Text
rmExplSyntax = Text.unlines . f . Text.lines
where
f [] = []
f ("\\ExplSyntaxOn" : (dropWhile (/= "\\ExplSyntaxOff") -> (_ : x))) = f x
f (h : t) = h : f t
loadMacros :: Text -> IO Macros
loadMacros extraMacros =
(initialMacros ++)
. snd
. doParse initialMacros
. replace "\\indeximpldef{" "\\index[impldefindex]{"
. textSubRegex (mkRegex "\\\\penalty[0-9]+{}") ""
. textSubRegex (mkRegex "\\\\verbtocs{[\\a-zA-Z]*}\\|[^|]*\\|") ""
. rmExplSyntax
. (++ extraMacros)
. mconcat
. mapM readFile
["config.tex", "macros.tex", "tables.tex"]
loadXrefDelta :: IO XrefDelta
loadXrefDelta = do
(tex, _, _) <- Parser.parseString initialContext . Text.unpack . readFile "xrefdelta.tex"
let lfc c = lookForCommand c tex
return $
[ (fromJust $ isJustRaw $ snd from, [snd to])
| [from, to] <- lfc "movedxrefs" ] ++
[ (fromJust $ isJustRaw $ snd from, (:[]) . TeXComm "ref" "" . (:[]) . tos)
| from : tos <- lfc "movedxref" ++ lfc "movedxrefii" ++ lfc "movedxrefiii" ] ++
[ (abbr, [])
| [(_, [TeXRaw abbr])] <- lfc "removedxref" ] ++
[ (abbr, [[TeXComm "ref" "" [(FixArg, [TeXRaw ("depr." ++ abbr)])]]])
| [(_, [TeXRaw abbr])] <- lfc "deprxref" ]
================================================
FILE: Render.hs
================================================
{-# OPTIONS_GHC -fno-warn-tabs #-}
{-# LANGUAGE
OverloadedStrings,
RecordWildCards,
TupleSections,
ViewPatterns,
NamedFieldPuns,
LambdaCase,
TypeSynonymInstances,
FlexibleInstances #-}
module Render (
Render(render), concatRender, renderTab, renderFig, renderIndex, simpleRender, simpleRender2, squareAbbr,
linkToSection, secnum, Page(..), parentLink, abbrHref,
defaultRenderContext, isSectionPage,
RenderContext(..), renderLatexParas
) where
import Load14882 (parseIndex) -- todo: bad
import Document (
CellSpan(..), Cell(..), RowSepKind(..), Row(..), Element(..), Draft(..), Footnote(..),
TeXPara(..), Sentence(..), Abbreviation, sectionByAbbr, footnotes, ColumnSpec(..),
Section(..), Chapter(..), Table(..), Figure(..), Sections(..), figures, formulas, tables, Item(..),
IndexComponent(..), IndexTree, IndexNode(..), IndexKind(..), IndexEntry(..), Formula(..),
IndexPath, indexKeyContent, tableByAbbr, figureByAbbr, formulaByAbbr, Paragraph(..), Note(..), Example(..),
chapterOfSection)
import LaTeXBase (LaTeX, LaTeXUnit(..), ArgKind(..), MathType(..), lookForCommand, concatRaws,
renderLaTeX, trim, isMath, isCodeblock, texStripPrefix, texSpan, mapTeX, mapCommandName)
import qualified Data.IntMap as IntMap
import Data.Text (isPrefixOf)
import qualified Data.Text.Lazy.Builder as TextBuilder
import Debug.Trace (trace)
import qualified Data.Text as Text
import qualified Data.Text.Lazy as LazyText
import qualified Text.HTML.TagSoup as Soup
import Data.Char (isAlpha, isSpace, isAlphaNum, toLower, isUpper, ord, isDigit, toUpper)
import Control.Arrow (second)
import qualified Prelude ()
import qualified MathJax
import Prelude hiding (take, (.), (++), writeFile)
import Data.List (find, nub, intersperse, (\\), sortOn, dropWhileEnd)
import qualified Data.Map as Map
import Data.Maybe (isJust, fromJust)
import Pages (Link(..))
import Sentences (linkifyFullStop)
import Util ((.), (++), replace, Text, xml, spanTag, anchor, Anchor(..), greekAlphabet,
urlChars, intercalateBuilders, replaceXmlChars, spanJust, h, partitionBy, mapHead)
import CxxParser (parseCppDirective, parseLiteral, parseComment)
kill, literal :: [String]
kill = words $
"clearpage renewcommand newcommand enlargethispage noindent indent vfill pagebreak setlength " ++
"caption capsep continuedcaption bottomline hline rowsep hspace endlist cline " ++
"hfill nocorr small endhead kill footnotesize rmfamily microtypesetup nobreak nolinebreak " ++
"topline FlushAndPrintGrammar left right protect = ! @ - xspace obeyspaces"
literal = ["#", "{", "}", "~", "%", ""]
simpleMacros :: [(String, Text)]
simpleMacros =
[ ("," , "<span style='white-space:nowrap'> </span>")
-- thin, non-breaking, non-stretching space
, ("\"" , "\"")
, ("`" , "`")
, ("prime" , "'")
, ("caret" , "^")
, ("copyright" , "©")
, ("textregistered" , "®")
, ("Cpp" , "C++")
, ("sum" , "∑")
, ("bot" , "⊥")
, ("perp" , "⊥")
, ("ell" , "ℓ")
, ("shr" , ">>")
, ("cv" , "cv")
, ("shl" , "<<")
, ("br" , "<br/>")
, ("linebreak" , "<br/>")
, ("sim" , "~")
, ("quad" , "  ")
, ("qquad" , "  ")
, ("indent" , " ")
, ("unun" , "__")
, ("^" , "^")
, ("ldots" , "…")
, ("vdots" , "⋮")
, ("dotsc" , "…")
, ("times" , "×")
, ("&" , "&")
, ("$" , "$")
, ("backslash" , "\\")
, ("textbackslash" , "\\")
, ("colcol" , "::")
, ("tilde" , "~")
, ("textasciitilde" , "~")
, ("hspace" , " ")
, ("space" , " ")
, ("equiv" , "≡")
, ("le" , " ≤ ")
, ("leq" , " ≤ ")
, ("ge" , " ≥ ")
, ("geq" , " ≥ ")
, ("neq" , " ≠ ")
, ("land" , " ∧ ")
, ("lor" , " ∨ ")
, ("cdot" , "·")
, ("cdots" , "⋯")
, ("to" , "→")
, ("rightarrow" , "→")
, ("mapsto" , "↦")
, ("sqrt" , "√")
, ("lfloor" , "⌊")
, ("rfloor" , "⌋")
, ("lceil" , "⌈")
, ("rceil" , "⌉")
, (";" , " ")
, ("min" , "<span class=\"mathrm\">min</span>")
, ("max" , "<span class=\"mathrm\">max</span>")
, ("bmod" , "<span class=\"mathrm\">mod</span>")
, ("exp" , "<span class=\"mathrm\">exp</span>")
, ("ln" , "<span class=\"mathrm\">ln</span>")
, ("log" , "<span class=\"mathrm\">log</span>")
, ("opt" , "<sub><small>opt</small></sub>")
, ("rightshift" , "<span class=\"mathsf\">rshift</span>")
, ("textlangle" , "⟨")
, ("textrangle" , "⟩")
, ("textmu" , "μ")
, ("tablerefname" , "Table")
, ("figurerefname" , "Figure")
, ("newline" , "<br>")
, (">" , "	")
, ("bnfindent" , "   ")
, ("\n" , "\n")
]
++ [(n, Text.pack [c]) | (n, c) <- greekAlphabet]
zwsp :: Text
zwsp = "​" -- U+200B ZERO WIDTH SPACE
makeSpan, makeDiv :: [String]
makeSpan = words "center mbox mathsf emph textsc phantom term mathtt textnormal textrm descr textsl textit mathit indented"
makeDiv = words "definition cvqual emph exitnote footnote mathit paras ttfamily TableBase table tabular longtable"
indexPathString :: IndexPath -> Text
indexPathString =
replace " " "_" . -- HTML forbids space.
Text.intercalate "," .
map (indexKeyContent . indexKey)
indexShortName :: Text -> Maybe IndexKind -> Text
indexShortName "grammarindex" (Just DefinitionIndexEntry) = "nt"
indexShortName "grammarindex" Nothing = "ntref"
indexShortName "conceptindex" (Just DefinitionIndexEntry) = "concept"
indexShortName "conceptindex" Nothing = "conceptref"
indexShortName "headerindex" (Just DefinitionIndexEntry) = "header"
indexShortName "headerindex" Nothing = "headerref"
indexShortName "generalindex" (Just DefinitionIndexEntry) = "def"
indexShortName "generalindex" _ = ""
indexShortName "libraryindex" _ = "lib"
indexShortName "impldefindex" _ = ""
indexShortName "bibliography" _ = "bib"
indexShortName cat _ = error $ "indexShortName: unrecognized category: " ++ Text.unpack cat
indexPathId :: Text -> Maybe IndexKind -> IndexPath -> Text
indexPathId category kind =
(indexShortName category kind ++) .
(":" ++) .
replace " " "%20" .
replace "'" "'" .
replace "&" "&" .
indexPathString
indexPathId2 :: RenderContext -> Int -> Text -> IndexPath -> Maybe IndexKind -> Text
indexPathId2 ctx entryNr cat path kind = indexPathId cat kind path ++ indexOccurrenceSuffix ctx entryNr
indexPathId3 :: RenderContext -> LaTeX -> Text
indexPathId3 ctx indices = indexPathId2 ctx inum icat ipath ikind
where (icat, ipath, inum, ikind) : _ = indexPaths indices
indexPathHref :: Text -> Maybe IndexKind -> IndexPath -> Text
indexPathHref cat kind = (("#" ++ indexShortName cat kind ++ ":") ++) . urlChars . replace "&" "&" . indexPathString
asId :: LaTeX -> Text
asId = mconcat . map f
where
f :: LaTeXUnit -> Text
f (TeXRaw t) = replace "\n" "_" $ replace " " "_" t
f (TeXComm "tcode" _ [(_, x)]) = asId x
f (TeXComm "noncxxtcode" _ [(_, x)]) = asId x
f (TeXComm "texttt" _ [(_, x)]) = asId x
f (TeXComm "textit" _ [(_, x)]) = asId x
f (TeXComm "mathsf" _ [(_, x)]) = asId x
f (TeXComm "xspace" _ []) = "_"
f (TeXBraces x) = asId x
f (TeXMath Dollar x) = asId x
f (TeXComm "texorpdfstring" _ [_, (_, x)]) = asId x
f x = error $ "asId: unexpected: " ++ show x
instance Render Anchor where
render Anchor{..} _ =
xml "a" ([("class", aClass) | aClass /= ""] ++
[("href" , aHref ) | aHref /= ""] ++
[("id" , aId ) | aId /= ""] ++
[("title", aTitle) | aTitle /= ""] ++
[("style", aStyle) | aStyle /= ""]) aText
class Render a where render :: a -> RenderContext -> TextBuilder.Builder
concatRender :: Render a => [a] -> RenderContext -> TextBuilder.Builder
concatRender x c = mconcat $ map (\y -> render y c) x
instance Render Char where render c _ = TextBuilder.singleton c
instance (Render a, Render b) => Render (a, b) where
render (x, y) = render x ++ render y
renderCodeblock :: String -> [(ArgKind, LaTeX)] -> LaTeX -> RenderContext -> TextBuilder.Builder
renderCodeblock env args code ctx =
(case (env, args) of
("codeblocktu", [(FixArg, title)]) -> (("<p>" ++ render title ctx ++ ":") ++)
("indexedcodeblock", [(FixArg, indices)]) ->
let
link = anchor
{ aClass = "itemDeclLink"
, aHref = "#" ++ urlChars (indexPathId3 ctx indices)
, aText = "🔗" }
in renderIndexed ctx "span" indices .
(xml "div" [("class", "marginalizedparent")] (render link ctx) ++)
_ -> id) $
xml "span" [("class", "codeblock")] (
highlightLines ctx{rawTilde=True, rawHyphens=True, rawSpace=True, inCodeBlock=True} $
concatRaws $ expandTcode (dropInitialNewline code))
where
dropInitialNewline :: LaTeX -> LaTeX
dropInitialNewline (TeXRaw (Text.uncons -> Just ('\n', rest)) : more) = TeXRaw rest : more
dropInitialNewline x = x
expandTcode :: LaTeX -> LaTeX
expandTcode [] = []
expandTcode (TeXComm "tcode" _ [(FixArg, x)] : y) = expandTcode (x ++ y)
expandTcode (x : y) = x : expandTcode y
renderOutputblock :: LaTeX -> RenderContext -> TextBuilder.Builder
renderOutputblock code ctx = xml "pre" [("class", "outputblock")] $
render code ctx{rawTilde=True, rawHyphens=True, rawSpace=True}
sameIdNamespace :: Maybe IndexKind -> Maybe IndexKind -> Bool
sameIdNamespace Nothing (Just IndexOpen) = True
sameIdNamespace (Just IndexOpen) Nothing = True
sameIdNamespace x y = x == y
isFullPage :: Page -> Bool
isFullPage FullPage = True
isFullPage _ = False
abbrIsOnPage :: Abbreviation -> Page -> Bool
abbrIsOnPage _ FullPage = True
abbrIsOnPage abbr TablesPage = "tab:" `isPrefixOf` abbr
abbrIsOnPage abbr FiguresPage = "fig:" `isPrefixOf` abbr
abbrIsOnPage abbr (FigurePage Figure{..}) = abbr == figureAbbr
abbrIsOnPage abbr (TablePage Table{..}) = abbr == tableAbbr
abbrIsOnPage abbr (SectionPage sec)
| "fig:" `isPrefixOf` abbr = abbr `elem` (figureAbbr . snd . figures sec)
| "eq:" `isPrefixOf` abbr = abbr `elem` (formulaAbbr . snd . formulas sec)
| "tab:" `isPrefixOf` abbr = abbr `elem` (tableAbbr . snd . tables sec)
| otherwise = abbr `elem` (abbreviation . sections sec)
abbrIsOnPage _ _ = False
pageIndexEntries :: RenderContext -> IntMap.IntMap IndexEntry
pageIndexEntries c
| SectionPage s <- page c = secIndexEntries s
| otherwise = indexEntryMap (draft c)
indexOccurrenceSuffix :: RenderContext -> Int -> Text
-- Returns the _ that distinguishes expr#def:object_expression from
-- expr#def:object_expression_ ([expr] has two definitions of 'object expression',
-- one for E1.E2 and one for E1.*E2.)
indexOccurrenceSuffix c indexNum = underscores
where
Just theEntry = IntMap.lookup indexNum (pageIndexEntries c)
ies
| SectionPage s <- page c = secIndexEntriesByPath s
| otherwise = indexEntriesByPath (draft c)
underscores = Text.pack
[ '_' | (i, e) <- fromJust (Map.lookup (indexPath theEntry) ies)
, indexCategory e == indexCategory theEntry
, sameIdNamespace (indexEntryKind e) (indexEntryKind theEntry)
, i < indexNum ]
instance Render LaTeX where
render (TeXComm "textbackslash" _ [] : y)
| (TeXRaw s : rest) <- y = \sec -> "\\" ++ render (TeXRaw $ if rawSpace sec then s else unspace s) sec ++ render rest sec
where
unspace s
| Just (c, cc) <- Text.uncons s, isSpace c = cc
| otherwise = s
render (TeXComm "itshape" _ [] : x) = ("<i>" ++) . (++ "</i>") . render x
render (x : y) = render x ++ render y
render [] = return ""
keywords :: [Text]
keywords = map Text.pack $ words $
"char8_t char16_t char32_t namespace struct void operator friend template typedef long short class double public extern " ++
"using char new union unsigned sizeof alignas typename virtual this return const_cast delete noexcept static_cast " ++
"reinterpret_cast mutable bool private protected inline constexpr consteval final volatile default explicit enum export asm " ++
"typeid dynamic_cast throw if else for do while goto auto concept requires decltype try catch static_assert wchar_t " ++
"case switch alignof break continue signed audit axiom override const register thread_local int float static module import " ++
"co_return co_await co_yield constinit contract_assert"
-- todo: read the real keyword table instead
highlightLines :: RenderContext -> LaTeX -> TextBuilder.Builder
highlightLines ctx x
| (spaces, x') <- texSpan (== ' ') x, spaces /= "" = TextBuilder.fromText spaces ++ highlightLines ctx x'
| Just (directive, x') <- parseCppDirective x = spanTag "preprocessordirective" (render directive ctx) ++ highlight ctx x'
| TeXComm (Text.pack -> c) _ [(FixArg, y)] : more <- x, c `elem` ["terminal"] = spanTag c (highlightLines ctx y) ++ highlight ctx more
| i@(TeXComm cmd _ _) : more <- x, cmd `elem` ["index", "obeyspaces"] = render i ctx ++ highlightLines ctx more
| otherwise = highlight ctx x
highlightUnit :: RenderContext -> LaTeXUnit -> TextBuilder.Builder
highlightUnit ctx x = case x of
TeXComm "rlap" _ [(FixArg, text)] ->
spanTag "rlap" (highlight ctx text)
TeXComm "indexedspan" _ [(FixArg, text), (FixArg, indices)] ->
renderIndexed ctx "span" indices (highlight ctx text)
TeXComm "terminal" _ [(FixArg, y)] ->
spanTag "terminal" (highlight ctx y)
TeXComm c _ []
| c `elem` ["%", "&", "caret", "~"] -> spanTag "operator" (render x ctx)
| c == "#" -> spanTag "preprocessordirective" (render x ctx)
| c `elem` ["{", "}"] -> spanTag "curlybracket" (render x ctx)
TeXBraces y -> highlight ctx y
_ -> render x ctx
highlight :: RenderContext -> LaTeX -> TextBuilder.Builder
highlight ctx (TeXRaw kwd : TeXComm "-" "" [] : x)
| kwd `elem` keywords = TextBuilder.fromText kwd ++ highlight ctx x
highlight ctx (TeXRaw kwd : rest@(TeXComm "textit" _ [(FixArg, (TeXRaw (Text.uncons -> Just (isAlphaNum -> True, _)) : _))] : _))
| kwd `elem` keywords = TextBuilder.fromText kwd ++ highlight ctx rest
highlight ctx x
| Just x' <- texStripPrefix "\n" x = "\n" ++ highlightLines ctx x'
| (TeXRaw "" : t) <- x = highlight ctx t
| Just (lit, x') <- parseLiteral x = spanTag "literal" (render lit ctx) ++ highlight ctx x'
| Just (comment, x') <- parseComment x = spanTag "comment" (render comment ctx{inComment=True, rawTilde=False}) ++ highlightLines ctx x'
| Just x' <- texStripPrefix "<new>" x = spanTag "operator""<" ++ "new" ++ spanTag "operator" ">" ++ highlight ctx x'
-- keywords
| (a, x') <- texSpan p x, a /= "" = (case () of
_ | a `elem` keywords -> spanTag "keyword"
_ | a `elem` ["defined", "__has_include", "__has_embed", "__has_cpp_attribute", "_Pragma"] -> spanTag "preprocessordirective"
_ | a `elem` ["nullptr", "true", "false"] -> spanTag "literal"
_ | otherwise -> id) (render (TeXRaw a) ctx) ++ highlight ctx x'
where p c = isAlphaNum c || c == '_'
highlight ctx (TeXRaw x : more)
| Text.head x `elem` ("'\"" :: String) = render (TeXRaw $ Text.take 1 x) ctx ++ highlight ctx (TeXRaw (Text.tail x) : more)
| Text.head x `elem` ("()"::String) = spanTag "parenthesis" (render (TeXRaw $ Text.take 1 x) ctx) ++ highlight ctx (TeXRaw (Text.tail x) : more)
| Text.head x `elem` ("{}"::String) = spanTag "curlybracket" (render (TeXRaw $ Text.take 1 x) ctx) ++ highlight ctx (TeXRaw (Text.tail x) : more)
| Text.head x `elem` ("[]"::String) = spanTag "squarebracket" (render (TeXRaw $ Text.take 1 x) ctx) ++ highlight ctx (TeXRaw (Text.tail x) : more)
| Text.head x `elem` ("<>"::String) = spanTag "anglebracket" (render (TeXRaw $ Text.take 1 x) ctx) ++ highlight ctx (TeXRaw (Text.tail x) : more)
| Text.head x == '#' = spanTag "preprocessordirective" "#" ++ highlight ctx (TeXRaw (Text.tail x) : more)
| Text.take 2 x == "::"
= spanTag "operator" (render (TeXRaw "::") ctx) ++ highlight ctx (TeXRaw (Text.drop 2 x) : more)
| Text.head x `elem` ("*&^.-+/!=|:?%~#"::String)
= spanTag "operator" (render (TeXRaw (Text.take 1 x)) ctx) ++ highlight ctx (TeXRaw (Text.tail x) : more)
| (a, x') <- Text.span (\c -> not (isAlphaNum c || c `elem` ("#%_(){}[]<>.*:?'\"+=-/|&!^~\n" :: String))) x, a /= ""
= render (TeXRaw a) ctx ++ highlight ctx (TeXRaw x' : more)
| otherwise = error ("shit: " ++ show x)
highlight ctx (x : more) = highlightUnit ctx x ++ highlight ctx more
highlight _ [] = ""
indexPaths :: LaTeX -> [(Text, IndexPath, Int, Maybe IndexKind)]
indexPaths indices =
[ (cat, path, entryNr, kind)
| [ (FixArg, [TeXRaw (Text.unpack -> read -> entryNr)])
, (OptArg, [TeXRaw cat])
, (FixArg, (parseIndex -> (path, kind))) ] <- lookForCommand "index" indices]
renderIndexed :: RenderContext -> Text -> LaTeX -> TextBuilder.Builder -> TextBuilder.Builder
renderIndexed ctx thing indices body = foldl f body (indexPaths indices)
where f t (cat, path, entryNr, kind) = xml thing [("id", indexPathId2 ctx entryNr cat path kind)] t
commasAnd :: [TextBuilder.Builder] -> TextBuilder.Builder
commasAnd [] = undefined
commasAnd [x] = x
commasAnd [x, y] = x ++ " and " ++ y
commasAnd [x, y, z] = x ++ ", " ++ y ++ ", and " ++ z
commasAnd (x : y) = x ++ ", " ++ commasAnd y
abbrTitle :: Text -> Bool -> RenderContext -> Text
abbrTitle "bibliography" _ _ = "Bibliography"
abbrTitle abbr includeAbbr ctx
| "tab:" `isPrefixOf` abbr
, Just Table{..} <- tableByAbbr (draft ctx) abbr =
"Table " ++ Text.pack (show tableNumber) ++ ": " ++
LazyText.toStrict (TextBuilder.toLazyText $ render tableCaption ctx{noTags=True})
| Just sec@Section{..} <- sectionByAbbr (draft ctx) abbr =
LazyText.toStrict $ TextBuilder.toLazyText $
secnumText sec ++ " " ++
render sectionName ctx{noTags=True} ++
TextBuilder.fromText (if includeAbbr then " [" ++ abbr ++ "]" else "")
| otherwise = ""
renderBreak :: RenderContext -> TextBuilder.Builder
renderBreak ctx = if noTags ctx then "\n" else "<br>"
renderIndexLink :: String -> [(ArgKind, [LaTeXUnit])] -> RenderContext -> TextBuilder.Builder
renderIndexLink cmd [(FixArg, txt), (FixArg, [TeXRaw cat]), (FixArg, rawIndexPath), (FixArg, abbr_arg)] ctx
| not (noTags ctx)
, Just abbr <- mabbr = render anchor
{ aText = render txt ctx{inLink=True}
, aHref = (if abbrIsOnPage abbr (page ctx) then "" else linkToSectionHref SectionToSection abbr)
++ indexPathHref cat kind p
, aTitle = abbrTitle abbr True ctx
, aClass = if cmd == "hiddenindexlink" then "hidden_link" else ""
} ctx
| otherwise = render txt ctx
where
(p, kind) = parseIndex rawIndexPath
resolved :: Maybe Text
resolved = case Map.lookup p $ indexEntriesByPath (draft ctx) of
Just entries
| (hd:_) <- [ abbreviation
| (_, IndexEntry{indexEntrySection=abbreviation, indexEntryKind}) <- entries
, indexEntryKind == kind
, not ("gram." `isPrefixOf` abbreviation) ] -> Just hd
_ -> Nothing
traceIfBad
| resolved == Nothing, cat /= "grammarindex", cat /= "bibliography" =
trace $ "\nbad index link: " ++ show (cat, rawIndexPath)
++ "\nlookup result: " ++ show (Map.lookup p $ indexEntriesByPath (draft ctx))
| otherwise = id
mabbr = traceIfBad $ case abbr_arg of
[] -> resolved
[TeXRaw x] -> Just x
y -> error $ "bad indexlink arg: " ++ show y
renderIndexLink _ _ _ = error "bad indexlink"
instance Render LaTeXUnit where
render (TeXRaw x ) = \RenderContext{..} -> TextBuilder.fromText
$ (if rawHyphens then id else replace "--" "–" . replace "---" "—")
$ (if not inCodeBlock then replace "''" "”" else id)
$ (if rawTilde then id else replace "~" " ")
$ (if not insertBreaks then id else
replace "::" (zwsp ++ "::" ++ zwsp) .
replace "\1" "__" .
replace "_" (if noTags then "_­" else "_<span class='shy'></span>") .
replace "__" "\1")
$ (if replXmlChars then replaceXmlChars else id)
$ x
render (TeXComm "br" _ _ ) = renderBreak
render TeXLineBreak = renderBreak
render (TeXComm "break" _ [] ) = renderBreak
render (TeXBraces t ) = render t
render m@(TeXMath _ _ ) = renderMath [m]
render (TeXComm "commentellip" _ []) = const $ spanTag "comment" "/* ... */"
render (TeXComm "ensuremath" _ [(FixArg, x)]) = renderMath x
render (TeXComm "hyperref" _ [_, (FixArg, x)]) = render x
render (TeXComm "label" _ [(FixArg, [TeXRaw x])]) = render anchor{aId = x, aClass = "index"}
render (TeXComm "ref*" x y) = render (TeXComm "ref" x y)
render (TeXComm "ref" _ [(FixArg, concatRaws -> [TeXRaw abbr])]) = \ctx@RenderContext{..} ->
let
linkText :: TextBuilder.Builder
linkText
| "tab:" `isPrefixOf` abbr
, Just Table{..} <- tableByAbbr draft abbr = TextBuilder.fromString $ show tableNumber
| "fig:" `isPrefixOf` abbr
, Figure{..} <- figureByAbbr draft abbr = TextBuilder.fromString $ show figureNumber
| "eq:" `isPrefixOf` abbr
, f@Formula{..} <- formulaByAbbr draft abbr = TextBuilder.fromText $ fullFormulaNumber f
| otherwise = squareAbbr (not noTags) abbr
renderLabelRef sec =
simpleRender2 anchor{
aHref = abbrHref (abbreviation sec) ctx ++ "#" ++ abbr,
aText = squareAbbr (not noTags) (abbreviation sec),
aTitle = abbrTitle (abbreviation sec) False ctx }
renderSectionRef =
simpleRender2 anchor{
aHref = abbrHref abbr ctx,
aText = linkText,
aTitle = abbrTitle abbr False ctx }
in if noTags then linkText else
case Map.lookup abbr (labels draft) of
Just sec -> renderLabelRef sec
Nothing
| SectionPage pageSec <- page, abbreviation pageSec == abbr -> linkText
| otherwise -> renderSectionRef
render (TeXComm "iref" _ [(FixArg, [TeXRaw abbrs])]) = \ctx ->
let renderAbbr abbr = render (TeXComm "ref" "" [(FixArg, [TeXRaw abbr])]) ctx
in " (" ++ mconcat (intersperse ", " $ map (renderAbbr . Text.strip) $ Text.splitOn "," abbrs) ++ ")"
render (TeXComm "nopnumdiffref" _ [(FixArg, [TeXRaw (Text.splitOn "," -> abbrs)])]) = \ctx ->
let f abbr = simpleRender2 anchor{aHref = abbrHref abbr ctx, aText = squareAbbr True abbr}
in "<b>Affected " ++ (if length abbrs == 1 then "subclause" else "subclauses") ++ ":</b> "
++ commasAnd (map f abbrs)
render (TeXComm "weblink" _ [(FixArg, text), (FixArg, href)])
= render anchor
{ aText = simpleRender2 text
, aHref = simpleRender href}
render (TeXComm "url" _ [(FixArg, u)])
= render anchor
{ aText = simpleRender2 u
, aHref = simpleRender u }
render (TeXComm "link" _ [(FixArg, txt), (FixArg, [TeXRaw abbr])])
= \ctx -> if noTags ctx then render txt ctx else render anchor{
aHref = abbrHref abbr ctx,
aText = render txt ctx{inLink=True},
aTitle = abbrTitle abbr True ctx} ctx
render (TeXComm c _ l) | c `elem` ["indexlink", "hiddenindexlink"] = renderIndexLink c l
render (TeXComm "color" _ _) = const ""
render (TeXComm "textcolor" _ [_, (FixArg, x)]) = render x
render (TeXComm "textsmaller" _ [_, (FixArg, x)]) = render x
render (TeXComm "terminal" _ [(FixArg, x)]) = spanTag "terminal" . flip highlightLines x
render (TeXComm "texttt" _ [(FixArg, x)]) = \ctx ->
(if noTags ctx then id else spanTag "texttt") $ render x ctx{rawHyphens = True, insertBreaks = True}
render (TeXComm "literaltcode" _ [(FixArg, x)]) = spanTag "literal" . spanTag "texttt" . render x
render (TeXComm cmd _ [(FixArg, x)])
| cmd `elem` ["tcode"] = \ctx ->
if noTags ctx then render x ctx{rawHyphens=True, insertBreaks=True}
else spanTag (if inCodeBlock ctx then "tcode_in_codeblock" else "texttt") $
if not (inComment ctx) && not (inLink ctx) && not (inSectionTitle ctx)
then highlightLines ctx{rawHyphens=True, insertBreaks=True} x
else render x ctx{rawHyphens=True, insertBreaks=True}
render (TeXComm "noncxxtcode" _ [(FixArg, x)]) = \ctx ->
spanTag (if inCodeBlock ctx then "tcode_in_codeblock" else "texttt") $
render x ctx{rawHyphens=True, insertBreaks=True}
render (TeXComm "textbf" _ [(FixArg, x)]) = ("<b>" ++) . (++ "</b>") . render x
render (TeXComm "index" _
[ (FixArg, [TeXRaw (Text.unpack -> read -> entryNr)])
, (OptArg, [TeXRaw category])
, (FixArg, (parseIndex -> (p, kind)))
])
= \ctx -> if noTags ctx then "" else case kind of
Just IndexClose -> ""
Just (See _ _) -> ""
_ -> render anchor
{ aId = indexPathId2 ctx entryNr category p kind
, aClass = "index"} ctx
render (TeXComm "indexedspan" _ [(FixArg, text), (FixArg, indices)]) =
\ctx -> (if noTags ctx then id else renderIndexed ctx "span" indices) $ render text ctx
render (TeXEnv "indexeditemdecl" [(FixArg, indices)] t) = \ctx ->
let
link = anchor
{ aClass = "itemDeclLink"
, aHref = "#" ++ urlChars (indexPathId3 ctx indices)
, aText = "🔗" }
in
renderIndexed ctx "div" indices $
xml "div" [("class", "itemdecl")] $
xml "div" [("class", "marginalizedparent")] (render link ctx) ++
xml "code" [("class", "itemdeclcode")] (TextBuilder.fromText $ Text.dropWhile (== '\n') $ LazyText.toStrict $ TextBuilder.toLazyText $ highlightLines ctx{rawTilde=True, rawHyphens=True} t)
render (TeXComm "discretionary" _ _) = const (TextBuilder.fromText zwsp)
render (TeXComm "ifthenelse" _ [_, _, (FixArg, x)]) = render x
render (TeXComm "multicolumn" _ [(FixArg, [TeXRaw n]), _, (FixArg, content)]) = xml "td" [("colspan", n)] . render content
render (TeXComm "leftshift" _ [(FixArg, content)]) =
(spanTag "mathsf" "lshift" ++) . xml "sub" [("class", "math")] . render content
render (TeXComm "verb" _ [(FixArg, a)]) = \c -> xml "code" [] $ render a c{rawTilde=True, rawHyphens=True}
render (TeXComm "footnoteref" _ [(FixArg, [TeXRaw n])]) = \ctx -> flip render ctx $ anchor
{ aClass = "footnoteref"
, aText = TextBuilder.fromText n
, aId = "footnoteref-" ++ n
, aTitle = (!! 3) $ iterate (Text.replace " " " ")
$ Text.replace "\n" " "
$ Text.replace "'" "'"
$ LazyText.toStrict $ TextBuilder.toLazyText
$ mconcat $ map (flip render ctx{noTags = True})
$ footnoteContent $ snd
$ footnotes (draft ctx) !! ((read (Text.unpack n) :: Int) - 1)
, aHref =
(if isFullPage (page ctx) || isSectionPage (page ctx) then "" else "SectionToSection/" ++ paraUrl ctx)
++ "#footnote-" ++ n }
render (TeXComm "raisebox" _ args)
| (FixArg, concatRaws -> [TeXRaw d]) <- head args
, (FixArg, content) <- Prelude.last args =
let neg s
| Text.head s == '-' = Text.tail s
| otherwise = "-" ++ s
in xml "span" [("style", "position: relative; top: " ++ neg d)] . render content
render (TeXComm "parbox" _ [_, (FixArg, x)]) = render x
render (TeXComm "term" _ [(FixArg, x)]) =
\sec ->
let
i = "def:" ++ asId x
-- It's tempting to use 'term:' instead of 'def:' here, but if we do that,
-- URLs break when upstream promotes a \term to a \defn.
in render anchor
{ aText = "<i>" ++ render x sec ++ "</i>"
, aId = i
, aHref = "#" ++ urlChars i
, aClass = "hidden_link" } sec
render (TeXComm "texorpdfstring" _ [_, (FixArg, x)]) = render x
render (TeXComm " " _ []) = return " "
render (TeXComm "\n" _ []) = return "\n"
render (TeXComm "textit" _ [(FixArg, x)]) = \c -> (if noTags c then id else xml "i" []) $ render x c{rawTilde = False}
render (TeXComm "c" _ [(FixArg, [TeXRaw "t"])]) = return "ţ"
render (TeXComm s _ [])
| s == "caret" = return "^"
| s `elem` literal = return $ TextBuilder.fromString s
| Just x <-
lookup s simpleMacros = return $ TextBuilder.fromText x
| s `elem` kill = return ""
| otherwise = return $ spanTag (Text.pack s) ""
render (TeXComm "class" _ [(FixArg, [TeXRaw cls]), (FixArg, [TeXComm "href" _ [(FixArg, [TeXRaw href]), (FixArg, text)]])])
= \ctx -> render anchor{aHref=href, aText=render text ctx, aClass=cls} ctx
render (TeXComm "class" _ [(FixArg, [TeXRaw cls]), (FixArg, x)])
= \ctx -> spanTag cls $ render x ctx
render (TeXComm "href" _ [(FixArg, [TeXRaw href]), (FixArg, text)])
= \ctx -> render anchor{aHref=href, aText=render text ctx} ctx
render (TeXComm "ucode" _ [(FixArg, code)]) = spanTag "ucode" . render (TeXRaw "U+" : code)
render (TeXComm x _ s)
| x `elem` kill = return ""
| null s, Just y <-
lookup x simpleMacros = return $ TextBuilder.fromText y
| [(FixArg, z)] <- s, Just y <-
lookup x simpleMacros = (TextBuilder.fromText y ++) . render z
| otherwise = \ctx -> (if noTags ctx then id else spanTag (Text.pack x)) $ render (s >>= snd) ctx
render (TeXEnv "itemdecl" [(FixArg, [TeXRaw num])] t) = \c ->
let
i = case [(icat, ipath) | (icat, ipath, _inum, Just DefinitionIndexEntry) <- indexPaths t] of
[(icat, ipath)] -> indexPathId icat (Just DefinitionIndexEntry) ipath
_ -> mconcat (idPrefixes c) ++ "itemdecl:" ++ num
link = anchor{aClass="itemDeclLink", aHref="#" ++ urlChars i, aText="🔗"}
in
xml "div" [("class", "itemdecl"), ("id", i)] $
xml "div" [("class", "marginalizedparent")] (render link c) ++
xml "code" [("class", "itemdeclcode")] (TextBuilder.fromText $ Text.dropWhile (== '\n') $ LazyText.toStrict $ TextBuilder.toLazyText $ highlightLines c{rawTilde=True, rawHyphens=True} t)
render env@(TeXEnv e args t)
| e `elem` makeSpan = \ctx -> (if noTags ctx then id else spanTag (Text.pack e)) (render t ctx)
| e `elem` makeDiv = xml "div" [("class", Text.pack e)] . render t
| isMath env && hasComplexMath True [env] = renderComplexMath [env]
| isCodeblock env = renderCodeblock e args t
| e == "minipage", [e2@(TeXEnv _ _ cb)] <- trim t, isCodeblock e2 =
xml "div" [("class", "minipage")] . renderCodeblock "codeblock" [] cb
| e == "outputblock" = renderOutputblock t
| e == "itemdescr" = render t
| e == "thebibliography" = render t
| otherwise = error $ "render: unexpected " ++ show env
instance Render Int where render = return . TextBuilder.fromString . show
instance Render IndexComponent where
render IndexComponent{..} = render indexKey
instance Render IndexEntry where
render IndexEntry{indexEntryKind=Just (See also x), ..} = \ctx ->
"<i>" ++ (if also then "see also" else "see") ++ "</i> " ++
render (anchor
{ aHref = "#:" ++
(urlChars $
replace " " "_" $
replace ", " "," $
indexKeyContent x)
, aText = render x ctx}) ctx
render IndexEntry{indexEntryKind=Just IndexClose} = return ""
render IndexEntry{..} =
return $ simpleRender2 anchor
{ aHref = "SectionToSection/" ++ urlChars indexEntrySection
++ indexPathHref indexCategory indexEntryKind indexPath
, aText = (if indexEntryKind == Just DefinitionIndexEntry then xml "b" [] else id) $ squareAbbr True indexEntrySection }
indexDisplayOrder :: IndexComponent -> (([(Int, Int)], Int), ([(Int, Int)], Int))
indexDisplayOrder y = (f (indexSortKey y), f (indexKey y))
where
g :: Char -> (Int, Int)
g c
| isDigit c = (1, ord c)
| isAlpha c = (2, ord (toLower c))
| otherwise = (0, ord c)
he :: String -> ([(Int, Int)], Int)
he x = (map g x, if isUpper (head x) then 0 else 1)
f = he . Text.unpack . indexKeyContent
instance Render [(IndexComponent, IndexNode)] where
render tree ctx = go [] tree
where
IndexPage cat = page ctx
go :: IndexPath -> [(IndexComponent, IndexNode)] -> TextBuilder.Builder
go up x = mconcat $ f up . (sortOn (indexDisplayOrder . fst) x)
f :: IndexPath -> (IndexComponent, IndexNode) -> TextBuilder.Builder
f up (comp, IndexNode{..}) =
let
up' = up ++ [comp]
in
xml "div" [("id", indexPathId cat Nothing up')] $
xml "div" [("class", "indexitems")] $
TextBuilder.fromText (
Text.intercalate ", " (nub $ filter (/= "") $ map (LazyText.toStrict . TextBuilder.toLazyText) $ render comp ctx : flip render ctx . indexEntries)) ++
go up' (Map.toList indexSubnodes)
data IndexHeading = Symbols | Numbers | Letter Char
deriving (Eq, Ord)
instance Show IndexHeading where
show Symbols = "Symbols"
show Numbers = "Numbers"
show (Letter c) = [c]
indexHeading :: IndexComponent -> IndexHeading
indexHeading (indexSortKey -> indexKeyContent -> Text.head -> c)
| isDigit c = Numbers
| isAlpha c = Letter (toUpper c)
| otherwise = Symbols
indexSortKey :: IndexComponent -> LaTeX
indexSortKey IndexComponent{..}
| distinctIndexSortKey /= [] = distinctIndexSortKey
| otherwise = indexKey
renderIndex :: RenderContext -> IndexTree -> TextBuilder.Builder
renderIndex ctx tree
| name `elem` ["generalindex", "libraryindex"] = mconcat $ ["<hr>"] ++ linklines ++ ["<hr>"] ++ map sub p
| otherwise = render (Map.toList tree) ctx
where
IndexPage name = page ctx
p = partitionBy (indexHeading . fst) $ Map.toList tree
sub (n, ii) = h 2 (render anchor{aText=TextBuilder.fromText $ Text.pack (show n), aId=Text.pack (show n)} ctx) ++ render ii ctx
(symnum, rest) = splitAt 2 p
linklines = map (h 2 . mconcat . intersperse " " . map (li . fst)) [symnum, rest]
li n = render anchor{aText = TextBuilder.fromText $ Text.pack (show n), aHref = "#" ++ Text.pack (show n)} ctx
renderTab :: Bool -> Table -> Text -> Bool -> Bool -> RenderContext -> TextBuilder.Builder
renderTab stripTab Table{..} href boldCaption linkifyTableNum ctx =
xml "div" [("class", "numberedTable"), ("id", id_)] $ -- todo: multiple abbrs?
(if boldCaption then "<b>" else "") ++
"Table " ++ tableNumF (render tableNumber ctx) ++ " — " ++
render tableCaption ctx ++
" " ++
render anchor{aText="[" ++ TextBuilder.fromText tableAbbr ++ "]", aHref=href} ctx ++
(if boldCaption then "</b>" else "") ++
"<br>" ++
renderTable columnSpec tableBody ctx
where
tableNumF = if linkifyTableNum then linkify anchor{aHref = "#" ++ id_} ctx else id
id_ = (if stripTab then replace "tab:" "" else id) tableAbbr
linkify :: Anchor -> RenderContext -> TextBuilder.Builder -> TextBuilder.Builder
linkify a ctx txt = render a{aText=txt} ctx
renderFig :: Bool -> Figure -> Text -> Bool -> Bool -> RenderContext -> TextBuilder.Builder
renderFig stripFig Figure{..} href boldCaption linkifyFigureNum ctx =
xml "div" [("class", "figure"), ("id", id_)] $
TextBuilder.fromText figureSvg ++ "<br>" ++
(if boldCaption then "<b>" else "") ++
"Figure " ++ figureNumF (render figureNumber ctx) ++ " — " ++
render figureName ctx ++ "  " ++
render anchor{aText=squareAbbr False figureAbbr, aHref=href} ctx ++
(if boldCaption then "</b>" else "")
where
figureNumF = if linkifyFigureNum then linkify anchor{aHref="#" ++ id_} ctx else id
id_ = (if stripFig then replace "fig:" "" else id) figureAbbr
data RenderItem = RenderItem { listOrdered :: Bool, item :: Item }
spacedJoin :: TextBuilder.Builder -> TextBuilder.Builder -> TextBuilder.Builder
spacedJoin x y
| TextBuilder.toLazyText x == "" = y
| TextBuilder.toLazyText y == "" = x
| otherwise = x ++ " " ++ y
instance Render RenderItem where
render RenderItem{item=Item Nothing mlabel elems paras} ctx
= xml "li" attrs $ render elems ctx ++ renderLatexParas paras ctx
where
attrs
| Just [TeXRaw l] <- mlabel = [("id", l)]
| otherwise = []
render RenderItem{item=Item (Just nn) mlabel elems paras, ..} ctx
| listOrdered =
xml "tr" [("id", thisId)] $
(xml "td" [] (case mlabel of
Nothing -> render link ctx'
Just label -> render anchor{aHref = linkHref, aText=simpleRender2 label} ctx' ++ " ")) ++
(xml "td" [] content)
| otherwise =
xml "li" [("id", thisId)] $ case mlabel of
Nothing -> xml "div" [("class", "marginalizedparent"), ("style", "left:" ++ left)] (render link ctx') ++ content
Just label ->
render anchor{aHref = linkHref, aText=simpleRender2 label} ctx'
++ " " ++ content
where
content = spacedJoin (render elems ctx') (renderLatexParas paras ctx')
left
| listOrdered = "-4.5em"
| otherwise = simpleRender (-marginalizedParentLeft - ulPaddingLeft * (length nn - 1) - extraIndentation ctx) ++ "mm"
ulPaddingLeft = 9
marginalizedParentLeft = 18
thisId = mconcat (idPrefixes ctx) ++ Text.pack (Prelude.last nn)
ctx' = ctx{ idPrefixes = idPrefixes ctx ++ [Text.pack (Prelude.last nn) ++ "."] }
dottedNumber = Text.intercalate "." (Text.pack . nn)
linkText
| listOrdered =
let
s = Prelude.last nn
punct
| isAlpha (head s) = ")"
| otherwise = "."
in
Text.pack $ s ++ punct
| otherwise = "(" ++ dottedNumber ++ ")"
linkClass
| listOrdered = "enumerated_item_num"
| otherwise = "marginalized"
linkHref = "#" ++ thisId
link = anchor{aClass=linkClass, aHref=linkHref, aText=TextBuilder.fromText linkText}
paraUrl :: RenderContext -> Text
paraUrl RenderContext{..} = urlChars $ abbreviation $ case nearestEnclosing of
Left p -> paraSection p
Right s -> s
prependSentence :: Sentence -> TeXPara -> TeXPara
prependSentence s (TeXPara ss) = TeXPara (s : ss)
instance Render Footnote where
render (Footnote n content) ctx =
xml "div" [("class", "footnote"), ("id", i)] $
renderParas (mapHead (prependSentence footnoteNum) content)
where
footnoteNum = Sentence Nothing [HtmlElement (LazyText.toStrict $ TextBuilder.toLazyText $ render (link, backlink) ctx)]
ctx' = ctx{idPrefixes = [i ++ "."]}
backlink = anchor{aText = linkText, aHref = "#footnoteref-" ++ num, aClass = "footnoteBacklink"}
renderParas [] = ""
renderParas (p:pp) = xml "div" [("class", "texpara")] (render p ctx') ++ renderParas pp
num = Text.pack $ show n
i = "footnote-" ++ num
footnoteIsOnPage = isFullPage (page ctx) || isSectionPage (page ctx)
linkText = TextBuilder.fromText $ num ++ ")"
link = anchor
{ aText = linkText
, aClass = "footnotenum"
, aHref =
(if footnoteIsOnPage then "" else "SectionToSection/" ++ paraUrl ctx)
++ "#" ++ i }
noWrapSpace :: TextBuilder.Builder
noWrapSpace = " "
instance Render Note where
render Note{..} ctx = xml "div" [("id", i), ("class", "note")] (renderParas True noteContent)
where
prefix = "[<i>" ++ TextBuilder.fromText noteLabel ++ " " ++ render link ctx ++ "</i>: "
suffix = " —" ++ noWrapSpace ++ "<i>end note</i>]"
renderParas _ [] = ""
renderParas isFirst (p:pp) = xml "div" [("class", "texpara")] ((if isFirst then prefix else "") ++ render p ctx ++ (if null pp then suffix else "")) ++ renderParas False pp
i = mconcat (dropWhileEnd (isDigit . Text.head) (idPrefixes ctx)) ++ "note-" ++ noteNum
noteNum = Text.pack $ show noteNumber
link = anchor{aHref = "#" ++ i, aText = TextBuilder.fromText noteNum }
instance Render Example where
render Example{..} ctx
| noTags ctx =
"[Example: "
++ renderLatexParas exampleContent ctx
++ " — end example] "
| otherwise = xml "div" [("id", i), ("class", "example")] (renderParas True exampleContent)
where
prefix = "[<i>Example " ++ render link ctx ++ "</i>: "
suffix = " —" ++ noWrapSpace ++ "<i>end example</i>]"
renderParas _ [] = ""
renderParas isFirst (p:pp) = xml "div" [("class", "texpara")] ((if isFirst then prefix else "") ++ render p ctx ++ (if null pp then suffix else "")) ++ renderParas False pp
i = mconcat (dropWhileEnd (isDigit . Text.head) (idPrefixes ctx)) ++ "example-" ++ exNum
exNum = Text.pack $ show exampleNumber
link = anchor{aHref = "#" ++ i, aText = TextBuilder.fromText exNum }
instance Render Formula where
render f@Formula{..} ctx =
xml "div" [("class", "formula"), ("id", formulaAbbr)] $
doRenderComplexMath [TeXMath Square ([tag] ++ formulaContent)] ctx
where tag = TeXComm "tag" "" [(FixArg, [TeXRaw (fullFormulaNumber f)])]
fullFormulaNumber :: Formula -> Text
fullFormulaNumber Formula{..} = Text.pack $ show chapterNum ++ "." ++ show formulaNumber
where chapterNum = sectionNumber $ chapterOfSection formulaSection
nontermDef :: LaTeX -> Maybe Text
nontermDef t
| [n] <- [n | ("grammarindex", [IndexComponent{distinctIndexSortKey=[TeXRaw n]}], _inum, Just DefinitionIndexEntry) <- indexPaths t] = Just n
| otherwise = Nothing
instance Render Element where
render (HtmlElement html) = const $ TextBuilder.fromText html
render (LatexElement x) = render x
render (Codeblock x) = render x
render (Itemdescr x) = xml "div" [("class", "itemdescr")] . renderLatexParas x
render (NoteElement x) = render x
render (ExampleElement x) = render x
render (Bnf e t) = xml "div" ([("class", Text.pack e)] ++ idattr) . render t
where
idattr
| Just nt <- nontermDef t = [("id", "nt:" ++ nt)]
| otherwise = []
render (TableElement t) = \ctx ->
renderTab False t ("./SectionToSection/" ++ tableAbbr t) False True ctx{idPrefixes=[tableAbbr t++"-"]}
render (FigureElement f) = renderFig False f ("./SectionToSection/" ++ figureAbbr f) False True
render (FormulaElement f) = render f
render (Tabbing t) =
xml "pre" [] . TextBuilder.fromText . htmlTabs . LazyText.toStrict . TextBuilder.toLazyText . render (preprocessPre t) -- todo: this is horrible
render Enumerated{..} = xml t [("class", Text.pack enumCmd)] .
concatRender (RenderItem (enumCmd == "enumerate") . enumItems)
where
t = case enumCmd of
"enumerate" -> "table"
"itemize" -> "ul"
"description" -> "ul"
"thebibliography" -> "ul"
_ -> undefined
class HasComplexMath a where
hasComplexMath :: Bool -> a -> Bool
instance HasComplexMath LaTeXUnit where
hasComplexMath mathMode (TeXRaw x) = mathMode && Text.any (`elem` ("+-*/^_=' " :: String)) (Text.strip x)
hasComplexMath m (TeXComm c _ args)
| c `elem` words "frac sum binom int sqrt lfloor rfloor lceil rceil log mathscr mapsto cdot bmod" = True
| c `elem` words "tcode" = hasComplexMath False (map snd args)
| otherwise = hasComplexMath m (map snd args)
hasComplexMath _ (TeXMath _ x) = hasComplexMath True x
hasComplexMath m (TeXBraces x) = hasComplexMath m x
hasComplexMath m (TeXEnv e _ args)
| e `elem` ["array", "eqnarray"] = True
| otherwise = hasComplexMath m args
hasComplexMath _ TeXLineBreak = False
instance HasComplexMath a => HasComplexMath [a] where
hasComplexMath m = any (hasComplexMath m)
data Page
= SectionPage Section
| TablePage Table
| FigurePage Figure
| FullPage
| IndexPage Text {- category -}
| XrefDeltaPage
| FootnotesPage
| TablesPage
| FiguresPage
| TocPage
| ExpandedTocPage
isSectionPage :: Page -> Bool
isSectionPage (SectionPage _) = True
isSectionPage _ = False
data RenderContext = RenderContext
{ page :: Page
, draft :: Draft
, nearestEnclosing :: Either Paragraph Section
, rawHyphens :: Bool -- in real code envs /and/ in \texttt
, rawTilde :: Bool -- in real code envs but not in \texttt
, rawSpace :: Bool
, insertBreaks :: Bool
, inLink :: Bool -- so as not to linkify grammarterms that appear as part of a defined/linkified term/phrase
, inCodeBlock :: Bool -- in codeblocks, some commands like \tcode have a different meaning
, inComment :: Bool -- in comments, \tcode should not be highlighted
, inSectionTitle :: Bool -- in section titles, there should be no highlighting
, replXmlChars :: Bool -- replace < with <, etc
, noTags :: Bool -- means we're rendering the contents of e.g. a "title" attribute which cannot contain tags/elements
, extraIndentation :: Int -- in em
, idPrefixes :: [Text] }
defaultRenderContext :: RenderContext
defaultRenderContext = RenderContext
{ page = error "no page"
, draft = error "no draft"
, nearestEnclosing = error "no para/sec"
, rawHyphens = False
, rawTilde = False
, rawSpace = False
, insertBreaks = False
, inLink = False
, inCodeBlock = False
, inComment = False
, inSectionTitle = False
, replXmlChars = True
, noTags = False
, extraIndentation = 0
, idPrefixes = [] }
squareAbbr :: Bool -> Abbreviation -> TextBuilder.Builder
squareAbbr softHyphens =
("[" ++) . (++ "]") .
TextBuilder.fromText .
(if softHyphens then Text.replace "." ".<span class='shy'></span>" else id)
parentLink :: Section -> Abbreviation -> Text
parentLink parent child
| Just sub <- Text.stripPrefix (abbreviation parent ++ ".") child = sub
| otherwise = child
abbrHref :: Abbreviation -> RenderContext -> Text
abbrHref abbr RenderContext{..}
| SectionPage sec <- page, abbreviation sec == abbr = "#"
| abbrIsOnPage abbr page = "#" ++ case page of
SectionPage sec -> urlChars (parentLink sec abbr)
TablesPage | Just abbr' <- Text.stripPrefix "tab:" abbr -> urlChars abbr'
_ -> urlChars abbr
| "fig:" `isPrefixOf` abbr =
let Figure{figureSection=Section{..}, ..} = figureByAbbr draft abbr
in "SectionToSection/" ++ urlChars abbr ++ "#" ++ urlChars figureAbbr
| "eq:" `isPrefixOf` abbr =
let Formula{formulaSection=Section{..}, ..} = formulaByAbbr draft abbr
in "SectionToSection/" ++ urlChars abbr ++ "#" ++ urlChars formulaAbbr
| "tab:" `isPrefixOf` abbr =
case tableByAbbr draft abbr of
Just Table{tableSection=Section{..}, ..} -> "SectionToSection/" ++ urlChars abbreviation ++ "#" ++ urlChars tableAbbr
_ -> "#" ++ urlChars abbr
| otherwise = linkToSectionHref SectionToSection abbr
prepMath :: LaTeX -> String
prepMath = Text.unpack . renderLaTeX . (>>= cleanup) . replaceTcode
where
replaceTcode = mapCommandName (\x -> if x == "tcode" then "texttt" else x)
cleanupText :: LaTeX -> LaTeX -- MathJax does not support \, in \text
cleanupText [] = []
cleanupText (TeXComm "," _ [] : x) = TeXRaw " " : cleanupText x
cleanupText (x : y) = cleanup x ++ cleanupText y
cleanup :: LaTeXUnit -> LaTeX
cleanup (TeXComm "texttt" _ [(FixArg, TeXComm "texttt" "" [(FixArg, x)] : y)]) =
cleanup (TeXComm "texttt" "" [(FixArg, x ++ y)])
cleanup (TeXComm "texttt" _ [(FixArg, TeXRaw x : y)]) =
TeXComm "texttt" "" [(FixArg, [TeXRaw x])] : cleanup (TeXComm "texttt" "" [(FixArg, y)])
cleanup (TeXComm "texttt" _ [(FixArg, TeXComm "textit" "" x : y)]) =
[TeXComm "class" "" [(FixArg, [TeXRaw "textit"]), (FixArg, [TeXComm "texttt" "" x])]]
++ cleanup (TeXComm "texttt" "" [(FixArg, y)])
-- \texttt{\textit{x}y} -> \class{textit}{\texttt{x}}\texttt{y}
-- MathJax does not support \textit inside \texttt
cleanup (TeXComm "nontcode" _ x) = [TeXComm "texttt" "" (map (second (>>= cleanup)) x)]
cleanup (TeXComm "ensuremath" _ [(FixArg, x)]) = x >>= cleanup
cleanup (TeXComm "discretionary" _ _) = []
cleanup (TeXComm "hfill" _ []) = []
cleanup (TeXComm "text" ws [(FixArg, x)]) = [TeXComm "text" ws [(FixArg, cleanupText x)]]
cleanup (TeXComm "break" _ []) = []
cleanup (TeXComm "br" _ []) = []
cleanup (TeXComm "-" _ []) = []
cleanup (TeXComm "quad" _ []) = [TeXRaw " "] -- because MathJax does not support \quad
cleanup (TeXComm x ws y) = [TeXComm x ws (map (second (>>= cleanup)) y)]
cleanup x@(TeXRaw _) = [x]
cleanup (TeXBraces x) = [TeXBraces (x >>= cleanup)]
cleanup (TeXEnv x y z) = [TeXEnv x (map (second (>>= cleanup)) y) (z >>= cleanup)]
cleanup (TeXMath Dollar [c@(TeXComm "text" _ _)]) = cleanup c
-- because the draft sources have \bigoh{$\text{bla}$}, which MathJax doesn't support
cleanup (TeXMath x y) = [TeXMath x (y >>= cleanup)]
cleanup x@TeXLineBreak = [x]
renderMath :: LaTeX -> RenderContext -> TextBuilder.Builder
renderMath [TeXMath Dollar (c@(TeXComm "noncxxtcode" _ _) : more)] ctx =
render c ctx ++ renderMath [TeXMath Dollar more] ctx
renderMath m ctx
| noTags ctx = renderSimpleMath m ctx
| hasComplexMath True m = renderComplexMath (mapTeX replaceNonCxxTcode m) ctx
| otherwise = spanTag (mathKind m) $ renderSimpleMath m ctx
where
mathKind [TeXMath Square _] = "mathblock"
mathKind _ = "math"
replaceNonCxxTcode :: LaTeXUnit -> Maybe LaTeX
replaceNonCxxTcode (TeXComm "noncxxtcode" _ args) = Just [TeXComm "tcode" "" args]
replaceNonCxxTcode _ = Nothing
renderSimpleMath :: LaTeX -> RenderContext -> TextBuilder.Builder
renderSimpleMath [] _ = ""
renderSimpleMath (TeXRaw s : rest) sec
| tlast `elem` ["^", "_"] = if noTags sec then "�" else
renderSimpleMathUnit (TeXRaw $ Text.reverse $ Text.drop 1 s') sec
++ xml tag [] (renderSimpleMath content sec)
++ renderSimpleMath rest' sec
| otherwise = renderSimpleMathUnit (TeXRaw s) sec ++ renderSimpleMath rest sec
where
s' = Text.reverse s
tlast = Text.take 1 s'
tag = case tlast of
"^" -> "sup"
"_" -> "sub"
_ -> error ""
(content, rest') = case rest of
(a : b) -> ([a], b)
other -> (other, [])
renderSimpleMath (TeXComm "frac" _ [(FixArg, num)] : rest) sec =
"[" ++ renderSimpleMath num sec ++ "] / [" ++ renderSimpleMath den sec ++ "]" ++ renderSimpleMath rest' sec
where
(den, rest') = findDenum rest
findDenum (TeXBraces d : r) = (d, r)
findDenum (_ : r) = findDenum r
findDenum r = (r, [])
renderSimpleMath (x : y) ctx = renderSimpleMathUnit x ctx ++ renderSimpleMath y ctx
renderSimpleMathUnit :: LaTeXUnit -> RenderContext -> TextBuilder.Builder
renderSimpleMathUnit (TeXRaw s) sec =
case suffix of
Just ('^', rest) -> if noTags sec then "�" else italicise prefix ++ output "sup" rest
Just ('_', rest) -> if noTags sec then "�" else italicise prefix ++ output "sub" rest
_ -> italicise s
where
(prefix, suffix') = Text.break (`elem` ['^', '_']) s
suffix = Text.uncons suffix'
output :: Text -> Text -> TextBuilder.Builder
output tag rest =
case Text.uncons rest of
Just (c, rest') -> xml tag [] (italicise $ Text.singleton c) ++ (renderSimpleMathUnit (TeXRaw rest') sec)
Nothing -> error "Malformed math"
italicise :: Text -> TextBuilder.Builder
italicise t = if noTags sec then TextBuilder.fromText t else
case Text.span isAlpha t of
("", "") -> TextBuilder.fromString ""
("", rest) ->
case Text.uncons rest of
Just (c, rest') -> entities c ++ italicise rest'
Nothing -> error ""
(alpha, rest) -> spanTag "mathalpha" (TextBuilder.fromText alpha) ++ italicise rest
entities :: Char -> TextBuilder.Builder
entities '<' = "<"
entities '>' = ">"
entities c = TextBuilder.singleton c
renderSimpleMathUnit (TeXComm "mathtt" _ [(FixArg, x)]) ctx = spanTag "mathtt" (highlight ctx x)
renderSimpleMathUnit (TeXBraces x) sec = renderSimpleMath x sec
renderSimpleMathUnit (TeXMath Dollar m) sec = renderSimpleMath (trim m) sec
renderSimpleMathUnit (TeXMath _ m) sec = renderSimpleMath m sec
renderSimpleMathUnit other sec = render other sec
mathKey :: LaTeX -> (String, Bool)
mathKey m = case m of
[TeXMath kind t] -> (prepMath t, kind == Dollar)
[TeXEnv "eqnarray*" [] _] -> (prepMath m, False)
[TeXEnv "equation*" [] _] -> (prepMath m, False)
_ -> (prepMath m, True)
highlightCodeInMath :: RenderContext -> [Soup.Tag Text] -> TextBuilder.Builder
highlightCodeInMath ctx
( open@(Soup.TagOpen "span" (("class", cls) : _))
: Soup.TagText code
: close@(Soup.TagClose "span")
: more )
| cls `elem` ["mjx-char MJXc-TeX-type-R", "mjx-charbox MJXc-TeX-type-R"]
= TextBuilder.fromText (Soup.renderTags [open])
++ highlight ctx [TeXRaw code]
++ TextBuilder.fromText (Soup.renderTags [close])
++ highlightCodeInMath ctx more
highlightCodeInMath ctx (a:b) = TextBuilder.fromText (Soup.renderTags [a]) ++ highlightCodeInMath ctx b
highlightCodeInMath _ [] = ""
{- Unfortunately, for: \class{hidden_link}{\href{url}{bla}}
MathJax generates: <a href="url"><span class="yada hidden_link">bla</span></a>
But CSS does not let you say "apply the following style to 'a' elements that have a 'span' child with class 'hidden_link'".
So fixHiddenLinks moves the "hidden_link" class from the span to the a... -}
fixHiddenLinks :: [Soup.Tag Text] -> [Soup.Tag Text]
fixHiddenLinks (Soup.TagOpen "a" attrs : Soup.TagOpen "span" [("class", Text.words -> cls)] : rest)
| "hidden_link" `elem` cls
= Soup.TagOpen "a" (("class", "hidden_link") : attrs) :
Soup.TagOpen "span" [("class", Text.unwords $ cls \\ ["hidden_link"])] : rest
fixHiddenLinks (x:y) = x : fixHiddenLinks y
fixHiddenLinks [] = []
removeAriaLabel :: Soup.Tag Text -> Soup.Tag Text
removeAriaLabel (Soup.TagOpen x attrs) = Soup.TagOpen x (filter ((/= "aria-label") . fst) attrs)
removeAriaLabel x = x
doRenderComplexMath :: LaTeX -> RenderContext -> TextBuilder.Builder
doRenderComplexMath math ctx =
(if inComment ctx then TextBuilder.fromText . Soup.renderTags else highlightCodeInMath ctx) $
fixHiddenLinks $ map removeAriaLabel $ Soup.parseTags $ MathJax.render formula inline
where (formula, inline) = mathKey math
renderComplexMath :: LaTeX -> RenderContext -> TextBuilder.Builder
renderComplexMath math ctx = (if inline then "" else "<br>") ++ spanTag "math" (doRenderComplexMath math ctx)
where (_, inline) = mathKey math
cssClasses :: ColumnSpec -> Text
cssClasses (ColumnSpec alignment border _) =
(if border then "border " else "") ++ Text.pack (show alignment)
cssStyle :: ColumnSpec -> Maybe Text
cssStyle (ColumnSpec _ _ (Just w)) = Just $ "width:" ++ w
cssStyle _ = Nothing
renderTable :: [ColumnSpec] -> [Row [TeXPara]] -> RenderContext -> TextBuilder.Builder
renderTable colspec a = \ctx -> xml "table" [] $ mconcat (renderRow ctx . zip [1..] a)
where
combine (ColumnSpec x False w) (ColumnSpec _ True _) = ColumnSpec x True w
combine x _ = x
renderRow :: RenderContext -> (Integer, Row [TeXPara]) -> TextBuilder.Builder
renderRow ctx (rowNum, Row{..}) =
xml "tr" ([("id", rowId)] ++ cls) (renderCols ctx' rowId colspec 1 clines cells)
where
rowId = mconcat (idPrefixes ctx) ++ "row-" ++ Text.pack (show rowNum)
ctx' = ctx{idPrefixes = idPrefixes ctx ++ ["row-" ++ Text.pack (show rowNum) ++ "-"]}
cls | RowSep <- rowSep = [("class", "rowsep")]
| CapSep <- rowSep = [("class", "capsep")]
| otherwise = []
clines
| Clines clns <- rowSep = clns
| otherwise = []
renderCols :: RenderContext -> Text -> [ColumnSpec] -> Int -> [(Int, Int)] -> [Cell [TeXPara]] -> TextBuilder.Builder
renderCols _ _ _ _ _ [] = ""
renderCols ctx rowId (c : cs) colnum clines (Cell{..} : rest)
| length cs < length rest = undefined
| Multicolumn w cs' <- cellSpan =
let
cs'' = combine cs' c
colspan
| null rest = length cs + 1
| otherwise = w
in
renderCell colspan (cssClasses cs'' ++ clineClass colnum clines) (cssStyle cs'') cnt
++ renderCols ctx rowId (drop (colspan - 1) cs) (colnum + colspan) clines rest
| otherwise =
renderCell 1 (cssClasses c ++ clineClass colnum clines) (cssStyle c) cnt
++ renderCols ctx rowId cs (colnum + 1) clines rest
where
marginalizedLink = xml "div" [("class", "marginalizedparent")] (render link ctx)
link = anchor{aClass="itemDeclLink", aHref="#" ++ urlChars rowId, aText="🔗"}
cnt = (if colnum == 1 then marginalizedLink else "") ++ renderLatexParas content ctx'
ctx' = ctx{idPrefixes = idPrefixes ctx ++ ["column-" ++ Text.pack (show colnum) ++ "-"]}
renderCols _ _ [] _ _ (_ : _) = error "Too many columns"
clineClass n clines
| isJust $ find (\(begin, end) -> begin <= n && n <= end) clines =
" cline"
| otherwise = ""
renderCell :: Int -> Text -> Maybe Text -> TextBuilder.Builder -> TextBuilder.Builder
renderCell colspan classes style content = xml "td" attrs content
where
classes' = if TextBuilder.toLazyText content == "" then "empty " ++ classes else classes
attrs = [("colspan", Text.pack $ show colspan) | colspan /= 1]
++ [("class", classes')]
++ [("style", s) | Just s <- [style]]
instance Render TeXPara where
render = (mconcat .) . (intersperse " " .) . mapM render . sentences
instance Render [Element] where
render l@(LatexElement _ : _) ctx = render (spanJust l p) ctx
where
p (LatexElement e) = Just e
p _ = Nothing
render (x : y) ctx = render x ctx ++ render y ctx
render [] _ = ""
instance Render Sentence where
render Sentence{..} ctx
| (Enumerated _ _ : _) <- sentenceElems = render sentenceElems ctx -- not a real sentence
| not (noTags ctx), Just v <- i
= xml "div" [("id", v), ("class", "sentence")] $
render (case linkifyFullStop link sentenceElems of Just x -> x; Nothing -> sentenceElems) ctx
| otherwise = render sentenceElems ctx
where
i = case sentenceNumber of
Just v -> Just $ mconcat (idPrefixes ctx) ++ "sentence-" ++ Text.pack (show v)
Nothing -> Nothing
link = TeXComm "class" ""
[ (FixArg, [TeXRaw "hidden_link"])
, (FixArg, [TeXComm "href" "" [(FixArg, [TeXRaw ("#" ++ fromJust i)]), (FixArg, [TeXRaw "."])]])
] -- in math, \class and \href are recognized by mathjax
renderLatexParas :: [TeXPara] -> RenderContext -> TextBuilder.Builder
renderLatexParas pp ctx = mconcat $ map (xml "div" [("class", "texpara")] . flip render ctx) pp
-- Explicit <br/>'s are redundant in <pre>, so strip them.
preprocessPre :: LaTeX -> LaTeX
preprocessPre = concatMap f
where
f TeXLineBreak = []
f (TeXComm "br" _ []) = []
f (TeXEnv e a c) = [TeXEnv e a (preprocessPre c)]
f x = [x]
htmlTabs :: Text -> Text
htmlTabs = replace "\t" "	" -- todo: still necessary?
linkToSectionHref :: Link -> Abbreviation -> Text
linkToSectionHref link abbr = Text.pack (show link) ++ "/" ++ urlChars abbr
linkToSection :: Link -> Abbreviation -> Anchor
linkToSection link abbr = anchor{ aHref = linkToSectionHref link abbr, aText = squareAbbr True abbr }
--url :: Text -> Text
--url = urlChars . LazyText.toStrict . TextBuilder.toLazyText . flip render defaultRenderContext{replXmlChars = False}
simpleRender :: Render a => a -> Text
simpleRender = LazyText.toStrict . TextBuilder.toLazyText . simpleRender2
simpleRender2 :: Render a => a -> TextBuilder.Builder
simpleRender2 = flip render defaultRenderContext
secnum :: Int -> Text -> Section -> TextBuilder.Builder
secnum reduceIndent href se@Section{..} =
simpleRender2 (anchor{aClass=c, aHref=href, aText=secnumText se, aStyle=Text.pack style})
where
style = "min-width:" ++ show (50 + (length parents - reduceIndent) * 15) ++ "pt"
c | chapter /= NormalChapter, null parents = "annexnum"
| otherwise = "secnum"
secnumText :: Section -> TextBuilder.Builder
secnumText Section{sectionNumber=n,..}
| chapter == InformativeAnnex, null parents = "Annex " ++ chap ++ " (informative)"
| chapter == NormativeAnnex, null parents = "Annex " ++ chap ++ " (normative)"
| otherwise = intercalateBuilders "." (chap : simpleRender2 . tail ns)
where
ns = reverse $ n : sectionNumber . parents
chap :: TextBuilder.Builder
chap
| chapter == NormalChapter = simpleRender2 (head ns)
| otherwise = TextBuilder.singleton $ ['A'..] !! head ns
================================================
FILE: SectionPages.hs
================================================
{-# OPTIONS_GHC -fno-warn-tabs #-}
{-# LANGUAGE OverloadedStrings, RecordWildCards, TupleSections, ViewPatterns, NamedFieldPuns #-}
module SectionPages
( writeSectionFiles
, writeSingleSectionFile
, writeFiguresFile
, writeFigureFiles
, writeTablesFile
, writeTableFiles
, writeIndexFiles
, writeFootnotesFile
, writeCssFile
, writeXrefDeltaFiles
) where
import Prelude hiding ((++), (.), writeFile)
import System.Directory (createDirectoryIfMissing)
import Control.Monad (when, forM_)
import Control.Arrow (first)
import Data.Maybe (fromJust)
import System.Process (readProcess)
import qualified Data.Map as Map
import qualified Data.Text as Text
import qualified Data.Text.Lazy.Builder as TextBuilder
import LaTeXBase (LaTeXUnit(..))
import Pages (writePage, pageContent, pagePath, PageStyle(..), fileContent, outputDir, Link(..))
import Render (render, concatRender, simpleRender2, renderFig, abbrHref,
defaultRenderContext, renderTab, RenderContext(..), Page(..),linkToSection, squareAbbr,
secnum, renderLatexParas, isSectionPage, parentLink, renderIndex)
import Document
import Util (urlChars, (++), (.), h, anchor, xml, Anchor(..), Text, writeFile, intercalateBuilders)
renderParagraph :: RenderContext -> TextBuilder.Builder
renderParagraph ctx@RenderContext{nearestEnclosing=Left Paragraph{..}, draft=Draft{..}} =
(case paraNumber of
Just i -> renderNumbered (Text.pack $ show i)
Nothing -> id)
$ (if paraInItemdescr then xml "div" [("class", "itemdescr")] else id)
$ (sourceLink
++ renderLatexParas paraElems ctx'{extraIndentation=if paraInItemdescr then 12 else 0})
-- the 12 here must match div.itemdescr's margin-left value in mm
where
urlBase = Text.replace "/commit/" "/tree/" commitUrl ++ "/source/"
sourceLink :: TextBuilder.Builder
sourceLink
| Just SourceLocation{..} <- paraSourceLoc =
xml "div" [("class", "sourceLinkParent")]
$ simpleRender2 $ anchor
{ aClass = "sourceLink"
, aText = "#"
, aHref = urlBase ++ Text.pack (sourceFile ++ "#L" ++ show sourceLine) }
| otherwise = ""
renderNumbered :: Text -> TextBuilder.Builder -> TextBuilder.Builder
renderNumbered n =
let
idTag = if isSectionPage (page ctx) then [("id", mconcat (idPrefixes ctx) ++ n)] else []
a = anchor
{ aClass = "marginalized"
, aHref =
if isSectionPage (page ctx)
then "#" ++ urlChars (mconcat (idPrefixes ctx)) ++ n
else "SectionToSection/" ++ urlChars (abbreviation paraSection) ++ "#" ++ n
, aText = TextBuilder.fromText n }
classes = "para" ++
(if all (not . normative) (paraElems >>= sentences >>= sentenceElems)
then " nonNormativeOnly"
else "")
in
xml "div" (("class", classes) : idTag) .
(xml "div" [("class", "marginalizedparent")] (render a ctx') ++)
ctx' = case paraNumber of
Just n -> ctx{ idPrefixes = idPrefixes ctx ++ [Text.pack (show n) ++ "."] }
Nothing -> ctx
renderParagraph _ = undefined
tocSection :: RenderContext -> Section -> TextBuilder.Builder
tocSection _ Section{sectionKind=DefinitionSection _} = ""
tocSection ctx s@Section{..} = header ++ mconcat (tocSection ctx . subsections)
where
header = h (min 4 $ 1 + length parents) $
secnum 0 "" s ++ " "
++ render ( sectionName ++ [TeXRaw " "]
, anchor{ aHref = abbrHref abbreviation ctx, aText = squareAbbr True abbreviation, aClass="abbr_ref" })
ctx{ inSectionTitle = True }
++ "<div style='clear:right'></div>"
renderSection :: RenderContext -> Maybe Section -> Bool -> Section -> (TextBuilder.Builder, Bool)
renderSection context specific parasEmitted s@Section{abbreviation, subsections, sectionFootnotes, paragraphs}
| full = (, True) $
idDiv header ++
(if specific == Just s && any (not . isDefinitionSection . sectionKind) subsections then toc else "") ++
mconcat (map
(\p -> renderParagraph (context{nearestEnclosing=Left p,idPrefixes=if parasEmitted then [secOnPage ++ "-"] else []}))
paragraphs) ++
(if null sectionFootnotes then "" else "<div class='footnoteSeparator'></div>") ++
concatRender sectionFootnotes context{nearestEnclosing=Right s} ++
mconcat (fst . renderSection context Nothing True . subsections)
| not anysubcontent = ("", False)
| otherwise =
( header ++
mconcat (fst . renderSection context specific False . subsections)
, anysubcontent )
where
idDiv
| specific == Just s = id
| otherwise = xml "div" [("id", secOnPage), ("class", "section")]
secOnPage :: Text
secOnPage = case page context of
SectionPage parent -> parentLink parent abbreviation
_ -> abbreviation
full = specific == Nothing || specific == Just s
reduceHeaderIndent = case page context of
SectionPage p | specific == Nothing -> length (parents p) + 1
_ -> 0
header = sectionHeader reduceHeaderIndent (min 4 $ 1 + length (parents s)) s
(if specific == Nothing && isSectionPage (page context) then "#" ++ urlChars secOnPage else "")
abbr context
toc = "<hr>" ++ mconcat (tocSection context . subsections) ++ "<hr>"
abbr
| specific == Just s && not (null (parents s))
= anchor
| Just sp <- specific, sp /= s, not (null (parents s))
= anchor{aHref = "SectionToSection/" ++ urlChars abbreviation ++ "#" ++ parentLink s (Document.abbreviation sp)}
| otherwise = linkToSection
(if null (parents s) then SectionToToc else SectionToSection)
abbreviation
anysubcontent =
or $ map (snd . renderSection context specific True)
$ subsections
sectionFileContent :: PageStyle -> TextBuilder.Builder -> TextBuilder.Builder -> Text
sectionFileContent sfs title body = pageContent sfs $ fileContent pathHome title sectionPageCss body
where
pathHome = if sfs == InSubdir then "../" else ""
sectionPageCss =
"<link rel='stylesheet' type='text/css' href='" ++ pathHome ++ "expanded.css' title='Normal'>" ++
"<link rel='alternate stylesheet' type='text/css' href='" ++ pathHome ++ "colored.css' title='Notes and examples colored'>" ++
"<link rel='alternate stylesheet' type='text/css' href='" ++ pathHome ++ "normative-only.css' title='Notes and examples hidden'>"
writeSectionFile :: FilePath -> PageStyle -> TextBuilder.Builder -> TextBuilder.Builder -> IO ()
writeSectionFile n sfs title body = writePage n sfs (sectionFileContent sfs title body)
sectionHeader :: Int -> Int -> Section -> Text -> Anchor -> RenderContext -> TextBuilder.Builder
sectionHeader reduceIndent hLevel s@Section{..} secnumHref abbr_ref ctx
| isDef = xml "h4" [("style", "margin-bottom:3pt")] $ num ++ abbrR ++ name
| abbreviation == "bibliography" = h hLevel name
| otherwise = h hLevel $ num ++ " " ++ name ++ " " ++ abbrR
where
num = secnum reduceIndent secnumHref s
abbrR = simpleRender2 abbr_ref{aClass = "abbr_ref", aText = squareAbbr False abbreviation}
name = render sectionName ctx{inSectionTitle=True}
isDef = isDefinitionSection sectionKind
writeFiguresFile :: PageStyle -> Draft -> IO ()
writeFiguresFile sfs draft = writeSectionFile "fig" sfs "14882: Figures" $
"<h1>Figures <a href='SectionToToc/fig' class='abbr_ref'>[fig]</a></h1>"
++ mconcat (uncurry r . figures draft)
where
r :: Paragraph -> Figure -> TextBuilder.Builder
r p f@Figure{..} =
renderFig True f ("./SectionToSection/" ++ urlChars figureAbbr) False True ctx
where ctx = defaultRenderContext{draft=draft, nearestEnclosing=Left p, page=FiguresPage}
writeTablesFile :: PageStyle -> Draft -> IO ()
writeTablesFile sfs draft = writeSectionFile "tab" sfs "14882: Tables" $
"<h1>Tables <a href='SectionToToc/tab' class='abbr_ref'>[tab]</a></h1>"
++ mconcat (uncurry r . tables draft)
where
r :: Paragraph -> Table -> TextBuilder.Builder
r p t@Table{tableSection=Section{..}, ..} =
renderTab True t ("./SectionToSection/" ++ urlChars tableAbbr) False True ctx
where ctx = defaultRenderContext{
draft = draft,
nearestEnclosing = Left p,
page = TablesPage,
idPrefixes = [fromJust (Text.stripPrefix "tab:" tableAbbr) ++ "-"]}
writeFootnotesFile :: PageStyle -> Draft -> IO ()
writeFootnotesFile sfs draft = writeSectionFile "footnotes" sfs "14882: Footnotes" $
"<h1>List of Footnotes</h1>"
++ mconcat (uncurry r . footnotes draft)
where
r :: Section -> Footnote -> TextBuilder.Builder
r s fn = render fn defaultRenderContext{draft=draft, nearestEnclosing = Right s, page=FootnotesPage}
writeSingleSectionFile :: PageStyle -> Draft -> String -> IO ()
writeSingleSectionFile sfs draft abbr = do
let Just section@Section{..} = Document.sectionByAbbr draft (Text.pack abbr)
let baseFilename = Text.unpack abbreviation
writeSectionFile baseFilename sfs (squareAbbr False abbreviation) $ mconcat $ fst . renderSection (defaultRenderContext{draft=draft,page=SectionPage section}) (Just section) False . chapters draft
putStrLn $ " " ++ baseFilename
writeTableFiles :: PageStyle -> Draft -> IO ()
writeTableFiles sfs draft =
forM_ (snd . tables draft) $ \tab@Table{..} -> do
let
context = defaultRenderContext{draft=draft, page=TablePage tab, nearestEnclosing=Right tableSection}
header :: Section -> TextBuilder.Builder
header sec = sectionHeader 0 (min 4 $ 1 + length (parents sec)) sec "" anchor{aHref=href} context
where href="SectionToSection/" ++ urlChars (abbreviation sec) ++ "#" ++ urlChars tableAbbr
headers = mconcat $ map header $ reverse $ tableSection : parents tableSection
writeSectionFile (Text.unpack tableAbbr) sfs (TextBuilder.fromText $ "[" ++ tableAbbr ++ "]") $
headers ++ renderTab True tab "" True False context
writeFigureFiles :: PageStyle -> Draft -> IO ()
writeFigureFiles sfs draft =
forM_ (snd . figures draft) $ \fig@Figure{..} -> do
let
context = defaultRenderContext{draft=draft, page=FigurePage fig, nearestEnclosing=Right figureSection}
header :: Section -> TextBuilder.Builder
header sec = sectionHeader 0 (min 4 $ 1 + length (parents sec)) sec "" anchor{aHref=href} context
where href="SectionToSection/" ++ urlChars (abbreviation sec) ++ "#" ++ urlChars figureAbbr
headers = mconcat $ map header $ reverse $ figureSection : parents figureSection
writeSectionFile (Text.unpack figureAbbr) sfs (TextBuilder.fromText $ "[" ++ figureAbbr ++ "]") $
headers ++ renderFig True fig "" True False context
writeSectionFiles :: PageStyle -> Draft -> [IO ()]
writeSectionFiles sfs draft = flip map (zip names contents) $ \(n, content) -> do
when (sfs == InSubdir) $ createDirectoryIfMissing True (outputDir ++ n)
writeFile (pagePath n sfs) content
where
secs = Document.sections draft
renSec section@Section{..} = (Text.unpack abbreviation, sectionFileContent sfs title body)
where
title = squareAbbr False abbreviation
body = mconcat $ fst . renderSection (defaultRenderContext{draft=draft,page=SectionPage section}) (Just section) False . chapters draft
fullbody = mconcat $ fst . renderSection defaultRenderContext{draft=draft, page=FullPage} Nothing True . chapters draft
fullfile = ("full", sectionFileContent sfs "14882" fullbody)
files = fullfile : map renSec secs
names = fst . files
contents = snd . files
writeIndexFile :: PageStyle -> Draft -> String -> IndexTree -> IO ()
writeIndexFile sfs draft cat index =
writeSectionFile cat sfs ("14882: " ++ indexCatName cat) $
h 1 (indexCatName cat) ++ renderIndex defaultRenderContext{page=IndexPage (Text.pack cat), draft=draft} index
writeIndexFiles :: PageStyle -> Draft -> Index -> [IO ()]
writeIndexFiles sfs draft index = flip map (Map.toList index) $ uncurry (writeIndexFile sfs draft) . first Text.unpack
writeCssFile :: IO ()
writeCssFile = do
base <- Text.pack . readFile "14882.css"
let
replaceFonts =
Text.replace
".MJXc-TeX-sans-R {font-family: MJXc-TeX-sans-R,MJXc-TeX-sans-Rw}"
".MJXc-TeX-sans-R {font-family: 'Noto Sans'; font-size: 10pt; }" .
Text.replace
".MJXc-TeX-type-R {font-family: MJXc-TeX-type-R,MJXc-TeX-type-Rw}"
".MJXc-TeX-type-R {font-family: 'Noto Sans Mono'; font-size: 10pt; }" .
Text.replace
".MJXc-TeX-main-R {font-family: MJXc-TeX-main-R,MJXc-TeX-main-Rw}"
".MJXc-TeX-main-R {}" .
Text.replace
".MJXc-TeX-math-I {font-family: MJXc-TeX-math-I,MJXc-TeX-math-Ix,MJXc-TeX-math-Iw}"
".MJXc-TeX-math-I {font-style: italic}" .
Text.replace
".MJXc-TeX-main-I {font-family: MJXc-TeX-main-I,MJXc-TeX-main-Ix,MJXc-TeX-main-Iw}"
".MJXc-TeX-main-I {font-style: italic}"
-- Replace fonts to make sure code in formulas matches code in code blocks, etc.
mjx <- Text.replace "display: block" "display: block;background:inherit" . replaceFonts . Text.pack .
readProcess "tex2html" ["--css", ""] ""
writeFile (outputDir ++ "/14882.css") (base ++ mjx)
writeXrefDeltaFiles :: PageStyle -> Draft -> [IO ()]
writeXrefDeltaFiles sfs draft = flip map (xrefDelta draft) $ \(from, to) ->
writeSectionFile (Text.unpack from) sfs (squareAbbr False from) $
if to == []
then "Subclause " ++ squareAbbr False from ++ " was removed."
else "See " ++ intercalateBuilders ", " (flip render ctx . to) ++ "."
where ctx = defaultRenderContext{draft=draft, page=XrefDeltaPage}
================================================
FILE: Sentences.hs
================================================
{-# LANGUAGE OverloadedStrings, ViewPatterns, LambdaCase, TypeSynonymInstances, FlexibleInstances #-}
module Sentences (splitIntoSentences, isActualSentence, linkifyFullStop, breakSentence) where
import LaTeXBase (LaTeXUnit(..), triml, ArgKind(FixArg))
import Data.Text (isPrefixOf, isSuffixOf, stripPrefix, Text)
import qualified Data.Text as Text
import Prelude hiding (take, (.), takeWhile, (++), lookup, readFile)
import Data.Char (isSpace, isDigit, isAlphaNum, isUpper, isLower)
import Control.Arrow (first)
import Data.Maybe (isNothing)
import Util ((++), textStripInfix, dropTrailingWs, (.))
import RawDocument
import Document
startsSentence :: RawElement -> Bool
startsSentence (RawLatexElement e) | [TeXRaw x] <- triml [e], x /= "" = isUpper (Text.head x)
startsSentence _ = False
unitContinuesSentence :: LaTeXUnit -> Bool
unitContinuesSentence (TeXComm " " _ []) = True
unitContinuesSentence (TeXRaw txt) = "," `isPrefixOf` txt
unitContinuesSentence _ = False
elemContinuesSentence :: RawElement -> Bool
elemContinuesSentence (RawLatexElement u) = unitContinuesSentence u
elemContinuesSentence _ = False
elemsContinueSentence :: [RawElement] -> Bool
elemsContinueSentence (RawLatexElement (TeXRaw "") : more) = elemsContinueSentence more
elemsContinueSentence (x : _) = elemContinuesSentence x
elemsContinueSentence _ = False
simpleHead :: [RawElement] -> Maybe Char
simpleHead [] = Nothing
simpleHead (RawLatexElement (TeXRaw x) : more)
| x == "" = simpleHead more
| otherwise = Just (Text.head x)
simpleHead (RawLatexElement (TeXComm " " "" []) : _) = Just ' '
simpleHead (RawLatexElement (TeXComm "tcode" _ [(_, x)]) : more) = simpleHead (map RawLatexElement x ++ more)
simpleHead (RawLatexElement (TeXComm "index" _ _) : more) = simpleHead more
simpleHead (RawLatexElement (TeXComm "footnoteref" _ _) : _) = Nothing -- hmm
simpleHead (RawLatexElement TeXLineBreak : _) = Nothing
simpleHead (RawLatexElement (TeXComm "br" _ _) : _) = Nothing
simpleHead (RawLatexElement (TeXComm "linebreak" _ _) : _) = Nothing
simpleHead (RawLatexElement (TeXComm "newline" _ _) : _) = Nothing
simpleHead (RawLatexElement (TeXComm "par" _ _) : _) = Nothing
simpleHead (RawLatexElement (TeXComm "nolinebreak" _ _) : _) = Nothing
simpleHead (RawLatexElement (TeXComm "iref" _ _) : _) = Nothing
simpleHead (RawLatexElement (TeXComm "," _ _) : _) = Just ','
simpleHead x = error $ "simpleHead: " ++ show x
splitIntoSentences :: [RawElement] -> [[RawElement]]
splitIntoSentences = go []
where
go [] [] = []
go [] (RawLatexElement (TeXRaw "\n") : y) = go [] y
go [] (x@(RawExample _) : y) = [x] : go [] y
go [] (x@(RawNote _ _) : y) = [x] : go [] y
go partial (x@(RawCodeblock _) : y) | z : _ <- rmIndices y, startsSentence z = (partial ++ [x]) : go [] y
go x [] = [x]
go x z@(e : y)
| Just (s, rest) <- breakSentence z = (x ++ s) : go [] rest
| otherwise = go (x ++ [e]) y
rmIndices (RawLatexElement (TeXRaw "\n") : RawLatexElement (TeXComm "index" _ _) : x) = rmIndices x
rmIndices x = x
breakSentence :: [RawElement] -> Maybe ([RawElement] {- sentence -}, [RawElement] {- remainder -})
breakSentence (e@(RawLatexElement (TeXMath _ math)) : more)
| f (reverse math) = Just ([e], more)
| otherwise = first (e :) . breakSentence more
where
f :: LaTeX -> Bool
f (TeXRaw y : z) | all isSpace (Text.unpack y) = f z
f (TeXComm "text" _ [(FixArg, a)] : _) = f (reverse a)
f (TeXComm "mbox" _ [(FixArg, a)] : _) = f (reverse a)
f (TeXRaw ".\n" : TeXComm "right" "" [] : y) = f y
f (TeXRaw y : _) = "." `isSuffixOf` (Text.pack $ dropTrailingWs $ Text.unpack y)
f _ = False
breakSentence (b@(RawLatexElement TeXLineBreak) : more) = Just ([b], more)
breakSentence (RawLatexElement (TeXBraces x) : more) = breakSentence (map RawLatexElement x ++ more)
breakSentence (e@(RawLatexElement (TeXEnv "eqnarray*" _ _)) : more) = first (e :) . breakSentence more
breakSentence (b@(RawLatexElement (TeXComm cmd _ _)) : more) =
if cmd `elem` ["break"]
then Just ([b], more)
else (first (b :)) . breakSentence more
breakSentence (e@(RawLatexElement (TeXRaw (textStripInfix "." -> (Just ((++ ".") -> pr, po))))) : more)
= f pr po
where
f :: Text -> Text -> Maybe ([RawElement], [RawElement])
f pre post
| "''" `isPrefixOf` post = f (pre ++ "''") (Text.drop 2 post)
| not (("(." `isSuffixOf` pre) && (")" `isPrefixOf` post))
, not ("" == post && maybe False (\c -> isLower c || isDigit c) (simpleHead more))
, not ("" == post && length more /= 0 && head more == RawLatexElement (TeXComm " " "" []))
, not (Text.length post > 0 && ((Text.head post == '.')
|| isLower (Text.head post)
|| isDigit (Text.head post)))
, not (Text.length pre > 1 && Text.length post > 0 && isAlphaNum (Text.last $ Text.init pre) && isDigit (Text.head post))
, not (elemsContinueSentence (RawLatexElement (TeXRaw post) : more))
, not (Text.length pre >= 2 && ("." `isSuffixOf` pre) && isUpper (Text.last $ Text.init pre))
, not ("e.g." `isSuffixOf` pre)
, not ("i.e." `isSuffixOf` pre) =
let
post' = Text.stripStart post
(pre', post'') = case stripPrefix ")" post' of
Just z -> (pre ++ ")" , Text.stripStart z)
Nothing -> (pre, post')
more' = if post'' == "" then more else RawLatexElement (TeXRaw post'') : more
(maybefootnote, more'') = case more' of
fn@(RawLatexElement (TeXComm "footnoteref" _ _)) : z -> ([fn], z)
_ -> ([], more')
sentence = [RawLatexElement (TeXRaw pre')] ++ maybefootnote
in
Just (sentence, more'')
| Just ((++ ".") -> pre', post') <- textStripInfix "." post = f (pre ++ pre') post'
| otherwise = first (e :) . breakSentence more
breakSentence (e@(RawLatexElement (TeXRaw _)) : more) = first (e :) . breakSentence more
breakSentence (enum@(RawEnumerated _ (last -> rawItemContent -> (_ : _ : _))) : more)
= Just ([enum], more)
breakSentence (enum@(RawEnumerated _ (last -> rawItemContent -> [RawTexPara y])) : more)
| Just _ <- breakSentence y = Just ([enum], more)
breakSentence _ = Nothing
isActualSentence :: [RawElement] -> Bool
isActualSentence (RawEnumerated _ _ : _) = False
isActualSentence l = any p l
where
yes = words $
"link tcode noncxxtcode textit ref grammarterm indexedspan " ++
"defnx textbf textrm textsl textsc indexlink hiddenindexlink"
q :: LaTeXUnit -> Bool
q (TeXRaw s) = not $ all isSpace $ Text.unpack s
q (TeXComm c _ _) | c `elem` yes = True
q (TeXEnv c _ _) | c `elem` yes = True
q (TeXEnv "indexed" _ body) = any q body
q (TeXBraces body) = any q body
q _ = False
p :: RawElement -> Bool
p (RawLatexElement u) = q u
p RawEnumerated{} = True
p _ = False
class LinkifyFullStop a where
linkifyFullStop :: LaTeXUnit -> a -> Maybe a
instance LinkifyFullStop LaTeX where
linkifyFullStop link l = reverse . f (reverse l)
where
f [] = Nothing
f (x@(TeXRaw ".\n") : y@(TeXComm "right" _ _) : more) = ([x, y] ++) . f more
f (u : uu)
| Just u' <- inUnit u = Just (reverse u' ++ uu)
| otherwise = (u :) . f uu
inUnit :: LaTeXUnit -> Maybe LaTeX -- returns content in regular order
inUnit (TeXEnv "array" args body)
| Just body' <- linkifyFullStop link body = Just [TeXEnv "array" args body']
inUnit (TeXEnv "indented" [] body)
| Just body' <- linkifyFullStop link body = Just [TeXEnv "indented" [] body']
inUnit (TeXComm "text" ws [(FixArg, x)])
| Just x' <- linkifyFullStop link x = Just (moveStuffOutsideText (TeXComm "text" ws [(FixArg, x')]))
| otherwise = Nothing
inUnit (TeXComm "mbox" ws [(FixArg, x)])
| Just x' <- linkifyFullStop link x = Just (moveStuffOutsideText (TeXComm "mbox" ws [(FixArg, x')]))
| otherwise = Nothing
inUnit (TeXMath kind m)
| Just m' <- linkifyFullStop link m = Just [TeXMath kind m']
inUnit (TeXRaw (Text.dropWhileEnd (=='\n') -> Text.stripSuffix "." -> Just s)) = Just [TeXRaw s, link]
inUnit (TeXRaw (Text.stripSuffix ".)" -> Just s)) = Just [TeXRaw s, link, TeXRaw ")"]
inUnit (TeXRaw (Text.stripSuffix ".''" -> Just s)) = Just [TeXRaw s, link, TeXRaw "''"]
inUnit _ = Nothing
instance LinkifyFullStop Item where
linkifyFullStop link it@Item{itemInlineContent=e}
| Just y <- linkifyFullStop link e
= Just it{itemInlineContent=y}
linkifyFullStop _ _ = Nothing
instance LinkifyFullStop [Element] where
linkifyFullStop link = (reverse .) . f . reverse
where
f :: [Element] -> Maybe [Element]
f (Enumerated cmd (reverse -> (lastItem : moreItems)) : more)
| all (isNothing . linkifyFullStop link) moreItems
, Just lastItem' <- linkifyFullStop link lastItem
= Just $ Enumerated cmd (reverse (lastItem' : moreItems)) : more
f (LatexElement u : more)
| Just u' <- linkifyFullStop link [u] = Just $ map LatexElement (reverse u') ++ more
| otherwise = (LatexElement u :) . f more
f _ = Nothing
moveStuffOutsideText :: LaTeXUnit -> LaTeX
-- Turns \text{ \class{bla} } into \text{ }\class{\text{bla}}\text{ }, and similar for \href,
-- because MathJax does not support \class and \href in \text.
moveStuffOutsideText (TeXComm parent pws [(FixArg, [TeXComm nested nws [x, y]])])
| parent `elem` ["text", "mbox"]
, nested `elem` ["class", "href"] = [TeXComm nested nws [x, (FixArg, moveStuffOutsideText (TeXComm parent pws [y]))]]
moveStuffOutsideText (TeXComm parent pws [(FixArg, t)])
| parent `elem` ["text", "mbox"]
, length t >= 2 = concatMap (\u -> moveStuffOutsideText $ TeXComm parent pws [(FixArg, [u])]) t
moveStuffOutsideText u = [u]
================================================
FILE: Setup.hs
================================================
import Distribution.Simple
main = defaultMain
================================================
FILE: Toc.hs
================================================
{-# OPTIONS_GHC -fno-warn-tabs #-}
{-# LANGUAGE RecordWildCards, OverloadedStrings, ViewPatterns, NamedFieldPuns #-}
module Toc (writeTocFiles) where
import qualified Data.Text as Text
import qualified Data.Text.Lazy as LazyText
import qualified Data.Text.Lazy.Builder as TextBuilder
import Data.Time.Format (formatTime, defaultTimeLocale)
import Data.Time.Clock (getCurrentTime, UTCTime)
import Prelude hiding ((.), (++), writeFile)
import LaTeXBase (LaTeXUnit(..))
import Pages (Link(..), fileContent, applyPageStyle, PageStyle(..), outputDir, writePage)
import Render (secnum, linkToSection, simpleRender2, RenderContext(..), render, defaultRenderContext, Page(..))
import Util
import Document (Section(..), Draft(..), SectionKind(..), indexCatName, isDefinitionSection)
tocSection :: Draft -> Bool -> Section -> TextBuilder.Builder
tocSection _ _ Section{sectionKind=DefinitionSection _} = ""
tocSection draft expanded s@Section{..} =
xml "div" [("id", abbreviation)] $ header ++ mconcat (tocSection draft expanded . subsections)
where
header = h (min 4 $ 2 + length parents) $
secnum 0 (if expanded then "#" ++ urlChars abbreviation else "") s ++ " "
++ render ( sectionName ++ [TeXRaw " "]
, (linkToSection (if expanded then SectionToSection else TocToSection) abbreviation){aClass="abbr_ref"})
defaultRenderContext{page=if expand
gitextract_qdd768we/ ├── .gitignore ├── 14882.css ├── CxxParser.hs ├── Document.hs ├── LICENSE ├── LaTeXBase.hs ├── LaTeXParser.hs ├── Load14882.hs ├── MathJax.hs ├── Pages.hs ├── README ├── RawDocument.hs ├── Render.hs ├── SectionPages.hs ├── Sentences.hs ├── Setup.hs ├── Toc.hs ├── Util.hs ├── colored.css ├── cxxdraft-htmlgen.cabal ├── expanded.css ├── fulltoc.css ├── genhtml.hs ├── macros.tex ├── mathjax-batch ├── normative-only.css ├── stack.yaml └── toc.css
Condensed preview — 28 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (235K chars).
[
{
"path": ".gitignore",
"chars": 42,
"preview": "14882\ndist\ntags\nnode_modules\n.stack-work/\n"
},
{
"path": "14882.css",
"chars": 12343,
"preview": "body {\n\tfont-family: 'Noto Serif';\n\thyphens: auto;\n\tline-height: 1.5;\n\tmargin-left: 20mm;\n\tmargin-right: 16mm;\n\tmargin-t"
},
{
"path": "CxxParser.hs",
"chars": 7397,
"preview": "{-# OPTIONS_GHC -fno-warn-tabs #-}\n{-# LANGUAGE\n\tOverloadedStrings,\n\tRecordWildCards,\n\tTupleSections,\n\tViewPatterns,\n\tLa"
},
{
"path": "Document.hs",
"chars": 13276,
"preview": "{-# OPTIONS_GHC -fno-warn-tabs #-}\n{-# LANGUAGE OverloadedStrings, RecordWildCards, ViewPatterns #-}\n\nmodule Document (\n"
},
{
"path": "LICENSE",
"chars": 147,
"preview": "All authors involved in the creation of the contents of this package have agreed to release their respective contributio"
},
{
"path": "LaTeXBase.hs",
"chars": 6842,
"preview": "{-# LANGUAGE ViewPatterns, OverloadedStrings #-}\n\nmodule LaTeXBase\n ( MathType(..), LaTeXUnit(..), LaTeX, TeXArg, ArgKin"
},
{
"path": "LaTeXParser.hs",
"chars": 18921,
"preview": "{-# OPTIONS_GHC -fno-warn-tabs #-}\n{-# LANGUAGE OverloadedStrings, RecordWildCards, ViewPatterns, TupleSections #-}\n\nmod"
},
{
"path": "Load14882.hs",
"chars": 23036,
"preview": "{-# OPTIONS_GHC -fno-warn-tabs #-}\n{-# LANGUAGE\n\tOverloadedStrings,\n\tScopedTypeVariables,\n\tRecordWildCards,\n\tViewPattern"
},
{
"path": "MathJax.hs",
"chars": 2342,
"preview": "{-# LANGUAGE OverloadedStrings, ViewPatterns #-}\n\nmodule MathJax (render) where\n\nimport Control.Concurrent.MVar (takeMVa"
},
{
"path": "Pages.hs",
"chars": 3300,
"preview": "{-# LANGUAGE OverloadedStrings, RecordWildCards, ViewPatterns #-}\n\nmodule Pages (fileContent, pageContent, pagePath, wri"
},
{
"path": "README",
"chars": 3159,
"preview": "Introduction\n\n cxxdraft-htmlgen parses the LaTeX sources of the draft,\n and generates static HTML pages from them.\n\nPr"
},
{
"path": "RawDocument.hs",
"chars": 23062,
"preview": "{-# LANGUAGE\n\tOverloadedStrings,\n\tRecordWildCards,\n\tViewPatterns,\n\tLambdaCase,\n\tTupleSections,\n\tNamedFieldPuns,\n\tFlexibl"
},
{
"path": "Render.hs",
"chars": 59581,
"preview": "{-# OPTIONS_GHC -fno-warn-tabs #-}\n{-# LANGUAGE\n\tOverloadedStrings,\n\tRecordWildCards,\n\tTupleSections,\n\tViewPatterns,\n\tNa"
},
{
"path": "SectionPages.hs",
"chars": 13167,
"preview": "{-# OPTIONS_GHC -fno-warn-tabs #-}\n{-# LANGUAGE OverloadedStrings, RecordWildCards, TupleSections, ViewPatterns, NamedFi"
},
{
"path": "Sentences.hs",
"chars": 9974,
"preview": "{-# LANGUAGE OverloadedStrings, ViewPatterns, LambdaCase, TypeSynonymInstances, FlexibleInstances #-}\n\nmodule Sentences "
},
{
"path": "Setup.hs",
"chars": 46,
"preview": "import Distribution.Simple\nmain = defaultMain\n"
},
{
"path": "Toc.hs",
"chars": 5197,
"preview": "{-# OPTIONS_GHC -fno-warn-tabs #-}\n{-# LANGUAGE RecordWildCards, OverloadedStrings, ViewPatterns, NamedFieldPuns #-}\n\nmo"
},
{
"path": "Util.hs",
"chars": 4675,
"preview": "{-# OPTIONS_GHC -fno-warn-tabs #-}\n{-# LANGUAGE TupleSections, OverloadedStrings, ViewPatterns #-}\n\nmodule Util (\n\tmconc"
},
{
"path": "colored.css",
"chars": 212,
"preview": "div.example {\n\tdisplay: block;\n\tmargin-top: 5pt;\n\tmargin-bottom: 5pt;\n\tfont-size: 11pt;\n\tcolor: #bb00bb;\n}\n\ndiv.note {\n\t"
},
{
"path": "cxxdraft-htmlgen.cabal",
"chars": 1210,
"preview": "name: cxxdraft-htmlgen\nversion: 0\nsynopsis: Converts C++ Standard draft documents "
},
{
"path": "expanded.css",
"chars": 458,
"preview": "div.example {\n\tdisplay: block;\n\tmargin-top: 5pt;\n\tmargin-bottom: 5pt;\n\tfont-size: 9pt;\n}\n\ndiv.note {\n\tdisplay: block;\n\tm"
},
{
"path": "fulltoc.css",
"chars": 640,
"preview": "h1 { margin: 0.2em 5pt 0.2em 5pt; line-height: 1.5; }\nh2 { margin: 0.2em 5pt 0.2em 5pt; border-bottom: 1px dashed rgba(0"
},
{
"path": "genhtml.hs",
"chars": 2292,
"preview": "{-# OPTIONS_GHC -fno-warn-tabs #-}\n{-# LANGUAGE LambdaCase, ViewPatterns, RecordWildCards, OverloadedStrings #-}\n\nimport"
},
{
"path": "macros.tex",
"chars": 4939,
"preview": "%% cxxdraft-htmlgen builtins:\n%\n% \\link\n% Link to section.\n% arg 0: link text\n% arg 1: section abbreviation\n%\n% \\w"
},
{
"path": "mathjax-batch",
"chars": 832,
"preview": "#! /usr/bin/env node\n\nvar mjAPI = require(\"mathjax-node\");\nvar split = require(\"split\");\n\nmjAPI.config(\n { extensions: "
},
{
"path": "normative-only.css",
"chars": 239,
"preview": "div.example { display: none; }\ndiv.note { display: none; }\n\na.footnotenum { display: none; }\ndiv.footnote { display: non"
},
{
"path": "stack.yaml",
"chars": 84,
"preview": "resolver: lts-12.17\npackages:\n- .\n\nextra-deps: []\n\nflags: {}\n\nextra-package-dbs: []\n"
},
{
"path": "toc.css",
"chars": 674,
"preview": "h1 { margin: 0.2em 5pt 0.2em 5pt; line-height: 1.5; }\nh2 { margin: 0.2em 5pt 0.2em 5pt; border-bottom: 1px dashed rgba(0"
}
]
About this extraction
This page contains the full source code of the Eelis/cxxdraft-htmlgen GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 28 files (213.0 KB), approximately 67.5k tokens. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.