[SVG Image: {html.escape(alt)}]
Original: {html.escape(src)}
"""
logger.debug(f"Replaced SVG image: {src}")
return placeholder
# =============================================================================
# Markdown Processing
# =============================================================================
def process_mermaid_blocks(
md_content: str, book: epub.EpubBook, state: BuildState, logger: logging.Logger
) -> str:
"""Find mermaid code blocks and replace with image references."""
pattern = r"```mermaid\n(.*?)```"
def replace_mermaid(match: re.Match[str]) -> str:
mermaid_code = sanitize_mermaid(match.group(1))
cache_key = mermaid_code.strip()
if cache_key in state.mermaid_cache:
img_data, img_name = state.mermaid_cache[cache_key]
# Only add image to book if not already added
if img_name not in state.mermaid_added_to_book:
img_item = epub.EpubItem(
uid=img_name.replace(".", "_"),
file_name=f"images/{img_name}",
media_type="image/png",
content=img_data,
)
book.add_item(img_item)
state.mermaid_added_to_book.add(img_name)
return f"\n\n"
else:
# This should not happen in strict mode since we pre-fetch all diagrams
logger.error("Mermaid diagram not found in cache")
raise MermaidRenderError("Mermaid diagram not found in cache")
return re.sub(pattern, replace_mermaid, md_content, flags=re.DOTALL)
def convert_internal_links(
html_content: str, current_file: Path, root_path: Path, state: BuildState
) -> str:
"""Convert markdown links to internal EPUB chapter links."""
soup = BeautifulSoup(html_content, "html.parser")
for link in soup.find_all("a"):
href = link.get("href", "")
if not href or href.startswith(("http://", "https://", "mailto:", "#")):
continue
# Remove anchor part for path resolution
anchor = ""
if "#" in href:
href, anchor = href.split("#", 1)
anchor = "#" + anchor
# Resolve relative path from current file's directory
if href:
resolved = (current_file.parent / href).resolve()
try:
rel_to_root = resolved.relative_to(root_path)
except ValueError:
# Link points outside the repo
continue
# Normalize the path for lookup
lookup_path = str(rel_to_root)
# Try various path forms for matching
paths_to_try = [
lookup_path,
lookup_path.rstrip("/"),
lookup_path + "/README.md"
if not lookup_path.endswith(".md")
else lookup_path,
]
for path in paths_to_try:
if path in state.path_to_chapter:
link["href"] = state.path_to_chapter[path] + anchor
break
return str(soup)
def md_to_html(
md_content: str,
current_file: Path,
root_path: Path,
book: epub.EpubBook,
state: BuildState,
logger: logging.Logger,
) -> str:
"""Convert markdown to HTML with proper styling.
Handles:
- Mermaid diagrams (rendered as PNG images)
- SVG images (replaced with styled placeholders)
- Internal links (converted to EPUB chapter references)
- Standard markdown features
"""
# Process mermaid blocks first (before markdown conversion)
md_content = process_mermaid_blocks(md_content, book, state, logger)
# Convert markdown to HTML
html_content = markdown.markdown(
md_content,
extensions=[
"tables",
"fenced_code",
"codehilite",
"toc",
],
)
# Clean up any SVG references (they won't work in EPUB)
soup = BeautifulSoup(html_content, "html.parser")
for img in soup.find_all("img"):
src = img.get("src", "")
if src.endswith(".svg"):
alt = img.get("alt", "Image")
placeholder = handle_svg_image(src, alt, logger)
img.replace_with(BeautifulSoup(placeholder, "html.parser"))
html_content = str(soup)
# Convert internal links to EPUB chapter references
html_content = convert_internal_links(html_content, current_file, root_path, state)
return html_content
# =============================================================================
# EPUB Generation
# =============================================================================
def create_stylesheet() -> epub.EpubItem:
"""Create the EPUB stylesheet."""
style = """
body { font-family: Georgia, serif; line-height: 1.6; padding: 1em; }
h1 { color: #333; border-bottom: 2px solid #e67e22; padding-bottom: 0.3em; }
h2 { color: #444; margin-top: 1.5em; }
h3 { color: #555; }
code { background: #f4f4f4; padding: 0.2em 0.4em; border-radius: 3px; font-family: monospace; }
pre { background: #f4f4f4; padding: 1em; overflow-x: auto; border-radius: 5px; }
pre code { background: none; padding: 0; }
table { border-collapse: collapse; width: 100%; margin: 1em 0; }
th, td { border: 1px solid #ddd; padding: 0.5em; text-align: left; }
th { background: #f4f4f4; }
blockquote { border-left: 4px solid #e67e22; margin: 1em 0; padding-left: 1em; color: #666; }
a { color: #e67e22; }
img { max-width: 100%; height: auto; display: block; margin: 1em auto; }
.diagram { text-align: center; margin: 1.5em 0; }
.svg-placeholder { border: 1px dashed #ccc; padding: 1em; text-align: center; background: #f9f9f9; border-radius: 4px; margin: 1em 0; }
"""
return epub.EpubItem(
uid="style_nav",
file_name="style/nav.css",
media_type="text/css",
content=style,
)
async def build_epub_async(
config: EPUBConfig,
logger: logging.Logger,
state: BuildState | None = None,
) -> Path:
"""Build EPUB asynchronously with concurrent diagram fetching."""
state = state or BuildState()
state.reset() # Ensure clean state
# Validate inputs
validate_inputs(config, logger)
# Initialize book
book = epub.EpubBook()
book.set_identifier(config.identifier)
book.set_title(config.title)
book.set_language(config.language)
book.add_author(config.author)
# Add cover
logger.info("Generating cover image...")
cover_data = create_cover_image(config, logger)
book.set_cover("cover.png", cover_data)
# Add CSS
nav_css = create_stylesheet()
book.add_item(nav_css)
# Collect all chapters in single pass
logger.info("Collecting chapters...")
collector = ChapterCollector(config.root_path, state)
chapter_infos = collector.collect_all_chapters(get_chapter_order())
# Extract and pre-fetch all Mermaid diagrams
logger.info("Extracting Mermaid diagrams...")
md_files = [(ch.file_path, ch.file_title) for ch in chapter_infos]
all_diagrams = extract_all_mermaid_blocks(md_files, logger)
if all_diagrams:
renderer = MermaidRenderer(config, state, logger)
await renderer.render_all(all_diagrams)
# Process chapters
logger.info("Processing chapters...")
chapters: list[epub.EpubHtml] = []
toc: list[epub.EpubHtml | tuple[epub.Section, list[epub.EpubHtml]]] = []
current_folder: str | None = None
current_folder_chapters: list[epub.EpubHtml] = []
for chapter_info in chapter_infos:
try:
content = chapter_info.file_path.read_text(encoding="utf-8")
except UnicodeDecodeError as e:
logger.error(f"Failed to read {chapter_info.file_path}: {e}")
raise ValidationError(
f"Failed to read {chapter_info.file_path}: {e}"
) from e
logger.debug(
f"Processing: {chapter_info.file_path.relative_to(config.root_path)}"
)
html_content = md_to_html(
content, chapter_info.file_path, config.root_path, book, state, logger
)
chapter = epub.EpubHtml(
title=chapter_info.file_title,
file_name=chapter_info.chapter_filename,
lang="en",
)
chapter.content = create_chapter_html(
chapter_info.display_name,
chapter_info.file_title,
html_content,
is_overview=chapter_info.is_folder_overview
or chapter_info.folder_name is None,
)
chapter.add_item(nav_css)
book.add_item(chapter)
chapters.append(chapter)
# Build TOC structure
if chapter_info.folder_name is None:
# Single file chapter
if current_folder is not None:
# Finish previous folder
toc.append(
(epub.Section(current_folder), current_folder_chapters.copy())
)
current_folder_chapters.clear()
current_folder = None
toc.append(chapter)
else:
# Part of a folder
if current_folder != chapter_info.folder_name:
if current_folder is not None:
# Finish previous folder
toc.append(
(epub.Section(current_folder), current_folder_chapters.copy())
)
current_folder_chapters.clear()
current_folder = chapter_info.folder_name
current_folder_chapters.append(chapter)
# Handle last folder
if current_folder is not None and current_folder_chapters:
toc.append((epub.Section(current_folder), current_folder_chapters))
# Set table of contents
book.toc = toc
# Add navigation files
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
# Set spine
book.spine = ["nav"] + chapters
# Write EPUB
logger.info(f"Writing EPUB to {config.output_path}...")
epub.write_epub(str(config.output_path), book, {})
logger.info(f"EPUB created successfully: {config.output_path}")
return config.output_path
def create_epub(root_path: Path, output_path: Path, verbose: bool = False) -> Path:
"""Synchronous wrapper for backward compatibility."""
logger = setup_logging(verbose)
config = EPUBConfig(root_path=root_path, output_path=output_path)
return asyncio.run(build_epub_async(config, logger))
# =============================================================================
# CLI
# =============================================================================
def main() -> int:
"""Main entry point with CLI argument parsing."""
parser = argparse.ArgumentParser(
description="Build an EPUB from Claude How-To markdown files."
)
parser.add_argument(
"--root",
"-r",
type=Path,
default=None,
help="Root directory containing markdown files (default: repo root)",
)
parser.add_argument(
"--output",
"-o",
type=Path,
default=None,
help="Output EPUB file path (default: