Repository: Spenhouet/confluence-markdown-exporter
Branch: main
Commit: 303989bb4f0e
Files: 91
Total size: 617.7 KB

Directory structure:
gitextract_yof8yoxc/

├── .dockerignore
├── .github/
│   ├── FUNDING.yml
│   ├── ISSUE_TEMPLATE/
│   │   ├── 1_bug_report.yaml
│   │   ├── 2_feature_request.yaml
│   │   ├── 3_question.yaml
│   │   └── config.yml
│   ├── PULL_REQUEST_TEMPLATE.md
│   ├── dependabot.yml
│   └── workflows/
│       ├── docker-build.yml
│       ├── docker-publish.yml
│       ├── docs.yml
│       ├── python-build.yml
│       ├── python-publish.yml
│       └── release.yml
├── .gitignore
├── .python-version
├── .vscode/
│   ├── extensions.json
│   ├── launch.json
│   ├── settings.json
│   └── tasks.json
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── README.md
├── confluence_markdown_exporter/
│   ├── __init__.py
│   ├── api_clients.py
│   ├── config.py
│   ├── confluence.py
│   ├── main.py
│   └── utils/
│       ├── __init__.py
│       ├── app_data_store.py
│       ├── config_interactive.py
│       ├── drawio_converter.py
│       ├── export.py
│       ├── lockfile.py
│       ├── measure_time.py
│       ├── page_registry.py
│       ├── rich_console.py
│       ├── table_converter.py
│       └── type_converter.py
├── docs/
│   ├── compatibility.md
│   ├── configuration/
│   │   ├── authentication.md
│   │   ├── ci.md
│   │   ├── index.md
│   │   ├── options.md
│   │   └── target-systems.md
│   ├── contributing.md
│   ├── docker.md
│   ├── features.md
│   ├── installation.md
│   ├── intro.md
│   ├── troubleshooting.md
│   └── usage.md
├── docusaurus.config.ts
├── package.json
├── pyproject.toml
├── scripts/
│   ├── build-versions.mjs
│   └── bump-docs-version.sh
├── sidebars.ts
├── src/
│   ├── components/
│   │   ├── HomepageFeatures/
│   │   │   ├── index.tsx
│   │   │   └── styles.module.css
│   │   └── quickstart/
│   │       └── index.tsx
│   ├── css/
│   │   └── custom.css
│   └── pages/
│       ├── index.module.css
│       └── index.tsx
├── tests/
│   ├── __init__.py
│   ├── conftest.py
│   ├── integration/
│   │   ├── __init__.py
│   │   └── test_cli_integration.py
│   └── unit/
│       ├── __init__.py
│       ├── test_alert_conversion.py
│       ├── test_api_clients.py
│       ├── test_confluence.py
│       ├── test_emoticon_conversion.py
│       ├── test_include_macro_conversion.py
│       ├── test_main.py
│       ├── test_nbsp_fix.py
│       ├── test_plantuml_code_block_detection.py
│       ├── test_plantuml_conversion.py
│       ├── test_template_placeholders.py
│       └── utils/
│           ├── __init__.py
│           ├── test_app_data_store_env.py
│           ├── test_drawio_converter.py
│           ├── test_export.py
│           ├── test_lockfile.py
│           ├── test_measure_time.py
│           ├── test_page_registry.py
│           ├── test_rich_console.py
│           ├── test_table_converter.py
│           └── test_type_converter.py
└── tsconfig.json

================================================
FILE CONTENTS
================================================

================================================
FILE: .dockerignore
================================================
.git
.github
.claude
.venv
dist
build
*.egg-info
__pycache__
.pytest_cache
.ruff_cache
.mypy_cache
node_modules
tests
scratch
AIRAscore
.vscode
.idea
*.log
.DS_Store


================================================
FILE: .github/FUNDING.yml
================================================
# These are supported funding model platforms

github: Spenhouet


================================================
FILE: .github/ISSUE_TEMPLATE/1_bug_report.yaml
================================================
name: Bug report
description: Report an error or unexpected behavior
labels: ["bug"]
body:
  - type: markdown
    attributes:
      value: |
        Thank you for taking the time to report an issue! We're glad to have you involved with confluence-markdown-exporter.

        **Before reporting, please make sure to search through [existing issues](https://github.com/Spenhouet/confluence-markdown-exporter/issues?q=is:issue+is:open+label:bug) (including [closed](https://github.com/Spenhouet/confluence-markdown-exporter/issues?q=is:issue%20state:closed%20label:bug)).**

  - type: markdown
    attributes:
      value: |
        ### Diagnostic info
        Run `cme bugreport` and paste the full output in the **Diagnostic info** field below.
        This command prints your version, system details, and configuration — with all secrets automatically redacted.

  - type: textarea
    attributes:
      label: Description
      description: |
        A clear and concise description of the bug, including a minimal reproducible example.

        Be sure to include the command you invoked (e.g., `cme pages https://company.atlassian.net/wiki/spaces/KEY/pages/123/Title`).
    validations:
      required: true

  - type: textarea
    attributes:
      label: Diagnostic info
      description: |
        Paste the output of `cme bugreport` here.
        This includes your version, Python/OS info, and configuration with secrets redacted.
      placeholder: |
        ## Bug Report Diagnostic Info

        ### Version
        confluence-markdown-exporter x.y.z

        ### System
        Python: ...
        Platform: ...
        Architecture: ...

        ### Config
        Config file: ...
        ```yaml
        ...
        ```
      render: markdown
    validations:
      required: false

  - type: input
    attributes:
      label: Version
      description: |
        What version of confluence-markdown-exporter are you using?
        (Already included in `cme bugreport` output — fill in here only if you didn't run that command.)
      placeholder: e.g., confluence-markdown-exporter 4.0.3
    validations:
      required: false

  - type: input
    attributes:
      label: Confluence Version
      description: |
        What Confluence version are you using? Include whether it's Cloud or Server/Data Center.
        Example: `Confluence Cloud` or `Confluence Server 7.19.2`
      placeholder: e.g., Confluence Cloud or Confluence Server 7.19.2
    validations:
      required: false

  - type: input
    attributes:
      label: Jira Version
      description: |
        What Jira version are you using (or not)? Include whether it's Cloud or Server/Data Center.
        Example: `Jira Cloud` or `Jira Server 8.20.5`
      placeholder: e.g., Jira Cloud or Jira Server 8.20.5
    validations:
      required: false


================================================
FILE: .github/ISSUE_TEMPLATE/2_feature_request.yaml
================================================
name: Feature request
description: Suggest a new feature or enhancement
labels: ["enhancement"]
body:
  - type: markdown
    attributes:
      value: |
        Thank you for taking the time to suggest a feature! We're glad to have you involved with confluence-markdown-exporter.

        **Before submitting, please make sure to search through [existing feature requests](https://github.com/Spenhouet/confluence-markdown-exporter/issues?q=is:issue+is:open+label:enhancement) (including [closed](https://github.com/Spenhouet/confluence-markdown-exporter/issues?q=is:issue%20state:closed%20label:enhancement)).**

  - type: textarea
    attributes:
      label: Problem Description
      description: |
        A clear and concise description of the problem or limitation you're experiencing.

        What is the use case? What workflow or task would this feature enable or improve?
    validations:
      required: true

  - type: textarea
    attributes:
      label: Proposed Solution
      description: |
        A clear and concise description of what you want to happen.

        How do you envision this feature working? What would the ideal implementation look like?

        If you have ideas about commands, options, or configuration, please include examples:
        ```bash
        # Example command or usage
        confluence-markdown-exporter <your-suggested-command>
        ```
    validations:
      required: true

  - type: textarea
    attributes:
      label: Alternatives Considered
      description: |
        A clear and concise description of any alternative solutions or features you've considered.

        Are there workarounds you're currently using? What other tools or approaches have you tried?
    validations:
      required: false

  - type: textarea
    attributes:
      label: Use Cases
      description: |
        Describe specific scenarios where this feature would be helpful.

        Please provide concrete examples of how you (or others) would use this feature in practice.
    validations:
      required: false


================================================
FILE: .github/ISSUE_TEMPLATE/3_question.yaml
================================================
name: Question
description: Ask a question about confluence-markdown-exporter
labels: ["question"]
body:
  - type: textarea
    attributes:
      label: Question
      description: Describe your question in detail.
    validations:
      required: true

  - type: input
    attributes:
      label: Version
      description: What version of confluence-markdown-exporter are you using? (see `confluence-markdown-exporter version`)
      placeholder: e.g., confluence-markdown-exporter 3.0.3
    validations:
      required: false

  - type: input
    attributes:
      label: Confluence Version
      description: |
        What Confluence version are you using? Include whether it's Cloud or Server/Data Center.
        Example: `Confluence Cloud` or `Confluence Server 7.19.2`
      placeholder: e.g., Confluence Cloud or Confluence Server 7.19.2
    validations:
      required: false

  - type: input
    attributes:
      label: Jira Version
      description: |
        What Jira version are you using (or not)? Include whether it's Cloud or Server/Data Center.
        Example: `Jira Cloud` or `Jira Server 8.20.5`
      placeholder: e.g., Jira Cloud or Jira Server 8.20.5
    validations:
      required: false


================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: false
contact_links:
  - name: Documentation
    url: https://github.com/Spenhouet/confluence-markdown-exporter#readme
    about: Read the project documentation and README


================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
<!--
Thank you for contributing to confluence-markdown-exporter! To help us out with reviewing, please consider the following:

- Does this pull request include a summary of the change? (See below.)
- Does this pull request include a descriptive title?
- Does this pull request include references to any relevant issues?
-->

## Summary

<!-- What's the purpose of the change? What does it do, and why? -->

## Test Plan

<!-- How was it tested? -->


================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
  - package-ecosystem: "github-actions"
    directory: "/"
    schedule:
      interval: "daily"
    groups:
      actions:
        patterns:
          - "*"


================================================
FILE: .github/workflows/docker-build.yml
================================================
name: Build Docker image

on:
  pull_request:
    branches: [main]
    paths:
      - Dockerfile
      - .dockerignore
      - .github/workflows/docker-build.yml
      - pyproject.toml
      - uv.lock
      - confluence_markdown_exporter/**
  # Also build on push to main so the GHA cache is primed on the default
  # branch. Tag-triggered publish runs fall back to the default branch's
  # cache, which would otherwise stay cold until the first release.
  push:
    branches: [main]
    paths:
      - Dockerfile
      - .dockerignore
      - .github/workflows/docker-build.yml
      - pyproject.toml
      - uv.lock
      - confluence_markdown_exporter/**

permissions:
  contents: read

jobs:
  build:
    name: Build image (PR verification)
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v6

      - name: Set up QEMU
        uses: docker/setup-qemu-action@v4

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v4

      - name: Build (no push)
        uses: docker/build-push-action@v7
        with:
          context: .
          file: ./Dockerfile
          platforms: linux/amd64,linux/arm64
          push: false
          cache-from: type=gha
          cache-to: type=gha,mode=max,ignore-error=true


================================================
FILE: .github/workflows/docker-publish.yml
================================================
name: Publish Docker image

on:
  workflow_call:
    inputs:
      version:
        description: "Release version to publish (e.g. 5.1.0)"
        required: true
        type: string
  workflow_dispatch:
    inputs:
      version:
        description: "Release version to publish (e.g. 5.1.0). Must match an existing git tag."
        required: true
        type: string

permissions:
  contents: read

jobs:
  publish:
    name: Publish image to Docker Hub
    runs-on: ubuntu-latest
    environment:
      name: dockerhub
      url: https://hub.docker.com/r/${{ vars.DOCKERHUB_IMAGE || 'spenhouet/confluence-markdown-exporter' }}
    env:
      IMAGE_NAME: ${{ vars.DOCKERHUB_IMAGE || 'spenhouet/confluence-markdown-exporter' }}
    steps:
      - name: Checkout release tag
        uses: actions/checkout@v6
        with:
          ref: ${{ inputs.version }}

      - name: Set up QEMU
        uses: docker/setup-qemu-action@v4

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v4

      - name: Log in to Docker Hub
        uses: docker/login-action@v4
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Extract metadata
        id: meta
        uses: docker/metadata-action@v6
        with:
          images: ${{ env.IMAGE_NAME }}
          tags: |
            type=semver,pattern={{version}},value=${{ inputs.version }}
            type=semver,pattern={{major}}.{{minor}},value=${{ inputs.version }}
            type=semver,pattern={{major}},value=${{ inputs.version }}
            type=raw,value=latest
          labels: |
            org.opencontainers.image.title=confluence-markdown-exporter
            org.opencontainers.image.description=Export Confluence pages to Markdown
            org.opencontainers.image.url=https://github.com/${{ github.repository }}
            org.opencontainers.image.source=https://github.com/${{ github.repository }}
            org.opencontainers.image.version=${{ inputs.version }}
            org.opencontainers.image.licenses=MIT

      - name: Build and push
        uses: docker/build-push-action@v7
        with:
          context: .
          file: ./Dockerfile
          platforms: linux/amd64,linux/arm64
          push: true
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
          cache-from: type=gha
          cache-to: type=gha,mode=max,ignore-error=true
          provenance: true

      - name: Update Docker Hub description
        uses: peter-evans/dockerhub-description@v5
        continue-on-error: true
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}
          repository: ${{ env.IMAGE_NAME }}
          short-description: Export Confluence pages to Markdown (CLI)
          readme-filepath: ./README.md


================================================
FILE: .github/workflows/docs.yml
================================================
name: Deploy docs

on:
  push:
    branches: [main]
    paths:
      - "docs/**"
      - "versioned_docs/**"
      - "versioned_sidebars/**"
      - "versions.json"
      - "src/**"
      - "static/**"
      - "docusaurus.config.ts"
      - "sidebars.ts"
      - "tsconfig.json"
      - "package.json"
      - "package-lock.json"
      - ".github/workflows/docs.yml"
  workflow_dispatch:

permissions:
  contents: read
  pages: write
  id-token: write

concurrency:
  group: pages
  cancel-in-progress: false

jobs:
  build:
    name: Build docs
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
        with:
          fetch-depth: 0

      - name: Setup Node
        uses: actions/setup-node@v6
        with:
          node-version: 20
          cache: npm

      - name: Install dependencies
        run: npm ci

      - name: Build site (with versioned docs from git tags)
        run: npm run build:versioned

      - name: Upload artifact
        uses: actions/upload-pages-artifact@v5
        with:
          path: build

  deploy:
    name: Deploy to GitHub Pages
    needs: build
    runs-on: ubuntu-latest
    environment:
      name: github-pages
      url: ${{ steps.deployment.outputs.page_url }}
    steps:
      - name: Deploy to GitHub Pages
        id: deployment
        uses: actions/deploy-pages@v5


================================================
FILE: .github/workflows/python-build.yml
================================================
name: Build Python package

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

jobs:
  test:
    name: Test, lint and build
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6

      - name: Install uv
        uses: astral-sh/setup-uv@v7
        with:
          enable-cache: true

      - name: Install dependencies
        run: uv sync --locked --all-groups

      - name: Run linting with ruff
        run: uv run ruff check

      - name: Run tests with pytest
        run: uv run pytest

      - name: Test build (with sources for development)
        run: uv build

      - name: Test build (without sources for publication)
        run: |
          rm -rf dist/
          uv build --no-sources

      - name: Test package installation and import
        run: |
          uv run --with dist/*.whl --no-project -- python -c "import confluence_markdown_exporter; print('Package imports successfully')"

      - name: Test CLI commands
        run: |
          uv run --with dist/*.whl --no-project confluence-markdown-exporter --help
          uv run --with dist/*.whl --no-project cme --help

      - name: Upload build artifacts for inspection
        uses: actions/upload-artifact@v7
        with:
          name: build-artifacts
          path: dist/
          retention-days: 5


================================================
FILE: .github/workflows/python-publish.yml
================================================
name: Publish Python package

on:
  workflow_call:
    inputs:
      version:
        description: "Release version to publish (e.g. 5.1.0)"
        required: true
        type: string
  workflow_dispatch:
    inputs:
      version:
        description: "Release version to publish (e.g. 5.1.0). Must match an existing git tag."
        required: true
        type: string

permissions:
  contents: write
  id-token: write
  attestations: write

jobs:
  publish:
    name: Publish to PyPI
    runs-on: ubuntu-latest
    environment:
      name: release
      url: https://pypi.org/p/confluence-markdown-exporter
    steps:
      - name: Checkout release tag
        uses: actions/checkout@v6
        with:
          ref: ${{ inputs.version }}

      - name: Install uv
        uses: astral-sh/setup-uv@v7
        with:
          enable-cache: true

      - name: Install dependencies
        run: uv sync --locked --all-groups

      - name: Build distributions
        run: uv build --no-sources

      - name: Generate artifact attestations
        uses: actions/attest-build-provenance@v4.1.0
        with:
          subject-path: "dist/*"

      - name: Publish to PyPI
        run: uv publish

      - name: Sign the distributions with Sigstore
        uses: sigstore/gh-action-sigstore-python@v3.3.0
        with:
          inputs: >-
            ./dist/*.tar.gz
            ./dist/*.whl

      - name: Upload signed artifacts to GitHub Release
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          gh release upload "${{ inputs.version }}" dist/** \
            --repo "$GITHUB_REPOSITORY"


================================================
FILE: .github/workflows/release.yml
================================================
name: Release

on:
  workflow_dispatch:
    inputs:
      version_bump:
        description: "Version bump type"
        required: true
        default: "patch"
        type: choice
        options:
          - patch
          - minor
          - major
          - alpha
          - beta
          - rc
      custom_version:
        description: "Custom version (leave empty to use bump type)"
        required: false
        type: string

permissions:
  contents: write
  id-token: write
  attestations: write

jobs:
  release:
    name: Bump version and create release
    runs-on: ubuntu-latest
    permissions:
      contents: write
    outputs:
      version: ${{ steps.export-version.outputs.value }}
    steps:
      - uses: actions/checkout@v6
        with:
          token: ${{ secrets.GITHUB_TOKEN }}

      - name: Install uv
        uses: astral-sh/setup-uv@v7
        with:
          enable-cache: true

      - name: Install dependencies
        run: uv sync --locked --all-groups

      - name: Update version (custom)
        if: ${{ github.event.inputs.custom_version != '' }}
        run: |
          uv version ${{ github.event.inputs.custom_version }}
          echo "NEW_VERSION=${{ github.event.inputs.custom_version }}" >> $GITHUB_ENV

      - name: Update version (bump)
        if: ${{ github.event.inputs.custom_version == '' }}
        run: |
          NEW_VERSION=$(uv version --bump ${{ github.event.inputs.version_bump }} | awk '{print $NF}')
          echo "NEW_VERSION=${NEW_VERSION}" >> $GITHUB_ENV

      - name: Export version as job output
        id: export-version
        run: echo "value=${NEW_VERSION}" >> "$GITHUB_OUTPUT"

      - name: Test build with new version
        run: |
          uv build --no-sources
          uv run --with dist/*.whl --no-project -- python -c "import confluence_markdown_exporter; print('Package imports successfully')"

      - name: Update version references in README and docs
        run: scripts/bump-docs-version.sh "${{ env.NEW_VERSION }}"

      - name: Commit version update
        run: |
          git config --local user.email "action@github.com"
          git config --local user.name "GitHub Action"
          # -u: stage modifications to tracked files only; never add untracked files.
          git add -u pyproject.toml uv.lock README.md docs src
          git diff --cached --quiet || git commit -m "Bump version to ${{ env.NEW_VERSION }}"
          git push

      - name: Create release tag
        run: |
          git tag "${{ env.NEW_VERSION }}"
          git push origin "${{ env.NEW_VERSION }}"

      - name: Create and publish GitHub Release
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          gh release create "${{ env.NEW_VERSION }}" \
            --title "Release ${{ env.NEW_VERSION }}" \
            --generate-notes

  publish-python:
    name: Publish Python package
    needs: release
    uses: ./.github/workflows/python-publish.yml
    with:
      version: ${{ needs.release.outputs.version }}
    secrets: inherit

  publish-docker:
    name: Publish Docker image
    needs: release
    uses: ./.github/workflows/docker-publish.yml
    with:
      version: ${{ needs.release.outputs.version }}
    secrets: inherit


================================================
FILE: .gitignore
================================================
### Custom ###

**/*.env
scratch/
log/
.ssh/

_tmp/*
*.tar.gz
*.sh~

*.zip
*.jpg

### LLM Agents ###
# The source stays vendor agnostic
.claude/

### Virtual Environments ###
.venv/
.venv-*/

# Created by https://www.gitignore.io/api/code,python
# Edit at https://www.gitignore.io/?templates=code,python

### Code ###
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json

### Docusaurus ###
node_modules/
.docusaurus/
.docusaurus-faster/
docs-build/
# Versioned docs are generated at build time from git tags by scripts/build-versions.mjs
versioned_docs/
versioned_sidebars/
versions.json

### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# Mr Developer
.mr.developer.cfg
.project
.pydevproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# End of https://www.gitignore.io/api/code,python


# Beads / Dolt files (added by bd init)
.dolt/
*.db
.beads-credential-key


================================================
FILE: .python-version
================================================
3.10.12


================================================
FILE: .vscode/extensions.json
================================================
{
  // See https://go.microsoft.com/fwlink/?LinkId=827846 to learn about workspace recommendations.
  // Extension identifier format: ${publisher}.${name}. Example: vscode.csharp
  // List of extensions which should be recommended for users of this workspace.
  "recommendations": [
    "astral-sh.ty",
    "charliermarsh.ruff",
    "github.vscode-github-actions",
    "ms-python.python",
    "njpwerner.autodocstring",
  ],
  // List of extensions recommended by VS Code that should not be recommended for users of this workspace.
  "unwantedRecommendations": []
}

================================================
FILE: .vscode/launch.json
================================================
{
  // Use IntelliSense to learn about possible attributes.
  // Hover to view descriptions of existing attributes.
  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
  "version": "0.2.0",
  "configurations": [
    {
      "name": "Python: Current File",
      "type": "debugpy",
      "request": "launch",
      "program": "${file}",
      "justMyCode": false,
      "console": "integratedTerminal",
      "cwd": "${workspaceFolder}",
      "env": {
        "PYTHONPATH": "${workspaceRoot}"
      }
    },
    {
      "name": "Python: Export Page(s)",
      "type": "debugpy",
      "request": "launch",
      "program": "${workspaceFolder}/confluence_markdown_exporter/main.py",
      "justMyCode": false,
      "args": [
        "pages",
        "<page-url>"
      ],
      "console": "integratedTerminal",
      "cwd": "${workspaceFolder}",
      "env": {
        "PYTHONPATH": "${workspaceRoot}",
        "CME_CONFIG_PATH": "scratch/cme_config.json",
        "CME_EXPORT__LOG_LEVEL": "DEBUG",
        "CME_EXPORT__OUTPUT_PATH": "scratch"
      }
    },
    {
      "name": "Python: Export Page(s) with Descendants",
      "type": "debugpy",
      "request": "launch",
      "program": "${workspaceFolder}/confluence_markdown_exporter/main.py",
      "justMyCode": false,
      "args": [
        "pages-with-descendants",
        "<page-url>"
      ],
      "console": "integratedTerminal",
      "cwd": "${workspaceFolder}",
      "env": {
        "PYTHONPATH": "${workspaceRoot}",
        "CME_CONFIG_PATH": "scratch/cme_config.json",
        "CME_EXPORT__LOG_LEVEL": "DEBUG",
        "CME_EXPORT__OUTPUT_PATH": "scratch"
      }
    },
    {
      "name": "Python: Export Space(s)",
      "type": "debugpy",
      "request": "launch",
      "program": "${workspaceFolder}/confluence_markdown_exporter/main.py",
      "justMyCode": false,
      "args": [
        "spaces",
        "<space-url>"
      ],
      "console": "integratedTerminal",
      "cwd": "${workspaceFolder}",
      "env": {
        "PYTHONPATH": "${workspaceRoot}",
        "CME_CONFIG_PATH": "scratch/cme_config.json",
        "CME_EXPORT__LOG_LEVEL": "DEBUG",
        "CME_EXPORT__OUTPUT_PATH": "scratch"
      }
    },
    {
      "name": "Python: Export Org(s)",
      "type": "debugpy",
      "request": "launch",
      "program": "${workspaceFolder}/confluence_markdown_exporter/main.py",
      "justMyCode": false,
      "args": [
        "orgs",
        "<base-url>"
      ],
      "console": "integratedTerminal",
      "cwd": "${workspaceFolder}",
      "env": {
        "PYTHONPATH": "${workspaceRoot}",
        "CME_CONFIG_PATH": "scratch/cme_config.json",
        "CME_EXPORT__LOG_LEVEL": "DEBUG",
        "CME_EXPORT__OUTPUT_PATH": "scratch"
      }
    },
    {
      "name": "Python: Config (Interactive)",
      "type": "debugpy",
      "request": "launch",
      "program": "${workspaceFolder}/confluence_markdown_exporter/main.py",
      "justMyCode": false,
      "args": [
        "config"
      ],
      "console": "integratedTerminal",
      "cwd": "${workspaceFolder}",
      "env": {
        "PYTHONPATH": "${workspaceRoot}",
        "CME_CONFIG_PATH": "scratch/cme_config.json",
        "CME_EXPORT__LOG_LEVEL": "DEBUG"
      }
    }
  ]
}

================================================
FILE: .vscode/settings.json
================================================
{
  "files.eol": "\n",
  "editor.formatOnSave": true,
  "autoDocstring.docstringFormat": "google",
  "autoDocstring.startOnNewLine": true,
  "python.testing.unittestEnabled": false,
  "python.testing.pytestEnabled": true,
  "python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python",
  "jupyter.notebookFileRoot": "${workspaceFolder}",
  "task.autoDetect": "off",
  "[python]": {
    "editor.defaultFormatter": "charliermarsh.ruff",
    "editor.codeActionsOnSave": {
      "source.fixAll": "explicit",
      "source.organizeImports": "explicit"
    }
  },
  "[json]": {
    "editor.defaultFormatter": "vscode.json-language-features"
  },
  "jupyter.debugJustMyCode": false,
  "debugpy.debugJustMyCode": false,
  "[markdown]": {
    "diffEditor.ignoreTrimWhitespace": false,
    "editor.unicodeHighlight.ambiguousCharacters": false,
    "editor.unicodeHighlight.invisibleCharacters": false,
    "editor.wordWrap": "on",
    "editor.quickSuggestions": {
      "comments": "off",
      "strings": "off",
      "other": "on"
    },
    "editor.fontLigatures": true,
    "editor.glyphMargin": false,
    "editor.minimap.enabled": false,
    "editor.wrappingIndent": "indent",
    "editor.overviewRulerBorder": false,
    "editor.lineHeight": 24,
    "editor.renderWhitespace": "none",
    "editor.suggest.showSnippets": false,
    "editor.tabSize": 2,
    "editor.wordBasedSuggestions": "off",
    "files.autoSave": "onFocusChange",
    "files.insertFinalNewline": true,
  },
  "markdown.updateLinksOnFileMove.enabled": "prompt",
  "markdown.validate.enabled": true,
}

================================================
FILE: .vscode/tasks.json
================================================
{
  "version": "2.0.0",
  "tasks": []
}

================================================
FILE: CONTRIBUTING.md
================================================
# Contributing

Any contribution is welcome! This document provides guidelines for contributing to the confluence-markdown-exporter project.

## Table of Contents

- [Getting Started](#getting-started)
- [Development Workflow](#development-workflow)
- [Testing](#testing)
- [Code Quality](#code-quality)
- [Release Process](#release-process)
- [Pull Request Guidelines](#pull-request-guidelines)

## Getting Started

### Prerequisites

- Python 3.10 or higher
- Git
- `uv` (Python package manager)
- `jq` (for JSON processing)

### Install jq

```bash
sudo apt-get install jq
```

### Install `uv`

Following the [uv installation guide](https://docs.astral.sh/uv/getting-started/installation):

```bash
curl -LsSf https://astral.sh/uv/install.sh | sh
```

Add shell completion (optional):

```bash
echo 'eval "$(uv generate-shell-completion bash)"' >> ~/.bashrc
```

### Project Setup

1. **Fork and Clone the Repository**

   ```bash
   git clone https://github.com/Spenhouet/confluence-markdown-exporter.git
   cd confluence-markdown-exporter
   ```

2. **Install Dependencies**

   ```bash
   uv sync --all-groups
   ```

   This will:

   - Create a virtual environment
   - Install all dependencies (including development dependencies via dependency groups)
   - Install the project in editable mode

3. **Verify Installation**

   ```bash
   uv run confluence-markdown-exporter --help
   uv run cme --help
   ```

## Development Workflow

### Running the Application

```bash
# Run with uv (recommended)
uv run confluence-markdown-exporter [commands]
uv run cme [commands]

# Or activate the virtual environment
source .venv/bin/activate
confluence-markdown-exporter [commands]
```

### Adding Dependencies

```bash
# Add runtime dependency
uv add package-name

# Add development dependency (to dev group)
uv add --group dev package-name

# Add to custom dependency group
uv add --group group-name package-name
```

### Updating Dependencies

```bash
# Update all dependencies
uv sync --upgrade

# Update specific dependency
uv sync --upgrade-package package-name
```

## Testing

We use `pytest` for testing. Tests are located in the `tests/` directory.

### Running Tests

```bash
# Run all tests
uv run pytest

# Run tests with verbose output
uv run pytest -v

# Run specific test file
uv run pytest tests/test_basic.py

# Run specific test
uv run pytest tests/test_basic.py::test_package_imports
```

### Writing Tests

1. **Create test files** in the `tests/` directory with the prefix `test_`
2. **Follow naming conventions**: `test_*.py` files, `test_*` functions
3. **Use descriptive test names** that explain what is being tested
4. **Add docstrings** to explain complex test scenarios

Example test structure:

```python
def test_feature_description() -> None:
    """Test that the feature works as expected."""
    # Arrange
    input_data = "test input"

    # Act
    result = function_under_test(input_data)

    # Assert
    assert result == expected_output
```

## Code Quality

### Linting with Ruff

We use `ruff` for Python linting and code formatting.

```bash
# Check code quality
uv run ruff check

# Auto-fix issues where possible
uv run ruff check --fix

# Check specific files or directories
uv run ruff check confluence_markdown_exporter/
uv run ruff check tests/
```

### Code Style Guidelines

- **Line length**: Maximum 100 characters
- **Docstring style**: Google docstring convention
- **Import formatting**: One import per line (enforced by ruff)
- **Type hints**: Use type annotations for new code

### Pre-commit Workflow

Before committing:

1. **Run linting**: `uv run ruff check`
2. **Run tests**: `uv run pytest`
3. **Fix any issues** before committing

## Release Process

> [!NOTE]
> Only relevant for maintainers.

### Automated Release

We use GitHub Actions for automated releases:

1. **Trigger Release Workflow**

   - Go to GitHub Actions tab
   - Run "Release" workflow
   - Choose version bump type (patch/minor/major) or specify custom version

2. **Automated Steps**
   - Updates version in `pyproject.toml`
   - Runs tests and builds
   - Creates Git tag
   - Publishes to PyPI
   - Creates GitHub release with auto-generated notes
   - Publishes the multi-arch Docker image to Docker Hub

## Pull Request Guidelines

### Before Submitting

1. **Create a feature branch**

   ```bash
   git checkout -b feature/your-feature-name
   ```

2. **Run the full test suite**

   ```bash
   uv run ruff check
   uv run pytest
   uv build --no-sources  # Test build
   ```

3. **Update documentation** if needed

### PR Requirements

- ✅ **All tests pass** (verified by CI)
- ✅ **Code passes linting** (ruff check)
- ✅ **Descriptive PR title** and description
- ✅ **Reference related issues** if applicable
- ✅ **Update tests** for new functionality
- ✅ **Update documentation** for user-facing changes

## Development Environment

### Recommended Tools

- **IDE**: VS Code with Python extension
- **Git client**: Command line or your preferred GUI
- **Terminal**: Any modern terminal with shell completion

### VS Code Extensions

Recommended extensions for development:

- Python (Microsoft)
- Ruff (Astral Software)
- GitLens (GitKraken)
- markdownlint (David Anson)

### Project Structure

```text
confluence-markdown-exporter/
├── .github/workflows/      # CI/CD workflows
├── confluence_markdown_exporter/  # Main package
│   ├── __init__.py
│   ├── main.py            # CLI entry point
│   ├── confluence.py      # Core functionality
│   ├── api_clients.py     # API integrations
│   └── utils/             # Utility modules
├── tests/                 # Test suite
├── .ruff.toml            # Ruff configuration
├── pyproject.toml        # Project configuration
├── uv.lock              # Dependency lock file
└── CONTRIBUTING.md       # This file
```

## Getting Help

- **GitHub Issues**: For bug reports and feature requests
- **GitHub Discussions**: For questions and general discussion
- **Documentation**: Check the README and code comments

Thank you for contributing to confluence-markdown-exporter! 🚀


================================================
FILE: Dockerfile
================================================
# syntax=docker/dockerfile:1.7

# ---- builder ---------------------------------------------------------------
FROM python:3.12-slim AS builder

ARG TARGETARCH

COPY --from=ghcr.io/astral-sh/uv:0.8 /uv /uvx /usr/local/bin/

ENV UV_LINK_MODE=copy \
    UV_COMPILE_BYTECODE=1 \
    UV_PYTHON_DOWNLOADS=never

WORKDIR /app

# Install runtime dependencies only. This layer is cached unless uv.lock or
# pyproject.toml change. Metadata is bind-mounted so it does not get baked
# into the layer and invalidate it on unrelated edits.
RUN --mount=type=cache,target=/root/.cache/uv,id=uv-$TARGETARCH \
    --mount=type=bind,source=uv.lock,target=uv.lock \
    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
    --mount=type=bind,source=README.md,target=README.md \
    uv sync --locked --no-install-project --no-editable --no-dev

# Install the project itself into the venv. Invalidates on source edits.
COPY pyproject.toml uv.lock README.md ./
COPY confluence_markdown_exporter ./confluence_markdown_exporter
RUN --mount=type=cache,target=/root/.cache/uv,id=uv-$TARGETARCH \
    uv sync --locked --no-editable --no-dev

# ---- runtime ---------------------------------------------------------------
FROM python:3.12-slim AS runtime

ENV PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
    PATH="/app/.venv/bin:$PATH" \
    HOME=/data/config \
    XDG_CONFIG_HOME=/data/config \
    CME_CONFIG_PATH=/data/config/app_data.json \
    CME_EXPORT__OUTPUT_PATH=/data/output

RUN groupadd --system --gid 1000 cme \
    && useradd  --system --uid 1000 --gid cme --home-dir /data/config --shell /usr/sbin/nologin cme \
    && mkdir -p /data/output /data/config \
    && chown -R cme:cme /data

# Copy only the venv, not the source. `--no-editable` made the install
# self-contained so the source tree is not needed at runtime.
COPY --from=builder /app/.venv /app/.venv

USER cme
WORKDIR /data/output

ENTRYPOINT ["confluence-markdown-exporter"]
CMD ["--help"]


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2025 Sebastian Penhouet

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
<p align="center">
  <a href="https://github.com/Spenhouet/confluence-markdown-exporter"><img src="https://raw.githubusercontent.com/Spenhouet/confluence-markdown-exporter/b8caaba935eea7e7017b887c86a740cb7bf99708/logo.png" alt="confluence-markdown-exporter"></a>
</p>
<p align="center">
    <em>The confluence-markdown-exporter exports Confluence pages in Markdown format. This exporter helps in migrating content from Confluence to platforms that support Markdown e.g. Obsidian, Gollum, Azure DevOps (ADO), Foam, Dendron and more.</em>
</p>
<p align="center">
  <a href="https://github.com/Spenhouet/confluence-markdown-exporter/actions/workflows/python-build.yml"><img src="https://github.com/Spenhouet/confluence-markdown-exporter/actions/workflows/python-build.yml/badge.svg" alt="Build Python package"></a>
  <a href="https://github.com/Spenhouet/confluence-markdown-exporter/actions/workflows/release.yml"><img src="https://github.com/Spenhouet/confluence-markdown-exporter/actions/workflows/release.yml/badge.svg" alt="Build and publish to PyPI"></a>
  <a href="https://pypi.org/project/confluence-markdown-exporter" target="_blank">
    <img src="https://img.shields.io/pypi/v/confluence-markdown-exporter?color=%2334D058&label=PyPI%20package" alt="PyPI version">
   </a>
  <a href="https://hub.docker.com/r/spenhouet/confluence-markdown-exporter" target="_blank">
    <img src="https://img.shields.io/docker/v/spenhouet/confluence-markdown-exporter?sort=semver&label=Docker%20Hub&color=2496ED&logo=docker&logoColor=white" alt="Docker Hub version">
   </a>
  <a href="https://spenhouet.github.io/confluence-markdown-exporter/" target="_blank">
    <img src="https://img.shields.io/badge/docs-online-blue" alt="Documentation">
   </a>
</p>

## What it does

Exports individual pages, pages with descendants, or entire Confluence spaces via the Atlassian API into clean Markdown. Skips unchanged pages by default, re-exporting only what has changed since the last run.

Supported targets include Obsidian, Gollum, Azure DevOps (ADO) wikis, Foam, Dendron, and anything else that consumes Markdown.

Full feature list, configuration reference, and target-system presets live in the **[documentation site](https://spenhouet.github.io/confluence-markdown-exporter/)**.

## Quickstart

### 1. Install

**macOS and Linux**

```bash
curl -LsSf uvx.sh/confluence-markdown-exporter/install.sh | sh
```

**Windows**

```powershell
powershell -ExecutionPolicy ByPass -c "irm https://uvx.sh/confluence-markdown-exporter/install.ps1 | iex"
```

Installing a specific version:

```bash
curl -LsSf uvx.sh/confluence-markdown-exporter/5.1.1/install.sh | sh
```

Alternative install methods (PyPI via `pip` / `uv`, prebuilt Docker image) are covered in the [installation docs](https://spenhouet.github.io/confluence-markdown-exporter/installation) and the [Docker page](https://spenhouet.github.io/confluence-markdown-exporter/docker).

> **Using the Docker image?** Steps 2 and 3 below use the local `cme` CLI. Inside the Docker image there is no interactive `cme config` menu; you supply a pre-defined config (mounted JSON file or `CME_*` environment variables) and run a single export command per container invocation. See the [Docker page](https://spenhouet.github.io/confluence-markdown-exporter/docker) for the non-interactive flow.

### 2. Authenticate

Set Confluence credentials interactively (URL, username, API token / PAT):

```sh
cme config edit auth.confluence
```

See [Authentication](https://spenhouet.github.io/confluence-markdown-exporter/configuration/authentication) for token scopes and Jira setup.

### 3. Export

```sh
# A single page
cme pages <page-url>

# A page and all its descendants
cme pages-with-descendants <page-url>

# An entire space
cme spaces <space-url>

# Every space of an organisation
cme orgs <base-url>
```

Output goes to the configured `export.output_path` (current directory by default).

## Documentation

The full documentation lives at **<https://spenhouet.github.io/confluence-markdown-exporter/>** and includes:

- [Installation](https://spenhouet.github.io/confluence-markdown-exporter/installation) (curl / PowerShell / pip / uv)
- [Usage guide](https://spenhouet.github.io/confluence-markdown-exporter/usage): pages, descendants, spaces, orgs, output layout
- [Feature list](https://spenhouet.github.io/confluence-markdown-exporter/features): supported Confluence content, macros, and add-ons
- [Configuration](https://spenhouet.github.io/confluence-markdown-exporter/configuration): config commands, ENV vars, full option reference
- [Target-system presets](https://spenhouet.github.io/confluence-markdown-exporter/configuration/target-systems): Obsidian, Azure DevOps, …
- [Docker](https://spenhouet.github.io/confluence-markdown-exporter/docker): prebuilt images for non-interactive / CI use
- [CI / non-interactive use](https://spenhouet.github.io/confluence-markdown-exporter/configuration/ci)
- [Compatibility](https://spenhouet.github.io/confluence-markdown-exporter/compatibility) and [Troubleshooting](https://spenhouet.github.io/confluence-markdown-exporter/troubleshooting)

## Contributing

If you would like to contribute, please read [our contribution guideline](CONTRIBUTING.md).

## License

This tool is an open source project released under the [MIT License](LICENSE).


================================================
FILE: confluence_markdown_exporter/__init__.py
================================================
"""Confluence Markdown Exporter package."""

try:
    from importlib.metadata import version

    __version__ = version("confluence-markdown-exporter")
except Exception:  # noqa: BLE001
    # fallback if package not installed or metadata not available
    __version__ = "unknown"


================================================
FILE: confluence_markdown_exporter/api_clients.py
================================================
import logging
import re
import urllib.parse
from threading import Lock
from threading import local
from typing import Annotated

import requests
from atlassian import Confluence as ConfluenceApiSdk
from atlassian import Jira as JiraApiSdk
from pydantic import AfterValidator
from pydantic import BaseModel

from confluence_markdown_exporter.utils.app_data_store import ApiDetails
from confluence_markdown_exporter.utils.app_data_store import AtlassianSdkConnectionConfig
from confluence_markdown_exporter.utils.app_data_store import get_settings
from confluence_markdown_exporter.utils.app_data_store import normalize_instance_url
from confluence_markdown_exporter.utils.app_data_store import set_setting_with_keys

logger = logging.getLogger(__name__)

# URL-keyed caches for API clients
_confluence_clients: dict[str, ConfluenceApiSdk] = {}
_jira_clients: dict[str, JiraApiSdk] = {}
_clients_lock = Lock()

# Thread-local storage for per-URL Confluence clients (one per worker thread per URL)
_thread_local = local()

_CLOUD_DOMAIN = ".atlassian.net"
_GATEWAY_PREFIX = "https://api.atlassian.com/ex"


def parse_gateway_url(url: str) -> tuple[str, str] | None:
    m = re.search(r"https://api\.atlassian\.com/ex/(confluence|jira)/([^/?#]+)", url)
    return (m.group(1).lower(), m.group(2)) if m else None


def build_gateway_url(service: str, cloud_id: str) -> str:
    return f"{_GATEWAY_PREFIX}/{service.lower()}/{cloud_id}"


def ensure_service_gateway_url(url: str, service: str | None = None) -> str:
    """Ensure the gateway URL uses the specified service.

    ``https://api.atlassian.com/ex/confluence/{cloudId}``
    becomes ``https://api.atlassian.com/ex/jira/{cloudId}``.
    Non-gateway URLs are returned as-is.
    """
    if parsed := parse_gateway_url(url):
        return build_gateway_url(service or parsed[0], parsed[1])

    return url


def _is_standard_atlassian_cloud_url(url: str) -> bool:
    """Return True if *url* looks like a standard Atlassian Cloud instance URL."""
    try:
        hostname = urllib.parse.urlparse(url).hostname or ""
        return hostname.endswith(_CLOUD_DOMAIN)
    except Exception:  # noqa: BLE001
        return False


def _try_fetch_cloud_id(base_url: str) -> str | None:
    """Try to fetch the Atlassian Cloud ID from the public tenant info endpoint.

    Returns the cloud ID string, or None if the fetch fails (e.g. for Server instances).
    """
    try:
        resp = requests.get(f"{base_url}/_edge/tenant_info", timeout=5)
        if resp.ok:
            return resp.json().get("cloudId")
    except Exception as e:  # noqa: BLE001
        logger.debug("Could not fetch Cloud ID from %s/_edge/tenant_info: %s", base_url, e)
    return None


def _get_confluence_sdk_url(base_url: str, auth: ApiDetails) -> str:
    """Return the SDK URL for Confluence, using the API gateway when a Cloud ID is configured."""
    if auth.cloud_id:
        return f"{_GATEWAY_PREFIX}/confluence/{auth.cloud_id}"
    return base_url


def _get_jira_sdk_url(base_url: str, auth: ApiDetails) -> str:
    """Return the SDK URL for Jira, using the API gateway when a Cloud ID is configured."""
    if auth.cloud_id:
        return f"{_GATEWAY_PREFIX}/jira/{auth.cloud_id}"
    return base_url


def _decode_url_part(v: str | None) -> None | str:
    if v is None or v == "":
        return None
    return urllib.parse.unquote_plus(v)


class ConfluenceRef(BaseModel):
    space_key: Annotated[str, AfterValidator(_decode_url_part)]
    page_id: int | None = None
    page_title: Annotated[str | None, AfterValidator(_decode_url_part)] = None


# 1) Cloud [/wiki]/spaces/{space_key}[/pages/{page_id}[/{page_title}]]
_CLOUD_URL_RE = re.compile(
    r"^(?:/ex/confluence/[^/]+)?(?:/wiki)?/spaces/"
    r"(?P<space_key>[A-Za-z0-9_~-]+)"
    r"(?:/pages/(?P<page_id>\d+)(?:/(?P<page_title>[^/?#]+))?)?"
    r"(?:/(?!pages/)[^/?#]+)?/?$"
)

# 2) Server [/display]/{space_key}[/{page_title}]
_SERVER_URL_RE = re.compile(
    r"^(?:/display)?"
    r"/(?P<space_key>[A-Za-z0-9._-]+)"
    r"(?:/(?P<page_title>[^/?#]+))?/?$"
)


def parse_confluence_path(path: str) -> ConfluenceRef | None:
    """Parse only the path portion of a Confluence URL and return a ConfluenceRef dict.

    Matching order:
      1) Cloud [/wiki]/spaces/{space_key}[/pages/{page_id}[/{page_title}]]
      2) Server [/display]/{space_key}[/{page_title}]
    """
    if not path:
        return None
    if not path.startswith("/"):
        path = "/" + path
    path = path.rstrip("/")

    if m := _CLOUD_URL_RE.match(path) or _SERVER_URL_RE.match(path):
        return ConfluenceRef.model_validate(m.groupdict())

    return None


class AuthNotConfiguredError(BaseException):
    """Raised when a connection attempt fails and no valid auth is configured for the URL.

    Inherits from BaseException (not Exception) so that broad ``except Exception`` handlers
    in export loops do not accidentally swallow it — it must propagate to the app boundary.
    """

    def __init__(self, url: str, service: str = "Confluence") -> None:
        self.url = url
        self.service = service
        super().__init__(f"No valid authentication configured for {service} at {url}")


class JiraAuthenticationError(Exception):
    """Raised when a Jira API response indicates an authentication failure."""


def _jira_auth_failure_hook(
    response: requests.Response, *_args: object, **_kwargs: object
) -> requests.Response:
    """Raise JiraAuthenticationError when Jira signals authentication failure."""
    if response.headers.get("X-Seraph-Loginreason") == "AUTHENTICATED_FAILED":
        msg = f"Jira authentication failed for request to {response.url}"
        raise JiraAuthenticationError(msg)
    return response


def response_hook(
    response: requests.Response, *_args: object, **_kwargs: object
) -> requests.Response:
    """Log response headers when requests fail."""
    if not response.ok:
        logger.warning(
            "Request to %s failed with status %s. Response headers: %s",
            response.url,
            response.status_code,
            dict(response.headers),
        )
    return response


class ApiClientFactory:
    """Factory for creating authenticated Confluence and Jira API clients with retry config."""

    def __init__(self, connection_config: AtlassianSdkConnectionConfig) -> None:
        # Reconstruct as the base SDK type so model_dump() only yields SDK-compatible fields,
        # even when a ConnectionConfig subclass is passed.
        self.connection_config = AtlassianSdkConnectionConfig.model_validate(
            connection_config.model_dump()
        )

    def create_confluence(self, url: str, auth: ApiDetails) -> ConfluenceApiSdk:
        try:
            instance = ConfluenceApiSdk(
                url=url,
                username=auth.username.get_secret_value() if auth.api_token else None,
                password=auth.api_token.get_secret_value() if auth.api_token else None,
                token=auth.pat.get_secret_value() if auth.pat else None,
                **self.connection_config.model_dump(),
            )
            instance.get_all_spaces(limit=1)
        except Exception as e:
            msg = f"Confluence connection failed: {e}"
            raise ConnectionError(msg) from e
        return instance

    def create_jira(self, url: str, auth: ApiDetails) -> JiraApiSdk:
        try:
            instance = JiraApiSdk(
                url=url,
                username=auth.username.get_secret_value() if auth.api_token else None,
                password=auth.api_token.get_secret_value() if auth.api_token else None,
                token=auth.pat.get_secret_value() if auth.pat else None,
                **self.connection_config.model_dump(),
            )
            instance.get_all_projects()
        except Exception as e:
            msg = f"Jira connection failed: {e}"
            raise ConnectionError(msg) from e
        return instance


def get_confluence_instance(url: str) -> ConfluenceApiSdk:
    """Get authenticated Confluence API client for *url*.

    Creates a new client if one doesn't exist for that URL yet and caches it.
    Prompts for auth config on connection failure.

    When the configured auth for *url* includes a Cloud ID, API calls are routed through
    the Atlassian API gateway (``https://api.atlassian.com/ex/confluence/{cloud_id}``),
    which enables the use of scoped API tokens.  For standard Atlassian Cloud instances
    (``.atlassian.net``) the Cloud ID is fetched and stored automatically on first connection.
    """
    url = normalize_instance_url(ensure_service_gateway_url(url, "confluence"))
    with _clients_lock:
        if url in _confluence_clients:
            logger.debug("Confluence client cache hit for %s", url)
            return _confluence_clients[url]

    settings = get_settings()

    auth = settings.auth.get_instance(url)
    if auth is None:
        raise AuthNotConfiguredError(url, "Confluence")

    logger.debug("Creating new Confluence client for %s", url)

    # Auto-fetch and store the Cloud ID for standard Atlassian Cloud instances
    if not auth.cloud_id and _is_standard_atlassian_cloud_url(url):
        cloud_id = _try_fetch_cloud_id(url)
        if cloud_id:
            logger.info("Auto-fetched Atlassian Cloud ID for %s — storing in config", url)
            set_setting_with_keys(["auth", "confluence", url, "cloud_id"], cloud_id)
            settings = get_settings()

    auth = settings.auth.get_instance(url) or ApiDetails()
    sdk_url = _get_confluence_sdk_url(url, auth)
    try:
        client = ApiClientFactory(settings.connection_config).create_confluence(sdk_url, auth)
        logger.info("Connected to Confluence at %s", sdk_url)
    except ConnectionError as e:
        logger.exception("[red bold]Confluence authentication failed for %s.[/red bold]", url)
        raise AuthNotConfiguredError(url, "Confluence") from e

    if settings.export.log_level == "DEBUG":
        client.session.hooks["response"] = [response_hook]

    with _clients_lock:
        _confluence_clients[url] = client
    return client


def get_thread_confluence(base_url: str) -> ConfluenceApiSdk:
    """Get or create a thread-local Confluence client for *base_url*.

    The atlassian-python-api Confluence client uses requests.Session, which is
    NOT thread-safe.  Each worker thread keeps its own dict of clients keyed by
    base URL so that multi-instance exports are also thread-safe.
    """
    base_url = normalize_instance_url(base_url)
    if not hasattr(_thread_local, "clients"):
        _thread_local.clients = {}
    if base_url not in _thread_local.clients:
        logger.debug("Initializing thread-local Confluence client for %s", base_url)
        _thread_local.clients[base_url] = get_confluence_instance(base_url)
    return _thread_local.clients[base_url]


def get_jira_instance(url: str) -> JiraApiSdk:
    """Get authenticated Jira API client for *url*.

    Creates a new client if one doesn't exist for that URL yet and caches it.

    When the input is a Confluence gateway URL (``/ex/confluence/{cloudId}``), it is
    automatically converted to the Jira gateway URL (``/ex/jira/{cloudId}``) before
    auth lookup and SDK connection.  This handles the common case where the caller
    derives the Jira URL from a Confluence page's ``base_url``.

    When the configured auth for *url* includes a Cloud ID, API calls are routed through
    the Atlassian API gateway (``https://api.atlassian.com/ex/jira/{cloud_id}``).
    For standard Atlassian Cloud instances the Cloud ID is fetched and stored automatically.
    """
    # Always work with the Jira gateway URL, even if the caller passed the Confluence one.
    url = normalize_instance_url(ensure_service_gateway_url(url, "jira"))
    settings = get_settings()

    if not settings.export.enable_jira_enrichment:
        msg = "Jira API client was requested eventhough Jira enrichment is disabled."
        raise RuntimeWarning(msg)

    with _clients_lock:
        if url in _jira_clients:
            logger.debug("Jira client cache hit for %s", url)
            return _jira_clients[url]

    auth = settings.auth.get_jira_instance(url)
    if auth is None:
        raise AuthNotConfiguredError(url, "Jira")

    logger.debug("Creating new Jira client for %s", url)

    # Auto-fetch and store the Cloud ID for standard Atlassian Cloud instances
    if not auth.cloud_id and _is_standard_atlassian_cloud_url(url):
        cloud_id = _try_fetch_cloud_id(url)
        if cloud_id:
            logger.info("Auto-fetched Atlassian Cloud ID for %s — storing in config", url)
            set_setting_with_keys(["auth", "jira", url, "cloud_id"], cloud_id)
            settings = get_settings()

    auth = settings.auth.get_jira_instance(url) or auth
    sdk_url = _get_jira_sdk_url(url, auth)
    try:
        client = ApiClientFactory(settings.connection_config).create_jira(sdk_url, auth)
        logger.info("Connected to Jira at %s", sdk_url)
    except ConnectionError as e:
        logger.exception("[red bold]Jira authentication failed for %s.[/red bold]", url)
        raise AuthNotConfiguredError(url, "Jira") from e

    client.session.hooks["response"].append(_jira_auth_failure_hook)

    if settings.export.log_level == "DEBUG":
        client.session.hooks["response"].append(response_hook)

    with _clients_lock:
        _jira_clients[url] = client
    return client


def invalidate_confluence_client(url: str) -> None:
    """Remove a cached Confluence client so the next call creates a fresh one."""
    with _clients_lock:
        _confluence_clients.pop(normalize_instance_url(url), None)


def invalidate_jira_client(url: str) -> None:
    """Remove a cached Jira client so the next call creates a fresh one."""
    with _clients_lock:
        _jira_clients.pop(normalize_instance_url(url), None)


def handle_jira_auth_failure(url: str) -> None:
    """Handle a Jira authentication failure by invalidating the cached client and raising."""
    invalidate_jira_client(url)
    raise AuthNotConfiguredError(url, "Jira")


================================================
FILE: confluence_markdown_exporter/config.py
================================================
"""Config sub-app for the cme CLI."""

import json
import logging
from typing import Annotated

import jmespath
import typer
import yaml

from confluence_markdown_exporter.utils.app_data_store import APP_CONFIG_PATH
from confluence_markdown_exporter.utils.app_data_store import get_settings
from confluence_markdown_exporter.utils.app_data_store import reset_to_defaults
from confluence_markdown_exporter.utils.app_data_store import set_setting

logger = logging.getLogger(__name__)

# Each table row must be its own \n\n-separated block so typer's epilog
# renderer keeps single \n between rows, forming valid markdown table syntax.
_CONFIG_KEYS_EPILOG = (
    "---\n\n"
    "**Available config keys** (run `cme config list` to see all current values):\n\n"
    "| Key | Description |\n\n"
    "| --- | ----------- |\n\n"
    "| `export.output_path` | Directory where exported files are saved |\n\n"
    "| `export.log_level` | Verbosity: `DEBUG`, `INFO`, `WARNING`, `ERROR` |\n\n"
    "| `export.save_log_to_file` | Also write logs to `cme.log` next to the config file |\n\n"
    "| `export.skip_unchanged` | Skip pages unchanged since last export |\n\n"
    "| `export.cleanup_stale` | Delete local files for removed pages |\n\n"
    "| `export.page_path` | File path template for exported pages |\n\n"
    "| `export.attachment_path` | File path template for exported attachments |\n\n"
    "| `export.page_href` | Link style for pages: `relative` or `absolute` |\n\n"
    "| `export.attachment_href` | Link style for attachments: `relative` or `absolute` |\n\n"
    "| `export.include_document_title` | Prepend H1 title to each page |\n\n"
    "| `export.include_toc` | Export Table of Contents macro (`true`/`false`) |\n\n"
    "| `export.include_macro` | How to render `include`/`excerpt-include` macros:"
    " `inline` (default) or `transclusion` (Obsidian `![[Page Title]]` embed) |\n\n"
    "| `export.page_breadcrumbs` | Include breadcrumb links at top of page |\n\n"
    "| `export.confluence_url_in_frontmatter` | Include Confluence page URL in YAML "
    "front matter: `none`, `webui`, `tinyui`, `both` |\n\n"
    "| `export.page_metadata_in_frontmatter` | Add Confluence page metadata "
    "fields (page_id, space_key, type, created, created_by, last_modified, "
    "last_modified_by, version) to YAML front matter (`true`/`false`) |\n\n"
    "| `export.enable_jira_enrichment` | Fetch Jira data for enriched links |\n\n"
    "| `export.attachments_export` | Which attachments to download:"
    " `referenced` (default), `all`, `disabled` |\n\n"
    "| `export.image_captions` | Use image captions as markdown alt text (`true`/`false`) |\n\n"
    "| `export.comments_export` | Which comments to export to sidecar "
    "`.comments.md` files: `none` (default), `inline`, `footer`, `all` |\n\n"
    "| `export.convert_status_badges` | Convert Confluence status badges to `<mark>` elements |\n\n"
    "| `export.convert_text_highlights` | Convert background-color spans to `<mark>` elements |\n\n"
    "| `export.convert_font_colors` | Convert font-color spans to `<font>` elements |\n\n"
    "| `export.filename_length` | Maximum filename length (default: 255) |\n\n"
    "| `connection_config.max_workers` | Parallel export workers (default: 20) |\n\n"
    "| `connection_config.use_v2_api` | Use Confluence REST API v2 (`true`/`false`) |\n\n"
    "| `connection_config.verify_ssl` | Verify SSL certificates (`true`/`false`) |\n\n"
    "| `connection_config.timeout` | API request timeout in seconds |\n\n"
    "| `auth.confluence` | Credentials keyed by instance URL — use `cme config edit` |\n\n"
    "| `auth.jira` | Jira credentials keyed by instance URL — use `cme config edit` |\n\n"
    "---\n\n"
    "Env var override: prefix with `CME_` and `__` as delimiter. "
    "Examples: `CME_EXPORT__OUTPUT_PATH=/tmp/export`, `CME_CONNECTION_CONFIG__MAX_WORKERS=5`.\n\n"
)

app = typer.Typer(
    rich_markup_mode="markdown",
    invoke_without_command=True,
    help=(
        "Manage configuration interactively or via subcommands.\n\n"
        "Running `cme config` without a subcommand opens the **interactive menu**, "
        "which lets you browse and change all settings including authentication credentials.\n\n"
        "For scripting or automation, use the subcommands below."
    ),
    epilog=(
        "**Subcommands at a glance:**\n\n"
        "- `cme config` — interactive menu\n\n"
        "- `cme config list` — print full config as YAML\n\n"
        "- `cme config list -o json` — print full config as JSON\n\n"
        "- `cme config get export.log_level` — print a single value\n\n"
        "- `cme config set export.log_level=DEBUG` — set a value\n\n"
        "- `cme config edit auth.confluence` — edit credentials interactively\n\n"
        "- `cme config path` — show config file path\n\n"
        "- `cme config reset` — reset all settings to defaults\n\n"
        "- `cme config reset export.log_level` — reset a single key to its default\n\n"
    ),
)


@app.callback(invoke_without_command=True)
def callback(ctx: typer.Context) -> None:
    """Open the interactive configuration menu if no subcommand is given."""
    if ctx.invoked_subcommand is None:
        from confluence_markdown_exporter.utils.config_interactive import main_config_menu_loop

        main_config_menu_loop(None)


@app.command(
    help=(
        "Reset configuration to defaults.\n\n"
        "Without a `KEY` argument, resets the **entire configuration** to factory defaults. "
        "Pass a dot-notation key to reset only that key or section.\n\n"
        "Use `--yes` / `-y` to skip the confirmation prompt (useful in scripts)."
    ),
    epilog=(
        "**Examples:**\n\n"
        "- `cme config reset` — reset everything (prompts for confirmation)\n\n"
        "- `cme config reset --yes` — skip confirmation prompt\n\n"
        "- `cme config reset export.log_level` — reset a single key to its default\n\n"
        "- `cme config reset connection_config` — reset a whole section to defaults\n\n"
    ),
)
def reset(
    key: Annotated[
        str | None,
        typer.Argument(
            help=(
                "Dot-notation config key or section to reset to its default. "
                "If omitted, the entire configuration is reset. "
                "Examples: `export.log_level`, `connection_config`, `export`."
            ),
            metavar="KEY",
        ),
    ] = None,
    yes: Annotated[  # noqa: FBT002
        bool,
        typer.Option("--yes", "-y", help="Skip the confirmation prompt."),
    ] = False,
) -> None:
    if not yes:
        target = f"'{key}'" if key else "all configuration"
        confirmed = typer.confirm(f"Reset {target} to defaults?", default=False)
        if not confirmed:
            raise typer.Abort
    reset_to_defaults(key)
    target = f"'{key}'" if key else "Configuration"
    typer.echo(f"{target} reset to defaults.")


@app.command(
    help=(
        "Print the path to the configuration file.\n\n"
        "Override the config file location by setting the `CME_CONFIG_PATH` environment variable."
    ),
    epilog=(
        "**Example:**\n\n"
        "- `cme config path`\n\n"
        "- `CME_CONFIG_PATH=/custom/path.json cme config path` — custom config file\n\n"
    ),
)
def path() -> None:
    """Output the path to the configuration file."""
    typer.echo(str(APP_CONFIG_PATH))


@app.command(
    name="list",
    help=(
        "Print the current configuration as YAML (default) or JSON.\n\n"
        "Shows all settings and their current effective values. "
        "Use this to discover available config keys for `cme config get` and "
        "`cme config set`.\n\n"
        "> **Note:** Secret values (API tokens, passwords) are printed in plaintext."
    ),
    epilog=(
        "**Examples:**\n\n"
        "- `cme config list` — YAML output (default)\n\n"
        "- `cme config list -o json` — JSON output\n\n"
        "- `cme config list -o yaml` — explicit YAML\n\n"
    ),
)
def list_config(
    output: Annotated[
        str,
        typer.Option(
            "--output",
            "-o",
            help="Output format. Accepted values: `yaml` (default) or `json`.",
            metavar="FORMAT",
        ),
    ] = "yaml",
) -> None:
    """Output the current configuration as YAML or JSON."""
    current_settings = get_settings()
    data = json.loads(current_settings.model_dump_json())
    fmt = output.lower()
    if fmt == "json":
        typer.echo(json.dumps(data, indent=2))
    elif fmt in ("yaml", "yml"):
        typer.echo(yaml.dump(data, default_flow_style=False, allow_unicode=True), nl=False)
    else:
        typer.echo(f"Unknown format '{output}': expected 'yaml' or 'json'.", err=True)
        raise typer.Exit(code=1)


@app.command(
    help=(
        "Print the current value of a single config key.\n\n"
        "Keys use dot notation to address nested settings "
        "(e.g. `export.log_level`, `connection_config.max_workers`). "
        "Nested sections are printed as YAML. "
        "Run `cme config list` to see all available keys."
    ),
    epilog=(
        "**Examples:**\n\n"
        "- `cme config get export.log_level`\n\n"
        "- `cme config get export.output_path`\n\n"
        "- `cme config get connection_config.max_workers`\n\n"
        "- `cme config get connection_config` — prints the whole section as YAML\n\n"
        "- `cme config get export` — prints all export settings\n\n"
        + _CONFIG_KEYS_EPILOG
    ),
)
def get(
    key: Annotated[
        str,
        typer.Argument(
            help=(
                "Config key in dot notation. "
                "Examples: `export.log_level`, `connection_config.max_workers`, `export`."
            ),
            metavar="KEY",
        ),
    ],
) -> None:
    """Output the current value of a config key."""
    current_settings = get_settings()
    data = json.loads(current_settings.model_dump_json())
    value = jmespath.search(key, data)
    if value is None:
        typer.echo(f"Key '{key}' not found.", err=True)
        raise typer.Exit(code=1)
    if isinstance(value, dict | list):
        typer.echo(yaml.dump(value, default_flow_style=False, allow_unicode=True), nl=False)
    else:
        typer.echo(str(value))


@app.command(
    name="set",
    help=(
        "Set one or more configuration values.\n\n"
        "Each argument must be a `key=value` pair using dot notation for the key. "
        "Values are parsed as JSON where possible "
        "(so `true`, `false`, numbers, and JSON arrays work), "
        "falling back to a plain string.\n\n"
        "> **Note:** For auth keys that contain a URL "
        "(e.g. `auth.confluence.https://...`), use `cme config edit auth.confluence` "
        "instead — the interactive editor handles URL-based keys correctly."
    ),
    epilog=(
        "**Examples:**\n\n"
        "- `cme config set export.log_level=DEBUG`\n\n"
        "- `cme config set export.output_path=/tmp/export`\n\n"
        "- `cme config set export.skip_unchanged=false`\n\n"
        "- `cme config set connection_config.max_workers=5`\n\n"
        "- `cme config set connection_config.verify_ssl=false`\n\n"
        "- `cme config set export.log_level=INFO export.output_path=./out`"
        " — multiple keys at once\n\n"
        + _CONFIG_KEYS_EPILOG
    ),
)
def set_config(
    key_values: Annotated[
        list[str],
        typer.Argument(
            help=(
                "One or more `key=value` pairs. "
                "Keys use dot notation (e.g. `export.log_level=DEBUG`). "
                "Values are parsed as JSON first, then as plain strings. "
                "For auth keys containing URLs, use `cme config edit` instead."
            ),
            metavar="KEY=VALUE",
        ),
    ],
) -> None:
    """Set one or more configuration values."""
    for kv in key_values:
        if "=" not in kv:
            typer.echo(f"Invalid format '{kv}': expected key=value.", err=True)
            raise typer.Exit(code=1)
        key, _, raw_value = kv.partition("=")
        value = _parse_value(raw_value)
        try:
            set_setting(key.strip(), value)
        except (ValueError, KeyError) as e:
            typer.echo(f"Failed to set '{key.strip()}': {e}", err=True)
            raise typer.Exit(code=1) from e
    typer.echo("Configuration updated.")


@app.command(
    help=(
        "Open the interactive editor for a specific config key.\n\n"
        "Launches the interactive configuration menu pre-navigated to the given key. "
        "Especially useful for editing authentication credentials, "
        "where the instance URL is part of the key and cannot be set via `cme config set`."
    ),
    epilog=(
        "**Examples:**\n\n"
        "- `cme config edit auth.confluence` — add or update Confluence credentials\n\n"
        "- `cme config edit auth.jira` — edit Jira credentials\n\n"
        "- `cme config edit export.log_level` — edit a setting interactively\n\n"
        "- `cme config edit export.output_path` — set output path interactively\n\n"
    ),
)
def edit(
    key: Annotated[
        str,
        typer.Argument(
            help=(
                "Config key to open in the interactive editor, using dot notation. "
                "Examples: `auth.confluence`, `auth.jira`, `export.log_level`."
            ),
            metavar="KEY",
        ),
    ],
) -> None:
    """Open the interactive editor for a specific config key."""
    from confluence_markdown_exporter.utils.config_interactive import main_config_menu_loop

    main_config_menu_loop(key)


def _parse_value(value_str: str) -> object:
    """Parse a CLI value string, trying JSON first then falling back to raw string.

    Handles JSON scalars (true/false, numbers, null), arrays, and objects.
    Also accepts Python-style True/False for convenience.
    """
    try:
        return json.loads(value_str)
    except json.JSONDecodeError:
        pass
    lower = value_str.lower()
    if lower == "true":
        return True
    if lower == "false":
        return False
    return value_str


================================================
FILE: confluence_markdown_exporter/confluence.py
================================================
"""Confluence API documentation.

https://developer.atlassian.com/cloud/confluence/rest/v1/intro
"""

import functools
import json
import logging
import mimetypes
import os
import re
import urllib.parse
from collections.abc import Set
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import as_completed
from os import PathLike
from pathlib import Path
from string import Template
from typing import Any
from typing import ClassVar
from typing import Literal
from typing import TypeAlias
from typing import cast
from urllib.parse import unquote
from urllib.parse import urlparse

import yaml
from atlassian.errors import ApiError
from atlassian.errors import ApiNotFoundError
from bs4 import BeautifulSoup
from bs4 import Tag
from markdownify import ATX
from markdownify import MarkdownConverter
from pydantic import BaseModel
from pydantic import Field
from requests import HTTPError
from requests import RequestException
from rich.progress import BarColumn
from rich.progress import MofNCompleteColumn
from rich.progress import Progress
from rich.progress import SpinnerColumn
from rich.progress import TaskProgressColumn
from rich.progress import TextColumn
from rich.progress import TimeElapsedColumn
from rich.progress import TimeRemainingColumn
from tabulate import tabulate

from confluence_markdown_exporter.api_clients import JiraAuthenticationError
from confluence_markdown_exporter.api_clients import build_gateway_url
from confluence_markdown_exporter.api_clients import get_confluence_instance
from confluence_markdown_exporter.api_clients import get_jira_instance
from confluence_markdown_exporter.api_clients import get_thread_confluence
from confluence_markdown_exporter.api_clients import handle_jira_auth_failure
from confluence_markdown_exporter.api_clients import parse_confluence_path
from confluence_markdown_exporter.api_clients import parse_gateway_url
from confluence_markdown_exporter.utils.app_data_store import get_settings
from confluence_markdown_exporter.utils.app_data_store import normalize_instance_url
from confluence_markdown_exporter.utils.drawio_converter import load_and_parse_drawio
from confluence_markdown_exporter.utils.export import github_heading_slug
from confluence_markdown_exporter.utils.export import sanitize_filename
from confluence_markdown_exporter.utils.export import sanitize_key
from confluence_markdown_exporter.utils.export import save_file
from confluence_markdown_exporter.utils.lockfile import AttachmentEntry
from confluence_markdown_exporter.utils.lockfile import LockfileManager
from confluence_markdown_exporter.utils.page_registry import PageTitleRegistry
from confluence_markdown_exporter.utils.rich_console import ExportStats
from confluence_markdown_exporter.utils.rich_console import console
from confluence_markdown_exporter.utils.rich_console import get_stats
from confluence_markdown_exporter.utils.rich_console import reset_stats
from confluence_markdown_exporter.utils.table_converter import TableConverter

JsonResponse: TypeAlias = dict
StrPath: TypeAlias = str | PathLike[str]

logger = logging.getLogger(__name__)
_MAX_UNICODE_CODEPOINT = 0x10FFFF

_RE_RGB_BG = re.compile(r"background-color:\s*rgb\((\d+),\s*(\d+),\s*(\d+)\)")
_RE_RGB_COLOR = re.compile(r"(?<![a-z-])color:\s*rgb\((\d+),\s*(\d+),\s*(\d+)\)")
_RE_COLORID_CSS = re.compile(r"(?<![>\w])\[data-colorid=(\w+)\]\{color:(#[0-9a-fA-F]+)\}")
_RE_HEX_COLOR = re.compile(r"^#(?:[0-9a-fA-F]{3}|[0-9a-fA-F]{6})$")

# Confluence default header backgrounds — applied automatically to <th> cells, and
# (in matrix-style tables) to row-label <td>s. Treated as "no user-chosen colour".
_DEFAULT_HEADER_BGS = frozenset({"#f4f5f7", "#f2f2f2"})


def _rgb_to_hex(r: int, g: int, b: int) -> str:
    return f"#{r:02x}{g:02x}{b:02x}"


def _extract_cell_highlight_hex(el: Tag) -> str | None:
    """Return Confluence cell background hex from data-highlight-colour, or None.

    Confluence Cloud sets `data-highlight-colour="#rrggbb"` (or `"transparent"`)
    on `<td>` / `<th>` when a cell background colour is applied.
    """
    val = el.get("data-highlight-colour")
    if not isinstance(val, str):
        return None
    val = val.strip().lower()
    if not val or val == "transparent" or val in _DEFAULT_HEADER_BGS:
        return None
    if _RE_HEX_COLOR.match(val):
        return val
    return None


# Background colours for Confluence status-badge lozenges (Atlassian design token pastels).
_LOZENGE_COLORS: dict[str, str] = {
    "aui-lozenge-complete": "#cce0ff",  # blue
    "aui-lozenge-success": "#baf3db",  # green
    "aui-lozenge-current": "#f8e6a0",  # yellow / orange
    "aui-lozenge-error": "#ffd5d2",  # red
    "aui-lozenge-progress": "#dfd8fd",  # purple / violet
}


def _require_dict(response: object, context: str) -> JsonResponse:
    """Validate that an API response is a dict, not an HTML redirect or error string.

    SAML SSO redirects and session-expiry responses are returned as raw HTML strings
    by the atlassian-python-api client instead of raising an exception.  Calling
    .get() on such a string produces a confusing AttributeError; this helper surfaces
    a clear message instead.
    """
    if isinstance(response, dict):
        return response
    preview = str(response)[:120].replace("\n", " ")
    if "SAMLRequest" in str(response) or "SAMLResponse" in str(response):
        msg = (
            f"Authentication failed for {context}: received a SAML SSO redirect instead of JSON. "
            "Check that your Confluence token/credentials are correct and not expired."
        )
    else:
        msg = f"Unexpected non-dict response for {context}: {preview!r}"
    raise ValueError(msg)


def _extract_base_url(url: str) -> str:
    """Extract the base URL from a Confluence or Jira URL.

    For Atlassian Cloud URLs (``*.atlassian.net``) returns ``{scheme}://{hostname}``.
    For Atlassian API gateway URLs of the form
    ``https://api.atlassian.com/ex/{service}/{cloudId}/...``
    returns ``https://api.atlassian.com/ex/{service}/{cloudId}`` so that
    the Cloud ID is preserved as part of the base URL used for auth lookup
    and SDK initialisation.
    For Server/Data Center instances with a context path (e.g.
    ``https://host/confluence/spaces/KEY``), the context path is preserved
    so the SDK client hits the correct REST endpoints.
    """
    parsed = urllib.parse.urlparse(url)
    if parsed.scheme is None or parsed.hostname is None:
        msg = (
            "Invalid URL: a scheme (http:// or https://) and hostname are required. "
            "Expected format: 'https://<hostname>[:port]/...'."
        )
        raise ValueError(msg)

    if gateway := parse_gateway_url(url):
        return normalize_instance_url(build_gateway_url(*gateway))

    # For Server/DC instances the Confluence webapp may be deployed under a
    # context path (e.g. ``/confluence``).  Preserve everything before the
    # first path segment that belongs to Confluence's own routing.
    _confluence_route_segments = {
        "wiki",
        "display",
        "spaces",
        "rest",
        "pages",
        "plugins",
        "dosearchsite.action",
    }
    segments = [s for s in parsed.path.split("/") if s]
    context_parts: list[str] = []
    for segment in segments:
        if segment.lower() in _confluence_route_segments:
            break
        context_parts.append(segment)

    base = f"{parsed.scheme}://{parsed.hostname}"
    if parsed.port and parsed.port not in (80, 443):
        base = f"{parsed.scheme}://{parsed.hostname}:{parsed.port}"
    if context_parts:
        base = f"{base}/{'/'.join(context_parts)}"
    return normalize_instance_url(base)


def _join_confluence_link(data: JsonResponse, key: str) -> str:
    links = data.get("_links", {})
    if not isinstance(links, dict):
        return ""
    base = links.get("base")
    rel = links.get(key)
    if not isinstance(base, str) or not isinstance(rel, str) or not base or not rel:
        return ""
    return f"{base.rstrip('/')}/{rel.lstrip('/')}"


def _get_web_url(data: JsonResponse) -> str:
    return _join_confluence_link(data, "webui")


def _get_tiny_url(data: JsonResponse) -> str:
    return _join_confluence_link(data, "tinyui")


_JIRA_ROUTE_SEGMENTS = {
    "agile",
    "backlog",
    "board",
    "browse",
    "issues",
    "plugins",
    "projects",
    "rest",
    "secure",
    "servicedesk",
    "software",
}

_HTML_ELEMENTS = frozenset(
    {
        "a",
        "abbr",
        "acronym",
        "address",
        "area",
        "article",
        "aside",
        "audio",
        "b",
        "base",
        "bdi",
        "bdo",
        "blockquote",
        "body",
        "br",
        "button",
        "canvas",
        "caption",
        "cite",
        "code",
        "col",
        "colgroup",
        "data",
        "datalist",
        "dd",
        "del",
        "details",
        "dfn",
        "dialog",
        "div",
        "dl",
        "dt",
        "em",
        "embed",
        "fieldset",
        "figcaption",
        "figure",
        "font",
        "footer",
        "form",
        "h1",
        "h2",
        "h3",
        "h4",
        "h5",
        "h6",
        "head",
        "header",
        "hgroup",
        "hr",
        "html",
        "i",
        "iframe",
        "img",
        "input",
        "ins",
        "kbd",
        "keygen",
        "label",
        "legend",
        "li",
        "link",
        "main",
        "map",
        "mark",
        "menu",
        "menuitem",
        "meta",
        "meter",
        "nav",
        "noscript",
        "object",
        "ol",
        "optgroup",
        "option",
        "output",
        "p",
        "picture",
        "pre",
        "progress",
        "q",
        "rp",
        "rt",
        "ruby",
        "s",
        "samp",
        "script",
        "section",
        "select",
        "small",
        "source",
        "span",
        "strong",
        "style",
        "sub",
        "summary",
        "sup",
        "table",
        "tbody",
        "td",
        "template",
        "textarea",
        "tfoot",
        "th",
        "thead",
        "time",
        "title",
        "tr",
        "track",
        "u",
        "ul",
        "var",
        "video",
        "wbr",
    }
)

_ANGLE_BRACKET_RE = re.compile(r"<([^<>\n]*)>")
_CODE_FENCE_RE = re.compile(r"^(`{3,}|~{3,})")
_INLINE_CODE_RE = re.compile(r"`[^`\n]*`")
_AUTOLINK_URI_RE = re.compile(r"^[A-Za-z][A-Za-z0-9+.\-]{1,31}:[^\s<>]*$")
_AUTOLINK_EMAIL_RE = re.compile(
    r"^[A-Za-z0-9.!#$%&'*+/=?^_`{|}~\-]+@[A-Za-z0-9](?:[A-Za-z0-9\-]{0,61}[A-Za-z0-9])?"
    r"(?:\.[A-Za-z0-9](?:[A-Za-z0-9\-]{0,61}[A-Za-z0-9])?)*$"
)


def _extract_jira_base_url(url: str) -> str | None:
    """Extract the Jira instance base URL from a Jira issue URL.

    Strips Jira-specific routing segments (e.g. ``browse``) so that the context
    path is preserved for Server/DC deployments (e.g. ``https://host/jira``),
    matching the key format used in ``auth.jira`` configuration.
    Returns ``None`` when *url* is not an absolute URL.
    """
    parsed = urllib.parse.urlparse(url)
    if not parsed.scheme or not parsed.hostname:
        return None

    if gateway := parse_gateway_url(url):
        return normalize_instance_url(build_gateway_url(*gateway))

    segments = [s for s in parsed.path.split("/") if s]
    context_parts: list[str] = []
    for segment in segments:
        if segment.lower() in _JIRA_ROUTE_SEGMENTS:
            break
        context_parts.append(segment)

    base = f"{parsed.scheme}://{parsed.hostname}"
    if parsed.port and parsed.port not in (80, 443):
        base = f"{parsed.scheme}://{parsed.hostname}:{parsed.port}"
    if context_parts:
        base = f"{base}/{'/'.join(context_parts)}"
    return normalize_instance_url(base)


settings = get_settings()


class JiraIssue(BaseModel):
    key: str
    summary: str
    description: str | None
    status: str

    @classmethod
    def from_json(cls, data: JsonResponse) -> "JiraIssue":
        fields = data.get("fields", {})
        return cls(
            key=data.get("key", ""),
            summary=fields.get("summary", ""),
            description=fields.get("description", ""),
            status=fields.get("status", {}).get("name", ""),
        )

    @classmethod
    def from_key(cls, issue_key: str, jira_url: str) -> "JiraIssue | None":
        """Fetch a Jira issue by key."""
        settings = get_settings()
        if not settings.export.enable_jira_enrichment:
            return None

        try:
            return cls._fetch_cached(issue_key, jira_url)
        except JiraAuthenticationError:
            handle_jira_auth_failure(jira_url)
            return None

    @classmethod
    @functools.lru_cache(maxsize=100)
    def _fetch_cached(cls, issue_key: str, jira_url: str) -> "JiraIssue":
        jira_instance = get_jira_instance(jira_url)
        issue_data = cast("JsonResponse", jira_instance.get_issue(issue_key))
        return cls.from_json(issue_data)


class User(BaseModel):
    account_id: str
    username: str
    display_name: str
    public_name: str
    email: str

    @classmethod
    def from_json(cls, data: JsonResponse) -> "User":
        return cls(
            account_id=data.get("accountId", ""),
            username=data.get("username", ""),
            display_name=data.get("displayName", ""),
            public_name=data.get("publicName", ""),
            email=data.get("email", ""),
        )

    @classmethod
    @functools.lru_cache(maxsize=100)
    def from_username(cls, username: str, base_url: str = "") -> "User":
        return cls.from_json(
            cast(
                "JsonResponse",
                get_thread_confluence(base_url).get_user_details_by_username(username),
            )
        )

    @classmethod
    @functools.lru_cache(maxsize=100)
    def from_userkey(cls, userkey: str, base_url: str = "") -> "User":
        return cls.from_json(
            cast(
                "JsonResponse",
                get_thread_confluence(base_url).get_user_details_by_userkey(userkey),
            )
        )

    @classmethod
    @functools.lru_cache(maxsize=100)
    def from_accountid(cls, accountid: str, base_url: str = "") -> "User":
        return cls.from_json(
            cast(
                "JsonResponse",
                get_thread_confluence(base_url).get_user_details_by_accountid(accountid),
            )
        )


class Version(BaseModel):
    number: int
    by: User
    when: str
    friendly_when: str

    @classmethod
    def from_json(cls, data: JsonResponse) -> "Version":
        return cls(
            number=data.get("number", 0),
            by=User.from_json(data.get("by", {})),
            when=data.get("when", ""),
            friendly_when=data.get("friendlyWhen", ""),
        )


class History(BaseModel):
    created: str
    created_by: User

    @classmethod
    def from_json(cls, data: JsonResponse) -> "History":
        return cls(
            created=data.get("createdDate", ""),
            created_by=User.from_json(data.get("createdBy", {})),
        )


class Organization(BaseModel):
    base_url: str
    spaces: list["Space"]

    @property
    def pages(self) -> list["Page | Descendant"]:
        return [page for space in self.spaces for page in space.pages]

    def export(self) -> None:
        """Export all pages across all spaces, showing per-space discovery progress."""
        all_pages: list[Page | Descendant] = []
        n = len(self.spaces)
        logger.info("Exporting %d space(s) from %s", n, self.base_url)
        with console.status("", spinner="dots") as status:
            for i, space in enumerate(self.spaces, 1):
                status.update(
                    f"[dim]Fetching pages for space [highlight]{space.name}[/highlight]"
                    f" ({i}/{n})…[/dim]"
                )
                all_pages.extend(space.pages)
        logger.info("Discovered %d page(s) across %d space(s)", len(all_pages), n)
        export_pages(all_pages)

    @classmethod
    def from_json(cls, data: JsonResponse, base_url: str) -> "Organization":
        return cls(
            base_url=base_url,
            spaces=[Space.from_json(space, base_url) for space in data.get("results", [])],
        )

    @classmethod
    @functools.lru_cache(maxsize=100)
    def from_url(cls, base_url: str) -> "Organization":
        logger.debug("Fetching space list from %s", base_url)
        with console.status(
            f"[dim]Fetching space list from [highlight]{base_url}[/highlight]…[/dim]"
        ):
            org = cls.from_json(
                cast(
                    "JsonResponse",
                    get_thread_confluence(base_url).get_all_spaces(
                        space_type="global", space_status="current", expand="homepage"
                    ),
                ),
                base_url,
            )
        logger.info("Found %d space(s) in %s", len(org.spaces), base_url)
        return org


class Space(BaseModel):
    base_url: str
    key: str
    name: str
    description: str
    homepage: int | None

    @property
    def pages(self) -> list["Page | Descendant"]:
        if self.homepage is None:
            logger.warning(
                f"Space '{self.name}' (key: {self.key}) has no homepage. No pages will be exported."
            )
            return []

        homepage = Page.from_id(self.homepage, self.base_url)
        return [homepage, *homepage.descendants]

    def export(self) -> None:
        """Export all pages in this space to Markdown."""
        logger.debug("Fetching pages for space '%s' (%s)", self.name, self.key)
        with console.status(
            f"[dim]Fetching pages for space [highlight]{self.name}[/highlight]…[/dim]"
        ):
            pages = self.pages
        logger.info("Found %d page(s) in space '%s'", len(pages), self.name)
        export_pages(pages)

    @classmethod
    def from_json(cls, data: JsonResponse, base_url: str) -> "Space":
        return cls(
            base_url=base_url,
            key=data.get("key", ""),
            name=data.get("name", ""),
            description=data.get("description", {}).get("plain", {}).get("value", ""),
            homepage=data.get("homepage", {}).get("id"),
        )

    @classmethod
    @functools.lru_cache(maxsize=100)
    def from_key(cls, space_key: str, base_url: str) -> "Space":
        return cls.from_json(
            cast(
                "JsonResponse",
                get_thread_confluence(base_url).get_space(space_key, expand="homepage"),
            ),
            base_url,
        )

    @classmethod
    def from_url(cls, space_url: str) -> "Space":
        """Retrieve a Space object given a Confluence space URL.

        The Confluence instance is selected automatically by matching the URL's
        hostname against configured instances.  If no match is found, a new
        entry is registered in the auth config so the user can fill in
        credentials via the interactive config menu.

        Supports standard instance URLs (``https://company.atlassian.net/wiki/spaces/KEY``)
        and Atlassian API gateway URLs
        (``https://api.atlassian.com/ex/confluence/{cloudId}/wiki/spaces/KEY``).
        """
        base_url = _extract_base_url(space_url)

        # Ensure a client exists (creates/prompts if first time for this host)
        get_confluence_instance(base_url)

        parsed = urllib.parse.urlparse(space_url)
        base_path = urllib.parse.urlparse(base_url).path.rstrip("/")
        relative_path = parsed.path[len(base_path) :]
        if match := parse_confluence_path(relative_path):
            if match.space_key:
                logger.debug("Resolved space key '%s' from URL %s", match.space_key, space_url)
                return cls.from_key(match.space_key, base_url)

        msg = f"Could not parse space URL {space_url}."
        raise ValueError(msg)


class Label(BaseModel):
    id: str
    name: str
    prefix: str

    @classmethod
    def from_json(cls, data: JsonResponse) -> "Label":
        return cls(
            id=data.get("id", ""),
            name=data.get("name", ""),
            prefix=data.get("prefix", ""),
        )


class Document(BaseModel):
    base_url: str
    title: str
    space: Space
    ancestors: list["Ancestor"]
    version: Version

    @property
    def _template_vars(self) -> dict[str, str]:
        homepage_id = ""
        homepage_title = ""
        if self.space.homepage:
            homepage_id = str(self.space.homepage)
            homepage_title = sanitize_filename(
                Page.from_id(self.space.homepage, self.base_url).title
            )

        return {
            "space_key": sanitize_filename(self.space.key),
            "space_name": sanitize_filename(self.space.name),
            "homepage_id": homepage_id,
            "homepage_title": homepage_title,
            "ancestor_ids": "/".join(str(a.id) for a in self.ancestors),
            "ancestor_titles": "/".join(sanitize_filename(a.title) for a in self.ancestors),
        }


class Attachment(Document):
    id: str
    file_size: int
    media_type: str
    media_type_description: str
    file_id: str
    collection_name: str
    download_link: str
    comment: str

    @property
    def extension(self) -> str:
        if self.comment == "draw.io diagram" and self.media_type == "application/vnd.jgraph.mxfile":
            return ".drawio"
        if self.comment == "draw.io preview" and self.media_type == "image/png":
            return ".drawio.png"

        return mimetypes.guess_extension(self.media_type) or ""

    @property
    def filename(self) -> str:
        return f"{self.file_id}{self.extension}"

    @property
    def _template_vars(self) -> dict[str, str]:
        ext = self.extension
        title = self.title
        title_without_ext = title[: -len(ext)] if ext and title.endswith(ext) else Path(title).stem
        return {
            **super()._template_vars,
            "attachment_id": str(self.id),
            "attachment_title": sanitize_filename(title_without_ext),
            # file_id is a GUID and does not need sanitization. On
            # Confluence Data Center / Server the API does not populate
            # fileId, so fall back to the content id which is always
            # present and unique.
            "attachment_file_id": self.file_id or str(self.id),
            "attachment_extension": self.extension,
        }

    @property
    def export_path(self) -> Path:
        filepath_template = Template(settings.export.attachment_path.replace("{", "${"))
        return Path(filepath_template.safe_substitute(self._template_vars))

    @classmethod
    def from_json(cls, data: JsonResponse, base_url: str) -> "Attachment":
        extensions = data.get("extensions", {})
        container = data.get("container", {})
        return cls(
            base_url=base_url,
            id=data.get("id", ""),
            title=data.get("title", ""),
            space=Space.from_key(
                data.get("_expandable", {}).get("space", "").split("/")[-1], base_url
            ),
            file_size=extensions.get("fileSize", 0),
            media_type=extensions.get("mediaType", ""),
            media_type_description=extensions.get("mediaTypeDescription", ""),
            file_id=extensions.get("fileId", ""),
            collection_name=extensions.get("collectionName", ""),
            download_link=data.get("_links", {}).get("download", ""),
            comment=extensions.get("comment", ""),
            ancestors=[
                *[
                    Ancestor.from_json(ancestor, base_url)
                    for ancestor in container.get("ancestors", [])
                ],
                Ancestor.from_json(container, base_url),
            ][1:],
            version=Version.from_json(data.get("version", {})),
        )

    @classmethod
    def from_page_id(cls, page_id: int, base_url: str) -> list["Attachment"]:
        attachments = []
        start = 0
        paging_limit = 50
        size = paging_limit  # Initialize to limit to enter the loop

        while size >= paging_limit:
            response = cast(
                "JsonResponse",
                get_thread_confluence(base_url).get_attachments_from_content(
                    page_id,
                    start=start,
                    limit=paging_limit,
                    expand="container.ancestors,version",
                ),
            )

            attachments.extend(
                [cls.from_json(att, base_url) for att in response.get("results", [])]
            )

            size = response.get("size", 0)
            start += size

        logger.debug("Found %d attachment(s) for page id=%s", len(attachments), page_id)
        return attachments

    def export(self) -> None:
        stats = get_stats()
        filepath = settings.export.output_path / self.export_path
        if filepath.exists():
            logger.debug("Skipping attachment '%s' — already exists at %s", self.title, filepath)
            return

        logger.debug("Downloading attachment '%s' to %s", self.title, filepath)
        client = get_thread_confluence(self.base_url)
        try:
            response = client.request(
                method="GET",
                path=client.url + self.download_link,
                absolute=True,
                advanced_mode=True,
            )
            response.raise_for_status()  # Raise error if request fails
        except HTTPError:
            logger.warning("There is no attachment with title '%s'. Skipping export.", self.title)
            stats.inc_attachments_failed()
            return
        except RequestException as e:
            logger.warning("Failed to download attachment '%s': %s. Skipping.", self.title, e)
            stats.inc_attachments_failed()
            return

        save_file(filepath, response.content)
        logger.debug("Saved attachment '%s' (%d bytes)", self.title, len(response.content))
        stats.inc_attachments_exported()


class Ancestor(Document):
    id: int

    @classmethod
    def from_json(cls, data: JsonResponse, base_url: str) -> "Ancestor":
        return cls(
            base_url=base_url,
            id=data.get("id", 0),
            title=data.get("title", ""),
            space=Space.from_key(
                data.get("_expandable", {}).get("space", "").split("/")[-1], base_url
            ),
            ancestors=[],  # Ancestors of ancestor is not needed for now.
            version=Version.from_json({}),  # Version of ancestor is not needed for now.
        )


class Descendant(Document):
    id: int

    @property
    def _template_vars(self) -> dict[str, str]:
        return {
            **super()._template_vars,
            "page_id": str(self.id),
            "page_title": sanitize_filename(self.title),
        }

    @property
    def export_path(self) -> Path:
        filepath_template = Template(settings.export.page_path.replace("{", "${"))
        return Path(filepath_template.safe_substitute(self._template_vars))

    @classmethod
    def from_json(cls, data: JsonResponse, base_url: str) -> "Descendant":
        return cls(
            base_url=base_url,
            id=data.get("id", 0),
            title=data.get("title", ""),
            space=Space.from_key(
                data.get("_expandable", {}).get("space", "").split("/")[-1], base_url
            ),
            ancestors=[
                Ancestor.from_json(ancestor, base_url) for ancestor in data.get("ancestors", [])
            ][1:],
            version=Version.from_json(data.get("version", {})),
        )


def _parse_image_captions(storage_xml: str) -> dict[str, str]:
    """Return {filename: caption} parsed from Confluence storage-format XML."""
    captions: dict[str, str] = {}
    if not storage_xml:
        return captions
    for block in re.findall(r"<ac:image[^>]*>.*?</ac:image>", storage_xml, re.DOTALL):
        filename_m = re.search(r'ri:filename="([^"]+)"', block)
        if not filename_m:
            continue
        caption_m = re.search(r"<ac:caption[^>]*>(.*?)</ac:caption>", block, re.DOTALL)
        if not caption_m:
            continue
        caption_content = caption_m.group(1)
        # CDATA in ac:plain-text-body (older format)
        cdata_m = re.search(
            r"<ac:plain-text-body>\s*<!\[CDATA\[(.*?)\]\]>\s*</ac:plain-text-body>",
            caption_content,
            re.DOTALL,
        )
        if cdata_m:
            caption = cdata_m.group(1).strip()
        else:
            # HTML elements in caption (e.g. <p>text</p>) — strip tags
            caption = BeautifulSoup(caption_content, "html.parser").get_text().strip()
        if caption:
            captions[filename_m.group(1)] = caption
    return captions


class Page(Document):
    id: int
    type: str = ""
    web_url: str = ""
    tiny_url: str = ""
    body: str
    body_export: str
    editor2: str
    body_storage: str = ""
    labels: list["Label"]
    attachments: list["Attachment"]
    history: History = Field(
        default_factory=lambda: History(created="", created_by=User.from_json({}))
    )

    @property
    def descendants(self) -> list["Descendant"]:
        url = "rest/api/content/search"
        params = {
            "cql": f"type=page AND ancestor={self.id}",
            "expand": "metadata.properties,ancestors,version",
            "limit": 250,
        }
        results = []
        client = get_thread_confluence(self.base_url)

        try:
            response = cast("dict", client.get(url, params=params))
            results.extend(response.get("results", []))
            next_path = response.get("_links", {}).get("next")

            while next_path:
                response = cast("dict", client.get(next_path))
                results.extend(response.get("results", []))
                next_path = response.get("_links", {}).get("next")

        except HTTPError as e:
            if e.response.status_code == 404:  # noqa: PLR2004
                logger.warning(
                    f"Content with ID {self.id} not found (404) when fetching descendants."
                )
                return []
            return []
        except Exception:
            logger.exception(
                f"Unexpected error when fetching descendants for content ID {self.id}."
            )
            return []
        return [Descendant.from_json(result, self.base_url) for result in results]

    @property
    def _template_vars(self) -> dict[str, str]:
        return {
            **super()._template_vars,
            "page_id": str(self.id),
            "page_title": sanitize_filename(self.title),
        }

    @property
    def export_path(self) -> Path:
        filepath_template = Template(settings.export.page_path.replace("{", "${"))
        return Path(filepath_template.safe_substitute(self._template_vars))

    @property
    def html(self) -> str:
        if settings.export.include_document_title:
            return f"<h1>{self.title}</h1>{self.body}"
        return self.body

    @property
    def markdown(self) -> str:
        return self.Converter(self).markdown

    def export(self) -> dict[str, AttachmentEntry]:
        if self.title == "Page not accessible":
            logger.warning("Skipping export for inaccessible page id=%s", self.id)
            return {}

        logger.debug("Exporting page id=%s '%s'", self.id, self.title)
        if settings.export.log_level == "DEBUG":
            self.export_body()
        # Export attachments first so the files can be utilized during markdown conversion
        logger.debug("Exporting attachments for page id=%s", self.id)
        attachment_entries = self.export_attachments()
        logger.debug("Converting to Markdown for page id=%s", self.id)
        self.export_markdown()
        if settings.export.comments_export != "none":
            logger.debug("Exporting comments for page id=%s", self.id)
            self.export_comments_sidecar()
        logger.info(
            "Exported '%s' -> %s", self.title, settings.export.output_path / self.export_path
        )
        return attachment_entries

    def export_with_descendants(self) -> None:
        with console.status(
            f"[dim]Fetching descendants of [highlight]{self.title}[/highlight]…[/dim]"
        ):
            pages = [self, *self.descendants]
        export_pages(pages)

    def export_body(self) -> None:
        soup = BeautifulSoup(self.html, "html.parser")
        save_file(
            settings.export.output_path
            / self.export_path.parent
            / f"{self.export_path.stem}_body_view.html",
            str(soup.prettify()),
        )
        soup = BeautifulSoup(self.body_export, "html.parser")
        save_file(
            settings.export.output_path
            / self.export_path.parent
            / f"{self.export_path.stem}_body_export_view.html",
            str(soup.prettify()),
        )
        save_file(
            settings.export.output_path
            / self.export_path.parent
            / f"{self.export_path.stem}_body_editor2.xml",
            str(self.editor2),
        )

    def export_markdown(self) -> None:
        conv = self.Converter(self)
        save_file(
            settings.export.output_path / self.export_path,
            conv.markdown,
        )
        self._marked_texts: dict[str, str] = conv._marked_texts

    _COMMENT_TITLE_MAX_LEN = 60

    def _fetch_inline_comments(self) -> list[dict]:
        client = get_thread_confluence(self.base_url)
        results: list[dict] = []
        try:
            resp = cast(
                "dict",
                client.get_page_comments(
                    self.id,
                    location="inline",
                    expand="extensions.inlineProperties,extensions.resolution,body.view,history.createdBy",
                    limit=50,
                ),
            )
            for comment in resp.get("results", []):
                status = comment.get("extensions", {}).get("resolution", {}).get("status", "open")
                if status == "open":
                    results.append(comment)
            next_path = resp.get("_links", {}).get("next")
            while next_path:
                resp = cast("dict", client.get(next_path))
                for comment in resp.get("results", []):
                    status = (
                        comment.get("extensions", {}).get("resolution", {}).get("status", "open")
                    )
                    if status == "open":
                        results.append(comment)
                next_path = resp.get("_links", {}).get("next")
        except Exception:  # noqa: BLE001
            logger.warning("Failed to fetch inline comments for page id=%s", self.id)
        return results

    def _fetch_page_comments(self) -> list[dict]:
        client = get_thread_confluence(self.base_url)
        results: list[dict] = []
        try:
            resp = cast(
                "dict",
                client.get_page_comments(
                    self.id,
                    location="footer",
                    expand="extensions.resolution,body.view,history.createdBy",
                    limit=50,
                ),
            )
            for comment in resp.get("results", []):
                status = comment.get("extensions", {}).get("resolution", {}).get("status", "open")
                if status == "open":
                    results.append(comment)
            next_path = resp.get("_links", {}).get("next")
            while next_path:
                resp = cast("dict", client.get(next_path))
                for comment in resp.get("results", []):
                    status = (
                        comment.get("extensions", {}).get("resolution", {}).get("status", "open")
                    )
                    if status == "open":
                        results.append(comment)
                next_path = resp.get("_links", {}).get("next")
        except Exception:  # noqa: BLE001
            logger.warning("Failed to fetch page comments for page id=%s", self.id)
        return results

    def _fetch_comment_replies(self, comment_id: str) -> list[dict]:
        client = get_thread_confluence(self.base_url)
        try:
            resp = cast(
                "dict",
                client.get(
                    f"rest/api/content/{comment_id}/child/comment",
                    params={"expand": "body.view,history.createdBy", "limit": 50},
                ),
            )
            return resp.get("results", [])
        except Exception:  # noqa: BLE001
            return []

    def export_comments_sidecar(self) -> None:
        mode = settings.export.comments_export
        inline = self._fetch_inline_comments() if mode in ("inline", "all") else []
        page = self._fetch_page_comments() if mode in ("footer", "all") else []
        if not inline and not page:
            return

        source_url = f"{self.base_url}/wiki/spaces/{self.space.key}/pages/{self.id}"

        lines: list[str] = [
            "---",
            f"confluence_page_id: '{self.id}'",
            f'confluence_page_title: "{self.title}"',
            f'confluence_webui_url: "{source_url}"',
            "---",
            "",
        ]

        if inline:
            lines.append("## Inline comments")
            lines.append("")
            self._render_inline_comments(lines, inline)

        if page:
            lines.append("## Page comments")
            lines.append("")
            self._render_page_comments(lines, page)

        save_file(
            settings.export.output_path
            / self.export_path.parent
            / f"{self.export_path.stem}.comments.md",
            "\n".join(lines),
        )

    def _render_inline_comments(self, lines: list[str], comments: list[dict]) -> None:
        for comment in comments:
            ref = comment.get("extensions", {}).get("inlineProperties", {}).get("markerRef", "")
            marked_md = self._marked_texts.get(ref, "")

            plain = re.sub(r"\s+", " ", marked_md).strip()
            n = self._COMMENT_TITLE_MAX_LEN
            short_title = plain[:n] + "…" if len(plain) > n else plain
            if not short_title:
                short_title = f"Comment {ref[:8]}"
            lines.append(f"### {short_title}")
            lines.append("")

            if marked_md:
                lines.extend(
                    f"> {line}" if line.strip() else ">" for line in marked_md.splitlines()
                )
                lines.append("")

            author = comment.get("history", {}).get("createdBy", {}).get("displayName", "Unknown")
            created = comment.get("history", {}).get("createdDate", "")[:10]
            body_md = (
                MarkdownConverter()
                .convert(comment.get("body", {}).get("view", {}).get("value", ""))
                .strip()
            )

            lines.append(f"**{author}** · {created}")
            lines.append("")
            if body_md:
                lines.append(body_md)
                lines.append("")

            for reply in self._fetch_comment_replies(comment["id"]):
                r_author = (
                    reply.get("history", {}).get("createdBy", {}).get("displayName", "Unknown")
                )
                r_created = reply.get("history", {}).get("createdDate", "")[:10]
                r_body_md = (
                    MarkdownConverter()
                    .convert(reply.get("body", {}).get("view", {}).get("value", ""))
                    .strip()
                )
                lines.append(f"**{r_author}** · {r_created}")
                lines.append("")
                if r_body_md:
                    lines.append(r_body_md)
                    lines.append("")

    def _render_page_comments(self, lines: list[str], comments: list[dict]) -> None:
        for comment in comments:
            body_md = (
                MarkdownConverter()
                .convert(comment.get("body", {}).get("view", {}).get("value", ""))
                .strip()
            )

            plain = re.sub(r"\s+", " ", body_md).strip()
            n = self._COMMENT_TITLE_MAX_LEN
            short_title = plain[:n] + "…" if len(plain) > n else plain
            if not short_title:
                short_title = f"Comment {str(comment.get('id', ''))[:8]}"
            lines.append(f"### {short_title}")
            lines.append("")

            author = comment.get("history", {}).get("createdBy", {}).get("displayName", "Unknown")
            created = comment.get("history", {}).get("createdDate", "")[:10]
            lines.append(f"**{author}** · {created}")
            lines.append("")
            if body_md:
                lines.append(body_md)
                lines.append("")

            for reply in self._fetch_comment_replies(comment["id"]):
                r_author = (
                    reply.get("history", {}).get("createdBy", {}).get("displayName", "Unknown")
                )
                r_created = reply.get("history", {}).get("createdDate", "")[:10]
                r_body_md = (
                    MarkdownConverter()
                    .convert(reply.get("body", {}).get("view", {}).get("value", ""))
                    .strip()
                )
                lines.append(f"**{r_author}** · {r_created}")
                lines.append("")
                if r_body_md:
                    lines.append(r_body_md)
                    lines.append("")

    def _attachments_for_export(self) -> list["Attachment"]:
        """Return the subset of attachments that should be exported for this page."""
        if settings.export.attachments_export == "all":
            return list(self.attachments)
        bodies = self.body + self.body_export
        return [
            a
            for a in self.attachments
            if (a.filename.endswith(".drawio") and f"diagramName={a.title}" in self.body)
            or (
                a.filename.endswith((".drawio.png", ".drawio"))
                and a.title.replace(" ", "%20") in self.body_export
            )
            or a.file_id in bodies
            or a.id in bodies
            or a.title in bodies
            or a.title.replace(" ", "%20") in bodies
        ]

    def export_attachments(self) -> dict[str, AttachmentEntry]:
        if settings.export.attachments_export == "disabled":
            logger.debug("Attachment download disabled for page id=%s", self.id)
            return {}
        old_entries = LockfileManager.get_page_attachment_entries(str(self.id))
        new_entries: dict[str, AttachmentEntry] = {}
        output_path = settings.export.output_path
        stats = get_stats()

        for attachment in self._attachments_for_export():
            att_id = attachment.id
            att_version = attachment.version.number if attachment.version else 0

            # Skip download if the same attachment version is tracked and the file still exists
            if att_id in old_entries:
                old = old_entries[att_id]
                if old.version == att_version and (output_path / old.path).exists():
                    new_entries[att_id] = old
                    logger.debug(
                        "Skipping unchanged attachment '%s' (v%d)", attachment.title, att_version
                    )
                    stats.inc_attachments_skipped()
                    continue

            attachment.export()
            if att_version:
                new_entries[att_id] = AttachmentEntry(
                    version=att_version, path=str(attachment.export_path)
                )

        # Clean up orphaned attachment files when an attachment was re-versioned
        for att_id, old_entry in old_entries.items():
            if att_id in new_entries and old_entry.path != new_entries[att_id].path:
                old_file = output_path / old_entry.path
                old_file.unlink(missing_ok=True)
                logger.info("Deleted old attachment file: %s", old_entry.path)
                stats.inc_attachments_removed()

        return new_entries

    def get_attachment_by_id(self, attachment_id: str) -> Attachment | None:
        """Get the Attachment object by its ID.

        Confluence Server sometimes stores attachments without a file_id.
        Fall back to the plain attachment.id and return None if nothing matches.
        """
        for a in self.attachments:
            if attachment_id in a.id:
                return a
            if a.file_id and attachment_id in a.file_id:
                return a
        return None

    def get_attachment_by_file_id(self, file_id: str) -> Attachment | None:
        for a in self.attachments:
            if a.file_id and file_id in a.file_id:
                return a
        return None

    def get_attachments_by_title(self, title: str) -> list[Attachment]:
        return [attachment for attachment in self.attachments if attachment.title == title]

    @classmethod
    def from_json(cls, data: JsonResponse, base_url: str) -> "Page":
        return cls(
            base_url=base_url,
            id=data.get("id", 0),
            type=data.get("type", ""),
            web_url=_get_web_url(data),
            tiny_url=_get_tiny_url(data),
            title=data.get("title", ""),
            space=Space.from_key(
                data.get("_expandable", {}).get("space", "").split("/")[-1], base_url
            ),
            body=data.get("body", {}).get("view", {}).get("value", ""),
            body_export=data.get("body", {}).get("export_view", {}).get("value", ""),
            editor2=data.get("body", {}).get("editor2", {}).get("value", ""),
            body_storage=data.get("body", {}).get("storage", {}).get("value", ""),
            labels=[
                Label.from_json(label)
                for label in data.get("metadata", {}).get("labels", {}).get("results", [])
            ],
            attachments=Attachment.from_page_id(data.get("id", 0), base_url),
            ancestors=[
                Ancestor.from_json(ancestor, base_url) for ancestor in data.get("ancestors", [])
            ][1:],
            version=Version.from_json(data.get("version", {})),
            history=History.from_json(data.get("history", {})),
        )

    @classmethod
    @functools.lru_cache(maxsize=1000)
    def from_id(cls, page_id: int, base_url: str) -> "Page":
        _empty_space = Space(base_url=base_url, key="", name="", description="", homepage=0)
        if page_id is None:
            logger.warning("Page ID is None, returning empty page")
            return cls(
                base_url=base_url,
                id=0,
                title="Page not accessible",
                space=_empty_space,
                body="",
                body_export="",
                editor2="",
                labels=[],
                attachments=[],
                ancestors=[],
            )
        logger.debug("Fetching page id=%s from %s", page_id, base_url)
        expand = (
            "body.view,body.export_view,body.editor2,body.storage,metadata.labels,"
            "metadata.properties,ancestors,version,history,history.createdBy"
        )
        try:
            return cls.from_json(
                _require_dict(
                    get_thread_confluence(base_url).get_page_by_id(
                        page_id,
                        expand=expand,
                    ),
                    f"page id={page_id} at {base_url}",
                ),
                base_url,
            )
        except (ApiError, HTTPError):
            logger.warning("Could not access page id=%s — treating as inaccessible", page_id)
            return cls(
                base_url=base_url,
                id=page_id,
                title="Page not accessible",
                space=_empty_space,
                body="",
                body_export="",
                editor2="",
                labels=[],
                attachments=[],
                ancestors=[],
                version=Version.from_json({}),
            )

    @classmethod
    def from_url(cls, page_url: str) -> "Page":
        """Retrieve a Page object given a Confluence page URL.

        The Confluence instance is selected automatically by matching the URL's
        hostname against configured instances.  If no match is found, a new
        entry is registered in the auth config so the user can fill in
        credentials via the interactive config menu.

        Supports standard instance URLs and Atlassian API gateway URLs of the form
        ``https://api.atlassian.com/ex/confluence/{cloudId}/wiki/spaces/KEY/pages/123``.
        """
        base_url = _extract_base_url(page_url)

        # Ensure a client exists (creates/prompts if first time for this host)
        get_confluence_instance(base_url)

        parsed = urllib.parse.urlparse(page_url)
        query_params = urllib.parse.parse_qs(parsed.query)
        page_id_param = next(
            (
                values[0]
                for key, values in query_params.items()
                if key.lower() == "pageid" and values and values[0]
            ),
            None,
        )
        if page_id_param and page_id_param.isdigit():
            page_id = int(page_id_param)
            logger.debug(
                "Resolved page id=%s from Confluence query string in URL %s", page_id, page_url
            )
            return Page.from_id(page_id, base_url)

        base_path = urllib.parse.urlparse(base_url).path.rstrip("/")
        relative_path = parsed.path[len(base_path) :]
        if match := parse_confluence_path(relative_path):
            if match.page_id:
                logger.debug("Resolved page id=%s from Confluence URL %s", match.page_id, page_url)
                return Page.from_id(match.page_id, base_url)

            if match.space_key and match.page_title:
                logger.debug(
                    "Resolving page '%s' in space '%s' from Confluence URL %s",
                    match.page_title,
                    match.space_key,
                    page_url,
                )
                page_data = _require_dict(
                    get_thread_confluence(base_url).get_page_by_title(
                        space=match.space_key, title=match.page_title, expand="version"
                    ),
                    f"page title={match.page_title!r} space={match.space_key!r} at {base_url}",
                )
                return Page.from_id(page_data["id"], base_url)

        msg = f"Could not parse page URL {page_url}."
        raise ValueError(msg)

    class Converter(TableConverter, MarkdownConverter):
        """Create a custom MarkdownConverter for Confluence HTML to Markdown conversion."""

        class Options(MarkdownConverter.DefaultOptions):  # type: ignore[assignment]
            bullets = "-"
            heading_style = ATX
            macros_to_ignore: Set[str] = frozenset(["qc-read-and-understood-signature-box"])
            front_matter_indent = 2

        def __init__(self, page: "Page", **options) -> None:  # noqa: ANN003
            super().__init__(**options)
            self.page = page
            self.page_properties = {}
            self._marked_texts: dict[str, str] = {}
            self._colorid_map_cache: dict[str, str] | None = None
            self._image_captions_cache: dict[str, str] | None = None
            self._panel_icon_map_cache: dict[str, str] | None = None
            self._plantuml_index: int = 0
            self._storage_plantuml_macros_cache: list[Tag] | None = None

        @property
        def _colorid_map(self) -> dict[str, str]:
            if self._colorid_map_cache is None:
                cache: dict[str, str] = {}
                soup = BeautifulSoup(self.page.html, "html.parser")
                for style_tag in soup.find_all("style"):
                    css = style_tag.get_text()
                    for m in _RE_COLORID_CSS.finditer(css):
                        color_id = m.group(1)
                        if color_id not in cache:
                            cache[color_id] = m.group(2)
                self._colorid_map_cache = cache
            return self._colorid_map_cache

        @property
        def _storage_plantuml_macros(self) -> list[Tag]:
            """Cache and return all PlantUML structured-macros from body.storage."""
            if self._storage_plantuml_macros_cache is None:
                macros: list[Tag] = []
                if self.page.body_storage:
                    wrapped = f"<root>{self.page.body_storage}</root>"
                    soup = BeautifulSoup(wrapped, "xml")
                    macros.extend(
                        macro
                        for macro in soup.find_all("structured-macro")
                        if isinstance(macro, Tag) and macro.get("name") == "plantuml"
                    )
                self._storage_plantuml_macros_cache = macros
            return self._storage_plantuml_macros_cache

        @property
        def _image_captions(self) -> dict[str, str]:
            if self._image_captions_cache is None:
                self._image_captions_cache = _parse_image_captions(self.page.body_storage)
            return self._image_captions_cache

        @property
        def _panel_icon_map(self) -> dict[str, str]:
            """Map panel macro-id to its custom icon emoji from editor2 XML."""
            if self._panel_icon_map_cache is None:
                cache: dict[str, str] = {}
                if self.page.editor2:
                    wrapped = f"<root>{self.page.editor2}</root>"
                    soup = BeautifulSoup(wrapped, "xml")
                    panel_names = {"panel", "info", "note", "tip", "warning"}
                    for macro in soup.find_all("structured-macro"):
                        if not isinstance(macro, Tag):
                            continue
                        if macro.get("name") not in panel_names:
                            continue
                        macro_id = macro.get("macro-id")
                        if not macro_id:
                            continue
                        emoji = self._extract_panel_emoji(macro)
                        if emoji:
                            cache[str(macro_id)] = emoji
                self._panel_icon_map_cache = cache
            return self._panel_icon_map_cache

        @staticmethod
        def _extract_panel_emoji(macro: Tag) -> str | None:
            params: dict[str, str] = {}
            for p in macro.find_all("parameter", recursive=False):
                if not isinstance(p, Tag):
                    continue
                name = p.get("name")
                if name:
                    params[str(name)] = p.get_text(strip=True)
            if text := params.get("panelIconText"):
                return text
            if icon_id := params.get("panelIconId"):
                try:
                    cps = [int(cp, 16) for cp in icon_id.split("-")]
                    if all(0 <= cp <= _MAX_UNICODE_CODEPOINT for cp in cps):
                        return "".join(chr(cp) for cp in cps)
                except (OverflowError, ValueError):
                    pass
            return None

        @property
        def markdown(self) -> str:
            html = self._strip_excerpt_include_panel_titles(self.page.html)
            md_body = self.convert(html)
            md_body = self._escape_template_placeholders(md_body)
            markdown = f"{self.front_matter}\n"
            if settings.export.page_breadcrumbs:
                markdown += f"{self.breadcrumbs}\n"
            markdown += f"{md_body}\n"
            return markdown

        @property
        def front_matter(self) -> str:
            indent = self.options["front_matter_indent"]
            self.set_page_properties(tags=self.labels)
            self._add_confluence_url_properties()
            self._add_page_metadata_properties()

            if not self.page_properties:
                return ""

            yml = yaml.dump(self.page_properties, indent=indent).strip()
            # Indent the root level list items
            yml = re.sub(r"^( *)(- )", r"\1" + " " * indent + r"\2", yml, flags=re.MULTILINE)
            return f"---\n{yml}\n---\n"

        def _add_confluence_url_properties(self) -> None:
            mode = settings.export.confluence_url_in_frontmatter
            if mode == "none":
                return

            if mode in ("webui", "both") and self.page.web_url:
                key = sanitize_key("confluence_webui_url")
                if key not in self.page_properties:
                    self.page_properties[key] = self.page.web_url

            if mode in ("tinyui", "both") and self.page.tiny_url:
                key = sanitize_key("confluence_tinyui_url")
                if key not in self.page_properties:
                    self.page_properties[key] = self.page.tiny_url

        def _add_page_metadata_properties(self) -> None:
            if not settings.export.page_metadata_in_frontmatter:
                return

            page = self.page
            version = page.version
            history = page.history
            metadata = {
                # Stored as str to stay JS-safe-integer compatible: Confluence
                # Cloud page IDs can exceed 2^53, which JS-based SSGs (Hugo,
                # Astro, ...) parsing the front matter would silently truncate.
                "confluence_page_id": str(page.id),
                "confluence_space_key": page.space.key,
                "confluence_type": page.type,
                "confluence_created": history.created,
                "confluence_created_by": history.created_by.display_name,
                "confluence_last_modified": version.when,
                "confluence_last_modified_by": version.by.display_name,
                "confluence_version": version.number,
            }
            for raw_key, value in metadata.items():
                if value in (None, "", 0):
                    continue
                key = sanitize_key(raw_key)
                if key not in self.page_properties:
                    self.page_properties[key] = value

        @property
        def breadcrumbs(self) -> str:
            return (
                " > ".join(
                    [self.convert_page_link(ancestor.id) for ancestor in self.page.ancestors]
                )
                + "\n"
            )

        @property
        def labels(self) -> list[str]:
            return [label.name for label in self.page.labels]

        def set_page_properties(self, **props: list[str] | str | None) -> None:
            for key, value in props.items():
                if value:
                    self.page_properties[sanitize_key(key)] = value

        def convert_page_properties(
            self, el: BeautifulSoup, text: str, parent_tags: list[str]
        ) -> str | None:
            fmt = settings.export.page_properties_format

            if fmt == "table":
                return text

            rows = [
                cast("list[Tag]", tr.find_all(["th", "td"]))
                for tr in cast("list[Tag]", el.find_all("tr"))
                if tr
            ]
            if not rows:
                return None

            props: dict[str, str] = {}
            key_counts: dict[str, int] = {}
            for row in rows:
                if len(row) == 2:  # noqa: PLR2004
                    raw_key = row[0].get_text(strip=True)
                    count = key_counts.get(raw_key, 0) + 1
                    key_counts[raw_key] = count
                    unique_key = raw_key if count == 1 else f"{raw_key} {count}"
                    props[unique_key] = self.convert(str(row[1])).strip()

            if fmt in ("frontmatter", "frontmatter_and_table", "meta-bind-view-fields"):
                self.set_page_properties(**props)

            if fmt == "frontmatter":
                return None

            if fmt == "frontmatter_and_table":
                return text

            if fmt == "dataview-inline-field":
                lines = "\n".join(f"{k}:: {v}" for k, v in props.items())
                return f"\n{lines}\n"

            # meta-bind-view-fields: two-column table with VIEW fields in value column
            table_data = [
                (f"**{k}**", f"`VIEW[{{{sanitize_key(k)}}}][text(renderMarkdown)]`") for k in props
            ]
            return "\n\n" + tabulate(table_data, headers=["", ""], tablefmt="pipe") + "\n"

        def convert_alert(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            """Convert Confluence info macros to Markdown GitHub style alerts.

            GitHub specific alert types: https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#alerts

            Inside table cells GitHub alerts don't render in most viewers
            (Obsidian, etc.), so emit a leading emoji + plain text instead.
            """
            alert_type_map = {
                "info": "IMPORTANT",
                "panel": "NOTE",
                "tip": "TIP",
                "note": "WARNING",
                "warning": "CAUTION",
            }
            alert_emoji_map = {
                "NOTE": "\U0001f4dd",
                "TIP": "\U0001f4a1",
                "IMPORTANT": "❗",
                "WARNING": "⚠️",
                "CAUTION": "\U0001f6d1",
            }

            alert_type = alert_type_map.get(str(el["data-macro-name"]), "NOTE")

            macro_id = el.get("data-macro-id")
            custom_emoji = self._panel_icon_map.get(str(macro_id)) if macro_id else None
            emoji = custom_emoji or alert_emoji_map[alert_type]

            tags = parent_tags if isinstance(parent_tags, list | set) else set()
            if "td" in tags or "th" in tags:
                return f"{emoji} {text.strip()}"

            blockquote = super().convert_blockquote(el, text, parent_tags)
            return f"\n> [!{alert_type}]{blockquote}"

        def convert_div(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            # Handle Confluence macros
            if el.has_attr("data-macro-name"):
                macro_name = str(el["data-macro-name"])
                if macro_name in self.options["macros_to_ignore"]:
                    return ""

                macro_handlers = {
                    "panel": self.convert_alert,
                    "info": self.convert_alert,
                    "note": self.convert_alert,
                    "tip": self.convert_alert,
                    "warning": self.convert_alert,
                    "details": self.convert_page_properties,
                    "drawio": self.convert_drawio,
                    "plantuml": self.convert_plantuml,
                    "scroll-ignore": self.convert_hidden_content,
                    "toc": self.convert_toc,
                    "jira": self.convert_jira_table,
                    "attachments": self.convert_attachments,
                    "markdown": self.convert_markdown,
                    "mohamicorp-markdown": self.convert_markdown,
                    "include": self.convert_include,
                    "excerpt-include": self.convert_include,
                }
                if macro_name in macro_handlers:
                    return macro_handlers[macro_name](el, text, parent_tags)

            class_handlers = {
                "expand-container": self.convert_expand_container,
                "columnLayout": self.convert_column_layout,
            }
            for class_name, handler in class_handlers.items():
                if class_name in str(el.get("class", "")):
                    return handler(el, text, parent_tags)

            return super().convert_div(el, text, parent_tags)

        def convert_expand_container(
            self, el: BeautifulSoup, text: str, parent_tags: list[str]
        ) -> str:
            """Convert expand-container div to HTML details element."""
            # Extract summary text from expand-control-text
            summary_element = el.find("span", class_="expand-control-text")
            summary_text = (
                summary_element.get_text().strip() if summary_element else "Click here to expand..."
            )

            # Extract content from expand-content
            content_element = el.find("div", class_="expand-content")
            # Recursively convert the content
            content = (
                self.process_tag(content_element, parent_tags).strip() if content_element else ""
            )

            # Return as details element
            return f"\n<details>\n<summary>{summary_text}</summary>\n\n{content}\n\n</details>\n\n"

        def _span_highlight(self, style: str, text: str) -> str | None:
            bg_m = _RE_RGB_BG.search(style)
            if not bg_m:
                return None
            hex_color = _rgb_to_hex(int(bg_m.group(1)), int(bg_m.group(2)), int(bg_m.group(3)))
            return f'<mark style="background: {hex_color};">{text}</mark>'

        def _wrap_cell_highlight(self, el: BeautifulSoup, text: str) -> str:
            if not settings.export.convert_text_highlights:
                return text
            bg = _extract_cell_highlight_hex(el)
            if bg is None:
                return text
            inner = text or "&nbsp;"
            return f'<mark style="background: {bg};">{inner}</mark>'

        def convert_td(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            text = super().convert_td(el, text, parent_tags)
            return self._wrap_cell_highlight(el, text)

        def convert_th(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            text = super().convert_th(el, text, parent_tags)
            return self._wrap_cell_highlight(el, text)

        def _span_font_color(self, el: BeautifulSoup, style: str, text: str) -> str | None:
            color_m = _RE_RGB_COLOR.search(style)
            if color_m:
                hex_color = _rgb_to_hex(
                    int(color_m.group(1)), int(color_m.group(2)), int(color_m.group(3))
                )
                return f'<font style="color: {hex_color};">{text}</font>'
            color_id = el.get("data-colorid")
            if isinstance(color_id, str):
                hex_color = self._colorid_map.get(color_id)
                if hex_color:
                    return f'<font style="color: {hex_color};">{text}</font>'
            return None

        def _span_status_badge(self, el: BeautifulSoup, text: str) -> str | None:
            if not settings.export.convert_status_badges:
                return None
            classes = el.get("class") or []
            if not isinstance(classes, list):
                return None
            if "status-macro" not in classes:
                return None
            bg = "#dfe1e6"  # default gray
            for cls, color in _LOZENGE_COLORS.items():
                if cls in classes:
                    bg = color
                    break
            return f'<mark style="background: {bg};">{text.strip()}</mark>'

        def convert_span(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:  # noqa: C901, PLR0911
            if el.has_attr("data-macro-name"):
                if el["data-macro-name"] == "jira":
                    return self.convert_jira_issue(el, text, parent_tags)
                if el["data-macro-name"] == "status":
                    result = self._span_status_badge(el, text)
                    if result is not None:
                        return result
                if el["data-macro-name"] == "plantuml":
                    return self.convert_plantuml(el, text, parent_tags)

            if el.has_attr("class") and "inline-comment-marker" in el["class"]:
                return self.convert_inline_comment_marker(el, text, parent_tags)

            raw_style = el.get("style", "")
            style = raw_style if isinstance(raw_style, str) else ""
            if settings.export.convert_text_highlights:
                result = self._span_highlight(style, text)
                if result is not None:
                    return result

            if settings.export.convert_font_colors:
                result = self._span_font_color(el, style, text)
                if result is not None:
                    return result

            return text

        def convert_inline_comment_marker(
            self, el: BeautifulSoup, text: str, _parent_tags: list[str]
        ) -> str:
            if settings.export.comments_export in ("inline", "all"):
                ref = el.get("data-ref", "")
                if isinstance(ref, str) and ref and ref not in self._marked_texts:
                    self._marked_texts[ref] = text
            return text

        def convert_attachments(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            file_header = el.find("th", {"class": "filename-column"})
            file_header_text = file_header.text.strip() if file_header else "File"

            modified_header = el.find("th", {"class": "modified-column"})
            modified_header_text = modified_header.text.strip() if modified_header else "Modified"

            def _get_path(p: Path) -> str:
                attachment_path = self._get_path_for_href(p, settings.export.attachment_href)
                return attachment_path.replace(" ", "%20")

            def _attachment_link(att: Attachment) -> str:
                if settings.export.attachment_href == "wiki":
                    return f"[[{att.export_path.name}|{att.title}]]"
                return f"[{att.title}]({_get_path(att.export_path)})"

            rows = [
                {
                    "file": _attachment_link(att),
                    "modified": f"{att.version.friendly_when} by {self.convert_user(att.version.by)}",  # noqa: E501
                }
                for att in self.page.attachments
            ]

            html = f"""<table>
            <tr><th>{file_header_text}</th><th>{modified_header_text}</th></tr>
            {"".join(f"<tr><td>{row['file']}</td><td>{row['modified']}</td></tr>" for row in rows)}
            </table>"""

            return (
                f"\n\n{self.convert_table(BeautifulSoup(html, 'html.parser'), text, parent_tags)}\n"
            )

        def convert_column_layout(
            self, el: BeautifulSoup, text: str, parent_tags: list[str]
        ) -> str:
            cells = el.find_all("div", {"class": "cell"})

            if len(cells) < 2:  # noqa: PLR2004
                return super().convert_div(el, text, parent_tags)

            html = f"<table><tr>{''.join([f'<td>{cell!s}</td>' for cell in cells])}</tr></table>"

            return self.convert_table(BeautifulSoup(html, "html.parser"), text, parent_tags)

        def convert_jira_table(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            jira_tables = BeautifulSoup(self.page.body_export, "html.parser").find_all(
                "div", {"class": "jira-table"}
            )

            if len(jira_tables) == 0:
                logger.warning("No Jira table found. Ignoring.")
                return text

            if len(jira_tables) > 1:
                logger.exception("Multiple Jira tables are not supported. Ignoring.")
                return text

            return self.process_tag(jira_tables[0], parent_tags)

        def convert_toc(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            if not settings.export.include_toc:
                return ""

            tocs = BeautifulSoup(self.page.body_export, "html.parser").find_all(
                "div", {"class": "toc-macro"}
            )

            if len(tocs) == 0:
                logger.warning("Could not find TOC macro. Ignoring.")
                return text

            if len(tocs) > 1:
                logger.exception("Multiple TOC macros are not supported. Ignoring.")
                return text

            return self.process_tag(tocs[0], parent_tags)

        def convert_hidden_content(
            self, el: BeautifulSoup, text: str, parent_tags: list[str]
        ) -> str:
            content = super().convert_p(el, text, parent_tags)
            if not content.strip():
                return ""
            return f"\n<!--{content}-->\n"

        def convert_jira_issue(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            issue_key = el.get("data-jira-key")
            link = cast("BeautifulSoup", el.find("a", {"class": "jira-issue-key"}))
            if not link:
                return text
            if not issue_key:
                return self.process_tag(link, parent_tags)

            try:
                jira_url = _extract_jira_base_url(str(link.get("href", ""))) or self.page.base_url
                issue = JiraIssue.from_key(str(issue_key), jira_url)
            except HTTPError:
                return f"[[{issue_key}]]({link.get('href')})"

            if not issue:
                return f"[[{issue_key}]]({link.get('href')})"

            return f"[[{issue.key}] {issue.summary}]({link.get('href')})"

        def convert_pre(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:  # type: ignore[override]
            if not text:
                return ""

            code_language = ""
            if el.has_attr("data-syntaxhighlighter-params"):
                match = re.search(r"brush:\s*([^;]+)", str(el["data-syntaxhighlighter-params"]))
                if match:
                    code_language = match.group(1)

            if "@startuml" in text:
                code_language = "plantuml"

            return f"\n\n```{code_language}\n{text}\n```\n\n"

        def convert_sub(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            return f"<sub>{text}</sub>"

        def convert_sup(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            """Convert superscript to Markdown footnotes."""
            if el.previous_sibling is None:
                return f"[^{text}]:"  # Footnote definition
            return f"[^{text}]"  # f"<sup>{text}</sup>"

        def convert_a(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:  # noqa: PLR0911, PLR0912, C901
            if "user-mention" in str(el.get("class")):
                return self.convert_user_mention(el, text, parent_tags)
            if "createpage.action" in str(el.get("href")) or "createlink" in str(el.get("class")):
                logger.warning(
                    f"Broken link detected: '{text}' on page '{self.page.title}' "
                    f"(ID: {self.page.id}). This is likely a Confluence bug. "
                    f"Please report this issue to Atlassian Support."
                )
                # Find fallback link without using string= parameter to avoid
                # BeautifulSoup recursion bug. The string= parameter triggers
                # recursive .string property access which fails on Fabric
                # Editor v2 HTML with fab:media tags
                try:
                    soup = BeautifulSoup(self.page.editor2, "html.parser")
                    for link in soup.find_all("a"):
                        # Use get_text() instead of .string to avoid recursion issues
                        link_text = link.get_text(strip=True)
                        if link_text == text:
                            # Prevent infinite recursion if fallback is the same element
                            if isinstance(link, Tag) and link.get("href") != el.get("href"):
                                return self.convert_a(link, text, parent_tags)  # type: ignore[arg-type]
                except RecursionError:
                    # editor2 HTML contains problematic tags (e.g., fab:media)
                    # that cause BS4 recursion. Skip fallback and return
                    # wiki-style link
                    pass
                # If no matching link found, return wiki-style link
                return f"[[{text}]]"
            if "page" in str(el.get("data-linked-resource-type")):
                page_id = str(el.get("data-linked-resource-id", ""))
                if page_id and page_id != "null":
                    return self.convert_page_link(int(page_id))
            if "attachment" in str(el.get("data-linked-resource-type")):
                link = self.convert_attachment_link(el, text, parent_tags)
                # convert_attachment_link may return None if the attachment meta is incomplete
                return link or f"[{text}]({el.get('href')})"
            href_str = str(el.get("href", ""))
            if href_str:
                parsed_href = urlparse(href_str)
                base_host = urlparse(getattr(self.page, "base_url", "") or "").hostname
                if not parsed_href.hostname or parsed_href.hostname == base_host:
                    query_params = urllib.parse.parse_qs(parsed_href.query)
                    page_id_param = next(
                        (
                            values[0]
                            for key, values in query_params.items()
                            if key.lower() == "pageid" and values and values[0]
                        ),
                        None,
                    )
                    if page_id_param and page_id_param.isdigit():
                        return self.convert_page_link(int(page_id_param))
                    if match := parse_confluence_path(parsed_href.path):
                        if match.page_id:
                            return self.convert_page_link(match.page_id)
            if (href := href_str).startswith("#"):
                if settings.export.page_href == "wiki":
                    return f"[[#{text}]]"
                return f"[{text}](#{github_heading_slug(href[1:])})"

            return super().convert_a(el, text, parent_tags)

        def convert_page_link(self, page_id: int) -> str:
            if not page_id:
                msg = "Page link does not have valid page_id."
                raise ValueError(msg)

            page = Page.from_id(page_id, self.page.base_url)

            if page.title == "Page not accessible":
                logger.warning(
                    f"Confluence page link (ID: {page_id}) is not accessible, "
                    f"referenced from page '{self.page.title}' (ID: {self.page.id})"
                )
                return f"[Page not accessible (ID: {page_id})]"

            PageTitleRegistry.register(int(page.id), page.title)

            if settings.export.page_href == "wiki":
                if PageTitleRegistry.is_ambiguous(page.title):
                    vault_path = page.export_path.with_suffix("").as_posix()
                    return f"[[{vault_path}|{page.title}]]"
                return f"[[{page.title}]]"

            page_path = self._get_path_for_href(page.export_path, settings.export.page_href)
            return f"[{page.title}]({page_path.replace(' ', '%20')})"

        def convert_attachment_link(
            self, el: BeautifulSoup, text: str, parent_tags: list[str]
        ) -> str:
            """Build a Markdown link for an attachment.

            If the attachment metadata is missing,
            return the original Confluence URL instead of crashing.
            """
            attachment = None
            if fid := el.get("data-linked-resource-file-id"):
                attachment = self.page.get_attachment_by_file_id(str(fid))
            if not attachment and (fid := el.get("data-media-id")):
                attachment = self.page.get_attachment_by_file_id(str(fid))
            if not attachment and (aid := el.get("data-linked-resource-id")):
                attachment = self.page.get_attachment_by_id(str(aid))

            if attachment is None:
                href = el.get("href") or text
                return f"[{text}]({href})"

            if settings.export.attachment_href == "wiki":
                return f"[[{attachment.export_path.name}|{attachment.title}]]"

            path = self._get_path_for_href(attachment.export_path, settings.export.attachment_href)
            return f"[{attachment.title}]({path.replace(' ', '%20')})"

        def convert_time(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            if el.has_attr("datetime"):
                return f"{el['datetime']}"

            return f"{text}"

        def convert_user_mention(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            if aid := el.get("data-account-id"):
                try:
                    return self.convert_user(User.from_accountid(str(aid), self.page.base_url))
                except ApiNotFoundError:
                    logger.warning(f"User {aid} not found. Using text instead.")

            return self.convert_user_name(text)

        def convert_user(self, user: User) -> str:
            return self.convert_user_name(user.display_name)

        def convert_user_name(self, name: str) -> str:
            return name.removesuffix("(Unlicensed)").removesuffix("(Deactivated)").strip()

        def convert_li(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            md = super().convert_li(el, text, parent_tags)
            bullet = self.options["bullets"][0]

            # Convert Confluence task lists to GitHub task lists
            if el.has_attr("data-inline-task-id"):
                is_checked = el.has_attr("class") and "checked" in el["class"]
                return md.replace(f"{bullet} ", f"{bullet} {'[x]' if is_checked else '[ ]'} ", 1)

            return md

        _ATLASSIAN_EMOTICONS: ClassVar[dict[str, str]] = {
            "atlassian-check_mark": "✅",
            "atlassian-cross_mark": "❌",
            "atlassian-yes": "👍",
            "atlassian-no": "👎",
            "atlassian-information": "\u2139\ufe0f",
            "atlassian-warning": "⚠️",
            "atlassian-forbidden": "🚫",
            "atlassian-plus": "\u2795",
            "atlassian-minus": "\u2796",
            "atlassian-question": "❓",
            "atlassian-exclamation": "❗",
            "atlassian-light_on": "💡",
            "atlassian-light_off": "💡",
            "atlassian-star_yellow": "⭐",
            "atlassian-blue_star": "🔵",
            "atlassian-smile": "😊",
            "atlassian-sad": "😞",
            "atlassian-tongue": "😛",
            "atlassian-biggrin": "😁",
            "atlassian-wink": "😉",
        }

        def _convert_emoticon(self, el: BeautifulSoup) -> str | None:
            classes = el.get("class") or []
            if "emoticon" not in classes:
                return None
            emoji_id = str(el.get("data-emoji-id", ""))
            fallback = str(el.get("data-emoji-fallback", ""))
            if fallback and not fallback.startswith(":"):
                return fallback
            if emoji_id:
                try:
                    codepoints = [int(cp, 16) for cp in emoji_id.split("-")]
                    if all(0 <= cp <= _MAX_UNICODE_CODEPOINT for cp in codepoints):
                        return "".join(chr(cp) for cp in codepoints)
                except (OverflowError, ValueError):
                    pass
                if emoji_id in self._ATLASSIAN_EMOTICONS:
                    return self._ATLASSIAN_EMOTICONS[emoji_id]
            shortname = str(el.get("data-emoji-shortname", ""))
            return shortname or fallback or str(el.get("alt", "")) or None

        def convert_img(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:  # noqa: C901, PLR0911, PLR0912
            if emoticon := self._convert_emoticon(el):
                return emoticon

            attachment = None
            if fid := el.get("data-media-id"):
                attachment = self.page.get_attachment_by_file_id(str(fid))
            if not attachment and (fid := el.get("data-media-id")):
                attachment = self.page.get_attachment_by_file_id(str(fid))
            if not attachment and (fid := el.get("data-linked-resource-file-id")):
                attachment = self.page.get_attachment_by_file_id(str(fid))
            if not attachment and (aid := el.get("data-linked-resource-id")):
                attachment = self.page.get_attachment_by_id(str(aid))
            if not attachment and (encoded_xml := el.get("data-encoded-xml")):
                decoded = unquote(str(encoded_xml))
                if m := re.search(r'ri:filename="([^"]+)"', decoded):
                    matches = self.page.get_attachments_by_title(m.group(1))
                    if matches:
                        attachment = matches[0]

            url_src = str(el.get("src", ""))

            if ".drawio.png" in url_src:
                filename = unquote(urlparse(url_src).path.split("/")[-1])
                drawio_result = self._convert_drawio_embedded_mermaid(filename)
                if drawio_result:
                    return drawio_result
                # If no mermaid diagram extracted, use PNG as attachment fallback
                if attachment is None:
                    drawio_images = self.page.get_attachments_by_title(filename)
                    if len(drawio_images) > 0:
                        attachment = drawio_images[0]

            if attachment is None:
                href = el.get("href") or text
                if href:
                    return f"![{text}]({href})"
                if url_src:
                    return f"![{text}]({url_src})"
                return text

            caption = (
                self._image_captions.get(attachment.title, "")
                if settings.export.image_captions
                else ""
            )

            if settings.export.attachment_href == "wiki":
                img_md = f"![[{attachment.export_path.name}]]"
                return f"{img_md}\n*{caption}*" if caption else img_md

            path = self._get_path_for_href(attachment.export_path, settings.export.attachment_href)
            el["src"] = path.replace(" ", "%20")
            tags = parent_tags if isinstance(parent_tags, list | set) else set()
            if "_inline" in tags:
                tags = set(tags)
                tags.discard("_inline")  # Always show images.
            img_md = super().convert_img(el, text, tags)  # type: ignore[union-attr]
            return f"{img_md}\n*{caption}*" if caption else img_md

        def _normalize_unicode_whitespace(self, text: str) -> str:
            r"""Normalize Unicode whitespace to regular spaces.

            This fixes an issue where markdownify's chomp() function strips Unicode
            whitespace characters (like \xa0 from &nbsp;) entirely, causing missing
            spaces in markdown output.

            Confluence often uses &nbsp; (non-breaking space, \xa0) inside inline
            formatting tags like <em>&nbsp;text</em>. BeautifulSoup correctly converts
            this to \xa0, but markdownify's chomp() doesn't preserve it, resulting in
            output like "word*text*" instead of "word *text*".

            This method normalizes all Unicode whitespace characters to regular ASCII
            spaces so they are preserved by markdownify's chomp() function.

            Args:
                text: Text string to normalize

            Returns:
                Text with Unicode whitespace replaced by regular spaces
            """
            # Normalize all Unicode whitespace to regular space
            # This includes: \xa0 (nbsp), \u2000-\u200a (various spaces),
            # \u2028 (line separator), \u2029 (paragraph separator), etc.
            # Keep \n, \r, \t as-is since they have semantic meaning
            normalized = text
            for char in text:
                if char.isspace() and char not in " \n\r\t":
                    # Replace Unicode whitespace with regular space
                    normalized = normalized.replace(char, " ")
            return normalized

        def escape(self, text: str, parent_tags: list[str]) -> str:
            escaped: str = cast("Any", MarkdownConverter).escape(self, text, parent_tags)
            return escaped.replace("[", r"\[").replace("]", r"\]")

        def _escape_template_placeholders(self, text: str) -> str:
            r"""Escape <placeholder> patterns that Obsidian misparsed as HTML tags.

            Confluence templates use <placeholder text> to mark values that need
            replacing. Obsidian's renderer treats these as HTML, breaking page
            formatting. This method escapes them to \<placeholder text\> so they
            render as literal angle-bracket text.

            Valid HTML tags (e.g. <br/>) are preserved. Content inside fenced code
            blocks and inline code spans is left untouched.
            """

            def _escape_if_placeholder(m: re.Match) -> str:
                inner = m.group(1)
                if _AUTOLINK_URI_RE.match(inner) or _AUTOLINK_EMAIL_RE.match(inner):
                    return m.group(0)
                # Strip leading slash (closing tag), get first token, strip trailing slash
                stripped = inner.strip().lstrip("/")
                tag_name = re.split(r"[\s/]", stripped)[0].lower() if stripped else ""
                if tag_name in _HTML_ELEMENTS or inner.startswith("!"):
                    return m.group(0)
                return f"\\<{inner}\\>"

            lines = text.split("\n")
            result = []
            in_fence = False
            for line in lines:
                if _CODE_FENCE_RE.match(line):
                    in_fence = not in_fence
                    result.append(line)
                    continue
                if in_fence:
                    result.append(line)
                    continue
                # Interleave non-code and inline-code parts; only process non-code
                parts = _INLINE_CODE_RE.split(line)
                codes = _INLINE_CODE_RE.findall(line)
                processed = []
                for i, part in enumerate(parts):
                    processed.append(_ANGLE_BRACKET_RE.sub(_escape_if_placeholder, part))
                    if i < len(codes):
                        processed.append(codes[i])
                result.append("".join(processed))
            return "\n".join(result)

        def convert_em(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            """Convert <em> tags, preserving spaces from Unicode whitespace entities."""
            text = self._normalize_unicode_whitespace(text)
            return super().convert_em(el, text, parent_tags)

        def convert_strong(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            """Convert <strong> tags, preserving spaces from Unicode whitespace entities."""
            text = self._normalize_unicode_whitespace(text)
            return super().convert_strong(el, text, parent_tags)

        def convert_code(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            """Convert <code> tags, preserving spaces from Unicode whitespace entities."""
            text = self._normalize_unicode_whitespace(text)
            return super().convert_code(el, text, parent_tags)

        def convert_i(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            """Convert <i> tags, preserving spaces from Unicode whitespace entities."""
            text = self._normalize_unicode_whitespace(text)
            return super().convert_i(el, text, parent_tags)

        def convert_b(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            """Convert <b> tags, preserving spaces from Unicode whitespace entities."""
            text = self._normalize_unicode_whitespace(text)
            return super().convert_b(el, text, parent_tags)

        def _convert_drawio_embedded_mermaid(self, filename: str) -> str | None:
            """Extract mermaid diagram from DrawIO PNG preview image.

            Args:
                filename: The filename of the drawio diagram image.

            Returns:
                Markdown formatted mermaid diagram or None if not found.
            """
            drawio_title = filename.removesuffix(".png")
            drawio_attachments = self.page.get_attachments_by_title(drawio_title)

            if len(drawio_attachments) == 0:
                return None

            drawio_filepath = settings.export.output_path / drawio_attachments[0].export_path
            if not drawio_filepath.exists():
                return None

            # Extract mermaid diagram from DrawIO file
            return load_and_parse_drawio(str(drawio_filepath))

        def convert_drawio(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            if match := re.search(r"\|diagramName=(.+?)\|", str(el)):
                drawio_name = match.group(1)
                preview_name = f"{drawio_name}.png"
                drawio_attachments = self.page.get_attachments_by_title(drawio_name)
                preview_attachments = self.page.get_attachments_by_title(preview_name)

                if not drawio_attachments or not preview_attachments:
                    return f"\n<!-- Drawio diagram `{drawio_name}` not found -->\n\n"

                if settings.export.attachment_href == "wiki":
                    preview_filename = preview_attachments[0].export_path.name
                    drawio_filename = drawio_attachments[0].export_path.name
                    drawio_image_embedding = f"![[{preview_filename}|{drawio_name}]]"
                    drawio_link = f"[[{drawio_filename}|{drawio_image_embedding}]]"
                else:
                    drawio_path = self._get_path_for_href(
                        drawio_attachments[0].export_path, settings.export.attachment_href
                    )
                    preview_path = self._get_path_for_href(
                        preview_attachments[0].export_path, settings.export.attachment_href
                    )
                    drawio_image_embedding = f"![{drawio_name}]({preview_path.replace(' ', '%20')})"
                    drawio_link = f"[{drawio_image_embedding}]({drawio_path.replace(' ', '%20')})"
                return f"\n{drawio_link}\n\n"

            return ""

        def _extract_uml_from_editor2(self, macro_id: str) -> str | None:
            """Extract PlantUML source from editor2 XML by macro-id (Cloud format)."""
            if not self.page.editor2:
                return None
            wrapped = f"<root>{self.page.editor2}</root>"
            soup = BeautifulSoup(wrapped, "xml")
            for macro in soup.find_all("structured-macro"):
                if not isinstance(macro, Tag):
                    continue
                if macro.get("name") != "plantuml" or macro.get("macro-id") != macro_id:
                    continue
                plain_text_body = macro.find("plain-text-body")
                if not isinstance(plain_text_body, Tag):
                    continue
                cdata = plain_text_body.get_text(strip=True)
                if not cdata:
                    continue
                try:
                    return json.loads(cdata).get("umlDefinition") or None
                except json.JSONDecodeError:
                    return None
            return None

        def _extract_uml_from_storage(self) -> str | None:
            """Extract PlantUML source from body.storage by position (Server format)."""
            storage_macros = self._storage_plantuml_macros
            idx = self._plantuml_index
            self._plantuml_index += 1
            if idx >= len(storage_macros):
                return None
            plain_text_body = storage_macros[idx].find("plain-text-body")
            if not isinstance(plain_text_body, Tag):
                return None
            uml = plain_text_body.get_text(strip=True)
            return uml or None

        def convert_plantuml(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            """Convert PlantUML diagrams to Markdown code blocks.

            Supports two Confluence formats:

            1. **Cloud / editor2**: The editor2 XML contains structured macros with
               the UML definition in a JSON CDATA section (``{"umlDefinition": "..."}``).
               Each macro carries a ``macro-id`` that is also present in the view
               HTML as ``data-macro-id``.

            2. **Server / Data Center**: ``editor2`` is often empty.  The UML source
               lives as raw ``@startuml`` text inside ``<plain-text-body>`` CDATA
               sections in ``body.storage``.  The view HTML renders each diagram
               inside a ``<span data-macro-name="plantuml">`` without a ``macro-id``.
               Diagrams are matched by position (Nth diagram in storage corresponds
               to the Nth ``<span>`` in view HTML).
            """
            # Strategy 1: editor2 with macro-id (Cloud)
            macro_id = el.get("data-macro-id")
            if macro_id:
                uml = self._extract_uml_from_editor2(str(macro_id))
                if uml:
                    return f"\n```plantuml\n{uml}\n```\n\n"

            # Strategy 2: body.storage fallback (Server / Data Center)
            uml = self._extract_uml_from_storage()
            if uml:
                return f"\n```plantuml\n{uml}\n```\n\n"

            logger.warning("PlantUML macro could not be resolved from editor2 or body.storage")
            return "\n<!-- PlantUML diagram (source not found) -->\n\n"

        def convert_include(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            """Convert Confluence `include` / `excerpt-include` macro.

            When `include_macro = transclusion`, emit an Obsidian-style embed link
            (`![[Page Title]]`) so the referenced page renders inline in Obsidian,
            mimicking the Confluence include/excerpt behavior. Requires the target
            page to also be exported so the link can resolve.

            When `include_macro = inline` (default), the body_view content is
            already expanded — fall through to normal div processing to render it.
            """
            macro_name = str(el.get("data-macro-name", ""))
            macro_id = el.get("data-macro-id")

            target_title: str | None = None
            if macro_id and isinstance(macro_id, str):
                target_title = self._extract_include_target_title(macro_id)

            if settings.export.include_macro == "transclusion" and target_title:
                return f"\n![[{target_title}]]\n\n"

            if settings.export.include_macro == "transclusion":
                logger.warning(
                    f"{macro_name} macro found but target page title could not be resolved; "
                    f"falling back to inline content"
                )

            inline = super().convert_div(el, text, parent_tags)  # type: ignore[misc]
            if macro_name == "excerpt-include":
                title_note = f" from page '{target_title}'" if target_title else ""
                return (
                    f"\n<!-- excerpt start{title_note} -->\n"
                    f"{inline}"
                    f"\n<!-- excerpt end{title_note} -->\n\n"
                )
            return inline

        def _strip_excerpt_include_panel_titles(self, html: str) -> str:
            """Strip the source-page-title panel from `excerpt-include` bodies.

            Confluence's `excerpt-include` body.view wraps the included
            content in a panel whose `panelHeader` is the source page title
            unless `nopanel=true`. The `panelContent` div holds the actual
            body. We unwrap to leave only the body.
            """
            soup = BeautifulSoup(html, "html.parser")
            for el in soup.find_all(attrs={"data-macro-name": "excerpt-include"}):
                self._unwrap_excerpt_include_panel(el)
            return str(soup)

        def _unwrap_excerpt_include_panel(self, el: Tag) -> None:
            classes = el.get("class") or []
            if not isinstance(classes, list) or "panel" not in classes:
                return
            header = el.find("div", class_="panelHeader")
            if isinstance(header, Tag):
                header.decompose()
            content = el.find("div", class_="panelContent")
            if isinstance(content, Tag):
                content.unwrap()

        def _extract_include_target_title(self, macro_id: str) -> str | None:
            """Resolve the target page title for an `include` / `excerpt-include` macro.

            BeautifulSoup with `xml` parser strips namespace prefixes, so
            `ac:structured-macro` becomes `structured-macro`, `ri:page` becomes
            `page`, and `ri:content-title` becomes `content-title`.
            """
            wrapped_editor2 = f"<root>{self.page.editor2}</root>"
            soup_editor2 = BeautifulSoup(wrapped_editor2, "xml")
            for macro in soup_editor2.find_all("structured-macro"):
                if not isinstance(macro, Tag):
                    continue
                if macro.get("name") not in ("include", "excerpt-include"):
                    continue
                if macro.get("macro-id") != macro_id:
                    continue
                ri_page = macro.find("page")
                if isinstance(ri_page, Tag):
                    title = ri_page.get("content-title")
                    if isinstance(title, str) and title:
                        return title
            return None

        def _find_element_with_namespace(self, parent: BeautifulSoup, tag_name: str) -> Tag | None:
            """Find an element with or without namespace prefix."""
            result = parent.find(f"ac:{tag_name}") or parent.find(tag_name)
            return result if isinstance(result, Tag) else None

        def _find_structured_macro(self, el: BeautifulSoup) -> Tag | None:
            """Find structured-macro element with or without namespace."""
            return self._find_element_with_namespace(el, "structured-macro")

        def _extract_plain_text_body(self, el: BeautifulSoup | Tag) -> str | None:
            """Extract markdown content from plain-text-body element."""
            plain_text_body = self._find_element_with_namespace(el, "plain-text-body")  # type: ignore[arg-type]
            if plain_text_body:
                return plain_text_body.get_text()
            return None

        def _extract_markdown_parameter(self, el: BeautifulSoup | Tag) -> str | None:
            """Extract markdown content from parameter element."""
            param = el.find("ac:parameter", {"ac:name": "markdown"})
            if param is None:
                param = el.find("parameter", {"name": "markdown"})
            if isinstance(param, Tag):
                return param.get_text()
            return None

        def _extract_markdown_from_body(self, el: BeautifulSoup) -> str | None:
            """Extract markdown content from body HTML."""
            # Try plain-text-body first (standard markdown macro)
            markdown_content = self._extract_plain_text_body(el)
            if markdown_content:
                return markdown_content

            # Check in structured-macro child element
            structured_macro = self._find_structured_macro(el)
            if structured_macro:
                markdown_content = self._extract_plain_text_body(structured_macro)
                if markdown_content:
                    return markdown_content

            # Try parameter for mohamicorp-markdown
            markdown_content = self._extract_markdown_parameter(el)
            if markdown_content:
                return markdown_content

            # Check parameter in structured-macro child
            if structured_macro:
                markdown_content = self._extract_markdown_parameter(structured_macro)
                if markdown_content:
                    return markdown_content

            return None

        def _extract_markdown_from_editor2(self, macro_id: str) -> str | None:
            """Extract markdown content from editor2 XML."""
            wrapped_editor2 = f"<root>{self.page.editor2}</root>"
            soup_editor2 = BeautifulSoup(wrapped_editor2, "xml")

            # BeautifulSoup strips namespace prefixes, so ac:structured-macro
            # becomes structured-macro
            markdown_macros = soup_editor2.find_all("structured-macro")
            for macro in markdown_macros:
                if not isinstance(macro, Tag):
                    continue
                if (
                    macro.get("name") in ("markdown", "mohamicorp-markdown")
                    and macro.get("macro-id") == macro_id
                ):
                    # Try plain-text-body first
                    plain_text_body = macro.find("plain-text-body")
                    if isinstance(plain_text_body, Tag):
                        return plain_text_body.get_text(strip=True)

                    # Try parameter for mohamicorp-markdown
                    param = macro.find("parameter", {"name": "markdown"})
                    if isinstance(param, Tag):
                        return param.get_text(strip=True)

            return None

        def convert_markdown(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            """Convert Markdown macro fragments to Markdown.

            Supports both standard 'markdown' macro and 'mohamicorp-markdown'
            macro. The content is already in Markdown format, so we just extract
            and return it.
            """
            macro_name = el.get("data-macro-name", "")

            # First, try to extract from body HTML
            markdown_content = self._extract_markdown_from_body(el)

            # If not found, try editor2 XML (similar to plantuml)
            if not markdown_content:
                macro_id = el.get("data-macro-id")
                if macro_id and isinstance(macro_id, str):
                    markdown_content = self._extract_markdown_from_editor2(macro_id)

            if not markdown_content:
                logger.warning(
                    f"Markdown macro ({macro_name}) found but no content could be extracted"
                )
                return f"\n<!-- Markdown macro ({macro_name}) content not found -->\n\n"

            # Return the markdown content directly (it's already in markdown format)
            # Add newlines for proper spacing
            return f"\n{markdown_content}\n\n"

        def convert_table(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
            if el.has_attr("class") and "metadata-summary-macro" in el["class"]:
                return self.convert_page_properties_report(el, text, parent_tags)

            return super().convert_table(el, text, parent_tags)

        def convert_page_properties_report(
            self, el: BeautifulSoup, text: str, parent_tags: list[str]
        ) -> str:
            data_cql = el.get("data-cql")
            if not data_cql:
                return ""

            if settings.export.page_properties_report_format == "dataview":
                dql = self._cql_to_dataview(el, str(data_cql))
                if dql is not None:
                    return f"\n```dataview\n{dql}\n```\n"

            soup = BeautifulSoup(self.page.body_export, "html.parser")
            table = soup.find("table", {"data-cql": data_cql})
            if not table:
                return ""
            return super().convert_table(table, "", parent_tags)  # type: ignore -

        def _cql_to_dataview(self, el: BeautifulSoup, cql: str) -> str | None:
            """Translate a Confluence CQL query to an Obsidian Dataview DQL query.

            Returns None if the CQL cannot be meaningfully translated.
            """
            current_content_id = str(el.get("data-current-content-id", ""))
            headings_raw = str(el.get("data-headings", ""))
            first_col = str(el.get("data-first-column-heading", "Title"))
            sort_by = str(el.get("data-sort-by", first_col))
            reverse_sort = str(el.get("data-reverse-sort", "false")).lower() == "true"

            label_conditions = [
                m.group(1) for m in re.finditer(r'label\s*=\s*"([^"]+)"', cql, re.IGNORECASE)
            ]

            parent_match = re.search(r'parent\s*=\s*"?(\d+)"?', cql, re.IGNORECASE)
            current_content_match = re.search(
                r'(?:ancestor|parent)\s*=\s*currentContent\s*\(\s*\)', cql, re.IGNORECASE
            )

            from_clause: str | None = None
            if current_content_match or (
                parent_match and parent_match.group(1) == current_content_id
            ):
                folder = str(self.page.export_path.parent).replace("\\", "/")
                from_clause = f'"{folder}"'

            if from_clause is None and not label_conditions:
                return None

            lines: list[str] = []

            if headings_raw:
                headings = [h.strip() for h in headings_raw.split(",") if h.strip()]
                col_names = ", ".join(f'{sanitize_key(h)} AS "{h}"' for h in headings)
                lines.append(f"TABLE {col_names}")
            else:
                lines.append("TABLE")

            from_parts = ([from_clause] if from_clause else []) + [
                f"#{lbl}" for lbl in label_conditions
            ]
            if from_parts:
                lines.append("FROM " + " AND ".join(from_parts))

            sort_col = sanitize_key(sort_by)
            sort_dir = "DESC" if reverse_sort else "ASC"
            lines.append(f"SORT {sort_col} {sort_dir}")

            return "\n".join(lines)

        def _get_path_for_href(
            self, path: Path, style: Literal["absolute", "relative", "wiki"]
        ) -> str:
            """Get the path to use in href attributes based on settings."""
            if style == "absolute":
                # Note that usually absolute would be
                # something like this: (settings.export.output_path / path).absolute()
                # In this case the URL will be "absolute" to the export path.
                # This is useful for local file links.
                result = "/" + str(path).lstrip("/")
            elif style == "wiki":
                result = path.name
            else:
                result = os.path.relpath(path, self.page.export_path.parent)
            return result


_CQL_MAX_BATCH_SIZE: int = 25


def _fetch_page_ids_v2_batch(batch: list[str], base_url: str) -> set[str]:
    """Single v2 API request for a batch of page IDs.

    Uses GET /api/v2/pages?id=X&id=Y&...  (Atlassian Cloud).
    The v2 API accepts multiple ``id`` params, so they are encoded directly
    into the URL path since the SDK only accepts a dict for ``params``.
    """
    query = urllib.parse.urlencode([("id", pid) for pid in batch] + [("limit", len(batch))])
    response = cast("dict", get_thread_confluence(base_url).get(f"api/v2/pages?{query}"))
    if not response:
        return set()
    return {str(item["id"]) for item in response.get("results", [])}


def _fetch_page_ids_cql_batch(batch: list[str], base_url: str) -> set[str]:
    """Single CQL v1 request for a batch of page IDs.

    Uses GET /rest/api/content/search with id in (...) (self-hosted / fallback).
    """
    cql = "id in ({})".format(",".join(batch))
    response = cast(
        "dict",
        get_thread_confluence(base_url).get(
            "rest/api/content/search",
            params={"cql": cql, "limit": len(batch), "fields": "id"},
        ),
    )
    if not response:
        return set()
    return {str(item["id"]) for item in response.get("results", [])}


def fetch_deleted_page_ids(page_ids: list[str], base_url: str) -> set[str]:
    """Return the subset of *page_ids* that no longer exist in Confluence.

    Uses the v2 REST API when ``connection_config.use_v2_api`` is enabled
    (multiple ``id`` query params, up to ``export.existence_check_batch_size``
    IDs per request), or the v1 CQL content search otherwise (capped at
    :data:`_CQL_MAX_BATCH_SIZE` IDs per request).

    Per-batch API failures are handled safely: affected IDs are assumed to
    still exist so they are never accidentally deleted.
    """
    if not page_ids:
        return set()

    use_v2 = settings.connection_config.use_v2_api
    batch_size = settings.export.existence_check_batch_size
    effective_batch_size = batch_size if use_v2 else min(batch_size, _CQL_MAX_BATCH_SIZE)
    n_batches = -(-len(page_ids) // effective_batch_size)  # ceil division
    logger.debug(
        "Checking existence of %d page(s) in %d batch(es) via %s API",
        len(page_ids),
        n_batches,
        "v2" if use_v2 else "v1 CQL",
    )
    existing: set[str] = set()

    for i in range(0, len(page_ids), effective_batch_size):
        batch = page_ids[i : i + effective_batch_size]
        try:
            if use_v2:
                existing.update(_fetch_page_ids_v2_batch(batch, base_url))
            else:
                existing.update(_fetch_page_ids_cql_batch(batch, base_url))
        except Exception:  # noqa: BLE001
            logger.warning(
                "Failed to check page existence for batch (%d IDs). "
                "Skipping deletion for these pages.",
                len(batch),
            )
            existing.update(batch)

    return set(page_ids) - existing


def sync_removed_pages(base_url: str) -> None:
    """Orchestrate stale-file cleanup: check API for deleted pages, then clean up."""
    if not settings.export.cleanup_stale:
        logger.debug("Stale page cleanup disabled — skipping.")
        return

    unseen = LockfileManager.unseen_ids()
    if not unseen:
        logger.debug("No unseen pages in lockfile — nothing to clean up.")
        return

    with console.status(f"[dim]Checking {len(unseen)} unseen page(s) for removal…[/dim]"):
        deleted = fetch_deleted_page_ids(sorted(unseen), base_url)

    if deleted:
        logger.info("Removing %d stale page(s) from local export.", len(deleted))
    LockfileManager.remove_pages(deleted)


def _make_progress() -> Progress:
    """Build a rich Progress instance for page export."""
    return Progress(
        SpinnerColumn(),
        TextColumn("[progress.description]{task.description}"),
        BarColumn(),
        MofNCompleteColumn(),
        TaskProgressColumn(),
        TimeElapsedColumn(),
        TimeRemainingColumn(),
        console=console,
        transient=False,
    )


def _export_page_worker(page: "Page | Descendant", stats: ExportStats | None = None) -> None:
    """Export a single Confluence page to Markdown (worker function).

    Each page carries its own ``base_url`` so the correct thread-local client
    is used automatically — no global state manipulation needed.

    Args:
        page: The page to export.
        stats: Optional stats tracker to update on completion.
    """
    _page = Page.from_id(page.id, page.base_url)
    attachment_entries = _page.export()
    LockfileManager.record_page(_page, attachment_entries)
    if stats is not None:
        stats.inc_exported()


def export_pages(pages: list["Page | Descendant"]) -> None:
    """Export a list of Confluence pages to Markdown.

    Pages are exported in parallel using ThreadPoolExecutor for significant
    performance improvement. Worker count is read from
    settings.connection_config.max_workers (default: 20).

    Args:
        pages: List of pages to export.
    """
    # Mark all pages as seen so cleanup skips API checks for unchanged pages
    LockfileManager.mark_seen([p.id for p in pages])
    for p in pages:
        PageTitleRegistry.register(int(p.id), p.title)
    pages_to_export = [page for page in pages if LockfileManager.should_export(page)]

    skipped_count = len(pages) - len(pages_to_export)
    stats = reset_stats(total=len(pages))
    for _ in range(skipped_count):
        stats.inc_skipped()

    if skipped_count:
        logger.info("Skipping %d unchanged page(s).", skipped_count)

    if not pages_to_export:
        logger.info("All %d page(s) unchanged — nothing to export.", len(pages))
        return

    # Get worker count from config
    max_workers = settings.connection_config.max_workers
    serial = settings.export.log_level == "DEBUG" or max_workers <= 1

    mode_label = "serial" if serial else f"parallel ({max_workers} workers)"
    logger.debug("Export mode: %s, pages to export: %d", mode_label, len(pages_to_export))

    with _make_progress() as progress:
        task = progress.add_task(
            f"[cyan]Exporting {len(pages_to_export)} page(s)[/cyan]",
            total=len(pages_to_export),
        )

        if serial:
            for page in pages_to_export:
                progress.update(task, description=f"[cyan]Page {page.id}[/cyan]")
                try:
                    _export_page_worker(page, stats)
                except Exception:
                    logger.exception("Failed to export page %s", page.id)
                    stats.inc_failed()
                finally:
                    progress.advance(task)
        else:
            with ThreadPoolExecutor(max_workers=max_workers) as executor:
                futures = {
                    executor.submit(_export_page_worker, page, stats): page
                    for page in pages_to_export
                }
                for future in as_completed(futures):
                    page = futures[future]
                    try:
                        future.result()
                    except Exception:
                        logger.exception("Failed to export page %s", page.id)
                        stats.inc_failed()
                    finally:
                        progress.advance(task)


================================================
FILE: confluence_markdown_exporter/main.py
================================================
import json
import logging
import platform
import sys
import urllib.parse
from typing import Annotated

import typer
import typer.rich_utils
import yaml
from rich.panel import Panel
from rich.table import Table

from confluence_markdown_exporter import __version__
from confluence_markdown_exporter import config as config_module
from confluence_markdown_exporter.utils.app_data_store import APP_CONFIG_PATH
from confluence_markdown_exporter.utils.app_data_store import get_settings
from confluence_markdown_exporter.utils.lockfile import LockfileManager
from confluence_markdown_exporter.utils.measure_time import measure
from confluence_markdown_exporter.utils.rich_console import console
from confluence_markdown_exporter.utils.rich_console import get_rich_console
from confluence_markdown_exporter.utils.rich_console import get_stats
from confluence_markdown_exporter.utils.rich_console import reset_stats
from confluence_markdown_exporter.utils.rich_console import setup_logging

typer.rich_utils._get_rich_console = get_rich_console

logger = logging.getLogger(__name__)


class _CmeTyper(typer.Typer):
    """Typer subclass that intercepts AuthNotConfiguredError at the app boundary.

    When an export command raises AuthNotConfiguredError, the exception propagates
    through any active console.status() context managers (stopping spinners cleanly
    via their __exit__) before reaching here.  We then open the config menu at the
    exact failing URL and exit — no traceback, no per-command boilerplate.
    """

    def __call__(self, *args: object, **kwargs: object) -> None:
        from confluence_markdown_exporter.api_clients import AuthNotConfiguredError

        try:
            super().__call__(*args, **kwargs)
        except AuthNotConfiguredError as e:
            from confluence_markdown_exporter.utils.config_interactive import main_config_menu_loop

            console.print(
                f"Please configure {e.service} credentials for {e.url} and re-run the export."
            )
            main_config_menu_loop(f"auth.{e.service.lower()}", new_instance_url=e.url)
            sys.exit(1)
        except ValueError as e:
            console.print(
                f"[red bold]{e}[/red bold]\n"
                "See [code]--help[/code] or [code]README.md[/code] for more information."
            )
            sys.exit(1)


# Each list item must be its own \n\n-separated block so typer's epilog renderer
# keeps single \n between items, forming a valid markdown bullet list.
_QUICKSTART_EPILOG = (
    "**Quick start:**\n\n"
    "- Configure credentials: `cme config edit auth.confluence`\n\n"
    "- Set output path: `cme config set export.output_path=./output`\n\n"
    "- Export a page: `cme pages https://company.atlassian.net/wiki/spaces/KEY/pages/123/Title`\n\n"
    "- Export a space: `cme spaces https://company.atlassian.net/wiki/spaces/MYSPACE`\n\n"
    "- Export everything: `cme orgs https://company.atlassian.net`\n\n"
    "- Each command also has a singular alias"
    " (`page`, `space`, `org`) that behaves identically.\n\n"
)

_PAGE_URL_FORMATS = (
    "**Supported URL formats:**\n\n"
    "- **Cloud**: `https://company.atlassian.net/wiki/spaces/KEY/pages/123/Title`\n\n"
    "- **Server (long)**: `https://confluence.company.com/display/KEY/Title`\n\n"
    "- **Server (short)**: `https://confluence.company.com/KEY/Title`\n\n"
)

_SPACE_URL_FORMATS = (
    "**Supported URL formats:**\n\n"
    "- **Cloud**: `https://company.atlassian.net/wiki/spaces/SPACEKEY`\n\n"
    "- **Server (long)**: `https://confluence.company.com/display/SPACEKEY`\n\n"
    "- **Server (short)**: `https://confluence.company.com/SPACEKEY`\n\n"
)

app = _CmeTyper(
    rich_markup_mode="markdown",
    no_args_is_help=True,
    help=(
        "Export Confluence pages, spaces, or entire organizations to Markdown files.\n\n"
        "Authentication and settings are managed via `cme config`. "
        "Run `cme config` to open the interactive menu, or use "
        "`cme config set <key=value>` to set values directly.\n\n"
        "Most settings can also be overridden with environment variables using the prefix "
        "`CME_` and `__` as the nested delimiter "
        "(e.g. `CME_EXPORT__OUTPUT_PATH=/tmp/export`)."
    ),
    epilog=_QUICKSTART_EPILOG,
)
app.add_typer(config_module.app, name="config")


def _init_logging() -> None:
    """Initialize logging from config (CME_EXPORT__LOG_LEVEL env var takes precedence)."""
    export = get_settings().export
    log_file = APP_CONFIG_PATH.parent / "cme.log" if export.save_log_to_file else None
    setup_logging(export.log_level, log_file=log_file)


def _print_summary() -> None:
    """Print a rich summary panel with export statistics."""
    stats = get_stats()
    if stats.total == 0:
        return

    output_path = get_settings().export.output_path

    grid = Table.grid(padding=(0, 2))
    grid.add_column(style="dim", justify="right")
    grid.add_column()

    grid.add_row("Pages", "")
    grid.add_row("  Total", str(stats.total))
    grid.add_row("  [success]Exported[/success]", f"[success]{stats.exported}[/success]")
    grid.add_row("  [dim]Skipped (unchanged)[/dim]", str(stats.skipped))
    if stats.removed:
        grid.add_row("  [dim]Removed[/dim]", str(stats.removed))
    if stats.failed:
        grid.add_row("  [error]Failed[/error]", f"[error]{stats.failed}[/error]")

    attachments_total = (
        stats.attachments_exported + stats.attachments_skipped + stats.attachments_failed
    )
    if attachments_total or stats.attachments_removed:
        grid.add_row("Attachments", "")
        if attachments_total:
            grid.add_row("  Total", str(attachments_total))
        att_exp = stats.attachments_exported
        grid.add_row("  [success]Exported[/success]", f"[success]{att_exp}[/success]")
        grid.add_row("  [dim]Skipped (unchanged)[/dim]", str(stats.attachments_skipped))
        if stats.attachments_removed:
            grid.add_row("  [dim]Removed[/dim]", str(stats.attachments_removed))
        if stats.attachments_failed:
            grid.add_row("  [error]Failed[/error]", f"[error]{stats.attachments_failed}[/error]")

    grid.add_row("Output", str(output_path))

    if stats.failed:
        title = "[warning]Export finished with errors[/warning]"
    else:
        title = "[success]Export complete[/success]"
    console.print(Panel(grid, title=title, expand=False))


@app.command(
    help=(
        "Export one or more Confluence pages by URL to Markdown.\n\n"
        "Fetches each page via the Confluence API and writes a Markdown file to the "
        "configured output directory (`export.output_path`). "
        "Pages that have not changed since the last export are skipped by default "
        "(`export.skip_unchanged=true`)."
    ),
    epilog=(
        "**Examples:**\n\n"
        "- `cme pages https://company.atlassian.net/wiki/spaces/KEY/pages/123/My+Page`\n\n"
        "- `cme pages https://...page1 https://...page2` — export multiple pages at once\n\n"
        "- `cme page URL` — singular alias, identical behaviour\n\n"
        "---\n\n" + _PAGE_URL_FORMATS
    ),
)
def pages(
    page_urls: Annotated[
        list[str],
        typer.Argument(
            help=(
                "One or more Confluence page URLs. "
                "Supports Cloud and Server URL formats. "
                "Example: https://company.atlassian.net/wiki/spaces/KEY/pages/123/Title"
            ),
            metavar="PAGE_URL",
        ),
    ],
) -> None:
    from confluence_markdown_exporter.confluence import Page
    from confluence_markdown_exporter.confluence import sync_removed_pages
    from confluence_markdown_exporter.utils.page_registry import PageTitleRegistry

    _init_logging()
    stats = reset_stats(total=len(page_urls))
    with measure(f"Export pages {', '.join(page_urls)}"):
        LockfileManager.init()

        exported_urls: set[str] = set()
        fetched_pages: list[Page] = []
        for page_url in page_urls:
            with console.status(f"[dim]Fetching [highlight]{page_url}[/highlight]…[/dim]"):
                page = Page.from_url(page_url)
            PageTitleRegistry.register(int(page.id), page.title)
            fetched_pages.append(page)

        for page in fetched_pages:
            LockfileManager.mark_seen([page.id])
            if not LockfileManager.should_export(page):
                stats.inc_skipped()
                exported_urls.add(page.base_url)
                continue
            try:
                with console.status(f"[dim]Exporting [highlight]{page.title}[/highlight]…[/dim]"):
                    attachment_entries = page.export()
                LockfileManager.record_page(page, attachment_entries)
                stats.inc_exported()
            except Exception:
                logger.exception("Failed to export page %s", page.title)
                stats.inc_failed()
            exported_urls.add(page.base_url)

        for base_url in exported_urls:
            sync_removed_pages(base_url)

    _print_summary()


app.command(
    name="page",
    help=(
        "Alias for `pages`. Export one or more Confluence pages by URL to Markdown.\n\n"
        "See `cme pages --help` for full documentation and all supported URL formats."
    ),
    epilog=(
        "**Example:**\n\n"
        "- `cme page https://company.atlassian.net/wiki/spaces/KEY/pages/123/My+Page`\n\n"
    ),
)(pages)


@app.command(
    help=(
        "Export one or more Confluence pages **and all their descendants** by URL to Markdown.\n\n"
        "Recursively fetches the given page(s) and every child page beneath them, "
        "then writes Markdown files to the configured output directory. "
        "Useful for exporting entire page trees without exporting a whole space."
    ),
    epilog=(
        "**Examples:**\n\n"
        "- `cme pages-with-descendants https://company.atlassian.net/wiki/spaces/KEY/pages/123/Root`\n\n"
        "- `cme pages-with-descendants https://...root1 https://...root2` — multiple trees\n\n"
        "- `cme page-with-descendants URL` — singular alias, identical behaviour\n\n"
        "---\n\n" + _PAGE_URL_FORMATS
    ),
)
def pages_with_descendants(
    page_urls: Annotated[
        list[str],
        typer.Argument(
            help=(
                "One or more Confluence page URLs. "
                "Each page and all its descendants will be exported. "
                "Example: https://company.atlassian.net/wiki/spaces/KEY/pages/123/Title"
            ),
            metavar="PAGE_URL",
        ),
    ],
) -> None:
    from confluence_markdown_exporter.confluence import Page
    from confluence_markdown_exporter.confluence import sync_removed_pages

    _init_logging()
    with measure(f"Export pages {', '.join(page_urls)} with descendants"):
        LockfileManager.init()

        exported_urls: set[str] = set()
        for page_url in page_urls:
            page = Page.from_url(page_url)
            page.export_with_descendants()
            exported_urls.add(page.base_url)

        for base_url in exported_urls:
            sync_removed_pages(base_url)

    _print_summary()


app.command(
    name="page-with-descendants",
    help=(
        "Alias for `pages-with-descendants`. "
        "Export a Confluence page and all its descendants by URL to Markdown.\n\n"
        "See `cme pages-with-descendants --help` for full documentation."
    ),
    epilog=(
        "**Example:**\n\n"
        "- `cme page-with-descendants https://company.atlassian.net/wiki/spaces/KEY/pages/123/Root`\n\n"
    ),
)(pages_with_descendants)


@app.command(
    help=(
        "Export **all pages** in one or more Confluence spaces by URL to Markdown.\n\n"
        "Fetches every page in each space via the Confluence API and writes Markdown files "
        "to the configured output directory. "
        "Pages that have not changed since the last export are skipped by default."
    ),
    epilog=(
        "**Examples:**\n\n"
        "- `cme spaces https://company.atlassian.net/wiki/spaces/MYSPACE`\n\n"
        "- `cme spaces https://...SPACE1 https://...SPACE2` — export multiple spaces\n\n"
        "- `cme space URL` — singular alias, identical behaviour\n\n"
        "---\n\n" + _SPACE_URL_FORMATS
    ),
)
def spaces(
    space_urls: Annotated[
        list[str],
        typer.Argument(
            help=(
                "One or more Confluence space URLs. "
                "All pages within each space will be exported. "
                "Example: https://company.atlassian.net/wiki/spaces/MYSPACE"
            ),
            metavar="SPACE_URL",
        ),
    ],
) -> None:
    from confluence_markdown_exporter.confluence import Space
    from confluence_markdown_exporter.confluence import sync_removed_pages

    _init_logging()
    with measure(f"Export spaces {', '.join(space_urls)}"):
        LockfileManager.init()

        exported_urls: set[str] = set()
        for space_url in space_urls:
            space = Space.from_url(space_url)
            space.export()
            exported_urls.add(space.base_url)

        for base_url in exported_urls:
            sync_removed_pages(base_url)

    _print_summary()


app.command(
    name="space",
    help=(
        "Alias for `spaces`. Export all pages in a Confluence space by URL to Markdown.\n\n"
        "See `cme spaces --help` for full documentation and all supported URL formats."
    ),
    epilog=("**Example:**\n\n- `cme space https://company.atlassian.net/wiki/spaces/MYSPACE`\n\n"),
)(spaces)


@app.command(
    help=(
        "Export **all spaces** of one or more Confluence organizations to Markdown.\n\n"
        "Iterates over every space in the organization and exports all pages in each. "
        "This is the broadest export scope — use `spaces` to target specific spaces, "
        "or `pages` / `pages-with-descendants` for finer-grained control.\n\n"
        "The base URL is the root of the Confluence instance, "
        "e.g. `https://company.atlassian.net`."
    ),
    epilog=(
        "**Examples:**\n\n"
        "- `cme orgs https://company.atlassian.net` — export everything\n\n"
        "- `cme orgs https://company1.atlassian.net https://company2.atlassian.net`"
        " — multiple orgs\n\n"
        "- `cme org URL` — singular alias, identical behaviour\n\n"
    ),
)
def orgs(
    base_urls: Annotated[
        list[str],
        typer.Argument(
            help=(
                "One or more Confluence base URLs (root of the instance). "
                "All spaces and pages within each organization will be exported. "
                "Example: https://company.atlassian.net"
            ),
            metavar="BASE_URL",
        ),
    ],
) -> None:
    from confluence_markdown_exporter.confluence import Organization
    from confluence_markdown_exporter.confluence import sync_removed_pages

    _init_logging()
    with measure("Export all spaces"):
        LockfileManager.init()

        for base_url in base_urls:
            org = Organization.from_url(base_url)
            org.export()
            sync_removed_pages(base_url)

    _print_summary()


app.command(
    name="org",
    help=(
        "Alias for `orgs`. "
        "Export all spaces of a Confluence organization to Markdown.\n\n"
        "See `cme orgs --help` for full documentation."
    ),
    epilog=("**Example:**\n\n- `cme org https://company.atlassian.net`\n\n"),
)(orgs)


@app.command(
    help="Show the installed version of confluence-markdown-exporter.",
)
def version() -> None:
    """Display the current version."""
    typer.echo(f"confluence-markdown-exporter {__version__}")


_ATLASSIAN_NET = "atlassian.net"
_REDACTED = "[redacted]"


def _redact_url(url: str) -> str:
    """Redact the instance URL.

    Atlassian Cloud URLs (``*.atlassian.net``) are kept as
    ``******.atlassian.net`` so the instance type is still visible.
    All other URLs are fully replaced with ``[redacted]``.
    """
    parsed = urllib.parse.urlparse(url)
    host = parsed.hostname or ""
    if host == _ATLASSIAN_NET or host.endswith(f".{_ATLASSIAN_NET}"):
        return f"https://******.{_ATLASSIAN_NET}"
    return _REDACTED


def _redact_config(data: dict) -> dict:
    """Return a deep copy of the config dict with sensitive values redacted.

    Redacted fields: ``api_token``, ``pat``, ``username``, ``cloud_id`` (when non-empty),
    ``export.output_path``, and instance URL keys in ``auth.confluence`` / ``auth.jira``.
    """
    import copy

    data = copy.deepcopy(data)
    for service in ("confluence", "jira"):
        auth_section: dict = data.get("auth", {}).get(service, {})
        redacted_section: dict = {}
        for url, details in auth_section.items():
            if isinstance(details, dict):
                for field in ("api_token", "pat", "username", "cloud_id"):
                    if details.get(field):
                        details[field] = _REDACTED
            redacted_section[_redact_url(url)] = details
        data.setdefault("auth", {})[service] = redacted_section
    if data.get("export", {}).get("output_path"):
        data["export"]["output_path"] = _REDACTED
    return data


@app.command(
    help=(
        "Print diagnostic information for filing a bug report.\n\n"
        "Outputs the app version, Python and OS details, and the current configuration "
        "with all secrets redacted (API tokens and PATs are masked; "
        "instance URL hostnames are partially hidden).\n\n"
        "Paste the full output into your GitHub issue when reporting a bug."
    ),
)
def bugreport() -> None:
    """Print version, system info, and redacted config for bug reports."""
    settings = get_settings()
    config_data = json.loads(settings.model_dump_json())
    redacted = _redact_config(config_data)

    lines: list[str] = [
        "## Bug Report Diagnostic Info",
        "",
        "### Version",
        f"confluence-markdown-exporter {__version__}",
        "",
        "### System",
        f"Python: {sys.version}",
        f"Platform: {platform.platform()}",
        f"Architecture: {platform.machine()}",
        "",
        "### Config",
        f"Config file: {_REDACTED}",
        "```yaml",
        yaml.dump(redacted, default_flow_style=False, allow_unicode=True).rstrip(),
        "```",
    ]
    typer.echo("\n".join(lines))


if __name__ == "__main__":
    app()


================================================
FILE: confluence_markdown_exporter/utils/__init__.py
================================================


================================================
FILE: confluence_markdown_exporter/utils/app_data_store.py
================================================
"""Handles storage and retrieval of application data (auth and settings) for the exporter."""

import contextlib
import json
import os
from pathlib import Path
from typing import Any
from typing import Literal

from pydantic import BaseModel
from pydantic import Field
from pydantic import SecretStr
from pydantic import ValidationError
from pydantic import field_serializer
from pydantic import field_validator
from pydantic import model_validator
from pydantic_settings import BaseSettings
from pydantic_settings import PydanticBaseSettingsSource
from pydantic_settings import SettingsConfigDict
from typer import get_app_dir


def get_app_config_path() -> Path:
    """Determine the path to the app config file, creating parent directories if needed."""
    config_env = os.environ.get("CME_CONFIG_PATH")
    if config_env:
        path = Path(config_env)
    else:
        app_name = "confluence-markdown-exporter"
        config_dir = Path(get_app_dir(app_name))
        path = config_dir / "app_data.json"
    path.parent.mkdir(parents=True, exist_ok=True)
    return path


APP_CONFIG_PATH = get_app_config_path()


class AtlassianSdkConnectionConfig(BaseModel):
    """Connection parameters forwarded directly to the Atlassian SDK client constructors.

    Only fields that are valid constructor keyword arguments for
    atlassian.Confluence (ConfluenceApiSdk) and atlassian.Jira (JiraApiSdk)
    may be added here.
    """

    backoff_and_retry: bool = Field(
        default=True,
        title="Enable Retry",
        description="Enable or disable automatic retry with exponential backoff on network errors.",
    )
    backoff_factor: int = Field(
        default=2,
        title="Backoff Factor",
        description=(
            "Multiplier for exponential backoff between retries. "
            "For example, 2 means each retry waits twice as long as the previous."
        ),
    )
    max_backoff_seconds: int = Field(
        default=60,
        title="Max Backoff Seconds",
        description="Maximum number of seconds to wait between retries.",
    )
    max_backoff_retries: int = Field(
        default=5,
        title="Max Retries",
        description="Maximum number of retry attempts before giving up.",
    )
    retry_status_codes: list[int] = Field(
        default_factory=lambda: [413, 429, 502, 503, 504],
        title="Retry Status Codes",
        description="HTTP status codes that should trigger a retry.",
    )
    verify_ssl: bool = Field(
        default=True,
        title="Verify SSL",
        description=(
            "Whether to verify SSL certificates for HTTPS requests. "
            "Set to False only if you are sure about the security of your connection."
        ),
    )
    timeout: int = Field(
        default=30,
        title="Request Timeout",
        description=(
            "Timeout in seconds for API requests. Prevents hanging on slow/unresponsive servers."
        ),
    )


class ConnectionConfig(AtlassianSdkConnectionConfig):
    """Full connection configuration, extending the Atlassian SDK config with app-level settings."""

    use_v2_api: bool = Field(
        default=False,
        title="Use Confluence v2 REST API",
        description=(
            "Enable Confluence REST API v2 endpoints where available. "
            "Supported by Atlassian Cloud and Confluence Data Center 8+. "
            "Must be disabled for older self-hosted Confluence Server instances."
        ),
    )
    max_workers: int = Field(
        default=20,
        title="Max Workers",
        description=(
            "Maximum number of parallel workers for page export. "
            "Set to 1 for serial mode (useful for debugging). "
            "Higher values improve performance but may hit API rate limits."
        ),
    )


class ApiDetails(BaseModel):
    """API authentication credentials for a single instance.

    The instance URL is used as the dict key in AuthConfig, not stored here.
    """

    username: SecretStr = Field(
        default=SecretStr(""),
        title="Username (email)",
        description="Username or email for API authentication.",
    )
    api_token: SecretStr = Field(
        default=SecretStr(""),
        title="API Token",
        description=(
            "API token for authentication (if required). "
            "Create an Atlassian API token at "
            "https://id.atlassian.com/manage-profile/security/api-tokens. "
            "See Atlassian documentation for details."
        ),
    )
    pat: SecretStr = Field(
        default=SecretStr(""),
        title="Personal Access Token (PAT)",
        description=(
            "Personal Access Token for authentication. "
            "Set this if you use a PAT instead of username+API token. "
            "See your Atlassian instance documentation for how to create a PAT."
        ),
    )
    cloud_id: str = Field(
        default="",
        title="Cloud ID",
        description=(
            "Atlassian Cloud ID for this instance. When set, API calls are routed through "
            "the Atlassian API gateway (https://api.atlassian.com/ex/confluence/{cloud_id}), "
            "which enables the use of scoped API tokens. "
            "For Atlassian Cloud instances this is fetched and stored automatically. "
            "To find your Cloud ID manually, see "
            "https://support.atlassian.com/jira/kb/retrieve-my-atlassian-sites-cloud-id/."
        ),
    )

    @field_validator("username", "api_token", "pat", mode="before")
    @classmethod
    def _single_line(cls, v: object) -> object:
        raw = v.get_secret_value() if isinstance(v, SecretStr) else v
        if isinstance(raw, str):
            return raw.replace("\r", "").replace("\n", "")
        return v

    @field_serializer("username", "api_token", "pat", when_used="json")
    def dump_secret(self, v: SecretStr) -> str:
        return v.get_secret_value()


class AuthConfig(BaseModel):
    """Authentication configuration for Confluence and Jira.

    Credentials are stored in dicts keyed by the instance base URL
    (e.g. ``"https://company.atlassian.net"``).  No "active" pointer is kept —
    the right instance is selected by matching the URL of the page or space
    being exported.
    """

    confluence: dict[str, ApiDetails] = Field(
        default_factory=dict,
        title="Confluence Accounts",
        description=(
            "Confluence authentication credentials keyed by instance base URL. "
            "Example key: 'https://company.atlassian.net'"
        ),
    )
    jira: dict[str, ApiDetails] = Field(
        default_factory=dict,
        title="Jira Accounts",
        description=(
            "Jira authentication credentials keyed by instance base URL. "
            "Example key: 'https://company.atlassian.net'"
        ),
    )

    @model_validator(mode="before")
    @classmethod
    def _migrate(cls, data: object) -> object:  # noqa: C901, PLR0912
        """Migrate legacy config formats to the current URL-keyed dict format.

        Also normalises all instance URL keys (strips trailing slashes) so that
        entries written with and without a trailing slash are treated as identical.
        """
        if not isinstance(data, dict):
            return data
        for service in ("confluence", "jira"):
            val = data.get(service)
            if not isinstance(val, dict):
                continue
            # Legacy v1: single ApiDetails with a 'url' field at the top level
            # e.g. {"url": "https://...", "username": "...", ...}
            if "url" in val and not _looks_like_url_keyed(val):
                url = val.pop("url", "") or ""
                # Remove stale active_* fields that were in the same dict
                val.pop("active_confluence", None)
                val.pop("active_jira", None)
                data[service] = {url.rstrip("/"): val} if url else {}
            # Legacy v2: named-key dict from the previous multi-instance refactor.
            # e.g. {"default": {"url": "https://...", ...}, "active_confluence": "default"}
            elif not _looks_like_url_keyed(val):
                migrated: dict = {}
                for k, v in val.items():
                    if k in ("active_confluence", "active_jira"):
                        continue
                    if isinstance(v, dict):
                        inner_url = v.pop("url", "") or ""
                        if inner_url:
                            migrated[inner_url.rstrip("/")] = v
                        elif v:
                            migrated[k] = v  # keep as-is if no URL
                if migrated:
                    data[service] = migrated
            else:
                # Current URL-keyed format: normalise any trailing slashes on existing keys
                normalised: dict = {}
                for k, v in val.items():
                    normalised[k.rstrip("/")] = v
                data[service] = normalised
        # Drop top-level active_* fields that were stored in auth
        data.pop("active_confluence", None)
        data.pop("active_jira", None)
        return data

    def get_instance(self, url: str) -> ApiDetails | None:
        """Return the Confluence ApiDetails whose key matches *url* (exact or host match)."""
        url = normalize_instance_url(url)
        return self.confluence.get(url) or self._match_by_host(self.confluence, url)

    def get_jira_instance(self, url: str) -> ApiDetails | None:
        """Return the Jira ApiDetails whose key matches *url* (exact or host match)."""
        url = normalize_instance_url(url)
        return self.jira.get(url) or self._match_by_host(self.jira, url)

    def default_confluence_url(self) -> str | None:
        """Return the URL of the only configured Confluence instance, or None if 0 or 2+."""
        return next(iter(self.confluence)) if len(self.confluence) == 1 else None

    def default_jira_url(self) -> str | None:
        """Return the URL of the only configured Jira instance, or None if 0 or 2+."""
        return next(iter(self.jira)) if len(self.jira) == 1 else None

    @staticmethod
    def _match_by_host(instances: dict[str, ApiDetails], url: str) -> ApiDetails | None:
        import urllib.parse

        parsed = urllib.parse.urlparse(url)
        host = parsed.hostname or url
        # Gateway URLs must match exactly — multiple tenants share api.atlassian.com.
        if host == "api.atlassian.com":
            return None
        for key, details in instances.items():
            key_parsed = urllib.parse.urlparse(key)
            # Skip gateway-style keys when doing hostname-only matching
            if key_parsed.hostname == "api.atlassian.com":
                continue
            if key_parsed.hostname != host or key_parsed.port != parsed.port:
                continue
            # Key stored without a context path matches any context path on the same host
            # (e.g. stored as "https://host", URL is "https://host/confluence/spaces/...")
            if not key_parsed.path.strip("/"):
                return details
            # Key stored with a context path must be a prefix of the lookup URL's path
            # (e.g. stored as "https://host/confluence", URL is "https://host/confluence/spaces/...")
            if parsed.path.startswith(key_parsed.path):
                return details
        return None


def _looks_like_url_keyed(d: dict) -> bool:
    """Return True if the dict looks like it's already keyed by URLs (not by field names)."""
    return any(k.startswith(("http://", "https://")) for k in d)


def normalize_instance_url(url: str) -> str:
    """Strip trailing slashes from an instance URL for consistent key storage."""
    return url.rstrip("/")


class ExportConfig(BaseModel):
    """Export settings for markdown and attachments."""

    log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = Field(
        default="INFO",
        title="Log Level",
        description=(
            "Controls how much output the exporter prints. "
            "DEBUG shows every step, INFO shows key milestones, "
            "WARNING shows only warnings and errors, ERROR shows only errors. "
            "In CI environments (CI=true / NO_COLOR set) rich formatting is suppressed "
            "automatically."
        ),
    )
    save_log_to_file: bool = Field(
        default=False,
        title="Save Log To File",
        description=(
            "Also write log records to a file alongside the console output. "
            "The file is named 'cme.log' and lives next to the config file "
            "(see 'cme config path'). Useful for capturing long DEBUG runs."
        ),
    )
    output_path: Path = Field(
        default=Path(),
        title="Output Path",
        description=("Directory where exported pages and attachments will be saved."),
        examples=[
            "`.`: Output will be saved relative to the current working directory.",
            (
                "`./confluence_export`: Output will be saved in a folder `confluence_export` "
                "relative to the current working directory."
            ),
            "`/path/to/export`: Output will be saved in the specified absolute path.",
        ],
    )
    page_href: Literal["absolute", "relative", "wiki"] = Field(
        default="relative",
        title="Page Href Style",
        description=(
            "How to generate page href paths. Options: absolute, relative, wiki.\n"
            "  - `relative` links are relative to the page\n"
            "  - `absolute` links start from the configured output path\n"
            "  - `wiki` generates Obsidian-style [[Page Title]] wiki links"
        ),
    )
    page_path: str = Field(
        default="{space_name}/{homepage_title}/{ancestor_titles}/{page_title}.md",
        title="Page Path Template",
        description=(
            "Template for exported page file paths.\n"
            "Available variables:\n"
            "  - {space_key}: The key of the Confluence space.\n"
            "  - {space_name}: The name of the Confluence space.\n"
            "  - {homepage_id}: The ID of the homepage of the Confluence space.\n"
            "  - {homepage_title}: The title of the homepage of the Confluence space.\n"
            "  - {ancestor_ids}: A slash-separated list of ancestor page IDs.\n"
            "  - {ancestor_titles}: A slash-separated list of ancestor page titles.\n"
            "  - {page_id}: The unique ID of the Confluence page.\n"
            "  - {page_title}: The title of the Confluence page.\n"
        ),
        examples=["{space_name}/{page_title}.md"],
    )
    attachment_href: Literal["absolute", "relative", "wiki"] = Field(
        default="relative",
        title="Attachment Href Style",
        description=(
            "How to generate attachment href paths. Options: absolute, relative, wiki.\n"
            "  - `relative` links are relative to the page\n"
            "  - `absolute` links start from the configured output path\n"
            "  - `wiki` generates Obsidian-style ![[Attachment Name]] wiki links"
        ),
    )
    attachment_path: str = Field(
        default="{space_name}/attachments/{attachment_file_id}{attachment_extension}",
        title="Attachment Path Template",
        description=(
            "Template for exported attachment file paths.\n"
            "Available variables:\n"
            "  - {space_key}: The key of the Confluence space.\n"
            "  - {space_name}: The name of the Confluence space.\n"
            "  - {homepage_id}: The ID of the homepage of the Confluence space.\n"
            "  - {homepage_title}: The title of the homepage of the Confluence space.\n"
            "  - {ancestor_ids}: A slash-separated list of ancestor page IDs.\n"
            "  - {ancestor_titles}: A slash-separated list of ancestor page titles.\n"
            "  - {attachment_id}: The unique ID of the attachment.\n"
            "  - {attachment_title}: The title of the attachment (without file extension).\n"
            "  - {attachment_file_id}: The file ID of the attachment. Falls back to "
            "{attachment_id} on Confluence Data Center / Server, where the API does "
            "not provide a file ID.\n"
            "  - {attachment_extension}: The file extension of the attachment,\n"
            "including the leading dot."
        ),
        examples=["{space_name}/attachments/{attachment_file_id}{attachment_extension}"],
    )

    @field_validator("attachment_path", mode="before")
    @classmethod
    def _migrate_attachment_path(cls, v: object) -> object:
        """Migrate templates that used {attachment_title} as the full filename.

        Before this change, {attachment_title} included the file extension.
        Templates that relied on that (i.e. no explicit {attachment_extension})
        are silently updated so file extensions are preserved.
        """
        if (
            isinstance(v, str)
            and "{attachment_title}" in v
            and "{attachment_extension}" not in v
        ):
            return v.replace("{attachment_title}", "{attachment_title}{attachment_extension}")
        return v

    attachments_export: Literal["referenced", "all", "disabled"] = Field(
        default="referenced",
        title="Attachments Export",
        description=(
            "Which attachments to download to disk:\n"
            "  referenced: only attachments referenced from the page body (default)\n"
            "  all: every attachment on the page (slower, more disk and bandwidth)\n"
            "  disabled: skip the download entirely - no files written, no lockfile\n"
            "    entries, no lockfile lookup. Attachment metadata is still fetched\n"
            "    from the Confluence API so image and file links in the page body\n"
            "    continue to resolve, but the referenced files will not exist locally."
        ),
    )
    image_captions: bool = Field(
        default=False,
        title="Image Captions",
        description=(
            "Whether to export Confluence image captions in the exported Markdown.\n"
            "When enabled, the storage format of each page is fetched via an additional "
            "API expansion to extract caption text from `ac:image` elements.\n"
            "Captions are rendered as an italic line directly below the image:\n"
            "  ![](image.png)\n"
            "  *Caption text*"
        ),
    )
    page_breadcrumbs: bool = Field(
        default=True,
        title="Page Breadcrumbs",
        description="Whether to include breadcrumb links at the top of the page.",
    )
    page_properties_format: Literal[
        "frontmatter",
        "table",
        "frontmatter_and_table",
        "dataview-inline-field",
        "meta-bind-view-fields",
    ] = Field(
        default="frontmatter_and_table",
        title="Page Properties Format",
        description=(
            "How to render Confluence Page Properties macros (Page Properties macro).\n"
            "  frontmatter: extract to YAML front matter only (table removed from content)\n"
            "  table: keep as markdown table only (no metadata)\n"
            "  frontmatter_and_table: front matter + keep original table in content (default)\n"
            "  dataview-inline-field: replace table with Dataview Key:: Value inline fields\n"
            "  meta-bind-view-fields: front matter + Meta Bind VIEW fields inline (requires plugin)"
        ),
    )
    page_properties_report_format: Literal["frozen", "dataview"] = Field(
        default="frozen",
        title="Page Properties Report Format",
        description=(
            "How to render Confluence Page Properties Report macros.\n"
            "  frozen: export the rendered table as a static markdown table (default)\n"
            "  dataview: translate the CQL query to an Obsidian Dataview DQL code block;\n"
            "    requires the Dataview plugin and all referenced child pages to be exported\n"
            "    with their page properties as frontmatter; falls back to frozen on failure"
        ),
    )
    confluence_url_in_frontmatter: Literal["none", "webui", "tinyui", "both"] = Field(
        default="none",
        title="Confluence URL in Front Matter",
        description=(
            "Whether to include the original Confluence page URL in YAML front matter.\n"
            "  none: do not include (default)\n"
            "  webui: include human-readable URL as `confluence_webui_url`\n"
            "  tinyui: include stable short permalink as `confluence_tinyui_url`\n"
            "  both: include both fields\n"
            "If a Page Properties macro already defines one of these keys, "
            "the macro value takes precedence."
        ),
    )
    page_metadata_in_frontmatter: bool = Field(
        default=False,
        title="Page Metadata in Front Matter",
        description=(
            "If True, add eight Confluence page metadata fields to the YAML "
            "front matter of each exported page: confluence_page_id, "
            "confluence_space_key, confluence_type (page or blogpost), "
            "confluence_created (ISO 8601, original creation timestamp), "
            "confluence_created_by (display name of the original author), "
            "confluence_last_modified (ISO 8601, value of the most recent "
            "version including minor edits), confluence_last_modified_by "
            "(display name), confluence_version (integer). Existing keys "
            "with the same name on the page (e.g. via a Page Properties "
            "macro) take precedence."
        ),
    )

    @model_validator(mode="before")
    @classmethod
    def _migrate_page_properties(cls, data: object) -> object:
        """Migrate legacy page_properties_as_front_matter bool to page_properties_format."""
        if not isinstance(data, dict):
            return data
        old_val = data.pop("page_properties_as_front_matter", None)
        if old_val is not None and "page_properties_format" not in data:
            if str(old_val).lower() in ("false", "0"):
                data["page_properties_format"] = "table"
            else:
                data["page_properties_format"] = "frontmatter"
        return data

    @model_validator(mode="before")
    @classmethod
    def _migrate_attachments_export(cls, data: object) -> object:
        """Migrate legacy attachment_export_all bool to attachments_export literal."""
        if not isinstance(data, dict):
            return data
        old_val = data.pop("attachment_export_all", None)
        if old_val is not None and "attachments_export" not in data:
            data["attachments_export"] = (
                "all" if str(old_val).lower() in ("true", "1") else "referenced"
            )
        return data

    @model_validator(mode="before")
    @classmethod
    def _migrate_inline_comments(cls, data: object) -> object:
        """Migrate legacy inline_comments bool to comments_export literal."""
        if not isinstance(data, dict):
            return data
        old_val = data.pop("inline_comments", None)
        if old_val is not None and "comments_export" not in data:
            data["comments_export"] = (
                "inline" if str(old_val).lower() in ("true", "1") else "none"
            )
        return data

    filename_encoding: str = Field(
        default='"<":"_",">":"_",":":"_","\\"":"_","/":"_","\\\\":"_","|":"_","?":"_","*":"_","\\u0000":"_","[":"_","]":"_","\'":"_","’":"_","´":"_","`":"_"',  # noqa: RUF001
        title="Filename Encoding",
        description=(
            "List character-to-replacement pairs, separated by commas. "
            'Each pair is written as "character":"replacement". '
            "Leave empty to disable all character replacements."
        ),
        examples=[
            '" ":"-","-":"%2D"',  # Replace spaces with dash and dashes with %2D
            '"=":" equals "',  # Replace equals sign with " equals "
        ],
    )
    filename_length: int = Field(
        default=255,
        title="Filename Length",
        description="Maximum length of the filename.",
    )
    filename_lowercase: bool = Field(
        default=False,
        title="Enforce lowercase paths",
        description=(
            "Make all paths/files lowercase.\nBy default the original casing will be retained.\n"
        ),
    )
    include_document_title: bool = Field(
        default=True,
        title="Include Document Title",
        description=(
            "Whether to include the document title in the exported markdown file. "
            "If enabled, the title will be added as a top-level heading."
        ),
    )
    include_toc: bool = Field(
        default=True,
        title="Export Table of Contents",
        description=(
            "Whether to export the Confluence Table of Contents macro. "
            "When enabled (default), the TOC is converted to markdown. "
            "When disabled, the TOC macro is removed from the output."
        ),
    )
    include_macro: Literal["inline", "transclusion"] = Field(
        default="inline",
        title="Include Macro Rendering",
        description=(
            "How to render Confluence `include` and `excerpt-include` macros.\n"
            "  inline: expand the referenced page content inline (default)\n"
            "  transclusion: emit an Obsidian-style `![[Page Title]]` embed link;\n"
            "    the referenced page must also be exported for the link to resolve"
        ),
    )
    enable_jira_enrichment: bool = Field(
        default=True,
        title="Enable Jira Enrichment",
        description=(
            "Whether to fetch Jira issue data to enrich Confluence pages. "
            "When enabled, Jira issue links will include the issue summary. "
            "When disabled, only the issue key and link will be included. "
            "Requires Jira auth to be configured."
        ),
    )
    comments_export: Literal["none", "inline", "footer", "all"] = Field(
        default="none",
        title="Export Comments",
        description=(
            "Which comments to export to a sidecar '.comments.md' file placed "
            "next to the exported page file. "
            "'none' — no sidecar. "
            "'inline' — open inline comments only (annotated text shown as a "
            "blockquote, then author/date/body). "
            "'footer' — open page-level (footer) comments only. "
            "'all' — both, in a single sidecar with two sections "
            "('## Inline comments' first, then '## Page comments'). "
            "Resolved comments are skipped. Replies are listed flat below "
            "the parent comment. Disabled by default — adds one to two extra "
            "API calls per page when enabled."
        ),
    )
    convert_status_badges: bool = Field(
        default=True,
        title="Convert Status Badges",
        description=(
            "Whether to convert Confluence status badge macros "
            "(<span class=\"status-macro ...\"/>) "
            "to HTML <mark> elements coloured with the badge's background colour. "
            "When disabled, only the badge label text is kept."
        ),
    )
    convert_text_highlights: bool = Field(
        default=True,
        title="Convert Text Highlights",
        description=(
            "Whether to convert Confluence text highlights "
            "(<span style=\"background-color: rgb(...);\"/>) "
            "to HTML <mark> elements with a hex color. "
            "When disabled, the highlight span is stripped and only the text is kept."
        ),
    )
    convert_font_colors: bool = Field(
        default=True,
        title="Convert Font Colors",
        description=(
            "Whether to convert Confluence font colors "
            "(<span data-colorid=\"...\"/> or <span style=\"color: rgb(...);\"/>) "
            "to HTML <font> elements with a hex color. "
            "When disabled, the color span is stripped and only the text is kept."
        ),
    )
    skip_unchanged: bool = Field(
        default=True,
        title="Skip Unchanged Pages",
        description=(
            "Skip exporting pages that have not changed since last export."
            " Uses a lockfile to track page versions."
        ),
    )
    cleanup_stale: bool = Field(
        default=True,
        title="Cleanup Stale Files",
        description=(
            "After export, delete local files for pages that have been removed "
            "from Confluence or whose export path has changed."
        ),
    )
    lockfile_name: str = Field(
        default="confluence-lock.json",
        title="Lock File Name",
        description="Name of the lock file used to track exported pages.",
    )
    existence_check_batch_size: int = Field(
        default=250,
        title="Existence Check Batch Size",
        description=(
            "Number of page IDs per batch when verifying page existence during cleanup. "
            "For self-hosted Confluence (CQL), this is internally capped at 25."
        ),
    )


class ConfigModel(BaseModel):
    """Top-level application configuration model (used for persistence only)."""

    export: ExportConfig = Field(default_factory=ExportConfig, title="Export Settings")
    connection_config: ConnectionConfig = Field(
        default_factory=ConnectionConfig, title="Connection Configuration"
    )
    auth: AuthConfig = Field(default_factory=AuthConfig, title="Authentication")


class _JsonConfigSource(PydanticBaseSettingsSource):
    """Settings source that reads from the JSON config file (lower priority than ENV vars)."""

    def get_field_value(self, field: Any, field_name: str) -> Any:  # noqa: ANN401
        return None, field_name, False

    def field_is_complex(self, field: Any) -> bool:  # noqa: ANN401
        return True

    def __call__(self) -> dict[str, Any]:
        if APP_CONFIG_PATH.exists():
            try:
                raw = json.loads(APP_CONFIG_PATH.read_text(encoding="utf-8"))
                return ConfigModel(**raw).model_dump()
            except Exception:  # noqa: BLE001
                return ConfigModel().model_dump()
        return ConfigModel().model_dump()


class AppSettings(BaseSettings):
    """Effective application settings: ENV vars take precedence over the config file.

    ENV vars use the prefix ``CME_`` and double-underscore (``__``) as the nested field
    delimiter, matching the dot-notation config keys but uppercased.  For example::

        CME_EXPORT__LOG_LEVEL=DEBUG
        CME_EXPORT__OUTPUT_PATH=/tmp/export
        CME_CONNECTION_CONFIG__MAX_WORKERS=5
        CME_CONNECTION_CONFIG__VERIFY_SSL=false
    """

    model_config = SettingsConfigDict(
        env_prefix="CME_",
        env_nested_delimiter="__",
        extra="ignore",
        populate_by_name=True,
    )

    export: ExportConfig = Field(default_factory=ExportConfig, title="Export Settings")
    connection_config: ConnectionConfig = Field(
        default_factory=ConnectionConfig, title="Connection Configuration"
    )
    auth: AuthConfig = Field(default_factory=AuthConfig, title="Authentication")

    @classmethod
    def settings_customise_sources(
        cls,
        settings_cls: type[BaseSettings],
        init_settings: PydanticBaseSettingsSource,
        env_settings: PydanticBaseSettingsSource,
        dotenv_settings: PydanticBaseSettingsSource,  # noqa: ARG003
        file_secret_settings: PydanticBaseSettingsSource,  # noqa: ARG003
    ) -> tuple[PydanticBaseSettingsSource, ...]:
        """ENV vars override JSON file config; init values override both."""
        return (init_settings, env_settings, _JsonConfigSource(settings_cls))


def load_app_data() -> dict[str, dict]:
    """Load application data from the config file, returning a validated dict."""
    data: dict = {}
    if APP_CONFIG_PATH.exists():
        with contextlib.suppress(json.JSONDecodeError, ValueError):
            data = json.loads(APP_CONFIG_PATH.read_text(encoding="utf-8"))
    try:
        return ConfigModel(**data).model_dump()
    except ValidationError:
        return ConfigModel().model_dump()


def save_app_data(config_model: ConfigModel) -> None:
    """Save application data to the config file using Pydantic serialization."""
    # Use Pydantic's model_dump_json which properly handles SecretStr serialization
    json_str = config_model.model_dump_json(indent=2)
    APP_CONFIG_PATH.write_text(json_str, encoding="utf-8")


def get_settings() -> AppSettings:
    """Get the effective application settings (ENV vars override stored config)."""
    return AppSettings()


def _set_by_path(obj: dict, path: str, value: object) -> None:
    """Set a value in a nested dict using dot notation path."""
    keys = path.split(".")
    current = obj
    for k in keys[:-1]:
        if k not in current or not isinstance(current[k], dict):
            current[k] = {}
        current = current[k]
    current[keys[-1]] = value


def _set_by_keys(obj: dict, keys: list[str], value: object) -> None:
    """Set a value in a nested dict using an explicit list of key components."""
    current = obj
    for k in keys[:-1]:
        if k not in current or not isinstance(current[k], dict):
            current[k] = {}
        current = current[k]
    current[keys[-1]] = value


def set_setting(path: str, value: object) -> None:
    """Set a setting by dot-path and save to config file."""
    data = load_app_data()
    _set_by_path(data, path, value)
    try:
        settings = ConfigModel.model_validate(data)
    except ValidationError as e:
        raise ValueError(str(e)) from e
    save_app_data(settings)


def set_setting_with_keys(keys: list[str], value: object) -> None:
    """Set a setting by an explicit list of path components and save to config file.

    Use this instead of ``set_setting`` when any path component contains dots
    (e.g. a URL used as a dict key: ``["auth", "confluence", "https://x.y", "username"]``).
    """
    data = load_app_data()
    _set_by_keys(data, keys, value)
    try:
        settings = ConfigModel.model_validate(data)
    except ValidationError as e:
        raise ValueError(str(e)) from e
    save_app_data(settings)


def get_default_value_by_path(path: str | None = None) -> object:
    """Get the default value for a given config path, or the whole config if path is None."""
    model = ConfigModel()
    if not path:
        return model.model_dump()
    keys = path.split(".")
    current = model
    for k in keys:
        if hasattr(current, k):
            current = getattr(current, k)
        elif isinstance(current, dict) and k in current:
            current = current[k]
        else:
            msg = f"Invalid config path: {path}"
            raise KeyError(msg)
    if isinstance(current, BaseModel):
        return current.model_dump()
    return current


def reset_to_defaults(path: str | None = None) -> None:
    """Reset the whole config, a section, or a single option to its default value.

    If path is None, reset the entire config. Otherwise, reset the specified path.
    """
    if path is None:
        save_app_data(ConfigModel())
        return
    data = load_app_data()
    default_value = get_default_value_by_path(path)
    _set_by_path(data, path, default_value)
    settings = ConfigModel.model_validate(data)
    save_app_data(settings)


================================================
FILE: confluence_markdown_exporter/utils/config_interactive.py
================================================
from pathlib import Path
from typing import Literal
from typing import get_args
from typing import get_origin

import jmespath
import questionary
from pydantic import BaseModel
from pydantic import SecretStr
from pydantic import ValidationError
from questionary import Choice
from questionary import Style

from confluence_markdown_exporter.api_clients import ensure_service_gateway_url
from confluence_markdown_exporter.utils.app_data_store import ConfigModel
from confluence_markdown_exporter.utils.app_data_store import get_app_config_path
from confluence_markdown_exporter.utils.app_data_store import get_settings
from confluence_markdown_exporter.utils.app_data_store import reset_to_defaults
from confluence_markdown_exporter.utils.app_data_store import save_app_data
from confluence_markdown_exporter.utils.app_data_store import set_setting
from confluence_markdown_exporter.utils.app_data_store import set_setting_with_keys

custom_style = Style(
    [
        ("key", "fg:#00b8d4 bold"),  # cyan bold for key
        ("value", "fg:#888888 italic"),  # gray italic for value
        ("pointer", "fg:#00b8d4 bold"),
        ("highlighted", "fg:#00b8d4 bold"),
    ]
)


def _get_field_type(model: type[BaseModel], key: str) -> type | None:
    # Handles both Pydantic v1 and v2
    if hasattr(model, "model_fields"):  # v2
        return model.model_fields[key].annotation
    return model.__annotations__[key]


def _get_submodel(model: type[BaseModel], key: str) -> type[BaseModel] | None:
    if hasattr(model, "model_fields"):
        sub = model.model_fields[key].annotation
    else:
        sub = model.__annotations__[key]
    # Only return submodel if it's a subclass of BaseModel
    if isinstance(sub, type):
        try:
            if issubclass(sub, BaseModel):
                return sub
        except TypeError:
            # sub is not a class or not suitable for issubclass
            return None
    return None


def _get_field_metadata(model: type[BaseModel], key: str) -> dict:
    # Support jmespath-style dot-separated paths for nested fields
    if "." in key:
        keys = key.split(".")
        key = keys[-1]

    # Returns dict with title, description, examples for a field
    if hasattr(model, "model_fields"):  # Pydantic v2
        field = model.model_fields[key]
        return {
            "title": getattr(field, "title", None),
            "description": getattr(field, "description", None),
            "examples": getattr(field, "examples", None),
        }
    # Pydantic v1 fallback
    field = model.model_fields[key]
    return {
        "title": getattr(field, "title", None),
        "description": getattr(field, "description", None),
        "examples": getattr(field, "example", None),
    }


def _format_prompt_message(key_name: str, model: type[BaseModel]) -> str:
    meta = _get_field_metadata(model, key_name)
    lines = []
    # Title
    if meta["title"]:
        lines.append(f"{meta['title']}\n")
    else:
        lines.append(f"{key_name}\n")

    # Description
    if meta["description"]:
        lines.append(meta["description"])

    # Examples
    if meta["examples"]:
        ex = meta["examples"]
        if isinstance(ex, list | tuple) and ex:
            lines.append("\nExamples:")
            lines.extend(f"  • {example}" for example in ex)
    # Instruction
    lines.append(f"\nChange {meta['title']} to:")
    return "\n".join(lines)


def _validate_int(val: str) -> bool | str:
    return val.isdigit() or "Must be an integer"


def _validate_pydantic(val: object, model: type[BaseModel], key_name: str) -> bool | str:
    try:
        data = model().model_dump()
        data[key_name] = val
        model(**data)
    except ValidationError as e:
        return str(e.errors()[0]["msg"])
    else:
        return True


def _prompt_literal(prompt_message: str, field_type: type, current_value: object) -> object:
    options = list(get_args(field_type))
    return questionary.select(
        prompt_message,
        choices=[str(opt) for opt in options],
        default=str(current_value),
        style=custom_style,
    ).ask()


def _prompt_bool(prompt_message: str, current_value: object) -> object:
    return questionary.confirm(
        prompt_message, default=bool(current_value), style=custom_style
    ).ask()


def _prompt_path(
    prompt_message: str,
    current_value: object,
    model: type[BaseModel],
    key_name: str,
) -> object:
    return questionary.path(
        prompt_message,
        default=str(current_value),
        validate=lambda val: _validate_pydantic(val, model, key_name),
        style=custom_style,
    ).ask()


def _prompt_int(prompt_message: str, current_value: object) -> object:
    answer = questionary.text(
        prompt_message,
        default=str(current_value),
        validate=_validate_int,
        style=custom_style,
    ).ask()
    if answer is not None:
        try:
            return int(answer)
        except ValueError:
            questionary.print("Invalid integer value.")
    return None


def _prompt_list(prompt_message: str, current_value: object) -> object:
    default_val = ""
    val_type = str
    if isinstance(current_value, list):
        default_val = ",".join(map(str, current_value))
        if len(current_value) > 0:
            val_type = type(current_value[0])
    answer = questionary.text(
        prompt_message + " (comma-separated)",
        default=default_val,
        style=custom_style,
    ).ask()
    if answer is not None:
        answer = answer.strip().lstrip("[").rstrip("]").strip(",").replace(" ", "")
        try:
            return [val_type(x.strip()) for x in answer.split(",") if x.strip()]
        except ValueError:
            questionary.print("Input should be a list (e.g. 1,2,3 or [1,2,3]).")
    return None


def _prompt_str(
    prompt_message: str,
    current_value: object,
    model: type[BaseModel],
    key_name: str,
) -> object:
    return questionary.text(
        prompt_message,
        default=str(current_value),
        validate=lambda val: _validate_pydantic(val, model, key_name),
        style=custom_style,
    ).ask()


def get_model_by_path(model: type[BaseModel], path: str) -> type[BaseModel]:
    """Traverse a Pydantic model class using a dot-separated path and return the submodel class."""
    keys = path.split(".")
    for key in keys:
        # Try direct submodel first
        sub = _get_submodel(model, key)
        if sub is not None:
            model = sub
            continue
        # Try dict[str, SomeModel] — the key may be a field name or an instance name
        if hasattr(model, "model_fields") and key in model.model_fields:
            dict_sub = _get_dict_value_model(model, key)
            if dict_sub is not None:
                model = dict_sub
                continue
        # key is an instance name inside a dict[str, SomeModel] — model stays the same
    return model


def _get_dict_value_model(model: type[BaseModel], key: str) -> type[BaseModel] | None:
    """If the field annotation is dict[str, SomeModel], return SomeModel; else None."""
    if hasattr(model, "model_fields"):
        annotation = model.model_fields[key].annotation
    else:
        annotation = model.__annotations__.get(key)
    if annotation is None:
        return None
    origin = get_origin(annotation)
    if origin is dict:
        args = get_args(annotation)
        if len(args) == 2 and isinstance(args[1], type):  # noqa: PLR2004
            try:
                if issubclass(args[1], BaseModel):
                    return args[1]
            except TypeError:
                pass
    return None


def _edit_instance_fields(  # noqa: C901, PLR0912
    instance_key: str,
    instance_data: dict,
    item_model: type[BaseModel],
    parent_path_parts: list[str],
) -> str | None:
    """Edit the fields of a single named instance using set_setting_with_keys.

    This avoids the dot-split path system so URL keys (which contain dots)
    work correctly.

    Returns ``"__remove__"`` if the user chose to remove this instance, else ``None``.
    """
    selected_field: str | None = None
    while True:
        choices = []
        for k, v in instance_data.items():
            if v is None:
                continue
            try:
                meta = _get_field_metadata(item_model, k)
                display_title = meta["title"] if meta and meta["title"] else k
            except (KeyError, AttributeError):
                display_title = k
            display_val = "Not set" if isinstance(v, str | SecretStr) and str(v) == "" else v
            choices.append(
                Choice(
                    title=[
                        ("class:key", str(display_title)),
                        ("class:value", f"  {display_val}"),
                    ],
                    value=k,
                )
            )
        choices.append(Choice(title="[Remove]", value="__remove__"))
        choices.append(Choice(title="[Back]", value="__back__"))
        field_key = questionary.select(
            f"Edit credentials for '{instance_key}':",
            choices=choices,
            style=custom_style,
            default=selected_field,
        ).ask()
        if field_key == "__back__" or field_key is None:
            return None
        if field_key == "__remove__":
            confirm = questionary.confirm(
                f"Remove instance '{instance_key}'?", default=False, style=custom_style
            ).ask()
            if confirm:
                return "__remove__"
            continue
        selected_field = field_key
        current_val = instance_data.get(field_key)
        while True:
            new_val = _prompt_for_new_value(field_key, current_val, item_model)
            if new_val is not None:
                try:
                    set_setting_with_keys([*parent_path_parts, instance_key, field_key], new_val)
                    instance_data[field_key] = new_val
                    questionary.print(f"Updated {field_key}.")
                    # Offer cross-service sync for auth credential fields
                    if len(parent_path_parts) >= 2 and parent_path_parts[0] == "auth":  # noqa: PLR2004
                        _maybe_sync_auth_change(
                            parent_path_parts[1], instance_key, field_key, new_val, current_val
                        )
                    break
                except (ValueError, TypeError) as e:
                    questionary.print(f"Error: {e}")
                    retry = questionary.confirm("Try again?", style=custom_style).ask()
                    if not retry:
                        break
            else:
                break


_SERVICE_PAIRS = {"confluence": "jira", "jira": "confluence"}


def _maybe_sync_new_instance(instance_url: str, parent_path_parts: list[str]) -> None:
    """After configuring a new instance, offer to copy its credentials to the paired service.

    Only applicable when the parent path is ``auth.confluence`` or ``auth.jira``.
    """
    if len(parent_path_parts) < 2 or parent_path_parts[0] != "auth":  # noqa: PLR2004
        return
    service = parent_path_parts[1]
    other_service = _SERVICE_PAIRS.get(service)
    if not other_service:
        return

    from confluence_markdown_exporter.api_clients import ensure_service_gateway_url

    target_url = ensure_service_gateway_url(instance_url, other_service)
    should_sync = questionary.confirm(
        f"Also save the same credentials for {other_service.capitalize()} at '{target_url}'?",
        default=True,
        style=custom_style,
    ).ask()
    if not should_sync:
        return

    settings = get_settings().model_dump()
    source: dict = settings
    for k in parent_path_parts:
        source = source[k]
    entry = source.get(instance_url)
    if entry:
        set_setting_with_keys(["auth", other_service, target_url], entry)
        questionary.print(f"auth.{other_service}.{target_url} updated to match.")


def _edit_instance_dict_loop(  # noqa: C901, PLR0912, PLR0915
    instances: dict,
    item_model: type[BaseModel],
    parent_key: str,
    new_instance_url: str | None = None,
) -> None:
    """Interactive loop for managing a dict[str, BaseModel] (URL-keyed instances).

    When *new_instance_url* is provided the loop skips the selection list and jumps
    directly to editing that specific URL (creating a blank entry first if needed).
    This is used when an export command detects missing auth for a known URL.
    """
    parent_path_parts = parent_key.split(".")

    # If a specific URL was requested, jump straight to its editor and then return.
    if new_instance_url:
        new_instance_url = new_instance_url.strip().rstrip("/")
        if new_instance_url not in instances:
            blank = item_model()
            set_setting_with_keys([*parent_path_parts, new_instance_url], blank.model_dump())
            instances[new_instance_url] = blank.model_dump()
        current_val = instances.get(new_instance_url, {})
        if not isinstance(current_val, dict):
            current_val = current_val.model_dump()  # type: ignore[union-attr]
        result = _edit_instance_fields(new_instance_url, current_val, item_model, parent_path_parts)
        if result == "__remove__":
            instances.pop(new_instance_url, None)
            current = get_settings().model_dump()
            sub: dict = current
            for k in parent_path_parts:
                sub = sub[k]
            sub.pop(new_instance_url, None)
            save_app_data(ConfigModel.model_validate(current))
        else:
            _maybe_sync_new_instance(new_instance_url, parent_path_parts)
        return

    while True:
        choices = [
            Choice(title=[("class:key", instance_url)], value=("edit", instance_url))
            for instance_url in instances
        ]
        choices.append(Choice(title="[Add instance]", value=("add", None)))
        choices.append(Choice(title="[Back]", value=("back", None)))

        action, instance_url = questionary.select(
            f"Manage instances for '{parent_key}':",
            choices=choices,
            style=custom_style,
        ).ask() or ("back", None)

        if action == "back" or action is None:
            return

        if action == "add":
            new_url = questionary.text(
                "Enter the base URL for the new instance (e.g. https://company.atlassian.net):",
                validate=lambda v: (
                    "URL cannot be empty"
                    if not v.strip()
                    else "Instance already exists"
                    if v.strip() in instances
                    else True
                ),
                style=custom_style,
            ).ask()
            if new_url:
                new_url = new_url.strip().rstrip("/")
                new_instance = item_model()
                set_setting_with_keys([*parent_path_parts, new_url], new_instance.model_dump())
                instances[new_url] = new_instance.model_dump()
            continue

        if action == "edit" and instance_url:
            current_val = instances.get(instance_url, {})
            if not isinstance(current_val, dict):
                current_val = current_val.model_dump()  # type: ignore[union-attr]
            result = _edit_instance_fields(
                instance_url,
                current_val,
                item_model,
                parent_path_parts,
            )
            if result == "__remove__":
                instances.pop(instance_url, None)
                current = get_settings().model_dump()
                sub: dict = current
                for k in parent_path_parts:
                    sub = sub[k]
                sub.pop(instance_url, None)
                save_app_data(ConfigModel.model_validate(current))
                continue
            # Refresh from disk
            updated = get_settings().model_dump()
            sub = updated
            for k in parent_path_parts:
                sub = sub[k]
            instances[instance_url] = sub.get(instance_url, current_val)


def _main_config_menu(settings: dict, default: tuple[str, bool] | None = None) -> tuple:
    choices = []
    for k, v in settings.items():
        meta = _get_field_metadata(ConfigModel, k)
        display_title = meta["title"] if meta and meta["title"] else k
        if isinstance(v, dict):
            choices.append(
                Choice(
                    title=[
                        ("class:key", str(display_title)),
                        ("class:value", "  [submenu]"),
                    ],
                    value=(k, True),
                )
            )
        else:
            display_val = "Not set" if isinstance(v, str | SecretStr) and str(v) == "" else v
            choices.append(
                Choice(
                    title=[
                        ("class:key", str(display_title)),
                        ("class:value", f"  {display_val}"),
                    ],
                    value=(k, False),
                )
            )
    choices.append(Choice(title="[Reset config to defaults]", value=("__reset__", False)))
    choices.append(Choice(title="[Exit]", value=("__exit__", False)))
    # Find the matching Choice value for default
    default_value = None
    if default is not None:
        for c in choices:
            if hasattr(c, "value") and c.value == default:
                default_value = c.value
                break
    return questionary.select(
        f"Config file location: {get_app_config_path()}\n\nSelect a config to change (or reset):",
        choices=choices,
        style=custom_style,
        default=default_value,
    ).ask() or (None, False)


def _prompt_for_new_value(  # noqa: PLR0911
    key_name: str,
    current_value: object,
    model: type[BaseModel],
) -> object:
    field_type = _get_field_type(model, key_name)
    origin = get_origin(field_type)
    prompt_message = _format_prompt_message(key_name, model)
    if field_type is None:
        field_type = str  # Default to string if no type found
    if origin is Literal:
        return _prompt_literal(prompt_message, field_type, current_value)
    if field_type is bool:
        return _prompt_bool(prompt_message, current_value)
    if field_type is Path:
        return _prompt_path(prompt_message, current_value, model, key_name)
    if field_type is int:
        return _prompt_int(prompt_message, current_value)
    if field_type is list or origin is list:
        return _prompt_list(prompt_message, current_value)
    if isinstance(current_value, SecretStr):
        return _prompt_str(prompt_message, current_value.get_secret_value(), model, key_name)
    return _prompt_str(prompt_message, current_value, model, key_name)


def _maybe_sync_auth_change(
    service: str,
    instance_url: str,
    key: str,
    value_cast: object,
    previous_value: object,
) -> None:
    """After changing an auth credential, offer to sync it to the paired service instance.

    Args:
        instance_url: The URL key of the instance being edited (may contain dots).
        service: ``"confluence"`` or ``"jira"``.
        key: The field name that changed (``"username"``, ``"api_token"``, or ``"pat"``).
        value_cast: The new value.
        previous_value: The old value (used to skip the prompt when was empty before).
    """
    if service == "confluence":
        other_service = "Jira"
        other_service_key = "jira"
    elif service == "jira":
        other_service = "Confluence"
        other_service_key = "confluence"
    else:
        return

    # Only ask when replacing an existing (non-empty) value
    if isinstance(previous_value, SecretStr):
        if not previous_value.get_secret_value():
            return
    elif not previous_value:
        return

    instance_url = ensure_service_gateway_url(instance_url, other_service_key)
    should_sync = questionary.confirm(
        f"Also apply this {key} change to the {other_service} instance '{instance_url}'?",
        default=True,
        style=custom_style,
    ).ask()
    if should_sync:
        try:
            set_setting_with_keys(["auth", other_service_key, instance_url, key], value_cast)
            questionary.print(f"auth.{other_service_key}.{instance_url}.{key} updated to match.")
        except (ValueError, TypeError) as e:
            questionary.print(f"Could not sync to {other_service}: {e}")


def _reset_and_reload(parent_key: str | None, display_title: str | None = None) -> None:
    """Reset config (whole or section) and reload config_dict from disk, with confirmation."""
    if parent_key is None:
        confirm_msg = "Are you sure you want to reset all config to defaults?"
    else:
        confirm_msg = f"Are you sure you want to reset section '{display_title}' to defaults?"
    confirm = questionary.confirm(confirm_msg, style=custom_style).ask()
    if not confirm:
        return
    reset_to_defaults(parent_key or None)
    updated = get_settings().model_dump()
    if parent_key:
        # Traverse to the correct nested dict for jmespath/dot-paths
        keys = parent_key.split(".")
        sub = updated
        for k in keys:
            sub = sub[k]
        # Optionally, update sub in place if needed (here, just to trigger reload/print)
    else:
        for k in list(updated.keys()):
            updated[k] = updated[k]
    if display_title:
        questionary.print(f"Section '{display_title}' reset to defaults.")
    else:
        questionary.print("Config reset to defaults.")


def _get_choices(config_dict: dict, model: type[BaseModel]) -> list:
    choices = []
    for k, v in config_dict.items():
        if v is None:
            continue
        meta = _get_field_metadata(model, k)
        display_title = meta["title"] if meta and meta["title"] else k
        if isinstance(v, dict):
            choices.append(
                Choice(
                    title=[
                        ("class:key", str(display_title)),
                        ("class:value", "  [submenu]"),
                    ],
                    value=k,
                )
            )
        else:
            display_val = "Not set" if isinstance(v, str | SecretStr) and str(v) == "" else v
            choices.append(
                Choice(
                    title=[
                        ("class:key", str(display_title)),
                        ("class:value", f"  {display_val}"),
                    ],
                    value=k,
                )
            )
    choices.append(Choice(title="[Reset this group to defaults]", value="__reset_section__"))
    choices.append(Choice(title="[Back]", value="__back__"))
    return choices


def _edit_dict_config_loop(  # noqa: C901, PLR0912, PLR0915
    config_dict: dict,
    model: type[BaseModel],
    parent_key: str,
    parent_model: type[BaseModel],
    last_selected: str | None = None,
) -> str | None:
    selected_key = last_selected
    while True:
        choices = _get_choices(config_dict, model)
        meta = None
        if hasattr(parent_model, "model_fields") and parent_key:
            meta = _get_field_metadata(parent_model, parent_key)
        display_title = meta["title"] if meta and meta["title"] else parent_key
        key = questionary.select(
            f"Edit options for '{display_title}':",
            choices=choices,
            style=custom_style,
            default=selected_key,
        ).ask()
        if key == "__back__" or key is None:
            return selected_key
        if key == "__reset_section__":
            _reset_and_reload(parent_key, display_title)
            # Reload the updated config_dict for this section from disk
            updated = get_settings().model_dump()
            if parent_key:
                # Traverse to the correct nested dict for jmespath/dot-paths
                keys = parent_key.split(".")
                sub = updated
                for k in keys:
                    sub = sub[k]
                config_dict.clear()
                config_dict.update(sub)
            else:
                config_dict.clear()
                config_dict.update(updated)
            selected_key = None
            continue
        current_value = config_dict[key] if key else None
        # Check for dict[str, BaseModel] (named instances, e.g. auth.confluence)
        dict_value_model = _get_dict_value_model(model, key)
        if isinstance(current_value, dict) and dict_value_model is not None:
            _edit_instance_dict_loop(
                current_value,
                dict_value_model,
                f"{parent_key}.{key}" if parent_key else key,
            )
            selected_key = key
            # Might have updated other service auth config
            # Reload the updated config_dict for this section from disk
            updated = get_settings().model_dump()
            if parent_key:
                # Traverse to the correct nested dict for jmespath/dot-paths
                keys = parent_key.split(".")
                sub = updated
                for k in keys:
                    sub = sub[k]
                config_dict.clear()
                config_dict.update(sub)
            else:
                config_dict.clear()
                config_dict.update(updated)
            continue
        submodel = _get_submodel(model, key)
        if isinstance(current_value, dict) and submodel is not None:
            # Always set selected_key to the submenu key after returning
            _edit_dict_config_loop(
                current_value,
                submodel,
                f"{parent_key}.{key}" if parent_key else key,
                model,
                last_selected=None,
            )
            selected_key = key
        else:
            while True:
                value_cast = _prompt_for_new_value(key, current_value, model)
                if value_cast is not None:
                    try:
                        set_setting(f"{parent_key}.{key}" if parent_key else key, value_cast)
                        config_dict[key] = value_cast
                        questionary.print(f"{parent_key}.{key} updated to {value_cast}.")
                        selected_key = key
                        break
                    except (ValueError, TypeError) as e:
                        questionary.print(f"Error: {e}")
                        retry = questionary.confirm("Try again?", style=custom_style).ask()
                        if not retry:
                            break
                else:
                    break
            # After editing, keep cursor at this entry
            selected_key = key


def _edit_dict_config(
    config_dict: dict,
    model: type[BaseModel],
    parent_key: str,
    parent_model: type[BaseModel],
    last_selected: str | None = None,
) -> str | None:
    return _edit_dict_config_loop(config_dict, model, parent_key, parent_model, last_selected)


def main_config_menu_loop(  # noqa: C901, PLR0912
    jump_to: str | None = None,
    new_instance_url: str | None = None,
) -> None:
    settings = get_settings().model_dump()
    if jump_to:
        submenu = jmespath.search(jump_to, settings)
        preselect: str | None = None
        if not isinstance(submenu, dict):
            # jump_to points to a leaf value — open its parent section with cursor on that item
            leaf_key = jump_to.rsplit(".", 1)[-1]
            jump_to = jump_to.rsplit(".", 1)[0] if "." in jump_to else jump_to
            submenu = jmespath.search(jump_to, settings)
            preselect = leaf_key
        parent_path = jump_to.rsplit(".", 1)[0] if "." in jump_to else None
        parent_model = get_model_by_path(ConfigModel, parent_path) if parent_path else ConfigModel
        # If jump_to resolves to a dict[str, BaseModel] field (URL-keyed instances such as
        # auth.confluence), delegate directly to the instance-dict editor so that
        # URL keys are never mistaken for Pydantic field names.
        last_segment = jump_to.rsplit(".", 1)[-1] if "." in jump_to else jump_to
        dict_value_model = _get_dict_value_model(parent_model, last_segment)
        if dict_value_model is not None and isinstance(submenu, dict):
            _edit_instance_dict_loop(
                submenu, dict_value_model, jump_to, new_instance_url=new_instance_url
            )
            return
        submodel = get_model_by_path(ConfigModel, jump_to)
        _edit_dict_config(submenu, submodel, jump_to, parent_model, last_selected=preselect)
        return
    last_selected = None
    while True:
        settings = get_settings().model_dump()
        key, is_dict = _main_config_menu(settings, default=last_selected)
        if key == "__reset__":
            _reset_and_reload(None)
            last_selected = None
            continue
        if key == "__exit__" or key is None:
            break
        last_selected = (key, is_dict)
        current_value = settings[key]
        if is_dict:
            submodel = _get_submodel(ConfigModel, key)
            if submodel is not None:
                returned_key = _edit_dict_config(
                    current_value, submodel, key, ConfigModel, last_selected=None
                )
                last_selected = (key, is_dict) if returned_key is None else (returned_key, True)
        else:
            while True:
                value_cast = _prompt_for_new_value(key, current_value, ConfigModel)
                if value_cast is None or value_cast == current_value:
                    # User cancelled or made no change: do not update config
                    break
                try:
                    set_setting(key, value_cast)
                    questionary.print(f"{key} updated to {value_cast}.")
                    last_selected = (key, is_dict)
                    break
                except (ValueError, TypeError) as e:
                    questionary.print(f"Error: {e}")
                    retry = questionary.confirm("Try again?", style=custom_style).ask()
                    if not retry:
                        break


================================================
FILE: confluence_markdown_exporter/utils/drawio_converter.py
================================================
"""Utility module for parsing DrawIO files and extracting mermaid diagrams."""

import html
import json
import logging
from pathlib import Path
from typing import cast

from bs4 import BeautifulSoup

logger = logging.getLogger(__name__)


def load_drawio_file(file_path: str | Path) -> str | None:
    """Load and parse a DrawIO XML file.

    Args:
        file_path: Path to the DrawIO file (.drawio)

    Returns:
        The XML content as a string, or None if file doesn't exist
    """
    file_path = Path(file_path)
    if not file_path.exists():
        return None

    return file_path.read_text(encoding="utf-8")


def extract_mermaid_data(xml_content: str) -> str | None:
    """Extract mermaid data from DrawIO XML.

    Args:
        xml_content: The XML content as a string.

    Returns:
        The extracted mermaid data string or None if not found.
    """
    try:
        soup = BeautifulSoup(xml_content, "xml")
        # Search for UserObject tag (XML parser preserves case)
        user_object = soup.find("UserObject")
        if user_object is None:
            return None
        try:
            attrs = cast(
                "dict[str, str]",
                user_object.attrs,  # type: ignore[attr-defined]
            )
            # XML parser preserves attribute case as mermaidData
            mermaid_data_attr = attrs.get("mermaidData")
            if mermaid_data_attr is None:
                return None
            # Unescape HTML entities if present
            return html.unescape(mermaid_data_attr)
        except AttributeError:
            return None
    except Exception:  # pylint: disable=broad-except
        logger.exception("Error extracting mermaid data from DrawIO XML")
        return None


def parse_mermaid_json(mermaid_data: str) -> str | None:
    """Parse mermaid data from JSON format and extract the diagram definition.

    The mermaid data is often stored as JSON with a "data" field containing
    the actual mermaid diagram as a string.

    Args:
        mermaid_data: The raw mermaid data string (may be JSON-formatted)

    Returns:
        The mermaid diagram string, or the input if already in plain format
    """
    try:
        # Try to parse as JSON
        parsed = json.loads(mermaid_data)
        if isinstance(parsed, dict) and "data" in parsed:
            return parsed["data"]
    except (json.JSONDecodeError, TypeError):
        # If not JSON, return as-is (already a plain diagram string)
        pass

    return mermaid_data


def format_mermaid_markdown(mermaid_diagram: str) -> str:
    """Format mermaid diagram as a markdown code fence.

    Args:
        mermaid_diagram: The mermaid diagram definition

    Returns:
        Formatted markdown code fence containing the mermaid diagram
    """
    return f"```mermaid\n{mermaid_diagram}\n```"


def load_and_parse_drawio(file_path: str | Path) -> str | None:
    """Load a DrawIO file and extract mermaid diagram as markdown.

    This is the main entry point that orchestrates the full process:
    1. Load the DrawIO XML file
    2. Extract mermaidData from UserObject
    3. Parse JSON format if needed
    4. Format as markdown code fence

    Args:
        file_path: Path to the DrawIO file (.drawio)

    Returns:
        Formatted markdown code fence with mermaid diagram, or None if not found/error
    """
    # Load the DrawIO file
    xml_content = load_drawio_file(file_path)
    if xml_content is None:
        return None

    # Extract mermaid data from XML
    mermaid_data = extract_mermaid_data(xml_content)
    if mermaid_data is None:
        return None

    # Parse mermaid data (handle JSON format)
    mermaid_diagram = parse_mermaid_json(mermaid_data)
    if mermaid_diagram is None:
        return None

    # Format as markdown
    result = format_mermaid_markdown(mermaid_diagram)
    logger.debug("Extracted mermaid diagram from %s", file_path)
    return result


================================================
FILE: confluence_markdown_exporter/utils/export.py
================================================
import json
import logging
import re
from pathlib import Path

from confluence_markdown_exporter.utils.app_data_store import get_settings

logger = logging.getLogger(__name__)

settings = get_settings()
export_options = settings.export


def parse_encode_setting(encode_setting: str) -> dict[str, str]:
    """Parse encoding setting containing character mapping.

    Args:
        encode_setting: JSON object content without braces
            '"char1":"replacement1","char2":"replacement2"'

    Returns:
        Dictionary mapping characters to their replacements

    Examples:
        "" -> {}
        '" ":"%2D","-":"%2D"' -> {" ": "%2D", "-": "%2D"}
        '" ":"dash","-":"%2D"' -> {" ": "dash", "-": "%2D"}
        '"=":" equals "' -> {"=": " equals "}

    Note:
        Uses JSON format for mapping to handle all characters unambiguously.
        Curly braces are added automatically before parsing.
    """
    if not encode_setting:
        return {}

    # Add curly braces to make it valid JSON
    json_str = f"{{{encode_setting}}}"

    # Use JSON parsing for robust and unambiguous parsing
    try:
        mapping = json.loads(json_str)
        if isinstance(mapping, dict):
            return mapping
    except (json.JSONDecodeError, TypeError):
        # Fallback: if parsing fails, return empty mapping
        pass

    return {}


def save_file(file_path: Path, content: str | bytes) -> None:
    """Save content to a file, creating parent directories as needed."""
    file_path.parent.mkdir(parents=True, exist_ok=True)
    if isinstance(content, bytes):
        with file_path.open("wb") as file:
            file.write(content)
    elif isinstance(content, str):
        with file_path.open("w", encoding="utf-8") as file:
            file.write(content)
    else:
        msg = "Content must be either a string or bytes."
        raise TypeError(msg)
    logger.debug("Saved file %s (%d bytes)", file_path, len(content))


def sanitize_filename(filename: str) -> str:
    """Sanitize a filename for cross-platform compatibility.

    Replaces characters based on encoding mapping,
    trims trailing spaces and dots, and prevents reserved names.

    Args:
        filename: The original filename.

    Returns:
        A sanitized filename string.
    """
    sanitized = filename

    # Strip control characters (ASCII 0x00-0x1F, 0x7F) invalid on Windows/Linux
    sanitized = re.sub(r"[\x00-\x1f\x7f]", "", sanitized)

    if export_options.filename_encoding:
        encode_map = parse_encode_setting(export_options.filename_encoding)

        # Create pattern from all characters that have mappings
        if encode_map:
            chars_to_encode = "".join(encode_map.keys())
            encode_re = escape_character_class(chars_to_encode)
            encode_pattern = re.compile(f"[{encode_re}]")

            def map_char(m: re.Match[str]) -> str:
                char = m.group(0)
                return encode_map[char]

            sanitized = re.sub(encode_pattern, map_char, sanitized)

    # Trim spaces and dots from the end
    sanitized = sanitized.rstrip(" .")

    # Reserved Windows names (case-insensitive)
    reserved = {
        "CON",
        "PRN",
        "AUX",
        "NUL",
        *(f"COM{i}" for i in range(1, 10)),
        *(f"LPT{i}" for i in range(1, 10)),
    }

    name = Path(sanitized).stem.upper()
    if name in reserved:
        sanitized = f"{sanitized}_"

    if export_options.filename_lowercase:
        sanitized = sanitized.lower()

    # Limit length to specificed number of characters
    return sanitized[: export_options.filename_length]


def sanitize_key(s: str, connector: str = "_") -> str:
    """Convert an input string to a valid Python/YAML-compatible key.

    - Lowercase the string.
    - Replace non-alphanumeric characters with underscores.
    - Collapse multiple underscores into one.
    - Trim leading/trailing underscores.
    - Prefix with 'key_' if the first character is not a letter or underscore.
    """
    s = s.lower()
    s = re.sub(f"[^a-z0-9{connector}]", connector, s)
    s = re.sub(f"{connector}+", connector, s)
    s = s.strip(connector)
    if not re.match(r"^[a-z]", s):
        s = f"key{connector}{s}"
    return s


def github_heading_slug(text: str) -> str:
    """Generate a GitHub-compatible heading anchor slug.

    Matches the github-slugger algorithm used by GitHub to render heading anchors,
    so that generated TOC links resolve correctly in GitHub-rendered Markdown.
    """
    text = text.lower().strip()
    text = re.sub(r"[^\w\s-]", "", text)  # drop punctuation; keep letters, digits, spaces, hyphens
    text = re.sub(r"[\s_]+", "-", text)   # whitespace/underscores → hyphens
    return re.sub(r"-{2,}", "-", text)    # collapse runs of hyphens (e.g. "- word" → "-word")


def escape_character_class(s: str) -> str:
    """Escape characters for use in a regex character class.

    Args:
        s: The string containing characters to escape.

    Returns:
        The input string with special regex character class characters escaped.
    """
    # Escape backslash first, then other special characters for character classes
    return s.replace("\\", r"\\").replace("-", r"\-").replace("]", r"\]").replace("^", r"\^")


================================================
FILE: confluence_markdown_exporter/utils/lockfile.py
================================================
"""Lock file handling for tracking exported Confluence pages."""

from __future__ import annotations

import json
import logging
import tempfile
import threading
from datetime import datetime
from datetime import timezone
from pathlib import Path
from typing import TYPE_CHECKING
from typing import ClassVar

from pydantic import BaseModel
from pydantic import Field
from pydantic import ValidationError

from confluence_markdown_exporter.utils.page_registry import PageTitleRegistry
from confluence_markdown_exporter.utils.rich_console import get_stats

if TYPE_CHECKING:
    from confluence_markdown_exporter.confluence import Descendant
    from confluence_markdown_exporter.confluence import Page

logger = logging.getLogger(__name__)

LOCKFILE_VERSION = 2


class AttachmentEntry(BaseModel):
    """Entry for a single attachment tracked in the lock file."""

    version: int
    path: str


class PageEntry(BaseModel):
    """Entry for a single page in the lock file."""

    title: str
    version: int
    export_path: str
    attachments: dict[str, AttachmentEntry] = Field(default_factory=dict)


class SpaceEntry(BaseModel):
    """Lock file entry for a Confluence space."""

    pages: dict[str, PageEntry] = Field(default_factory=dict)


class OrgEntry(BaseModel):
    """Lock file entry for a Confluence organisation (base URL)."""

    spaces: dict[str, SpaceEntry] = Field(default_factory=dict)


class ConfluenceLock(BaseModel):
    """Lock file tracking exported Confluence data."""

    lockfile_version: int = Field(default=LOCKFILE_VERSION)
    last_export: str = Field(default="")
    orgs: dict[str, OrgEntry] = Field(default_factory=dict)

    @classmethod
    def load(cls, lockfile_path: Path) -> ConfluenceLock:
        """Load lock file from disk, or return empty if not exists or outdated."""
        if lockfile_path.exists():
            try:
                content = lockfile_path.read_text(encoding="utf-8")
                data = json.loads(content)
                if data.get("lockfile_version", 1) < LOCKFILE_VERSION:
                    logger.info(
                        "Lock file format is outdated (v%s → v%s). Starting fresh.",
                        data.get("lockfile_version", 1),
                        LOCKFILE_VERSION,
                    )
                    return cls()
                return cls.model_validate(data)
            except (ValidationError, json.JSONDecodeError):
                logger.warning("Failed to parse lock file: %s. Starting fresh.", lockfile_path)
        return cls()

    def all_pages(self) -> dict[str, PageEntry]:
        """Return all page entries as a flat dict keyed by page ID."""
        result: dict[str, PageEntry] = {}
        for org in self.orgs.values():
            for space in org.spaces.values():
                result.update(space.pages)
        return result

    def get_page(self, page_id: str) -> PageEntry | None:
        """Return the PageEntry for *page_id*, searching all orgs and spaces."""
        for org in self.orgs.values():
            for space in org.spaces.values():
                if page_id in space.pages:
                    return space.pages[page_id]
        return None

    def remove_page(self, page_id: str) -> None:
        """Remove *page_id* from whichever org/space entry holds it."""
        for org in self.orgs.values():
            for space in org.spaces.values():
                space.pages.pop(page_id, None)

    def add_page(
        self,
        page: Page,
        attachment_entries: dict[str, AttachmentEntry] | None = None,
    ) -> None:
        """Add or update a page entry, placed under its org and space."""
        if page.version is None:
            logger.warning("Page %s has no version info. Skipping lock entry.", page.id)
            return

        org_url = page.base_url
        space_key = page.space.key

        if org_url not in self.orgs:
            self.orgs[org_url] = OrgEntry()
        if space_key not in self.orgs[org_url].spaces:
            self.orgs[org_url].spaces[space_key] = SpaceEntry()

        self.orgs[org_url].spaces[space_key].pages[str(page.id)] = PageEntry(
            title=page.title,
            version=page.version.number,
            export_path=str(page.export_path),
            attachments=attachment_entries or {},
        )

    def save(  # noqa: C901
        self, lockfile_path: Path, *, delete_ids: set[str] | None = None
    ) -> None:
        """Save lock file to disk.

        To handle concurrent writes, this method reads the existing lock file
        and merges it with the current state before saving.
        """
        lockfile_path.parent.mkdir(parents=True, exist_ok=True)

        # Read existing lock file and merge to handle concurrent writes
        existing = ConfluenceLock.load(lockfile_path)
        for org_url, org_entry in self.orgs.items():
            if org_url not in existing.orgs:
                existing.orgs[org_url] = OrgEntry()
            for space_key, space_entry in org_entry.spaces.items():
                if space_key not in existing.orgs[org_url].spaces:
                    existing.orgs[org_url].spaces[space_key] = SpaceEntry()
                existing.orgs[org_url].spaces[space_key].pages.update(space_entry.pages)

        if delete_ids:
            for page_id in delete_ids:
                existing.remove_page(page_id)

        # Sort for deterministic output
        for org in existing.orgs.values():
            for space in org.spaces.values():
                space.pages = dict(sorted(space.pages.items()))
            org.spaces = dict(sorted(org.spaces.items()))
        existing.orgs = dict(sorted(existing.orgs.items()))

        existing.last_export = datetime.now(timezone.utc).isoformat()

        json_str = json.dumps(existing.model_dump(), indent=2, ensure_ascii=False)
        tmp_path = None
        try:
            with tempfile.NamedTemporaryFile(
                mode="w",
                dir=lockfile_path.parent,
                suffix=".tmp",
                delete=False,
                encoding="utf-8",
            ) as fd:
                tmp_path = Path(fd.name)
                fd.write(json_str)
            try:
                tmp_path.replace(lockfile_path)
            except PermissionError:
                # Windows: MoveFileExW(MOVEFILE_REPLACE_EXISTING) can fail when
                # security software holds the destination. Fall back to non-atomic
                # unlink + rename.
                lockfile_path.unlink(missing_ok=True)
                tmp_path.rename(lockfile_path)
        except BaseException:
            if tmp_path is not None:
                tmp_path.unlink(missing_ok=True)
            raise

        # Update self to reflect merged state
        self.orgs = existing.orgs
        self.last_export = existing.last_export


class LockfileManager:
    """Manager for lock file operations during export."""

    _lockfile_path: ClassVar[Path | None] = None
    _lock: ClassVar[ConfluenceLock | None] = None
    _output_path: ClassVar[Path | None] = None
    _all_entries_snapshot: ClassVar[dict[str, PageEntry]] = {}
    _seen_page_ids: ClassVar[set[str]] = set()
    _thread_lock: ClassVar[threading.Lock] = threading.Lock()

    @classmethod
    def init(cls) -> None:
        """Initialize the lockfile manager if skip_unchanged is enabled."""
        from confluence_markdown_exporter.utils.app_data_store import get_settings

        settings = get_settings()
        if not settings.export.skip_unchanged:
            return

        cls._output_path = settings.export.output_path
        cls._lockfile_path = cls._output_path / settings.export.lockfile_name
        cls._lock = ConfluenceLock.load(cls._lockfile_path)
        cls._all_entries_snapshot = dict(cls._lock.all_pages())
        cls._seen_page_ids = set()
        PageTitleRegistry.reset()
        for pid, entry in cls._all_entries_snapshot.items():
            try:
                PageTitleRegistry.register(int(pid), entry.title)
            except (TypeError, ValueError):
                continue
        logger.debug(
            "Lockfile initialized: %s (%d tracked page(s))",
            cls._lockfile_path,
            len(cls._all_entries_snapshot),
        )

    @classmethod
    def get_page_attachment_entries(cls, page_id: str) -> dict[str, AttachmentEntry]:
        """Return attachment entries for *page_id* from the lock file, or empty dict."""
        if cls._lock is None:
            return {}
        entry = cls._lock.get_page(page_id)
        return entry.attachments if entry else {}

    @classmethod
    def record_page(
        cls,
        page: Page,
        attachment_entries: dict[str, AttachmentEntry] | None = None,
    ) -> None:
        """Record a page export to the lock file."""
        if cls._lock is None or cls._lockfile_path is None:
            return

        with cls._thread_lock:
            cls._lock.add_page(page, attachment_entries)
            cls._lock.save(cls._lockfile_path)
            cls._seen_page_ids.add(str(page.id))
        PageTitleRegistry.register(int(page.id), page.title)

    @classmethod
    def mark_seen(cls, page_ids: list[int]) -> None:
        """Mark page IDs as seen in the current export run.

        This avoids unnecessary API existence checks during cleanup for pages
        that were encountered but skipped (e.g. unchanged pages).
        """
        cls._seen_page_ids.update(str(pid) for pid in page_ids)

    @classmethod
    def should_export(cls, page: Page | Descendant) -> bool:
        """Check if a page should be exported based on lockfile state.

        Returns True if the page should be exported (not in lockfile or changed).
        """
        if cls._lock is None:
            return True

        page_id = str(page.id)
        entry = cls._lock.get_page(page_id)
        if entry is None:
            logger.debug("Page id=%s not in lockfile — will export", page_id)
            return True

        if page.version is None:
            logger.debug("Page id=%s has no version info — will export", page_id)
            return True

        # Re-export if the output file is missing from disk
        if cls._output_path is not None and not (cls._output_path / entry.export_path).exists():
            logger.debug("Page id=%s output file missing — will re-export", page_id)
            return True

        # Export if version or export_path has changed
        if entry.version != page.version.number or entry.export_path != str(page.export_path):
            logger.debug(
                "Page id=%s changed (v%s -> v%s) — will export",
                page_id,
                entry.version,
                page.version.number,
            )
            return True

        logger.debug("Page id=%s unchanged (v%s) — skipping", page_id, entry.version)
        return False

    @classmethod
    def unseen_ids(cls) -> set[str]:
        """Return lockfile page IDs not encountered during the current export run."""
        if cls._lock is None:
            return set()
        return set(cls._lock.all_pages().keys()) - cls._seen_page_ids

    @classmethod
    def remove_pages(cls, deleted_ids: set[str]) -> None:
        """Remove files and lockfile entries for moved or deleted pages.

        Args:
            deleted_ids: Page IDs confirmed as deleted from Confluence.
        """
        if cls._lock is None or cls._lockfile_path is None or cls._output_path is None:
            return

        result_delete_ids: set[str] = set()

        # Handle moved pages: delete old file when export_path changed
        for page_id in cls._seen_page_ids:
            if page_id in cls._all_entries_snapshot:
                old_entry = cls._all_entries_snapshot[page_id]
                new_entry = cls._lock.get_page(page_id)
                if new_entry and old_entry.export_path != new_entry.export_path:
                    (cls._output_path / old_entry.export_path).unlink(missing_ok=True)
                    logger.info("Deleted old path for moved page: %s", old_entry.export_path)

        # Remove files and lockfile entries for pages deleted from Confluence
        for page_id in deleted_ids:
            entry = cls._lock.get_page(page_id)
            if entry:
                (cls._output_path / entry.export_path).unlink(missing_ok=True)
                logger.info("Deleted removed page: %s", entry.export_path)
                result_delete_ids.add(page_id)

        if result_delete_ids:
            with cls._thread_lock:
                cls._lock.save(cls._lockfile_path, delete_ids=result_delete_ids)

        stats = get_stats()
        for _ in result_delete_ids:
            stats.inc_removed()


================================================
FILE: confluence_markdown_exporter/utils/measure_time.py
================================================
import logging
import time
from collections.abc import Callable
from collections.abc import Generator
from contextlib import contextmanager
from datetime import datetime
from typing import ParamSpec
from typing import TypeVar

from dateutil.relativedelta import relativedelta
from rich.rule import Rule

from confluence_markdown_exporter.utils.rich_console import console

T = TypeVar("T")
P = ParamSpec("P")

logger = logging.getLogger(__name__)


def _format_duration(delta: relativedelta) -> str:
    """Return a human-readable duration string from a relativedelta.

    Args:
        delta: The duration as a relativedelta.

    Returns:
        A formatted string like "2m 3s" or "45s".
    """
    parts = []
    if delta.hours:
        parts.append(f"{delta.hours}h")
    if delta.minutes:
        parts.append(f"{delta.minutes}m")
    seconds = delta.seconds + round(delta.microseconds / 1_000_000)
    if seconds or not parts:
        parts.append(f"{seconds}s")
    return " ".join(parts)


def measure_time(func: Callable[P, T]) -> Callable[P, T]:
    """Decorator to measure and print the execution time of a function."""

    def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        elapsed_time = end_time - start_time
        logger.info(f"Function '{func.__name__}' took {elapsed_time:.4f} seconds to execute.")
        return result

    return wrapper


@contextmanager
def measure(step: str) -> Generator[None, None, None]:
    """Measure and display the execution time of the encapsulated block.

    Prints a rich rule banner at start and a summary line at end.

    Args:
        step: The step name shown in the banner.

    Raises:
        e: Reraised exception from execution.
    """
    start_time = datetime.now()
    console.print(Rule(f"[highlight]{step}[/highlight]", style="dim"))
    logger.debug("Started at %s", start_time.strftime("%Y-%m-%d %H:%M:%S"))
    state = "stopped"
    try:
        yield
        state = "ended"
    except Exception:
        state = "failed"
        raise
    finally:
        end_time = datetime.now()
        duration = relativedelta(end_time, start_time)
        duration_str = _format_duration(duration)
        if state == "ended":
            console.print(
                f"[success]✓[/success] [dim]{step}[/dim] "
                f"completed in [highlight]{duration_str}[/highlight]"
            )
        elif state == "failed":
            console.print(
                f"[error]✗[/error] [dim]{step}[/dim] "
                f"failed after [highlight]{duration_str}[/highlight]"
            )
        else:
            console.print(
                f"[warning]![/warning] [dim]{step}[/dim] "
                f"stopped after [highlight]{duration_str}[/highlight]"
            )


================================================
FILE: confluence_markdown_exporter/utils/page_registry.py
================================================
"""Cross-space page title registry for link disambiguation.

Confluence enforces page-title uniqueness per space, not across spaces.
When pages from multiple spaces are exported into the same vault, two
pages can share a title — Obsidian's wiki link ``[[Title]]`` then
resolves ambiguously. This registry tracks known page titles so the
Markdown converter can emit a path-qualified wiki link
(``[[path/to/file|Title]]``) when a collision is detected.
"""

from __future__ import annotations

import threading
from typing import ClassVar


class PageTitleRegistry:
    """Track page-id -> title mappings to detect cross-page title collisions.

    Populated from the lockfile snapshot at run start and from each
    page list before export workers begin so collisions are known
    before any link rendering.
    """

    _entries: ClassVar[dict[int, str]] = {}
    _title_counts: ClassVar[dict[str, int]] = {}
    _lock: ClassVar[threading.Lock] = threading.Lock()

    @classmethod
    def reset(cls) -> None:
        with cls._lock:
            cls._entries.clear()
            cls._title_counts.clear()

    @classmethod
    def register(cls, page_id: int, title: str) -> None:
        if not page_id or not title:
            return
        with cls._lock:
            old = cls._entries.get(page_id)
            if old == title:
                return
            if old is not None:
                cls._title_counts[old] -= 1
                if cls._title_counts[old] <= 0:
                    cls._title_counts.pop(old, None)
            cls._entries[page_id] = title
            cls._title_counts[title] = cls._title_counts.get(title, 0) + 1

    @classmethod
    def is_ambiguous(cls, title: str) -> bool:
        return cls._title_counts.get(title, 0) > 1

    @classmethod
    def title_count(cls, title: str) -> int:
        return cls._title_counts.get(title, 0)


================================================
FILE: confluence_markdown_exporter/utils/rich_console.py
================================================
"""Shared rich console, logging setup, and export statistics tracking."""

import logging
import threading
from dataclasses import dataclass
from dataclasses import field
from os import getenv
from pathlib import Path

from rich.console import Console
from rich.logging import RichHandler
from rich.style import Style
from rich.theme import Theme

_CME_THEME = Theme(
    {
        "none": Style.null(),
        "reset": Style(
            color="default",
            bgcolor="default",
            dim=False,
            bold=False,
            italic=False,
            underline=False,
            blink=False,
            blink2=False,
            reverse=False,
            conceal=False,
            strike=False,
        ),
        "dim": Style(dim=True),
        "bright": Style(dim=False),
        "bold": Style(bold=True),
        "strong": Style(bold=True),
        "code": Style(color="cyan"),
        "italic": Style(italic=True),
        "emphasize": Style(italic=True),
        "underline": Style(underline=True),
        "blink": Style(blink=True),
        "blink2": Style(blink2=True),
        "reverse": Style(reverse=True),
        "strike": Style(strike=True),
        "black": Style(color="black"),
        "red": Style(color="red"),
        "green": Style(color="green"),
        "yellow": Style(color="yellow"),
        "magenta": Style(color="magenta"),
        "cyan": Style(color="cyan"),
        "white": Style(color="white"),
        "inspect.attr": Style(color="yellow", italic=True),
        "inspect.attr.dunder": Style(color="yellow", italic=True, dim=True),
        "inspect.callable": Style(bold=True, color="red"),
        "inspect.async_def": Style(italic=True, color="bright_cyan"),
        "inspect.def": Style(italic=True, color="bright_cyan"),
        "inspect.class": Style(italic=True, color="bright_cyan"),
        "inspect.error": Style(bold=True, color="red"),
        "inspect.equals": Style(),
        "inspect.help": Style(color="cyan"),
        "inspect.doc": Style(dim=True),
        "inspect.value.border": Style(color="green"),
        "live.ellipsis": Style(bold=True, color="red"),
        "layout.tree.row": Style(dim=False, color="red"),
        "layout.tree.column": Style(dim=False, color="blue"),
        "logging.keyword": Style(bold=True, color="yellow"),
        "logging.level.notset": Style(dim=True),
        "logging.level.debug": Style(color="green"),
        "logging.level.info": Style(color="blue"),
        "logging.level.warning": Style(color="yellow"),
        "logging.level.error": Style(color="red", bold=True),
        "logging.level.critical": Style(color="red", bold=True, reverse=True),
        "log.level": Style.null(),
        "log.time": Style(color="cyan", dim=True),
        "log.message": Style.null(),
        "log.path": Style(dim=True),
        "repr.ellipsis": Style(color="yellow"),
        "repr.indent": Style(color="green", dim=True),
        "repr.error": Style(color="red", bold=True),
        "repr.str": Style(color="green", italic=False, bold=False),
        "repr.brace": Style(bold=True),
        "repr.comma": Style(bold=True),
        "repr.ipv4": Style(bold=True, color="bright_green"),
        "repr.ipv6": Style(bold=True, color="bright_green"),
        "repr.eui48": Style(bold=True, color="bright_green"),
        "repr.eui64": Style(bold=True, color="bright_green"),
        "repr.tag_start": Style(bold=True),
        "repr.tag_name": Style(color="bright_magenta", bold=True),
        "repr.tag_contents": Style(color="default"),
        "repr.tag_end": Style(bold=True),
        "repr.attrib_name": Style(color="yellow", italic=False),
        "repr.attrib_equal": Style(bold=True),
        "repr.attrib_value": Style(color="magenta", italic=False),
        "repr.number": Style(color="cyan", bold=True, italic=False),
        "repr.number_complex": Style(color="cyan", bold=True, italic=False),  # same
        "repr.bool_true": Style(color="bright_green", italic=True),
        "repr.bool_false": Style(color="bright_red", italic=True),
        "repr.none": Style(color="magenta", italic=True),
        "repr.url": Style(underline=True, color="bright_blue", italic=False, bold=False),
        "repr.uuid": Style(color="bright_yellow", bold=False),
        "repr.call": Style(color="magenta", bold=True),
        "repr.path": Style(color="magenta"),
        "repr.filename": Style(color="bright_magenta"),
        "rule.line": Style(color="bright_green"),
        "rule.text": Style.null(),
        "json.brace": Style(bold=True),
        "json.bool_true": Style(color="bright_green", italic=True),
        "json.bool_false": Style(color="bright_red", italic=True),
        "json.null": Style(color="magenta", italic=True),
        "json.number": Style(color="cyan", bold=True, italic=False),
        "json.str": Style(color="green", italic=False, bold=False),
        "json.key": Style(color="blue", bold=True),
        "prompt": Style.null(),
        "prompt.choices": Style(color="magenta", bold=True),
        "prompt.default": Style(color="cyan", bold=True),
        "prompt.invalid": Style(color="red"),
        "prompt.invalid.choice": Style(color="red"),
        "pretty": Style.null(),
        "scope.border": Style(color="blue"),
        "scope.key": Style(color="yellow", italic=True),
        "scope.key.special": Style(color="yellow", italic=True, dim=True),
        "scope.equals": Style(color="red"),
        "table.header": Style(bold=True),
        "table.footer": Style(bold=True),
        "table.cell": Style.null(),
        "table.title": Style(italic=True),
        "table.caption": Style(italic=True, dim=True),
        "traceback.error": Style(color="red", italic=True),
        "traceback.border.syntax_error": Style(color="bright_red"),
        "traceback.border": Style(color="red"),
        "traceback.text": Style.null(),
        "traceback.title": Style(color="red", bold=True),
        "traceback.exc_type": Style(color="bright_red", bold=True),
        "traceback.exc_value": Style.null(),
        "traceback.offset": Style(color="bright_red", bold=True),
        "traceback.error_range": Style(underline=True, bold=True),
        "traceback.note": Style(color="green", bold=True),
        "traceback.group.border": Style(color="magenta"),
        "bar.back": Style(color="grey23"),
        "bar.complete": Style(color="rgb(249,38,114)"),
        "bar.finished": Style(color="rgb(114,156,31)"),
        "bar.pulse": Style(color="rgb(249,38,114)"),
        "progress.description": Style.null(),
        "progress.filesize": Style(color="green"),
        "progress.filesize.total": Style(color="green"),
        "progress.download": Style(color="green"),
        "progress.elapsed": Style(color="yellow"),
        "progress.percentage": Style(color="magenta"),
        "progress.remaining": Style(color="cyan"),
        "progress.data.speed": Style(color="red"),
        "progress.spinner": Style(color="green"),
        "status.spinner": Style(color="green"),
        "tree": Style(),
        "tree.line": Style(),
        "markdown.paragraph": Style(),
        "markdown.text": Style(),
        "markdown.em": Style(italic=True),
        "markdown.emph": Style(italic=True),  # For commonmark backwards compatibility
        "markdown.strong": Style(bold=True),
        "markdown.code": Style(color="cyan"),
        "markdown.code_block": Style(color="cyan"),
        "markdown.block_quote": Style(color="magenta"),
        "markdown.list": Style(color="cyan"),
        "markdown.item": Style(),
        "markdown.item.bullet": Style(color="yellow", bold=True),
        "markdown.item.number": Style(color="yellow", bold=True),
        "markdown.hr": Style(color="yellow"),
        "markdown.h1.border": Style(),
        "markdown.h1": Style(bold=True),
        "markdown.h2": Style(bold=True, underline=True),
        "markdown.h3": Style(bold=True),
        "markdown.h4": Style(bold=True, dim=True),
        "markdown.h5": Style(underline=True),
        "markdown.h6": Style(italic=True),
        "markdown.h7": Style(italic=True, dim=True),
        "markdown.link": Style(color="bright_blue"),
        "markdown.link_url": Style(color="blue", underline=True),
        "markdown.s": Style(strike=True),
        "iso8601.date": Style(color="blue"),
        "iso8601.time": Style(color="magenta"),
        "iso8601.timezone": Style(color="yellow"),
    }
)

TERMINAL_WIDTH = getenv("TERMINAL_WIDTH")
MAX_WIDTH = int(TERMINAL_WIDTH) if TERMINAL_WIDTH else None
FORCE_TERMINAL = (
    False
    if getenv("NO_COLOR") or getenv("CI")
    else True
    if getenv("FORCE_COLOR") or getenv("PY_COLORS") or getenv("GITHUB_ACTIONS")
    else None
)


def get_rich_console(*, stderr: bool = False) -> Console:
    return Console(
        theme=_CME_THEME,
        highlight=False,
        # In CI, disable live rendering (no ANSI escapes, no overwriting lines, no colors)
        force_terminal=FORCE_TERMINAL,
        width=MAX_WIDTH,
        stderr=stderr,
    )


console: Console = get_rich_console()


def setup_logging(log_level: str = "INFO", log_file: Path | None = None) -> None:
    """Configure the root logger to use rich output.

    Args:
        log_level: One of DEBUG, INFO, WARNING, ERROR.
        log_file: Optional path to also write log records to. The file uses
            a plain (non-rich) format so it is grep-friendly. Parent
            directories are created if missing.
    """
    level = getattr(logging, log_level.upper(), logging.INFO)
    handler = RichHandler(
        console=console,
        rich_tracebacks=True,
        show_path=log_level == "DEBUG",
        markup=False,
        log_time_format="[%X]",
    )
    handler.setLevel(level)
    root = logging.getLogger()
    root.setLevel(level)
    # Remove any existing handlers so we don't double-log
    root.handlers.clear()
    root.addHandler(handler)
    if log_file is not None:
        log_file.parent.mkdir(parents=True, exist_ok=True)
        file_handler = logging.FileHandler(log_file, encoding="utf-8")
        file_handler.setLevel(level)
        file_handler.setFormatter(
            logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s")
        )
        root.addHandler(file_handler)


@dataclass
class ExportStats:
    """Thread-safe counters for a single export run."""

    total: int = 0
    exported: int = 0
    skipped: int = 0
    failed: int = 0
    removed: int = 0
    attachments_exported: int = 0
    attachments_skipped: int = 0
    attachments_failed: int = 0
    attachments_removed: int = 0
    _lock: threading.Lock = field(default_factory=threading.Lock, repr=False, compare=False)

    def inc_exported(self) -> None:
        """Increment the exported counter by 1."""
        with self._lock:
            self.exported += 1

    def inc_skipped(self) -> None:
        """Increment the skipped counter by 1."""
        with self._lock:
            self.skipped += 1

    def inc_failed(self) -> None:
        """Increment the failed counter by 1."""
        with self._lock:
            self.failed += 1

    def inc_removed(self) -> None:
        """Increment the pages removed counter by 1."""
        with self._lock:
            self.removed += 1

    def inc_attachments_exported(self) -> None:
        """Increment the attachments exported counter by 1."""
        with self._lock:
            self.attachments_exported += 1

    def inc_attachments_skipped(self) -> None:
        """Increment the attachments skipped counter by 1."""
        with self._lock:
            self.attachments_skipped += 1

    def inc_attachments_failed(self) -> None:
        """Increment the attachments failed counter by 1."""
        with self._lock:
            self.attachments_failed += 1

    def inc_attachments_removed(self) -> None:
        """Increment the attachments removed counter by 1."""
        with self._lock:
            self.attachments_removed += 1


# Module-level stats instance reset at the start of each export run
_stats: ExportStats = ExportStats()


def reset_stats(total: int = 0) -> ExportStats:
    """Reset and return the global export stats for a new run.

    Args:
        total: Total number of pages in the export scope (including skipped).

    Returns:
        The fresh ExportStats instance.
    """
    global _stats  # noqa: PLW0603
    _stats = ExportStats(total=total)
    return _stats


def get_stats() -> ExportStats:
    """Return the current global export stats."""
    return _stats


================================================
FILE: confluence_markdown_exporter/utils/table_converter.py
================================================
import re
from typing import cast

from bs4 import BeautifulSoup
from bs4 import Tag
from markdownify import MarkdownConverter
from tabulate import tabulate

_LEADING_BR_OR_WS = re.compile(r"^(?:\s|<br\s*/?>)+")
_TRAILING_BR_OR_WS = re.compile(r"(?:\s|<br\s*/?>)+$")


def _get_int_attr(cell: Tag, attr: str, default: str = "1") -> int:
    val = cell.get(attr, default)
    if isinstance(val, list):
        val = val[0] if val else default
    try:
        return int(str(val))
    except (ValueError, TypeError):
        return int(default)


def pad(rows: list[list[Tag]]) -> list[list[Tag]]:
    """Pad table rows to handle rowspan and colspan for markdown conversion."""
    padded: list[list[Tag]] = []
    occ: dict[tuple[int, int], Tag] = {}
    for r, row in enumerate(rows):
        if not row:
            continue
        cur: list[Tag] = []
        c = 0
        for cell in row:
            while (r, c) in occ:
                cur.append(occ.pop((r, c)))
                c += 1
            rs = _get_int_attr(cell, "rowspan", "1")
            cs = _get_int_attr(cell, "colspan", "1")
            cur.append(cell)
            # Append extra cells for colspan
            if cs > 1:
                cur.extend(make_empty_cell() for _ in range(1, cs))
            # Mark future cells for rowspan and colspan
            for i in range(rs):
                for j in range(cs):
                    if i or j:
                        occ[(r + i, c + j)] = make_empty_cell()
            c += cs
        while (r, c) in occ:
            cur.append(occ.pop((r, c)))
            c += 1
        padded.append(cur)
    return padded


def make_empty_cell() -> Tag:
    """Return an empty <td> Tag."""
    return Tag(name="td")


def _normalize_table_cell_text(text: str) -> str:
    text = text.replace("|", "\\|").replace("\n", "<br/>")
    text = _LEADING_BR_OR_WS.sub("", text)
    return _TRAILING_BR_OR_WS.sub("", text)


class TableConverter(MarkdownConverter):
    """Custom MarkdownConverter for converting HTML tables to markdown tables."""

    def convert_table(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
        rows = [
            cast("list[Tag]", tr.find_all(["td", "th"]))
            for tr in cast("list[Tag]", el.find_all("tr"))
            if tr
        ]

        if not rows:
            return ""

        padded_rows = pad(rows)
        converted = [[self.convert(str(cell)) for cell in row] for row in padded_rows]

        has_header = all(cell.name == "th" for cell in rows[0])
        if has_header:
            return tabulate(converted[1:], headers=converted[0], tablefmt="pipe")

        return tabulate(converted, headers=[""] * len(converted[0]), tablefmt="pipe")

    def convert_th(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
        """This method is empty because we want a No-Op for the <th> tag."""
        return _normalize_table_cell_text(text)

    def convert_tr(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
        """This method is empty because we want a No-Op for the <tr> tag."""
        return text

    def convert_td(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
        """This method is empty because we want a No-Op for the <td> tag."""
        return _normalize_table_cell_text(text)

    def convert_thead(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
        """This method is empty because we want a No-Op for the <thead> tag."""
        return text

    def convert_tbody(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:
        """This method is empty because we want a No-Op for the <tbody> tag."""
        return text

    ParentTags = list[str] | set[str]

    @staticmethod
    def _normalize_parent_tags(
        parent_tags: "TableConverter.ParentTags | bool",
    ) -> "TableConverter.ParentTags":
        # markdownify 1.x passes set[str]; older versions passed bool (convert_as_inline)
        return parent_tags if isinstance(parent_tags, list | set) else set()

    def convert_ol(
        self, el: BeautifulSoup, text: str, parent_tags: "TableConverter.ParentTags | bool"
    ) -> str:
        tags = self._normalize_parent_tags(parent_tags)
        if "td" in tags:
            lines = text.splitlines()
            if not lines:
                return ""
            start = int(el.get("start") or 1)
            numbered = [
                f"{start + i}. {item}".rstrip() if item.strip() else str(start + i)
                for i, item in enumerate(lines)
            ]
            return "<br>".join(n for n in numbered if n)
        return super().convert_ol(el, text, tags)

    def convert_li(
        self, el: BeautifulSoup, text: str, parent_tags: "TableConverter.ParentTags | bool"
    ) -> str:
        tags = self._normalize_parent_tags(parent_tags)
        if "td" in tags:
            return text.strip().removesuffix("<br/>") + "\n"
        return MarkdownConverter.convert_li(self, el, text, tags)  # type: ignore[attr-defined]

    def convert_ul(
        self, el: BeautifulSoup, text: str, parent_tags: "TableConverter.ParentTags | bool"
    ) -> str:
        tags = self._normalize_parent_tags(parent_tags)
        if "td" in tags:
            items = [item for item in text.splitlines() if item.strip()]
            if not items:
                return ""
            if len(items) == 1:
                return items[0]
            return "- " + "<br>- ".join(items)
        return super().convert_ul(el, text, tags)

    def convert_p(
        self, el: BeautifulSoup, text: str, parent_tags: "TableConverter.ParentTags | bool"
    ) -> str:
        tags = self._normalize_parent_tags(parent_tags)
        md = super().convert_p(el, text, tags)
        if "td" in tags:
            md = md.replace("\n", "") + "<br/>"
        return md


================================================
FILE: confluence_markdown_exporter/utils/type_converter.py
================================================
def str_to_bool(value: str) -> bool:
    """Convert a string to boolean."""
    true_set = {"true", "1", "yes", "on"}
    false_set = {"false", "0", "no", "off"}

    val = value.strip().lower()
    if val in true_set:
        return True
    if val in false_set:
        return False
    msg = f"Invalid boolean string: '{value}'"
    raise ValueError(msg)


================================================
FILE: docs/compatibility.md
================================================
---
id: compatibility
title: Compatibility
sidebar_position: 5
---

# Compatibility

This package is not tested extensively. Please check all output and report any issue on the [issue tracker](https://github.com/Spenhouet/confluence-markdown-exporter/issues).

It has generally been tested on:

- **Confluence Cloud** 1000.0.0-b5426ab8524f (2025-05-28)
- **Confluence Server** 8.5.20

If you successfully use the exporter with a different Confluence version, feel free to open a PR adding it to this list.


================================================
FILE: docs/configuration/authentication.md
================================================
---
id: authentication
title: Authentication
sidebar_position: 3
---

# Authentication

:::note
Auth credentials use URL-keyed nested dicts (e.g. `auth.confluence["https://company.atlassian.net"]`) and cannot be mapped to flat ENV var names. Use `cme config edit auth.confluence` or `cme config set` for auth configuration.
:::

The fastest way to set credentials is the interactive menu:

```sh
cme config edit auth.confluence
cme config edit auth.jira
```

## Confluence

### auth.confluence.url

Confluence instance URL.

- Default: `""`

### auth.confluence.username

Confluence username/email.

- Default: `""`

### auth.confluence.api_token

Confluence API token.

- Default: `""`

### auth.confluence.pat

Confluence Personal Access Token.

- Default: `""`

### auth.confluence.cloud_id

Atlassian Cloud ID for the Confluence instance. When set, API calls are routed through the Atlassian API gateway (`https://api.atlassian.com/ex/confluence/{cloud_id}`), which enables the use of [scoped API tokens](https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/).

For Atlassian Cloud instances (`.atlassian.net`) this is fetched and stored **automatically** on first connection. You can also set it manually. See [How to retrieve your Atlassian Cloud ID](https://support.atlassian.com/jira/kb/retrieve-my-atlassian-sites-cloud-id/).

- Default: `""`

## Jira

### auth.jira.url

Jira instance URL.

- Default: `""`

### auth.jira.username

Jira username/email.

- Default: `""`

### auth.jira.api_token

Jira API token.

- Default: `""`

### auth.jira.pat

Jira Personal Access Token.

- Default: `""`

### auth.jira.cloud_id

Atlassian Cloud ID for the Jira instance. Works identically to `auth.confluence.cloud_id` above, routing API calls through `https://api.atlassian.com/ex/jira/{cloud_id}`.

For Atlassian Cloud instances this is fetched and stored **automatically** on first connection.

- Default: `""`

## Generating API tokens

API tokens that are associated with Atlassian Cloud accounts can be generated [in your 'Account Settings'](https://id.atlassian.com/manage-profile/security/api-tokens) (in Jira/Confluence: profile picture in upper-right corner → _Account Settings_ → _Security_ → _Create and Manage API tokens_).

Scoped API tokens **require 'classic' scopes**; these scopes have been tested (giving full read-only access):

```text
read:confluence-content.all
read:account
read:confluence-content.permission
read:confluence-content.summary
read:confluence-groups
read:confluence-props
read:confluence-space.summary
read:confluence-user
read:me
readonly:content.attachment:confluence
search:confluence
```


================================================
FILE: docs/configuration/ci.md
================================================
---
id: ci
title: Running in CI
sidebar_label: CI / non-interactive
sidebar_position: 5
---

# Running in CI / non-interactive environments

The exporter automatically detects CI environments and suppresses rich terminal formatting (colors, spinner animations, progress bar redraws) so that log output is clean and readable in CI logs.

Detection is based on two standard environment variables:

| Variable     | Effect                                                                    |
| ------------ | ------------------------------------------------------------------------- |
| `CI=true`    | Disables ANSI color codes and live terminal output                        |
| `NO_COLOR=1` | Same effect (follows the [no-color.org](https://no-color.org) convention) |

Most CI platforms (GitHub Actions, GitLab CI, CircleCI, Jenkins, etc.) set `CI=true` automatically.

## Controlling log verbosity

You can control output verbosity via the `CME_EXPORT__LOG_LEVEL` env var or the [`export.log_level`](./options.md#exportlog_level) config option:

```sh
# Enable verbose debug logging for a single run (not persisted):
CME_EXPORT__LOG_LEVEL=DEBUG cme pages <page-url>

# Reduce verbosity permanently:
cme config set export.log_level=WARNING

# Or for the current session only:
CME_EXPORT__LOG_LEVEL=WARNING cme pages <page-url>
```

This is useful for using different log levels for different environments or for scripting.

## Tips for CI pipelines

- Use a dedicated config file via [`CME_CONFIG_PATH`](./index.md#custom-config-file-location) so CI runs don't share state with developer machines.
- Provide credentials via secrets and set them with `cme config set` at the start of the run, or use ENV var overrides for non-auth options.
- Pin the version using the version-specific installer URL; see [Installation](../installation.md#pinning-a-specific-version).


================================================
FILE: docs/configuration/index.md
================================================
---
id: index
title: Configuration
slug: /configuration/
sidebar_position: 1
---

# Configuration

All configuration and authentication is stored in a single JSON file managed by the application. You do not need to manually edit this file; use the `cme config` commands instead.

## Config commands

| Command                         | Description                                    |
| ------------------------------- | ---------------------------------------------- |
| `cme config`                    | Open the interactive configuration menu        |
| `cme config list`               | Print the full configuration as YAML           |
| `cme config get <key>`          | Print the value of a single config key         |
| `cme config set <key=value>...` | Set one or more config values                  |
| `cme config edit <key>`         | Open the interactive editor for a specific key |
| `cme config path`               | Print the path to the config file              |
| `cme config reset`              | Reset all configuration to defaults            |

### Interactive menu

```sh
cme config
```

Opens a full interactive menu where you can:

- See all config options and their current values
- Select any option to change it (including authentication)
- Navigate into nested sections (e.g. `auth.confluence`)
- Reset all config to defaults

### List current configuration

```sh
cme config list           # YAML (default)
cme config list -o json   # JSON
```

Prints the entire current configuration. Output format defaults to YAML; use `-o json` for JSON.

### Get a single value

```sh
cme config get export.log_level
cme config get connection_config.max_workers
```

Prints the current value of the specified key. Nested sections are printed as YAML.

### Set values

```sh
cme config set export.log_level=DEBUG
cme config set export.output_path=/tmp/export
cme config set export.skip_unchanged=false
```

Sets one or more `key=value` pairs directly. Values are parsed as JSON where possible (so `true`, `false`, and numbers work as expected), falling back to a plain string.

:::note
For auth keys that contain a URL (e.g. `auth.confluence.https://...`), use `cme config edit auth.confluence` instead, which handles URL-based keys correctly.
:::

### Edit a specific key interactively

```sh
cme config edit auth.confluence
cme config edit export.log_level
```

Opens the interactive editor directly at the specified config section, skipping the top-level menu.

### Show config file path

```sh
cme config path
```

Prints the absolute path to the configuration file. Useful when `CME_CONFIG_PATH` is set or when locating the file for backup/inspection.

### Reset to defaults

```sh
cme config reset
cme config reset --yes   # skip confirmation
```

Resets the entire configuration to factory defaults after confirmation.

## ENV var overrides

All options can be set via the config file (using `cme config set`) or overridden for the current session via environment variables.

ENV vars **take precedence** over stored config and are **not** persisted. ENV var names use the `CME_` prefix and `__` (double underscore) as the nested delimiter, matching the key in uppercase. Example: `export.log_level` → `CME_EXPORT__LOG_LEVEL`.

:::note
Auth credentials use URL-keyed nested dicts (e.g. `auth.confluence["https://company.atlassian.net"]`) and cannot be mapped to flat ENV var names. Use `cme config edit auth.confluence` or `cme config set` for auth configuration.
:::

## Custom config file location

By default, configuration is stored in a platform-specific application directory. You can override the config file location by setting the `CME_CONFIG_PATH` environment variable to the desired file path:

```sh
export CME_CONFIG_PATH=/path/to/your/custom_config.json
```

If set, the application will read and write config from this file instead.

## Next

- [Full option reference →](./options.md)
- [Authentication →](./authentication.md)
- [Target-system presets (Obsidian, ADO, …) →](./target-systems.md)
- [Running in CI / non-interactive environments →](./ci.md)


================================================
FILE: docs/configuration/options.md
================================================
---
id: options
title: Configuration options
sidebar_label: Options reference
sidebar_position: 2
---

# Configuration options

Reference for every supported option. All options can be set via `cme config set <key>=<value>` or overridden per-session through the listed environment variable.

## export.\*

### export.log_level

Controls output verbosity: `DEBUG` (every step), `INFO` (key milestones), `WARNING` (warnings/errors only), `ERROR` (errors only).

- Default: `INFO`
- ENV Var: `CME_EXPORT__LOG_LEVEL`

### export.output_path

The directory where all exported files and folders will be written. Used as the base for relative and absolute links.

- Default: `./` (current working directory)
- ENV Var: `CME_EXPORT__OUTPUT_PATH`

### export.page_href

How to generate links to pages in Markdown. Options: `relative` (default), `absolute`, or `wiki`.

- Default: `relative`
- ENV Var: `CME_EXPORT__PAGE_HREF`

| Value      | Output                                 |
| ---------- | -------------------------------------- |
| `relative` | `[Page Title](../path/to/page.md)`     |
| `absolute` | `[Page Title](/space/path/to/page.md)` |
| `wiki`     | `[[Page Title]]`                       |

### export.page_path

Path template for exported pages.

- Default: `{space_name}/{homepage_title}/{ancestor_titles}/{page_title}.md`
- ENV Var: `CME_EXPORT__PAGE_PATH`

### export.attachment_href

How to generate links to attachments in Markdown. Options: `relative` (default), `absolute`, or `wiki`.

- Default: `relative`
- ENV Var: `CME_EXPORT__ATTACHMENT_HREF`

| Value      | Output                                                                             |
| ---------- | ---------------------------------------------------------------------------------- |
| `relative` | `[file.pdf](../path/to/file.pdf)` / `![alt](../path/to/image.png)`                 |
| `absolute` | `[file.pdf](/space/attachments/file.pdf)` / `![alt](/space/attachments/image.png)` |
| `wiki`     | `[[file.pdf\|File Title]]` / `![[image.png]]`                                      |

### export.attachment_path

Path template for attachments.

- Default: `{space_name}/attachments/{attachment_file_id}{attachment_extension}`
- ENV Var: `CME_EXPORT__ATTACHMENT_PATH`

On Confluence Data Center / Server, where the API does not provide `fileId`, `{attachment_file_id}` falls back to the content id, so the default template still produces unique filenames.

### export.attachments_export

Which attachments to download to disk.

| Value        | Behaviour                                                                                                                                                                     |
| ------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `referenced` | Only attachments whose ID/filename appears in the page body (default).                                                                                                        |
| `all`        | Every attachment on the page. Large or numerous attachments increase export time.                                                                                             |
| `disabled`   | Skip downloads entirely: no files written, no lockfile entries, no lookup. Body image and file links still point at `attachment_path`, but the files will not exist locally. |

- Default: `referenced`
- ENV Var: `CME_EXPORT__ATTACHMENTS_EXPORT`

### export.image_captions

Whether to export Confluence image captions in the exported Markdown. When enabled, the storage format of each page is fetched (via an additional API body expansion) and `ac:image` captions are extracted and rendered as an italic line directly below the image:

```markdown
![](image.png)
_Caption text_
```

When disabled, no caption is added.

- Default: `False`
- ENV Var: `CME_EXPORT__IMAGE_CAPTIONS`

### export.page_breadcrumbs

Whether to include breadcrumb links at the top of the page.

- Default: `True`
- ENV Var: `CME_EXPORT__PAGE_BREADCRUMBS`

### export.page_properties_format

Controls how Confluence Page Properties macros (key-value tables) are rendered. Duplicate property keys are automatically disambiguated by appending a counter (e.g. `status`, `status_2`, `status_3`).

| Value                   | Description                                                                                                                                  |
| ----------------------- | -------------------------------------------------------------------------------------------------------------------------------------------- |
| `frontmatter`           | Extract to YAML front matter; table is removed from the page body                                                                            |
| `table`                 | Keep as a regular markdown table; no metadata is written                                                                                     |
| `frontmatter_and_table` | Write to YAML front matter **and** keep the original table in the body (default)                                                             |
| `dataview-inline-field` | Replace the table with [Dataview](https://blacksmithgu.github.io/obsidian-dataview/) `Key:: Value` inline fields                             |
| `meta-bind-view-fields` | Write YAML front matter and a table using [Meta Bind](https://www.moritzjung.dev/obsidian-meta-bind-plugin-docs/) `VIEW[{key}][text]` fields |

:::info Migration
The legacy `page_properties_as_front_matter=true/false` is still accepted and maps to `frontmatter` / `table` respectively.
:::

- Default: `frontmatter_and_table`
- ENV Var: `CME_EXPORT__PAGE_PROPERTIES_FORMAT`

### export.page_properties_report_format

Controls how Confluence Page Properties Report macros (dynamic cross-page property tables) are rendered.

| Value      | Description                                                                                                                                                                                                                                                                                                |
| ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `frozen`   | Export the rendered table as a static markdown table snapshot (default)                                                                                                                                                                                                                                    |
| `dataview` | Translate the CQL query to an [Obsidian Dataview](https://blacksmithgu.github.io/obsidian-dataview/) DQL code block; requires the Dataview plugin and all referenced child pages to be exported with their page properties as front matter; falls back to a frozen table if the query cannot be translated |

- Default: `frozen`
- ENV Var: `CME_EXPORT__PAGE_PROPERTIES_REPORT_FORMAT`

### export.confluence_url_in_frontmatter

Whether to include the original Confluence page URL in the YAML front matter of the exported file.

| Value    | Description                                                                                               |
| -------- | --------------------------------------------------------------------------------------------------------- |
| `none`   | Do not include any URL (default)                                                                          |
| `webui`  | Include `confluence_webui_url` (human-readable URL; may change when the page is renamed or moved)         |
| `tinyui` | Include `confluence_tinyui_url` (stable short permalink based on the page ID; survives renames and moves) |
| `both`   | Include both fields                                                                                       |

If a Page Properties macro on the page already defines `confluence_webui_url` or `confluence_tinyui_url`, the value from the macro takes precedence over the URL extracted from the API.

- Default: `none`
- ENV Var: `CME_EXPORT__CONFLUENCE_URL_IN_FRONTMATTER`

### export.page_metadata_in_frontmatter

Add eight Confluence page metadata fields to the YAML front matter of each exported page.

| Field | Source |
| ----- | ------ |
| `confluence_page_id` | Page ID (string) |
| `confluence_space_key` | Space key |
| `confluence_type` | Content type (`page` or `blogpost`) |
| `confluence_created` | ISO 8601 timestamp of when the page was first created (`history.createdDate`) |
| `confluence_created_by` | Display name of the original author (`history.createdBy.displayName`) |
| `confluence_last_modified` | ISO 8601 timestamp of the most recent version (`version.when`), including minor edits |
| `confluence_last_modified_by` | Display name of the last editor |
| `confluence_version` | Version number (integer) |

Fields with empty or zero values are omitted. If a Page Properties macro on the page already defines a key with the same name, the macro value takes precedence.

`confluence_page_id` is intentionally written as a quoted string (e.g. `'629839369'`) rather than an integer. Confluence Cloud page IDs can exceed JavaScript's safe-integer range (`2^53 − 1`), so JS-based static site generators (Hugo, Astro, …) parsing the front matter would silently truncate them. `confluence_created` and `confluence_last_modified` are also quoted because PyYAML wraps ISO-8601 timestamps with timezone offsets to prevent loaders from coercing the value into a `datetime` object.

Example front matter with both `confluence_url_in_frontmatter: webui` and `page_metadata_in_frontmatter: true`:

```yaml
---
tags:
  - team-foo
confluence_webui_url: https://.../wiki/spaces/.../pages/123/Title
confluence_page_id: '123'
confluence_space_key: TEAM
confluence_type: page
confluence_created: "2024-08-15T08:34:12.000+02:00"
confluence_created_by: Sam Creator
confluence_last_modified: "2026-04-12T10:34:00.000+02:00"
confluence_last_modified_by: Alex Johnson
confluence_version: 7
---
```

- Default: `false`
- ENV Var: `CME_EXPORT__PAGE_METADATA_IN_FRONTMATTER`

### export.filename_encoding

Character mapping for filename encoding.

- Default: Default mappings for forbidden characters.
- ENV Var: `CME_EXPORT__FILENAME_ENCODING`

### export.filename_length

Maximum length of filenames.

- Default: `255`
- ENV Var: `CME_EXPORT__FILENAME_LENGTH`

### export.filename_lowercase

Make all exported paths and filenames lowercase. By default the original casing from Confluence is retained.

- Default: `False`
- ENV Var: `CME_EXPORT__FILENAME_LOWERCASE`

### export.include_document_title

Whether to include the document title in the exported markdown file. If enabled, the title will be added as a top-level heading.

- Default: `True`
- ENV Var: `CME_EXPORT__INCLUDE_DOCUMENT_TITLE`

### export.include_toc

Whether to export the Confluence Table of Contents macro. When enabled, the TOC is converted to markdown. When disabled, the TOC macro is removed from the output.

- Default: `True`
- ENV Var: `CME_EXPORT__INCLUDE_TOC`

### export.include_macro

Controls how Confluence `include` and `excerpt-include` macros are rendered. The `include` macro embeds the full content of another page; `excerpt-include` embeds a named excerpt from another page.

| Value          | Behaviour                                                                                                                                                                     |
| -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `inline`       | Expand the referenced page content inline at the point of inclusion (default). The body already rendered by Confluence is used, so no extra API calls are required.           |
| `transclusion` | Emit an Obsidian-style `![[Page Title]]` embed link. Obsidian renders the link as an inline preview of the target note. The referenced page must also be exported to resolve. |

- Default: `inline`
- ENV Var: `CME_EXPORT__INCLUDE_MACRO`

### export.enable_jira_enrichment

Fetch Jira issue data to enrich Confluence pages. When enabled, Jira issue links include the issue summary. Requires Jira auth to be configured.

- Default: `True`
- ENV Var: `CME_EXPORT__ENABLE_JIRA_ENRICHMENT`

### export.comments_export

Which comments to export to a sidecar `.comments.md` file placed next to the exported page file, using the same path stem.

| Value      | Behaviour                                                                                |
| ---------- | ---------------------------------------------------------------------------------------- |
| `none`     | No sidecar (default).                                                                    |
| `inline`   | Open inline comments only (annotated text as blockquote, then author / date / body).     |
| `footer`   | Open page-level (footer) comments only.                                                  |
| `all`      | Both, in a single sidecar with `## Inline comments` first, then `## Page comments`.      |

Only open comments are included; resolved comments are skipped. Replies are listed flat below their parent comment. Disabled by default; enabling adds one to two extra API calls per page.

Sidecar example for `comments_export = "all"`:

```markdown
---
confluence_page_id: '123'
confluence_page_title: "Example Page"
confluence_webui_url: "https://example.atlassian.net/wiki/spaces/TEAM/pages/123"
---

## Inline comments

### marked excerpt
> marked excerpt

**Alice** · 2026-04-01

Looks good to me.

## Page comments

### Discussion about the rollout

**Bob** · 2026-04-02

Are we shipping this Friday?
```

The legacy boolean key `inline_comments` is migrated automatically: `true` becomes `"inline"`, `false` becomes `"none"`.

- Default: `none`
- ENV Var: `CME_EXPORT__COMMENTS_EXPORT`

### export.convert_status_badges

Whether to convert Confluence status badge macros to HTML `<mark>` elements coloured with the badge's background colour. Each lozenge variant maps to an Atlassian design-system pastel:

| Lozenge        | Colour          | Hex       |
| -------------- | --------------- | --------- |
| Gray (default) | Gray            | `#dfe1e6` |
| Blue           | Blue            | `#cce0ff` |
| Green          | Green           | `#baf3db` |
| Yellow         | Yellow / Orange | `#f8e6a0` |
| Red            | Red             | `#ffd5d2` |
| Purple         | Purple / Violet | `#dfd8fd` |

When disabled, only the badge label text is kept.

- Default: `True`
- ENV Var: `CME_EXPORT__CONVERT_STATUS_BADGES`

### export.convert_text_highlights

Whether to convert Confluence text highlights (`<span style="background-color: rgb(...);">`) to HTML `<mark>` elements with a hex color value. When disabled, the highlight span is stripped and only the plain text is kept.

- Default: `True`
- ENV Var: `CME_EXPORT__CONVERT_TEXT_HIGHLIGHTS`

### export.convert_font_colors

Whether to convert Confluence font colors to HTML `<font>` elements with a hex color value. Handles both inline-style spans (`<span style="color: rgb(...);">`) and CSS-class-based spans (`<span data-colorid="...">`) used in the Confluence export view. When disabled, the color span is stripped and only the plain text is kept.

- Default: `True`
- ENV Var: `CME_EXPORT__CONVERT_FONT_COLORS`

### export.skip_unchanged

Skip exporting pages that have not changed since last export. Uses a lockfile to track page versions.

- Default: `True`
- ENV Var: `CME_EXPORT__SKIP_UNCHANGED`

### export.cleanup_stale

After export, delete local files for pages removed from Confluence or whose export path has changed.

- Default: `True`
- ENV Var: `CME_EXPORT__CLEANUP_STALE`

### export.lockfile_name

Name of the lock file used to track exported pages.

- Default: `confluence-lock.json`
- ENV Var: `CME_EXPORT__LOCKFILE_NAME`

### export.existence_check_batch_size

Number of page IDs per batch when checking page existence during cleanup. Capped at 25 for self-hosted (CQL).

- Default: `250`
- ENV Var: `CME_EXPORT__EXISTENCE_CHECK_BATCH_SIZE`

## connection_config.\*

### connection_config.backoff_and_retry

Enable or disable automatic retry with exponential backoff on network errors.

- Default: `True`
- ENV Var: `CME_CONNECTION_CONFIG__BACKOFF_AND_RETRY`

### connection_config.backoff_factor

Multiplier for exponential backoff between retries. For example, `2` means each retry waits twice as long as the previous.

- Default: `2`
- ENV Var: `CME_CONNECTION_CONFIG__BACKOFF_FACTOR`

### connection_config.max_backoff_seconds

Maximum seconds to wait between retries.

- Default: `60`
- ENV Var: `CME_CONNECTION_CONFIG__MAX_BACKOFF_SECONDS`

### connection_config.max_backoff_retries

Maximum number of retry attempts before giving up.

- Default: `5`
- ENV Var: `CME_CONNECTION_CONFIG__MAX_BACKOFF_RETRIES`

### connection_config.retry_status_codes

HTTP status codes that trigger a retry.

- Default: `[413, 429, 502, 503, 504]`
- ENV Var: `CME_CONNECTION_CONFIG__RETRY_STATUS_CODES`

### connection_config.timeout

Timeout in seconds for API requests. Prevents hanging on slow or unresponsive servers.

- Default: `30`
- ENV Var: `CME_CONNECTION_CONFIG__TIMEOUT`

### connection_config.verify_ssl

Whether to verify SSL certificates for HTTPS requests. Set to `False` only if you are sure about the security of your connection.

- Default: `True`
- ENV Var: `CME_CONNECTION_CONFIG__VERIFY_SSL`

### connection_config.use_v2_api

Enable Confluence REST API v2 endpoints. Supported on Atlassian Cloud and Data Center 8+. Disable for self-hosted Server instances.

- Default: `False`
- ENV Var: `CME_CONNECTION_CONFIG__USE_V2_API`

### connection_config.max_workers

Maximum number of parallel workers for page export. Set to `1` for serial/debug mode. Higher values improve performance but may hit API rate limits.

- Default: `20`
- ENV Var: `CME_CONNECTION_CONFIG__MAX_WORKERS`


================================================
FILE: docs/contributing.md
================================================
---
id: contributing
title: Contributing
sidebar_position: 7
---

# Contributing

If you would like to contribute to `confluence-markdown-exporter`, please read the [contribution guideline](https://github.com/Spenhouet/confluence-markdown-exporter/blob/main/CONTRIBUTING.md) in the repository.

## Reporting issues

Use the [GitHub issue tracker](https://github.com/Spenhouet/confluence-markdown-exporter/issues). When reporting, include:

1. Your Confluence flavour and version (Cloud, Server, Data Center)
2. The exact command you ran
3. The full output with `cme config set export.log_level=DEBUG` enabled
4. A minimal page (if possible) reproducing the issue

## Docs site

The documentation site is built with [Docusaurus](https://docusaurus.io/) and deployed to GitHub Pages.

- Sources live under `docs/` in the repository as plain Markdown / MDX.
- Local preview: `npm ci && npm start` (serves `http://localhost:3000/confluence-markdown-exporter/`).
- Production build with all versions: `npm run build:versioned` then `npm run serve`.

### Versioning

Versioning is **driven by git release tags**. There are no `versioned_docs/` folders committed to the repo. At build time, `scripts/build-versions.mjs`:

1. Lists git tags matching `^\d+\.\d+\.\d+$` (the project's release pattern).
2. Filters to tags whose tree already contains a Docusaurus `docs/` + `sidebars.ts`.
3. Snapshots each eligible tag into `versioned_docs/version-<tag>/` by checking out that tag's docs and running `docusaurus docs:version`.
4. Builds with the newest tag set as the default version; HEAD becomes the `Next 🚧` (unreleased) version.

That means: cutting a new release tag automatically produces a new docs version on the next site build. Old versions cannot be edited after-the-fact; they are sourced directly from their git tag.

## License

This tool is an open source project released under the [MIT License](https://github.com/Spenhouet/confluence-markdown-exporter/blob/main/LICENSE).


================================================
FILE: docs/docker.md
================================================
---
id: docker
title: Docker
sidebar_position: 5
---

# Docker

Prebuilt images are published to Docker Hub at [`spenhouet/confluence-markdown-exporter`](https://hub.docker.com/r/spenhouet/confluence-markdown-exporter).

The Docker image is intended for **non-interactive / CI use**: you supply a pre-defined config (either as a mounted JSON file or as environment variables), and the container runs a single export command and exits.

:::note
The interactive `cme config` menu is **not** supported in this mode. Edit the JSON config file directly or change the env vars instead.
:::

## Available tags

- `latest`: the most recent release
- `<version>` (e.g. `5.1.0`): pinned release version
- `<major>` / `<major>.<minor>` (e.g. `5`, `5.1`): rolling tags following the latest release within that range

## Quick start

```bash
docker pull spenhouet/confluence-markdown-exporter:latest
docker run --rm spenhouet/confluence-markdown-exporter --help
```

The image pins `export.output_path` to `/data/output` (via the `CME_EXPORT__OUTPUT_PATH` env var baked into the image), overriding whatever value the mounted config file has. Bind-mount your host export directory there and exported files appear in it.

## Providing configuration

The image reads its config from `/data/config/app_data.json` (set via `CME_CONFIG_PATH`). Generate this file once on a workstation by running `cme config` locally, then check it in to your CI repository or your secret store and mount it into the container, using the same pattern as a Kubernetes ConfigMap volume:

```bash
docker run --rm \
  -v "$PWD/app_data.json:/data/config/app_data.json:ro" \
  -v "$PWD/output:/data/output" \
  spenhouet/confluence-markdown-exporter \
  pages <page-url>
```

The mounted file must be readable by UID `1000` (the non-root `cme` user inside the image). For a config file managed in a CI runner this is usually already the case; if not, `chmod 644 app_data.json` is enough.

In Docker Compose, the [`configs:`](https://docs.docker.com/reference/compose-file/configs/) top-level key expresses the same mount declaratively:

```yaml
services:
  cme:
    image: spenhouet/confluence-markdown-exporter:latest
    command: ["pages", "<page-url>"]
    configs:
      - source: cme_config
        target: /data/config/app_data.json
    volumes:
      - ./output:/data/output

configs:
  cme_config:
    file: ./app_data.json
```

## Overriding scalar settings via environment variables

Scalar settings can be overridden at runtime with environment variables using the `CME_` prefix and `__` as the nested delimiter:

```bash
docker run --rm \
  -e CME_EXPORT__LOG_LEVEL=DEBUG \
  -e CME_CONNECTION_CONFIG__MAX_WORKERS=5 \
  -v "$PWD/app_data.json:/data/config/app_data.json:ro" \
  -v "$PWD/output:/data/output" \
  spenhouet/confluence-markdown-exporter \
  pages <page-url>
```

See the full [options reference](./configuration/options.md) for every supported `CME_*` env var.

## Auth credentials in environment variables

:::warning
The `auth.confluence` and `auth.jira` settings are dicts keyed by the instance base URL. That URL key cannot be expressed inside an environment variable name.
:::

If you must inject auth credentials via env vars (e.g. to keep secrets out of the JSON file), supply the whole sub-dict as a single JSON-encoded value:

```bash
docker run --rm \
  -v "$PWD/app_data.json:/data/config/app_data.json:ro" \
  -e CME_AUTH__CONFLUENCE="{\"https://company.atlassian.net\":{\"username\":\"$CONFLUENCE_USER\",\"api_token\":\"$CONFLUENCE_API_TOKEN\"}}" \
  -v "$PWD/output:/data/output" \
  spenhouet/confluence-markdown-exporter \
  pages <page-url>
```

For most CI setups it is simpler to template the JSON file from the CI secret store before running the container.

## See also

- [Authentication](./configuration/authentication.md): full credential setup and scoped-token notes
- [CI / non-interactive](./configuration/ci.md): `CI=true`, `NO_COLOR`, log-level control
- [Installation](./installation.md): pip / uv / curl / PowerShell installers


================================================
FILE: docs/features.md
================================================
---
id: features
title: Features
sidebar_position: 3
---

# Features

Exports individual pages, pages with descendants, or entire spaces via the Atlassian API. Skips unchanged pages by default, re-exporting only what has changed since the last run.

## Supported Confluence features

### Content & formatting

- **Rich text**: headings, paragraphs, bold, italic, underline, lists, tables, links, images, attachments, and image captions
- **Code blocks**: language-aware fenced code blocks
- **Task lists**: checkboxes with completion state
- **Text highlights & font colours**: preserved with inline HTML colour styling
- **Status badges**: converted to coloured inline highlights
- **Info / note / tip / warning panels**: converted to Markdown alert blocks (`[!NOTE]`, `[!TIP]`, …)
- **Comments**: open inline and/or page-level (footer) comments exported as sidecar files next to each page
- **Include / excerpt-include macros**: embedded pages either inlined or exported as Obsidian transclusion links (`![[Page Title]]`)

### Page metadata

- **Page properties**: Page Properties macro exported as YAML front matter, [Dataview](https://blacksmithgu.github.io/obsidian-dataview/) inline fields, or [Meta Bind](https://www.moritzjung.dev/obsidian-meta-bind-plugin-docs/) VIEW fields; duplicate keys are disambiguated automatically (configurable via [`export.page_properties_format`](./configuration/options.md#exportpage_properties_format))
- **Page Properties Report**: dynamic cross-page property tables exported as a static snapshot or a live [Dataview](https://blacksmithgu.github.io/obsidian-dataview/) DQL query (configurable via [`export.page_properties_report_format`](./configuration/options.md#exportpage_properties_report_format))
- **Page labels**: exported as `tags` in YAML front matter

### Diagrams & add-ons

- **[draw.io](https://marketplace.atlassian.com/apps/1210933/draw-io-diagrams-uml-bpmn-aws-erd-flowcharts)**: diagram files saved as attachments; embedded Mermaid diagrams extracted as fenced Mermaid blocks
- **[PlantUML](https://marketplace.atlassian.com/apps/1222993/flowchart-plantuml-diagrams-for-confluence)**: exported as fenced PlantUML code blocks
- **[Markdown Extensions](https://marketplace.atlassian.com/apps/1215703/markdown-extensions-for-confluence)**: pass-through of raw Markdown macro content


================================================
FILE: docs/installation.md
================================================
---
id: installation
title: Installation
sidebar_position: 1
---

import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import { VerifyTabs } from '@site/src/components/quickstart';

# Installation

Pick the install method that fits your environment. All methods produce the same `cme` / `confluence-markdown-exporter` CLI.

<Tabs groupId="install-method" queryString>

<TabItem value="linux" label="Linux">

```bash
curl -LsSf uvx.sh/confluence-markdown-exporter/install.sh | sh
```

Uses [uv](https://docs.astral.sh/uv/) under the hood to create an isolated, self-updating environment. No need to manage a virtualenv yourself.

</TabItem>

<TabItem value="macos" label="macOS">

```bash
curl -LsSf uvx.sh/confluence-markdown-exporter/install.sh | sh
```

Uses [uv](https://docs.astral.sh/uv/) under the hood to create an isolated, self-updating environment. No need to manage a virtualenv yourself.

</TabItem>

<TabItem value="windows" label="Windows">

```powershell
powershell -ExecutionPolicy ByPass -c "irm https://uvx.sh/confluence-markdown-exporter/install.ps1 | iex"
```

Uses [uv](https://docs.astral.sh/uv/) under the hood. Run from PowerShell.

</TabItem>

<TabItem value="pip" label="pip">

```bash
pip install confluence-markdown-exporter
```

Installs from PyPI into the active Python environment. Requires Python ≥ 3.10. If you don't already have a project virtualenv, prefer the **uv** or **Linux/macOS/Windows installer** tabs; they isolate the tool for you.

</TabItem>

<TabItem value="uv" label="uv">

```bash
# Install as an isolated, self-managed tool
uv tool install confluence-markdown-exporter

# …or run it once without installing
uvx confluence-markdown-exporter --help
```

[`uv tool install`](https://docs.astral.sh/uv/concepts/tools/) puts the CLI on your PATH inside its own isolated environment. [`uvx`](https://docs.astral.sh/uv/guides/tools/) runs it ephemerally; handy for one-off exports or CI.

</TabItem>

<TabItem value="docker" label="Docker">

```bash
docker pull spenhouet/confluence-markdown-exporter:latest
docker run --rm spenhouet/confluence-markdown-exporter --help
```

The Docker image is intended for **non-interactive / CI use**: you supply a pre-defined config (mounted JSON file or env vars) and the container runs a single export command and exits. The interactive `cme config` menu is not available inside the container. Full setup (mounted config, Compose example, env-var auth) is on the [Docker page](./docker.md).

</TabItem>

</Tabs>

## Pinning a specific version

<Tabs groupId="install-method" queryString>

<TabItem value="linux" label="Linux">

```bash
curl -LsSf uvx.sh/confluence-markdown-exporter/5.1.1/install.sh | sh
```

</TabItem>

<TabItem value="macos" label="macOS">

```bash
curl -LsSf uvx.sh/confluence-markdown-exporter/5.1.1/install.sh | sh
```

</TabItem>

<TabItem value="windows" label="Windows">

```powershell
powershell -ExecutionPolicy ByPass -c "irm https://uvx.sh/confluence-markdown-exporter/5.1.1/install.ps1 | iex"
```

</TabItem>

<TabItem value="pip" label="pip">

```bash
pip install confluence-markdown-exporter==5.1.1
```

</TabItem>

<TabItem value="uv" label="uv">

```bash
uv tool install confluence-markdown-exporter==5.1.1
```

</TabItem>

<TabItem value="docker" label="Docker">

```bash
docker pull spenhouet/confluence-markdown-exporter:5.1.1
```

Pinned tags are kept available indefinitely; rolling tags (`latest`, `<major>`, `<major>.<minor>`) advance with each release. See [Docker → Available tags](./docker.md#available-tags).

</TabItem>

</Tabs>

## Verify the install

<VerifyTabs />

You should see the top-level commands: `pages`, `pages-with-descendants`, `spaces`, `orgs`, and `config`.

## Next steps

- [Authenticate and configure your first export →](./configuration/index.md#interactive-menu) (local install)
- [Export pages or whole spaces →](./usage.md) (local install)
- [Docker page](./docker.md): non-interactive setup (mounted config + env vars)


================================================
FILE: docs/intro.md
================================================
---
id: intro
title: Introduction
sidebar_position: 1
---

import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import { AuthenticateTabs, ExportTabs } from '@site/src/components/quickstart';
import Logo from '@site/static/img/logo.png';

<div style={{textAlign: 'center', padding: '1rem 0 2rem'}}>
  <img src={Logo} alt="confluence-markdown-exporter" style={{maxWidth: '420px', width: '100%'}} />
</div>

> Export Confluence pages to Markdown for Obsidian, Gollum, Azure DevOps, Foam, Dendron and any other Markdown-based platform.

Exports individual pages, pages with descendants, or entire Confluence spaces via the Atlassian API into clean Markdown. Skips unchanged pages by default, re-exporting only what has changed since the last run.

## What's in these docs

- **[Installation](./installation.md)**: install and update the CLI in one command
- **[Usage](./usage.md)**: export pages, descendants, spaces, or organisations
- **[Features](./features.md)**: supported Confluence content, macros, and add-ons
- **[Configuration](./configuration/index.md)**: every option with defaults and ENV vars
- **[Target systems](./configuration/target-systems.md)**: Obsidian, Azure DevOps, …
- **[Troubleshooting](./troubleshooting.md)**: known issues and how to report

## Get started in 60 seconds

### 1. Install

<Tabs groupId="install-method" queryString>

<TabItem value="linux" label="Linux">

```bash
curl -LsSf uvx.sh/confluence-markdown-exporter/install.sh | sh
```

</TabItem>

<TabItem value="macos" label="macOS">

```bash
curl -LsSf uvx.sh/confluence-markdown-exporter/install.sh | sh
```

</TabItem>

<TabItem value="windows" label="Windows">

```powershell
powershell -ExecutionPolicy ByPass -c "irm https://uvx.sh/confluence-markdown-exporter/install.ps1 | iex"
```

</TabItem>

<TabItem value="pip" label="pip">

```bash
pip install confluence-markdown-exporter
```

</TabItem>

<TabItem value="uv" label="uv">

```bash
uv tool install confluence-markdown-exporter
# or, one-shot run without installing:
uvx confluence-markdown-exporter --help
```

</TabItem>

<TabItem value="docker" label="Docker">

```bash
docker pull spenhouet/confluence-markdown-exporter:latest
docker run --rm spenhouet/confluence-markdown-exporter --help
```

The Docker image is intended for non-interactive / CI use; see the [Docker page](./docker.md) for config-file mounts and environment variables.

</TabItem>

</Tabs>

### 2. Authenticate

<AuthenticateTabs />

### 3. Export

<ExportTabs />

Your Markdown lands in the configured `export.output_path` (current directory by default).


================================================
FILE: docs/troubleshooting.md
================================================
---
id: troubleshooting
title: Troubleshooting
sidebar_position: 6
---

# Troubleshooting

## Known issues and limitations

### Missing attachment file ID on Server

For some Confluence Server versions / configurations, the attachment file ID is not returned by the API ([#39](https://github.com/Spenhouet/confluence-markdown-exporter/issues/39)).

In that case, `{attachment_file_id}` automatically falls back to the content id, so the default [`export.attachment_path`](./configuration/options.md#exportattachment_path) template still produces unique filenames out of the box.

If you prefer human-readable filenames over numeric IDs, set `export.attachment_path` to use `{attachment_title}{attachment_extension}`, e.g.:

```sh
cme config set export.attachment_path='{space_name}/attachments/{attachment_title}{attachment_extension}'
```

### Connection issues behind proxy or VPN

There might be connection issues if your Confluence Server is behind a proxy or VPN ([#38](https://github.com/Spenhouet/confluence-markdown-exporter/issues/38)). If you experience issues, help to fix this is appreciated.

## Reporting bugs

Open an issue on the [GitHub issue tracker](https://github.com/Spenhouet/confluence-markdown-exporter/issues) and include:

1. Your Confluence flavour and version (Cloud, Server, Data Center)
2. The exact command you ran
3. The full output, ideally with `cme config set export.log_level=DEBUG` enabled
4. A minimal example page (if possible) reproducing the issue


================================================
FILE: docs/usage.md
================================================
---
id: usage
title: Usage
sidebar_position: 2
---

# Usage

Run the exporter with the desired Confluence page URL or space URL. Execute the console application by typing `confluence-markdown-exporter` (or its shorter alias `cme`) followed by one of the commands `pages`, `pages-with-descendants`, `spaces`, `orgs`, or `config`. Add `--help` to any command for additional information.

All export commands accept one or more URLs as space-separated arguments. Each command also has a singular alias (`page`, `page-with-descendants`, `space`, `org`) that behaves identically.

## Export pages

Export one or more Confluence pages by URL:

```sh
cme pages <page-url>
cme pages <page-url-1> <page-url-2> ...

# Singular alias (identical behaviour):
cme page <page-url>
```

Supported page URL formats:

- Confluence Cloud: `https://company.atlassian.net/wiki/spaces/SPACEKEY/pages/123456789/Page+Title`
- Confluence Cloud (API gateway): `https://api.atlassian.com/ex/confluence/CLOUDID/wiki/spaces/SPACEKEY/pages/123456789/Page+Title`
- Confluence Server (long): `https://wiki.company.com/display/SPACEKEY/Page+Title`
- Confluence Server (short): `https://wiki.company.com/SPACEKEY/Page+Title`
- Confluence Server (param): `https://wiki.company.com/pages/viewpage.action?pageId=123456789`

## Export pages with descendants

Export one or more Confluence pages and all their descendant pages by URL:

```sh
cme pages-with-descendants <page-url>
cme pages-with-descendants <page-url-1> <page-url-2> ...

# Singular alias (identical behaviour):
cme page-with-descendants <page-url>
```

## Export spaces

Export all Confluence pages of one or more spaces by URL:

```sh
cme spaces <space-url>
cme spaces <space-url-1> <space-url-2> ...

# Singular alias (identical behaviour):
cme space <space-url>
```

Supported space URL formats:

- Confluence Cloud: `https://company.atlassian.net/wiki/spaces/SPACEKEY`
- Confluence Cloud (API gateway): `https://api.atlassian.com/ex/confluence/CLOUDID/wiki/spaces/SPACEKEY`
- Confluence Server (long): `https://wiki.company.com/display/SPACEKEY`
- Confluence Server (short): `https://wiki.company.com/SPACEKEY`

## Export all spaces of an organization

Export all Confluence pages across all spaces of one or more organizations by URL:

```sh
cme orgs <base-url>
cme orgs <base-url-1> <base-url-2> ...

# Singular alias (identical behaviour):
cme org <base-url>
```

## Output layout

The exported Markdown file(s) will be saved in the configured output directory (see [`export.output_path`](./configuration/options.md#exportoutput_path)) e.g.:

```text
output_path/
└── MYSPACE/
   ├── MYSPACE.md
   └── MYSPACE/
      ├── My Confluence Page.md
      └── My Confluence Page/
            ├── My nested Confluence Page.md
            └── Another one.md
```


================================================
FILE: docusaurus.config.ts
================================================
import { themes as prismThemes } from "prism-react-renderer";
import type { Config } from "@docusaurus/types";
import type * as Preset from "@docusaurus/preset-classic";

const config: Config = {
  title: "Confluence Markdown Exporter",
  tagline:
    "Export Confluence pages to Markdown for Obsidian, Gollum, Azure DevOps, Foam, Dendron and more.",
  favicon: "img/favicon.svg",

  url: "https://spenhouet.github.io",
  baseUrl: "/confluence-markdown-exporter/",

  organizationName: "Spenhouet",
  projectName: "confluence-markdown-exporter",
  trailingSlash: false,

  onBrokenLinks: "throw",

  i18n: {
    defaultLocale: "en",
    locales: ["en"],
  },

  presets: [
    [
      "classic",
      {
        docs: {
          sidebarPath: "./sidebars.ts",
          routeBasePath: "/",
          editUrl:
            "https://github.com/Spenhouet/confluence-markdown-exporter/edit/main/",
          showLastUpdateAuthor: true,
          showLastUpdateTime: true,
          // Versioning is driven by git tags via scripts/build-versions.mjs.
          // The script writes versioned_docs/, versioned_sidebars/, versions.json
          // at build time and exports DOCS_LAST_VERSION pointing at the newest tag.
          lastVersion: process.env.DOCS_LAST_VERSION || "current",
          versions: {
            current: {
              label: process.env.DOCS_LAST_VERSION ? "Next 🚧" : "Current",
              path: process.env.DOCS_LAST_VERSION ? "next" : "",
              banner: process.env.DOCS_LAST_VERSION ? "unreleased" : "none",
            },
          },
        },
        blog: false,
        theme: {
          customCss: "./src/css/custom.css",
        },
        sitemap: {
          changefreq: "weekly",
          priority: 0.5,
        },
      } satisfies Preset.Options,
    ],
  ],

  themeConfig: {
    image: "img/logo.png",
    colorMode: {
      defaultMode: "dark",
      respectPrefersColorScheme: true,
    },
    announcementBar: {
      id: "github_star",
      content:
        '⭐ If you like <strong>confluence-markdown-exporter</strong>, star it on <a target="_blank" rel="noopener noreferrer" href="https://github.com/Spenhouet/confluence-markdown-exporter">GitHub</a>!',
      backgroundColor: "var(--ifm-color-primary-darker)",
      textColor: "#ffffff",
      isCloseable: true,
    },
    navbar: {
      title: "Confluence Markdown Exporter",
      logo: {
        alt: "confluence-markdown-exporter logo",
        src: "img/favicon.svg",
      },
      items: [
        {
          type: "docSidebar",
          sidebarId: "docsSidebar",
          position: "left",
          label: "Docs",
        },
        {
          type: "docsVersionDropdown",
          position: "right",
          dropdownActiveClassDisabled: true,
        },
        {
          href: "https://pypi.org/project/confluence-markdown-exporter/",
          label: "PyPI",
          position: "right",
        },
        {
          href: "https://github.com/Spenhouet/confluence-markdown-exporter",
          position: "right",
          className: "header-github-link",
          "aria-label": "GitHub repository",
        },
      ],
    },
    footer: {
      style: "dark",
      links: [
        {
          title: "Docs",
          items: [
            { label: "Installation", to: "/installation" },
            { label: "Usage", to: "/usage" },
            { label: "Configuration", to: "/configuration/" },
            { label: "Features", to: "/features" },
          ],
        },
        {
          title: "Community",
          items: [
            {
              label: "Issues",
              href: "https://github.com/Spenhouet/confluence-markdown-exporter/issues",
            },
            {
              label: "Discussions",
              href: "https://github.com/Spenhouet/confluence-markdown-exporter/discussions",
            },
          ],
        },
        {
          title: "More",
          items: [
            { label: "Contributing", to: "/contributing" },
            {
              label: "GitHub",
              href: "https://github.com/Spenhouet/confluence-markdown-exporter",
            },
            {
              label: "PyPI",
              href: "https://pypi.org/project/confluence-markdown-exporter/",
            },
          ],
        },
      ],
      copyright: `Copyright © ${new Date().getFullYear()} Sebastian Penhouet. Built with Docusaurus.`,
    },
    prism: {
      theme: prismThemes.github,
      darkTheme: prismThemes.dracula,
      additionalLanguages: ["bash", "powershell", "yaml", "json", "toml", "diff"],
    },
    docs: {
      sidebar: {
        hideable: true,
        autoCollapseCategories: false,
      },
    },
    tableOfContents: {
      minHeadingLevel: 2,
      maxHeadingLevel: 4,
    },
  } satisfies Preset.ThemeConfig,

  plugins: [
    [
      require.resolve("@easyops-cn/docusaurus-search-local"),
      {
        hashed: true,
        indexBlog: false,
        docsRouteBasePath: "/",
        highlightSearchTermsOnTargetPage: true,
        explicitSearchResultPath: true,
      },
    ],
  ],

  markdown: {
    mermaid: false,
    hooks: {
      onBrokenMarkdownLinks: "warn",
    },
  },
};

export default config;


================================================
FILE: package.json
================================================
{
  "name": "confluence-markdown-exporter-docs",
  "version": "0.0.0",
  "private": true,
  "scripts": {
    "docusaurus": "docusaurus",
    "start": "docusaurus start",
    "build": "docusaurus build",
    "build:versioned": "node scripts/build-versions.mjs",
    "swizzle": "docusaurus swizzle",
    "deploy": "docusaurus deploy",
    "clear": "docusaurus clear",
    "serve": "docusaurus serve",
    "write-translations": "docusaurus write-translations",
    "write-heading-ids": "docusaurus write-heading-ids",
    "typecheck": "tsc"
  },
  "dependencies": {
    "@docusaurus/core": "^3.10.1",
    "@docusaurus/preset-classic": "^3.10.1",
    "@easyops-cn/docusaurus-search-local": "^0.46.1",
    "@mdx-js/react": "^3.0.0",
    "clsx": "^2.0.0",
    "prism-react-renderer": "^2.3.0",
    "react": "^18.0.0",
    "react-dom": "^18.0.0"
  },
  "devDependencies": {
    "@docusaurus/module-type-aliases": "^3.10.1",
    "@docusaurus/tsconfig": "^3.10.1",
    "@docusaurus/types": "^3.10.1",
    "typescript": "~5.5.2"
  },
  "browserslist": {
    "production": [
      ">0.5%",
      "not dead",
      "not op_mini all"
    ],
    "development": [
      "last 3 chrome version",
      "last 3 firefox version",
      "last 5 safari version"
    ]
  },
  "engines": {
    "node": ">=18.0"
  }
}


================================================
FILE: pyproject.toml
================================================
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[project]
name = "confluence-markdown-exporter"
version = "5.1.1"
description = "A tool to export Confluence pages to Markdown"
keywords = ["confluence", "atlassian", "markdown", "export", "convertion", "download"]
readme = "README.md"
license = { text = "MIT" }
authors = [
    { name = "Sebastian Penhouet" }
]
requires-python = ">= 3.10"
dependencies = [
    'atlassian-python-api',
    'jmespath',
    'markdownify',
    'pydantic-settings',
    'pyyaml',
    'questionary',
    'rich',
    'tabulate',
    'typer',
    'python-dateutil',
    "lxml>=6.0.2",
]


[dependency-groups]
dev = [
    "pytest>=8.4.1",
    "ruff>=0.11.13",
]

[project.urls]
Homepage = "https://github.com/Spenhouet/confluence-markdown-exporter"
Documentation = "https://spenhouet.github.io/confluence-markdown-exporter/"
Source = "https://github.com/Spenhouet/confluence-markdown-exporter"
Tracker = "https://github.com/Spenhouet/confluence-markdown-exporter/issues"

[project.scripts]
confluence-markdown-exporter = "confluence_markdown_exporter.main:app"
cme = "confluence_markdown_exporter.main:app"

[tool.hatch.build.targets.wheel]
packages = ["confluence_markdown_exporter"]

[tool.ruff]
# Exclude a variety of commonly ignored directories. This means Ruff will not lint or format files with these names
exclude = [
  ".bzr",
  ".direnv",
  ".eggs",
  ".git",
  ".git-rewrite",
  ".hg",
  ".ipynb_checkpoints",
  ".mypy_cache",
  ".nox",
  ".pants.d",
  ".pyenv",
  ".pytest_cache",
  ".pytype",
  ".ruff_cache",
  ".svn",
  ".tox",
  ".venv",
  ".vscode",
  "__pypackages__",
  "_build",
  "buck-out",
  "build",
  "dist",
  "node_modules",
  "site-packages",
  "venv",
]

indent-width = 4 # each indent is 4 spaces, equivalent to using "tab" 
line-length = 100 # max no of characters in a line. Black default is 88 characters
target-version = "py310" # Assumes Python 3.10 and above

[tool.ruff.lint]
select = [
  "A", # flake8-builtins
  "B", # flake8-bugbear
  "D", # pydocstyle
  "E", # pycodestyle errors
  "F", # pyflakes
  "G", # flake8-logging-format
  "I", # isort
  "N", # pep8-naming
  "S", # flake8-bandit
  "W", # pycodestyle warnings
  "C4", # flake8-comprehensions
  "EM", # flake8-errmsg
  "PD", # pandas-vet
  "PL", # Pylint
  "UP", # pyupgrade - auto-upgrade syntax for current version of Python
  "ANN", # flake8-annotations
  "BLE", # flake8-blind-except
  "C90", # McCabe complexity checker
  "ERA", # eradicate - removes commented out code
  "FBT", # flake8-boolean-trap
  "FLY", # flynt
  "ICN", # flake8-import-conventions
  "LOG", # flake8-logger
  "NPY", # numpy-specific rules
  "PGH", # pygrep-hooks
  "PIE", # flake8-pie
  "RET", # flake8-return
  "RSE", # flake8-raise
  "SIM", # flake8-simplify
  "RUF", # ruff-specific rules
  "TCH", # flake8-type-checking
  "TID", # flake8-tidy-imports
  "TRY", # tryceratops
  "ASYNC", # flake8-async
  "PT", # flake8-pytest-style
  "FAST", # FastAPI,
  "T20", # flake8-print
  "ARG", # flake8-unused-arguments
  "PTH", # flake8-use-pathlib
  "PERF", # Perflint
  "FURB", # refurb
]

ignore = [
  "W191", # lint rule that may clash with Ruff Formatter: tab-indentation
  "E111", # lint rule that may clash with Ruff Formatter: indentation-with-invalid-multiple
  "E114", # lint rule that may clash with Ruff Formatter: indentation-with-invalid-multiple-comment
  "E117", # lint rule that may clash with Ruff Formatter: over-indented
  "D206", # lint rule that may clash with Ruff Formatter: indent-with-spaces
  "D300", # lint rule that may clash with Ruff Formatter: triple-single-quotes      
  "D1", # ignore this to match google docstring convention
  "G004", # ignore this to allow f-strings in logging
  "UP015", # ignore this to allow "with open" statements to have modes explicitly stated 
  "SIM102", # ignore this to avoid changing nested if statements to single if statements, potentially confusing
  "ERA001", # ignore this to keep commented out lines while functionality is not implemented (configs/logos)
  "PERF203", # ignore this as this often is intentional.
  "ARG002", # Many methods in this project share the same signature, independent of variable usage.
  "PLC0415", # Allow lacy loading of imports
]

fixable = ["ALL"] # Allow fix for all enabled rules (when using "Fix all" or when `--fix` is provided to ruff check in CLI)
unfixable = ["F401"] # disable autofix for unused-imports

dummy-variable-rgx = "^(_+\\w*)$" # Allow unused variables when underscore-prefixed
flake8-bugbear.extend-immutable-calls = [
  "fastapi.Depends",
  "fastapi.Query",
] # Allow default arguments like, e.g., `data: List[str] = fastapi.Query(None)`  
pycodestyle.max-doc-length = 100 # max line-length for docstrings
pydocstyle.convention = "google" # docstring convention. Options: "google", "numpy", or "pep257"
pylint.max-args = 10 # max no of args in a function

[tool.ruff.lint.isort]
known-first-party = ["airamed", "main"]
force-single-line = true # force each import to be in its own line

[tool.ruff.format]
docstring-code-format = true # Enable auto-formatting of code examples in docstrings. Markdown, reStructuredText code/literal blocks and doctests are all supported
docstring-code-line-length = "dynamic" # Set line length limit used when formatting code snippets in docstrings. This only has an effect when the `docstring-code-format` setting is enabled
indent-style = "space" # indent with spaces, rather than "tab"
line-ending = "lf" # options: "auto", "lf", "cr-lf", "native"
quote-style = "double" # Use double quotes as voted by majority
skip-magic-trailing-comma = false # respects magic trailing commas

# Ignore S101 (assert) in all test files
[tool.ruff.lint.per-file-ignores]
"tests/**/*.py" = [
  "S101",    # Assert in tests is expected
  "S110",    # try-except-pass detected
  "FBT001",  # Often conflicts in tests
  "PLR2004", # Magic numbers are acceptable in tests
]


================================================
FILE: scripts/build-versions.mjs
================================================
#!/usr/bin/env node
/**
 * Build the Docusaurus site with per-tag versioned docs derived from git history.
 *
 * Strategy:
 *   1. List git tags matching the project release pattern (e.g. "5.1.0").
 *   2. Keep only the tags whose tree already contains a Docusaurus-style
 *      docs/ tree and sidebars.ts (i.e. tags cut after docs migration).
 *   3. For each eligible tag (newest -> oldest):
 *        a. Copy docs/ + sidebars.ts from that tag into the working tree.
 *        b. Run `docusaurus docs:version <tag>` to snapshot it.
 *        c. Restore HEAD docs/ + sidebars.ts.
 *   4. Set DOCS_LAST_VERSION env var to the newest tag and invoke `npm run build`.
 *
 * versioned_docs/, versioned_sidebars/, and versions.json are NOT committed to
 * the repo (see .gitignore). They are regenerated on every build.
 */

import { execSync } from "node:child_process";
import { existsSync, mkdtempSync, rmSync, cpSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";

const log = (msg) => console.log(`[build-versions] ${msg}`);

function sh(cmd, opts = {}) {
  return execSync(cmd, { encoding: "utf8", stdio: ["ignore", "pipe", "pipe"], ...opts });
}

function tagHasDocusaurusDocs(tag) {
  try {
    execSync(`git cat-file -e ${tag}:sidebars.ts`, { stdio: "ignore" });
    execSync(`git cat-file -e ${tag}:docs/intro.md`, { stdio: "ignore" });
    return true;
  } catch {
    return false;
  }
}

function listTags() {
  const out = sh("git tag --sort=-version:refname").trim();
  if (!out) return [];
  return out
    .split("\n")
    .map((t) => t.trim())
    .filter((t) => /^\d+\.\d+\.\d+$/.test(t));
}

function snapshotTag(tag) {
  log(`Snapshotting ${tag}`);
  const work = mkdtempSync(join(tmpdir(), `docs-${tag}-`));
  try {
    // Extract docs/ and sidebars.ts from the tag into a temp dir.
    sh(`git archive ${tag} docs sidebars.ts | tar -x -C ${work}`, {
      shell: "/bin/bash",
    });
    // Swap into the working tree, then snapshot, then restore HEAD.
    rmSync("docs", { recursive: true, force: true });
    cpSync(join(work, "docs"), "docs", { recursive: true });
    cpSync(join(work, "sidebars.ts"), "sidebars.ts");
    execSync(`npx --no-install docusaurus docs:version ${tag}`, {
      stdio: "inherit",
    });
  } finally {
    rmSync(work, { recursive: true, force: true });
    // Restore HEAD versions of docs/ and sidebars.ts.
    execSync("git checkout HEAD -- docs sidebars.ts", { stdio: "ignore" });
  }
}

function main() {
  // Refuse to run if working tree has uncommitted changes to docs or sidebars,
  // because we temporarily overwrite them during snapshots.
  const dirty = sh("git status --porcelain -- docs sidebars.ts").trim();
  if (dirty && !process.env.FORCE_BUILD_VERSIONS) {
    console.error(
      "[build-versions] docs/ or sidebars.ts has uncommitted changes; refusing to run.\n" +
        "Commit / stash them first, or set FORCE_BUILD_VERSIONS=1 to override.",
    );
    process.exit(1);
  }

  const tags = listTags();
  const eligible = tags.filter(tagHasDocusaurusDocs);

  log(
    eligible.length
      ? `Found ${eligible.length} eligible tag(s): ${eligible.join(", ")}`
      : "No eligible release tags found; building HEAD as 'Current' only.",
  );

  for (const tag of eligible) {
    snapshotTag(tag);
  }

  const lastVersion = eligible[0] || "";
  log(`DOCS_LAST_VERSION=${lastVersion || "(unset, HEAD only)"}`);

  execSync("npm run build", {
    stdio: "inherit",
    env: { ...process.env, DOCS_LAST_VERSION: lastVersion },
  });
}

main();


================================================
FILE: scripts/bump-docs-version.sh
================================================
#!/usr/bin/env bash
# Bump every version-pinning reference in README and the documentation tree.
#
# Patterns rewritten:
#   uvx.sh/confluence-markdown-exporter/<version>/install.sh
#   uvx.sh/confluence-markdown-exporter/<version>/install.ps1
#   confluence-markdown-exporter==<version>
#   spenhouet/confluence-markdown-exporter:<version>    (Docker pin; :latest left alone)
#
# Auto-discovers any file under README.md, docs/, or src/ that contains one of
# the patterns above, so no explicit file list needs to be maintained when new
# docs pages adopt version-pinning snippets.
#
# Usage: scripts/bump-docs-version.sh <new-version>
set -euo pipefail

if [[ $# -ne 1 ]]; then
  echo "usage: $0 <new-version>" >&2
  exit 1
fi
NEW="$1"

# Validate: tolerate "1.2.3", "1.2.3a4", "1.2.3rc1" etc. (anything pip accepts).
if [[ ! "$NEW" =~ ^[0-9]+\.[0-9]+\.[0-9]+([.-][0-9A-Za-z]+)*$ ]]; then
  echo "error: '$NEW' does not look like a valid version" >&2
  exit 1
fi

PATTERN='(uvx\.sh/confluence-markdown-exporter/[^/[:space:]]+/install\.(sh|ps1)|confluence-markdown-exporter==[0-9]|spenhouet/confluence-markdown-exporter:[0-9])'

mapfile -t files < <(
  # Search README.md, docs/, src/ if they exist. Suppress "No such file" noise.
  for root in README.md docs src; do
    [[ -e "$root" ]] || continue
    if [[ -f "$root" ]]; then
      echo "$root"
    else
      find "$root" -type f \( -name '*.md' -o -name '*.mdx' -o -name '*.tsx' -o -name '*.ts' \)
    fi
  done | xargs -r grep -lE "$PATTERN" 2>/dev/null
)

if [[ ${#files[@]} -eq 0 ]]; then
  echo "No files contain version-pin patterns; nothing to update."
  exit 0
fi

for f in "${files[@]}"; do
  sed -i \
    -e "s|uvx\.sh/confluence-markdown-exporter/[^/[:space:]]*/install\.sh|uvx.sh/confluence-markdown-exporter/${NEW}/install.sh|g" \
    -e "s|uvx\.sh/confluence-markdown-exporter/[^/[:space:]]*/install\.ps1|uvx.sh/confluence-markdown-exporter/${NEW}/install.ps1|g" \
    -e "s|confluence-markdown-exporter==[0-9A-Za-z.\\-]*|confluence-markdown-exporter==${NEW}|g" \
    -e "s|spenhouet/confluence-markdown-exporter:[0-9][0-9A-Za-z.\\-]*|spenhouet/confluence-markdown-exporter:${NEW}|g" \
    "$f"
  echo "updated: $f"
done


================================================
FILE: sidebars.ts
================================================
import type { SidebarsConfig } from "@docusaurus/plugin-content-docs";

const sidebars: SidebarsConfig = {
  docsSidebar: [
    "intro",
    {
      type: "category",
      label: "Quickstart",
      collapsed: false,
      items: ["installation", "usage"],
    },
    "features",
    {
      type: "category",
      label: "Configuration",
      collapsed: false,
      link: { type: "doc", id: "configuration/index" },
      items: [
        "configuration/options",
        "configuration/authentication",
        "configuration/target-systems",
        "configuration/ci",
      ],
    },
    "docker",
    "compatibility",
    "troubleshooting",
    "contributing",
  ],
};

export default sidebars;


================================================
FILE: src/components/HomepageFeatures/index.tsx
================================================
import React, { type ReactNode } from "react";
import clsx from "clsx";
import Link from "@docusaurus/Link";
import styles from "./styles.module.css";

type Feature = {
  icon: string;
  title: string;
  description: ReactNode;
  href: string;
};

const FEATURES: Feature[] = [
  {
    icon: "🚀",
    title: "One-command install",
    href: "/installation",
    description: (
      <>
        A single curl/PowerShell line installs an isolated, self-updating CLI
        via <code>uv</code>. No virtualenv juggling.
      </>
    ),
  },
  {
    icon: "📚",
    title: "Pages, spaces, orgs",
    href: "/usage",
    description: (
      <>
        Export a single page, a page subtree, an entire space, or every space
        in your Atlassian organisation.
      </>
    ),
  },
  {
    icon: "⚡",
    title: "Incremental by default",
    href: "/features",
    description: (
      <>
        Skips unchanged pages using a lockfile. Re-runs export only what
        actually moved since last time.
      </>
    ),
  },
  {
    icon: "🎯",
    title: "Target presets",
    href: "/configuration/target-systems",
    description: (
      <>
        Pre-baked configurations for Obsidian (wiki links, Dataview, Meta Bind)
        and Azure DevOps wikis (sanitized filenames, attachments folder).
      </>
    ),
  },
  {
    icon: "🧩",
    title: "Macros & add-ons",
    href: "/features",
    description: (
      <>
        Status badges, panels, page properties, draw.io, PlantUML, Mermaid,
        include/excerpt: all converted to portable Markdown.
      </>
    ),
  },
  {
    icon: "🔐",
    title: "Cloud & Server",
    href: "/configuration/authentication",
    description: (
      <>
        Works against Confluence Cloud, the Atlassian API gateway, and
        on-premise Server / Data Center. API tokens, PATs, scoped tokens: all
        supported.
      </>
    ),
  },
];

function FeatureCard({ icon, title, description, href }: Feature) {
  return (
    <Link to={href} className={clsx("col col--4", styles.featureCol)}>
      <div className={clsx("feature-card", styles.featureCard)}>
        <span className="feature-icon" aria-hidden="true">
          {icon}
        </span>
        <h3>{title}</h3>
        <p>{description}</p>
      </div>
    </Link>
  );
}

export default function HomepageFeatures(): ReactNode {
  return (
    <section className={styles.features}>
      <div className="container">
        <div className="row">
          {FEATURES.map((f) => (
            <FeatureCard key={f.title} {...f} />
          ))}
        </div>
      </div>
    </section>
  );
}


================================================
FILE: src/components/HomepageFeatures/styles.module.css
================================================
.features {
  padding: 4rem 0;
  width: 100%;
}

.featureCol {
  margin-bottom: 1.5rem;
  text-decoration: none !important;
}

.featureCol:hover {
  text-decoration: none !important;
}

.featureCard {
  display: flex;
  flex-direction: column;
  gap: 0.25rem;
}

@media (max-width: 768px) {
  .features {
    padding: 2rem 0;
  }
}


================================================
FILE: src/components/quickstart/index.tsx
================================================
import React, { type ReactNode } from "react";
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
import CodeBlock from "@theme/CodeBlock";
import Link from "@docusaurus/Link";

/**
 * Build a six-tab group keyed by the install-method groupId, so it stays in
 * sync with the install tabs on landing / intro / installation pages.
 *
 * The five non-docker tabs share the same `local` content; the docker tab
 * shows the container equivalent.
 */
function makeStepTabs(local: ReactNode, docker: ReactNode) {
  return (
    <Tabs groupId="install-method" queryString>
      <TabItem value="linux" label="Linux">
        {local}
      </TabItem>
      <TabItem value="macos" label="macOS">
        {local}
      </TabItem>
      <TabItem value="windows" label="Windows">
        {local}
      </TabItem>
      <TabItem value="pip" label="pip">
        {local}
      </TabItem>
      <TabItem value="uv" label="uv">
        {local}
      </TabItem>
      <TabItem value="docker" label="Docker">
        {docker}
      </TabItem>
    </Tabs>
  );
}

/** Step 2: Authenticate. Interactive `cme config` locally, JSON config for Docker. */
export function AuthenticateTabs() {
  return makeStepTabs(
    <CodeBlock language="bash">{`cme config edit auth.confluence`}</CodeBlock>,
    <>
      <p>
        The container has no interactive menu. Generate the JSON config on a
        workstation first, then mount it (or pass credentials via{" "}
        <code>CME_AUTH__*</code> env vars):
      </p>
      <CodeBlock language="bash" title="On your workstation">
        {`# Writes ~/.config/confluence-markdown-exporter/app_data.json
cme config edit auth.confluence`}
      </CodeBlock>
      <p>
        Copy that <code>app_data.json</code> to your CI repo or secret store,
        then mount it on every container run (next step). See the{" "}
        <Link to="/docker">Docker page</Link> for the env-var alternative.
      </p>
    </>,
  );
}

/** Step 3: Export. `cme pages …` locally, `docker run … pages …` for Docker. */
export function ExportTabs() {
  return makeStepTabs(
    <CodeBlock language="bash">
      {`# A page, a subtree, an entire space, or every space of an org:
cme pages   https://example.atlassian.net/wiki/spaces/SPACE/pages/123/Title
cme spaces  https://example.atlassian.net/wiki/spaces/SPACE
cme orgs    https://example.atlassian.net`}
    </CodeBlock>,
    <CodeBlock language="bash">
      {`docker run --rm \\
  -v "$PWD/app_data.json:/data/config/app_data.json:ro" \\
  -v "$PWD/output:/data/output" \\
  spenhouet/confluence-markdown-exporter \\
  pages https://example.atlassian.net/wiki/spaces/SPACE/pages/123/Title`}
    </CodeBlock>,
  );
}

/** "Verify the install" tab variants for the installation page. */
export function VerifyTabs() {
  return makeStepTabs(
    <CodeBlock language="bash">{`cme --help`}</CodeBlock>,
    <CodeBlock language="bash">
      {`docker run --rm spenhouet/confluence-markdown-exporter --help`}
    </CodeBlock>,
  );
}


================================================
FILE: src/css/custom.css
================================================
/**
 * Theme overrides for Docusaurus Infima.
 * Primary palette tuned for a modern docs look.
 */

:root {
  --ifm-color-primary: #5b6cff;
  --ifm-color-primary-dark: #3c50ff;
  --ifm-color-primary-darker: #2c41ff;
  --ifm-color-primary-darkest: #0026e6;
  --ifm-color-primary-light: #7a88ff;
  --ifm-color-primary-lighter: #8a96ff;
  --ifm-color-primary-lightest: #b5bdff;

  --ifm-code-font-size: 90%;
  --ifm-font-family-base: "Inter", -apple-system, BlinkMacSystemFont, "Segoe UI",
    Roboto, Oxygen, Ubuntu, Cantarell, "Helvetica Neue", sans-serif;
  --ifm-font-family-monospace: "JetBrains Mono", ui-monospace, SFMono-Regular,
    "SF Mono", Consolas, "Liberation Mono", monospace;

  --ifm-heading-font-weight: 700;
  --ifm-h1-font-size: 2.5rem;
  --ifm-h2-font-size: 1.75rem;
  --ifm-h3-font-size: 1.25rem;

  --ifm-navbar-shadow: 0 1px 0 0 rgb(0 0 0 / 5%);
  --ifm-navbar-background-color: rgba(255, 255, 255, 0.85);
  --ifm-navbar-link-hover-color: var(--ifm-color-primary);

  --ifm-toc-border-color: transparent;
  --ifm-table-stripe-background: rgba(0, 0, 0, 0.02);
  --ifm-table-border-color: rgba(0, 0, 0, 0.08);

  --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.08);
}

[data-theme="dark"] {
  --ifm-color-primary: #8a96ff;
  --ifm-color-primary-dark: #6a7aff;
  --ifm-color-primary-darker: #5b6cff;
  --ifm-color-primary-darkest: #3c50ff;
  --ifm-color-primary-light: #a4adff;
  --ifm-color-primary-lighter: #b5bdff;
  --ifm-color-primary-lightest: #d4d9ff;

  --ifm-background-color: #0d1117;
  --ifm-background-surface-color: #161b22;
  --ifm-navbar-background-color: rgba(13, 17, 23, 0.85);

  --ifm-table-stripe-background: rgba(255, 255, 255, 0.03);
  --ifm-table-border-color: rgba(255, 255, 255, 0.08);

  --docusaurus-highlighted-code-line-bg: rgba(255, 255, 255, 0.08);
}

@font-face {
  font-family: "Inter";
  font-style: normal;
  font-weight: 100 900;
  font-display: swap;
  src: url("https://rsms.me/inter/font-files/InterVariable.woff2") format("woff2");
}

html {
  scroll-padding-top: var(--ifm-navbar-height);
}

.navbar {
  backdrop-filter: saturate(180%) blur(20px);
  -webkit-backdrop-filter: saturate(180%) blur(20px);
}

.navbar__title {
  font-weight: 700;
}

.header-github-link::before {
  content: "";
  display: inline-block;
  width: 24px;
  height: 24px;
  background-color: var(--ifm-navbar-link-color);
  mask-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'%3E%3Cpath d='M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12'/%3E%3C/svg%3E");
  mask-repeat: no-repeat;
  mask-size: contain;
  vertical-align: middle;
}

.header-github-link:hover::before {
  background-color: var(--ifm-color-primary);
}

.header-github-link {
  font-size: 0;
  padding: 0.5rem !important;
}

/* Hero */
.hero {
  background: linear-gradient(
    135deg,
    var(--ifm-color-primary-darkest) 0%,
    var(--ifm-color-primary) 50%,
    var(--ifm-color-primary-light) 100%
  );
  color: #fff;
  padding: 4rem 0 5rem;
  position: relative;
  overflow: hidden;
}

.hero::before {
  content: "";
  position: absolute;
  inset: 0;
  background:
    radial-gradient(circle at 20% 20%, rgba(255, 255, 255, 0.15), transparent 50%),
    radial-gradient(circle at 80% 80%, rgba(255, 255, 255, 0.1), transparent 50%);
  pointer-events: none;
}

.hero > .container {
  position: relative;
  z-index: 1;
}

.hero__title {
  font-size: 3rem;
  font-weight: 800;
  letter-spacing: -0.02em;
}

.hero__subtitle {
  font-size: 1.25rem;
  opacity: 0.92;
  max-width: 36rem;
  margin: 1rem auto 0;
}

.hero-logo {
  max-width: 480px;
  width: 80%;
  margin-bottom: 1.5rem;
  filter: drop-shadow(0 12px 32px rgba(0, 0, 0, 0.25));
}

@media (max-width: 768px) {
  .hero-logo {
    max-width: 320px;
    width: 90%;
  }
}

.button--hero {
  background: #fff;
  color: var(--ifm-color-primary-darkest);
  border: none;
  font-weight: 600;
  transition: transform 0.15s ease, box-shadow 0.15s ease;
}

.button--hero:hover {
  background: #fff;
  color: var(--ifm-color-primary-darker);
  transform: translateY(-1px);
  box-shadow: 0 6px 20px rgba(0, 0, 0, 0.2);
}

.button--hero-secondary {
  background: transparent;
  color: #fff;
  border: 1px solid rgba(255, 255, 255, 0.5);
  font-weight: 600;
}

.button--hero-secondary:hover {
  background: rgba(255, 255, 255, 0.1);
  color: #fff;
  border-color: #fff;
}

/* Feature cards */
.feature-card {
  height: 100%;
  padding: 1.75rem;
  border-radius: 12px;
  background: var(--ifm-background-surface-color);
  border: 1px solid var(--ifm-color-emphasis-200);
  transition: transform 0.15s ease, border-color 0.15s ease, box-shadow 0.15s ease;
}

.feature-card:hover {
  transform: translateY(-2px);
  border-color: var(--ifm-color-primary);
  box-shadow: 0 12px 24px -8px rgba(91, 108, 255, 0.25);
}

.feature-card h3 {
  margin: 0.75rem 0 0.5rem;
  font-size: 1.15rem;
}

.feature-card p {
  color: var(--ifm-color-emphasis-700);
  margin: 0;
  font-size: 0.95rem;
}

.feature-icon {
  display: inline-flex;
  align-items: center;
  justify-content: center;
  width: 48px;
  height: 48px;
  border-radius: 12px;
  background: linear-gradient(
    135deg,
    var(--ifm-color-primary) 0%,
    var(--ifm-color-primary-light) 100%
  );
  font-size: 1.5rem;
}

/* Code blocks polish */
.theme-code-block {
  border-radius: 10px;
}

/* Admonition tweaks */
.alert--info,
.alert--note,
.alert--tip,
.alert--warning,
.alert--danger {
  border-left-width: 4px;
}

/* Table polish */
table {
  border-radius: 8px;
  overflow: hidden;
}


================================================
FILE: src/pages/index.module.css
================================================
.heroBanner {
  text-align: center;
}

.buttons {
  display: flex;
  align-items: center;
  justify-content: center;
  gap: 1rem;
  margin-top: 2rem;
  flex-wrap: wrap;
}

.quickstart {
  padding: 3rem 0 5rem;
}

.quickstartTitle {
  text-align: center;
  font-size: 2rem;
  letter-spacing: -0.01em;
  margin-bottom: 0.5rem;
}

.quickstartLead {
  text-align: center;
  color: var(--ifm-color-emphasis-700);
  margin-bottom: 2rem;
  font-size: 1.05rem;
}

.quickstartFooter {
  text-align: center;
  margin-top: 2rem;
  color: var(--ifm-color-emphasis-600);
  font-size: 0.9rem;
}

.stepTitle {
  font-size: 1.15rem;
  font-weight: 600;
  margin: 2rem 0 0.75rem;
  color: var(--ifm-color-emphasis-900);
  letter-spacing: -0.005em;
}

.stepTitle:first-of-type {
  margin-top: 1rem;
}

@media (max-width: 768px) {
  .quickstart {
    padding: 2rem 0;
  }
}


================================================
FILE: src/pages/index.tsx
================================================
import React, { type ReactNode } from "react";
import clsx from "clsx";
import Link from "@docusaurus/Link";
import useDocusaurusContext from "@docusaurus/useDocusaurusContext";
import Layout from "@theme/Layout";
import CodeBlock from "@theme/CodeBlock";
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
import HomepageFeatures from "@site/src/components/HomepageFeatures";
import {
  AuthenticateTabs,
  ExportTabs,
} from "@site/src/components/quickstart";
import styles from "./index.module.css";

function HomepageHeader() {
  const { siteConfig } = useDocusaurusContext();
  return (
    <header className={clsx("hero", styles.heroBanner)}>
      <div className="container">
        <img
          src="img/logo.png"
          alt={siteConfig.title}
          className="hero-logo"
        />
        <p className="hero__subtitle">{siteConfig.tagline}</p>
        <div className={styles.buttons}>
          <Link
            className="button button--hero button--lg"
            to="/installation"
          >
            Get started →
          </Link>
          <Link
            className="button button--hero-secondary button--lg"
            to="https://github.com/Spenhouet/confluence-markdown-exporter"
          >
            View on GitHub
          </Link>
        </div>
      </div>
    </header>
  );
}

const INSTALL_SNIPPETS = {
  linux: `# Installs an isolated, self-updating CLI via uv.
curl -LsSf uvx.sh/confluence-markdown-exporter/install.sh | sh`,
  macos: `# Installs an isolated, self-updating CLI via uv.
curl -LsSf uvx.sh/confluence-markdown-exporter/install.sh | sh`,
  windows: `powershell -ExecutionPolicy ByPass -c "irm https://uvx.sh/confluence-markdown-exporter/install.ps1 | iex"`,
  pip: `pip install confluence-markdown-exporter`,
  uv: `# Install as an isolated tool…
uv tool install confluence-markdown-exporter

# …or run it once without installing:
uvx confluence-markdown-exporter --help`,
  docker: `# Pull and run the prebuilt image (non-interactive / CI use).
docker pull spenhouet/confluence-markdown-exporter:latest
docker run --rm spenhouet/confluence-markdown-exporter --help`,
};

function InstallTabs() {
  return (
    <Tabs groupId="install-method" queryString>
      <TabItem value="linux" label="Linux">
        <CodeBlock language="bash">{INSTALL_SNIPPETS.linux}</CodeBlock>
      </TabItem>
      <TabItem value="macos" label="macOS">
        <CodeBlock language="bash">{INSTALL_SNIPPETS.macos}</CodeBlock>
      </TabItem>
      <TabItem value="windows" label="Windows">
        <CodeBlock language="powershell">{INSTALL_SNIPPETS.windows}</CodeBlock>
      </TabItem>
      <TabItem value="pip" label="pip">
        <CodeBlock language="bash">{INSTALL_SNIPPETS.pip}</CodeBlock>
      </TabItem>
      <TabItem value="uv" label="uv">
        <CodeBlock language="bash">{INSTALL_SNIPPETS.uv}</CodeBlock>
      </TabItem>
      <TabItem value="docker" label="Docker">
        <CodeBlock language="bash">{INSTALL_SNIPPETS.docker}</CodeBlock>
      </TabItem>
    </Tabs>
  );
}

function QuickstartSection() {
  return (
    <section className={styles.quickstart}>
      <div className="container">
        <div className="row">
          <div className="col col--8 col--offset-2">
            <h2 className={styles.quickstartTitle}>Get going in 60 seconds</h2>
            <p className={styles.quickstartLead}>
              Install, authenticate, export. That's the whole flow.
            </p>

            <h3 className={styles.stepTitle}>1. Install</h3>
            <InstallTabs />

            <h3 className={styles.stepTitle}>2. Authenticate</h3>
            <AuthenticateTabs />

            <h3 className={styles.stepTitle}>3. Export</h3>
            <ExportTabs />

            <p className={styles.quickstartFooter}>
              Detailed setup and per-target presets in the{" "}
              <Link to="/installation">installation docs</Link>.
            </p>
          </div>
        </div>
      </div>
    </section>
  );
}

export default function Home(): ReactNode {
  const { siteConfig } = useDocusaurusContext();
  return (
    <Layout
      title={siteConfig.title}
      description={siteConfig.tagline}
    >
      <HomepageHeader />
      <main>
        <HomepageFeatures />
        <QuickstartSection />
      </main>
    </Layout>
  );
}


================================================
FILE: tests/__init__.py
================================================
# Test package for confluence-markdown-exporter


================================================
FILE: tests/conftest.py
================================================
"""Shared test fixtures and configuration for confluence-markdown-exporter tests."""

import importlib
import os
import sys
import tempfile
from collections.abc import Generator
from pathlib import Path
from typing import Any
from unittest.mock import MagicMock

# Isolate tests from the developer's user config. The package binds APP_CONFIG_PATH
# at import time from CME_CONFIG_PATH (or, when unset, typer.get_app_dir() which
# resolves to ~/.config/confluence-markdown-exporter/app_data.json on Linux).
# Without this, local settings like `page_href="wiki"` leak into tests that rely
# on the schema defaults.
_test_config_dir = tempfile.mkdtemp(prefix="cme-test-config-")
os.environ["CME_CONFIG_PATH"] = str(Path(_test_config_dir) / "app_data.json")

import pytest  # noqa: E402
from pydantic import SecretStr  # noqa: E402

from confluence_markdown_exporter.utils.app_data_store import ApiDetails  # noqa: E402
from confluence_markdown_exporter.utils.app_data_store import AuthConfig  # noqa: E402
from confluence_markdown_exporter.utils.app_data_store import ConfigModel  # noqa: E402
from confluence_markdown_exporter.utils.app_data_store import ConnectionConfig  # noqa: E402
from confluence_markdown_exporter.utils.app_data_store import ExportConfig  # noqa: E402

# Store original functions before any patching
_original_get_confluence = None
_original_get_jira = None


def pytest_configure(config: pytest.Config) -> None:  # noqa: ARG001
    """Configure pytest and mock API clients before test collection."""
    import confluence_markdown_exporter.api_clients

    global _original_get_confluence, _original_get_jira  # noqa: PLW0603

    # Save the original functions
    _original_get_confluence = confluence_markdown_exporter.api_clients.get_confluence_instance
    _original_get_jira = confluence_markdown_exporter.api_clients.get_jira_instance

    # Create mock objects that will be returned by the wrapper
    mock_confluence = MagicMock()
    mock_confluence.get_all_spaces.return_value = []

    mock_jira = MagicMock()

    # Replace with wrapper functions that return mocks
    confluence_markdown_exporter.api_clients.get_confluence_instance = lambda _url: mock_confluence
    confluence_markdown_exporter.api_clients.get_jira_instance = lambda _url: mock_jira


def pytest_unconfigure(config: pytest.Config) -> None:  # noqa: ARG001
    """Restore original functions after test session."""
    import confluence_markdown_exporter.api_clients

    global _original_get_confluence, _original_get_jira  # noqa: PLW0602

    if _original_get_confluence:
        confluence_markdown_exporter.api_clients.get_confluence_instance = _original_get_confluence
    if _original_get_jira:
        confluence_markdown_exporter.api_clients.get_jira_instance = _original_get_jira


@pytest.fixture(autouse=True)
def restore_api_functions_for_specific_tests(
    request: pytest.FixtureRequest,
) -> Generator[None, None, None]:
    """Restore original API functions for api_clients tests that test those functions.

    This allows those tests to properly mock and test the actual function behavior.
    """
    import confluence_markdown_exporter.api_clients

    global _original_get_confluence, _original_get_jira  # noqa: PLW0602

    # Check if this is a test that needs the original functions
    is_api_client_function_test = (
        "test_api_clients.py" in str(request.fspath) and
        ("TestGetConfluenceInstance" in request.node.nodeid or
         "TestGetJiraInstance" in request.node.nodeid)
    )

    if is_api_client_function_test and _original_get_confluence and _original_get_jira:
        # Temporarily restore original functions
        confluence_markdown_exporter.api_clients.get_confluence_instance = _original_get_confluence
        confluence_markdown_exporter.api_clients.get_jira_instance = _original_get_jira

        # Force reimport in the test module to pick up the restored functions
        # This is needed because the test module imported the mocked versions at collection time
        if "tests.unit.test_api_clients" in sys.modules:
            importlib.reload(sys.modules["tests.unit.test_api_clients"])

    yield

    # Re-apply mocks after the test
    if is_api_client_function_test:
        mock_confluence = MagicMock()
        mock_confluence.get_all_spaces.return_value = []
        mock_jira = MagicMock()

        confluence_markdown_exporter.api_clients.get_confluence_instance = (
            lambda _url: mock_confluence
        )
        confluence_markdown_exporter.api_clients.get_jira_instance = lambda _url: mock_jira


@pytest.fixture
def temp_config_dir() -> Generator[Path, None, None]:
    """Create a temporary directory for test configuration."""
    with tempfile.TemporaryDirectory() as temp_dir:
        yield Path(temp_dir)


@pytest.fixture
def mock_confluence_client() -> MagicMock:
    """Create a mock Confluence client for testing."""
    mock_client = MagicMock()
    mock_client.get_all_spaces.return_value = [
        {"key": "TEST", "name": "Test Space", "id": "123456"}
    ]
    mock_client.get_page_by_id.return_value = {
        "id": "123456",
        "title": "Test Page",
        "body": {"storage": {"value": "<p>Test content</p>"}},
        "space": {"key": "TEST"},
        "version": {"number": 1},
    }
    return mock_client


@pytest.fixture
def mock_jira_client() -> MagicMock:
    """Create a mock Jira client for testing."""
    mock_client = MagicMock()
    mock_client.get_all_projects.return_value = [
        {"key": "TEST", "name": "Test Project", "id": "10000"}
    ]
    mock_client.get_issue.return_value = {
        "key": "TEST-123",
        "fields": {
            "summary": "Test Issue",
            "description": "Test description",
            "status": {"name": "Open"},
        },
    }
    return mock_client


SAMPLE_CONFLUENCE_URL = "https://test.atlassian.net"


@pytest.fixture
def sample_api_details() -> ApiDetails:
    """Create sample API details for testing."""
    return ApiDetails(
        username=SecretStr("test@example.com"),
        api_token=SecretStr("test-token"),
        pat=SecretStr("test-pat"),
    )


@pytest.fixture
def sample_connection_config() -> ConnectionConfig:
    """Create sample connection configuration for testing."""
    return ConnectionConfig(
        backoff_and_retry=True,
        backoff_factor=2,
        max_backoff_seconds=60,
        max_backoff_retries=5,
        retry_status_codes=[413, 429, 502, 503, 504],
        verify_ssl=True,
    )


@pytest.fixture
def sample_config_model(
    sample_api_details: ApiDetails,
    sample_connection_config: ConnectionConfig,
    temp_config_dir: Path,
) -> ConfigModel:
    """Create sample configuration for testing."""
    auth_config = AuthConfig(
        confluence={SAMPLE_CONFLUENCE_URL: sample_api_details},
        jira={SAMPLE_CONFLUENCE_URL: sample_api_details},
    )

    export_config = ExportConfig(
        output_path=temp_config_dir / "output",
    )

    return ConfigModel(
        auth=auth_config,
        export=export_config,
        connection_config=sample_connection_config,
    )


@pytest.fixture
def confluence_page_response() -> dict[str, Any]:
    """Sample Confluence page response for testing."""
    return {
        "id": "123456",
        "type": "page",
        "status": "current",
        "title": "Test Page",
        "space": {"key": "TEST", "name": "Test Space", "id": "123"},
        "version": {
            "number": 1,
            "when": "2023-01-01T00:00:00.000Z",
            "by": {"displayName": "Test User", "username": "testuser"},
        },
        "ancestors": [],
        "children": {"page": {"results": [], "size": 0}},
        "descendants": {"page": {"results": [], "size": 0}},
        "body": {
            "storage": {
                "value": (
                    "<h1>Test Heading</h1><p>Test content with <strong>bold</strong> text.</p>"
                ),
                "representation": "storage",
            }
        },
        "_links": {
            "webui": "/spaces/TEST/pages/123456/Test+Page",
            "base": "https://test.atlassian.net/wiki",
        },
    }


@pytest.fixture
def confluence_space_response() -> dict[str, Any]:
    """Sample Confluence space response for testing."""
    return {
        "id": "123",
        "key": "TEST",
        "name": "Test Space",
        "description": {"plain": {"value": "A test space"}},
        "homepage": {"id": "123456"},
        "_links": {
            "webui": "/spaces/TEST",
            "base": "https://test.atlassian.net/wiki",
        },
    }


@pytest.fixture
def jira_issue_response() -> dict[str, Any]:
    """Sample Jira issue response for testing."""
    return {
        "id": "10000",
        "key": "TEST-123",
        "fields": {
            "summary": "Test Issue Summary",
            "description": "This is a test issue description",
            "status": {"name": "Open", "id": "1"},
            "priority": {"name": "Medium", "id": "3"},
            "issuetype": {"name": "Bug", "id": "1"},
            "created": "2023-01-01T00:00:00.000+0000",
            "updated": "2023-01-01T12:00:00.000+0000",
        },
    }


================================================
FILE: tests/integration/__init__.py
================================================
"""Integration tests for confluence-markdown-exporter."""


================================================
FILE: tests/integration/test_cli_integration.py
================================================
"""Basic tests for confluence-markdown-exporter package."""

import subprocess
import sys

import pytest

import confluence_markdown_exporter.main as main_module
from confluence_markdown_exporter import __version__


def test_package_has_version() -> None:
    """Test that package has a version attribute."""
    assert __version__ is not None
    assert isinstance(__version__, str)
    assert len(__version__) > 0


def test_version_command() -> None:
    """Test that the version command works correctly."""
    try:
        # Test the version command
        result = subprocess.run(
            [sys.executable, "-m", "confluence_markdown_exporter.main", "version"],
            capture_output=True,
            text=True,
            check=True,
            timeout=10,
        )

        # Check that version output contains expected format
        assert "confluence-markdown-exporter" in result.stdout
        assert result.returncode == 0

        # The version should be present in output
        # Note: We don't check exact match since dev versions may have extra info
        assert len(result.stdout.strip()) > len("confluence-markdown-exporter")

    except subprocess.TimeoutExpired:
        pytest.fail("Version command timed out")
    except subprocess.CalledProcessError as e:
        pytest.fail(f"Version command failed: {e}")
    except Exception as e:  # noqa: BLE001
        pytest.fail(f"Unexpected error testing version command: {e}")


def test_config_list_command() -> None:
    """Test that the config list command works correctly."""
    import yaml

    try:
        result = subprocess.run(
            [
                sys.executable,
                "-m",
                "confluence_markdown_exporter.main",
                "config",
                "list",
            ],
            capture_output=True,
            text=True,
            check=True,
            timeout=10,
        )

        assert result.returncode == 0
        assert "auth:" in result.stdout
        assert "export:" in result.stdout
        assert "connection_config:" in result.stdout

        # Verify it's valid YAML
        config_data = yaml.safe_load(result.stdout)
        assert isinstance(config_data, dict)
        assert "auth" in config_data
        assert "export" in config_data
        assert "connection_config" in config_data

    except subprocess.TimeoutExpired:
        pytest.fail("Config list command timed out")
    except subprocess.CalledProcessError as e:
        pytest.fail(f"Config list command failed: {e}")
    except Exception as e:  # noqa: BLE001
        pytest.fail(f"Unexpected error testing config list command: {e}")


def test_cli_entry_points() -> None:
    """Test that CLI entry points are properly configured."""
    # Test that we can import the main module without triggering execution
    try:
        # Check that the main module exists and has expected attributes
        assert main_module is not None
        # Check if the app is defined (typer app)
        assert hasattr(main_module, "app")
    except ImportError as e:
        pytest.fail(f"Could not import main module: {e}")
    except Exception:  # noqa: BLE001
        # Allow other exceptions as the module might have initialization code
        # but we can still verify it's importable
        pass


================================================
FILE: tests/unit/__init__.py
================================================
"""Unit tests for confluence-markdown-exporter."""


================================================
FILE: tests/unit/test_alert_conversion.py
================================================
"""Test Confluence alert/panel macro conversion."""

from __future__ import annotations

from typing import TYPE_CHECKING

import pytest

if TYPE_CHECKING:
    from confluence_markdown_exporter.confluence import Page


def _make_converter(editor2: str = "") -> Page.Converter:
    from confluence_markdown_exporter.confluence import Page

    class MockPage:
        def __init__(self) -> None:
            self.id = "test-page"
            self.title = "Test Page"
            self.html = ""
            self.labels = []
            self.ancestors = []
            self.editor2 = editor2

        def get_attachment_by_file_id(self, file_id: str) -> None:
            return None

    return Page.Converter(MockPage())


@pytest.fixture
def converter() -> Page.Converter:
    return _make_converter()


class TestAlertOutsideTable:
    def test_panel_renders_as_note_alert(self, converter: Page.Converter) -> None:
        html = '<div data-macro-name="panel"><p>body text</p></div>'
        out = converter.convert(html)
        assert "> [!NOTE]" in out
        assert "body text" in out

    def test_warning_renders_as_caution_alert(self, converter: Page.Converter) -> None:
        html = '<div data-macro-name="warning"><p>danger</p></div>'
        out = converter.convert(html)
        assert "> [!CAUTION]" in out


class TestAlertInsideTableCell:
    def test_panel_in_td_emits_emoji_no_blockquote(self, converter: Page.Converter) -> None:
        html = (
            "<table><tr><td>"
            '<div data-macro-name="panel"><p>Klinische Abteilung</p></div>'
            "</td></tr></table>"
        )
        out = converter.convert(html)
        assert "[!NOTE]" not in out
        assert ">" not in out.replace("</td>", "")
        assert "\U0001f4dd Klinische Abteilung" in out

    def test_info_in_td_emits_important_emoji(self, converter: Page.Converter) -> None:
        html = (
            "<table><tr><td>"
            '<div data-macro-name="info"><p>info text</p></div>'
            "</td></tr></table>"
        )
        out = converter.convert(html)
        assert "[!IMPORTANT]" not in out
        assert "❗ info text" in out

    def test_warning_in_td_emits_caution_emoji(self, converter: Page.Converter) -> None:
        html = (
            "<table><tr><td>"
            '<div data-macro-name="warning"><p>danger</p></div>'
            "</td></tr></table>"
        )
        out = converter.convert(html)
        assert "[!CAUTION]" not in out
        assert "\U0001f6d1 danger" in out

    def test_tip_in_td_emits_tip_emoji(self, converter: Page.Converter) -> None:
        html = (
            "<table><tr><td>"
            '<div data-macro-name="tip"><p>helpful</p></div>'
            "</td></tr></table>"
        )
        out = converter.convert(html)
        assert "[!TIP]" not in out
        assert "\U0001f4a1 helpful" in out

    def test_note_in_td_emits_warning_emoji(self, converter: Page.Converter) -> None:
        html = (
            "<table><tr><td>"
            '<div data-macro-name="note"><p>watch out</p></div>'
            "</td></tr></table>"
        )
        out = converter.convert(html)
        assert "[!WARNING]" not in out
        assert "⚠️ watch out" in out

    def test_panel_in_th_emits_emoji_no_blockquote(self, converter: Page.Converter) -> None:
        html = (
            "<table><tr><th>"
            '<div data-macro-name="panel"><p>header note</p></div>'
            "</th></tr></table>"
        )
        out = converter.convert(html)
        assert "[!NOTE]" not in out
        assert "\U0001f4dd header note" in out


class TestCustomPanelEmoji:
    def test_custom_panel_icon_text_used_in_table_cell(self) -> None:
        editor2 = (
            '<ac:structured-macro ac:name="panel" ac:macro-id="abc-1">'
            '<ac:parameter ac:name="panelIconId">1f6e0</ac:parameter>'
            '<ac:parameter ac:name="panelIcon">:tools:</ac:parameter>'
            '<ac:parameter ac:name="panelIconText">\U0001f6e0️</ac:parameter>'
            "<ac:rich-text-body><p>Klinische Abteilung</p></ac:rich-text-body>"
            "</ac:structured-macro>"
        )
        converter = _make_converter(editor2)
        html = (
            "<table><tr><td>"
            '<div data-macro-name="panel" data-macro-id="abc-1"><p>Klinische Abteilung</p></div>'
            "</td></tr></table>"
        )
        out = converter.convert(html)
        assert "\U0001f6e0️ Klinische Abteilung" in out
        assert "\U0001f4dd" not in out

    def test_custom_panel_icon_id_decoded_when_no_text(self) -> None:
        editor2 = (
            '<ac:structured-macro ac:name="panel" ac:macro-id="abc-2">'
            '<ac:parameter ac:name="panelIconId">1f6e0</ac:parameter>'
            "<ac:rich-text-body><p>x</p></ac:rich-text-body>"
            "</ac:structured-macro>"
        )
        converter = _make_converter(editor2)
        html = (
            "<table><tr><td>"
            '<div data-macro-name="panel" data-macro-id="abc-2"><p>x</p></div>'
            "</td></tr></table>"
        )
        out = converter.convert(html)
        assert "\U0001f6e0 x" in out

    def test_panel_without_custom_icon_falls_back_to_default(self) -> None:
        editor2 = (
            '<ac:structured-macro ac:name="panel" ac:macro-id="plain-1">'
            "<ac:rich-text-body><p>plain</p></ac:rich-text-body>"
            "</ac:structured-macro>"
        )
        converter = _make_converter(editor2)
        html = (
            "<table><tr><td>"
            '<div data-macro-name="panel" data-macro-id="plain-1"><p>plain</p></div>'
            "</td></tr></table>"
        )
        out = converter.convert(html)
        assert "\U0001f4dd plain" in out

    def test_unknown_macro_id_falls_back_to_default(self) -> None:
        converter = _make_converter("")
        html = (
            "<table><tr><td>"
            '<div data-macro-name="panel" data-macro-id="missing"><p>y</p></div>'
            "</td></tr></table>"
        )
        out = converter.convert(html)
        assert "\U0001f4dd y" in out


================================================
FILE: tests/unit/test_api_clients.py
================================================
"""Unit tests for api_clients module."""

import urllib.parse
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
import requests
from atlassian.errors import ApiError
from pydantic import SecretStr

from confluence_markdown_exporter.api_clients import ApiClientFactory
from confluence_markdown_exporter.api_clients import AuthNotConfiguredError
from confluence_markdown_exporter.api_clients import ConfluenceRef
from confluence_markdown_exporter.api_clients import get_confluence_instance
from confluence_markdown_exporter.api_clients import parse_confluence_path
from confluence_markdown_exporter.api_clients import response_hook
from confluence_markdown_exporter.utils.app_data_store import ApiDetails
from confluence_markdown_exporter.utils.app_data_store import AtlassianSdkConnectionConfig
from confluence_markdown_exporter.utils.app_data_store import AuthConfig
from confluence_markdown_exporter.utils.app_data_store import ConfigModel
from tests.conftest import SAMPLE_CONFLUENCE_URL

_PARSE_CONFLUENCE_PATH_CASES = [
    (
        "https://company.atlassian.net/wiki/spaces/SPACEKEY",
        ConfluenceRef(space_key="SPACEKEY"),
    ),
    (
        "https://company.atlassian.net/wiki/spaces/SPACEKEY/pages/123456789/Page+Title",
        ConfluenceRef(space_key="SPACEKEY", page_id=123456789, page_title="Page Title"),
    ),
    (
        "https://company.atlassian.net/wiki/spaces/SPACEKEY/pages/sddssd/Page+Title",
        None,
    ),
    (
        "https://company.atlassian.net/wiki/spaces/SPACEKEY/overview",
        ConfluenceRef(space_key="SPACEKEY"),
    ),
    (
        "https://api.atlassian.com/ex/confluence/CLOUDID/wiki/spaces/SPACEKEY/pages/123456789/Page+Title",
        ConfluenceRef(space_key="SPACEKEY", page_id=123456789, page_title="Page Title"),
    ),
    (
        "https://api.atlassian.com/ex/confluence/1232132-12312312-21321332/wiki/spaces/SPACEKEY",
        ConfluenceRef(space_key="SPACEKEY"),
    ),
    (
        "https://api.atlassian.com/ex/confluence/1232132-12312312-21321332/wiki/spaces/SPACEKEY/pages/123456789",
        ConfluenceRef(space_key="SPACEKEY", page_id=123456789),
    ),
    (
        "/wiki/spaces/SPACEKEY/",
        ConfluenceRef(space_key="SPACEKEY"),
    ),
    (
        "/wiki/spaces/SPACEKEY/overview",
        ConfluenceRef(space_key="SPACEKEY"),
    ),
    (
        "/wiki/spaces/SPACEKEY/pages/123456789/Page+Title",
        ConfluenceRef(space_key="SPACEKEY", page_id=123456789, page_title="Page Title"),
    ),
    (
        "/ex/confluence/CLOUDID/wiki/spaces/SPACEKEY/pages/123456789/Page+Title",
        ConfluenceRef(space_key="SPACEKEY", page_id=123456789, page_title="Page Title"),
    ),
    (
        "/ex/confluence/1232132-12312312-21321332/wiki/spaces/SPACEKEY",
        ConfluenceRef(space_key="SPACEKEY"),
    ),
    (
        "/ex/confluence/1232132-12312312-21321332/wiki/spaces/SPACEKEY/pages/123456789",
        ConfluenceRef(space_key="SPACEKEY", page_id=123456789),
    ),
    (
        "https://confluence.company.com/display/SPACEKEY",
        ConfluenceRef(space_key="SPACEKEY"),
    ),
    (
        "https://confluence.company.com/display/SPACEKEY/Page+Title",
        ConfluenceRef(space_key="SPACEKEY", page_title="Page Title"),
    ),
    (
        "https://confluence.company.com/SPACEKEY",
        ConfluenceRef(space_key="SPACEKEY"),
    ),
    (
        "https://confluence.company.com/SPACEKEY/Page+Title",
        ConfluenceRef(space_key="SPACEKEY", page_title="Page Title"),
    ),
    (
        "https://company.atlassian.net/display/SPACEKEY/Page+Title",
        ConfluenceRef(space_key="SPACEKEY", page_title="Page Title"),
    ),
    (
        "https://company.atlassian.net/SPACEKEY/Page+Title",
        ConfluenceRef(space_key="SPACEKEY", page_title="Page Title"),
    ),
    (
        "/display/SPACEKEY",
        ConfluenceRef(space_key="SPACEKEY"),
    ),
    (
        "/display/SPACEKEY/Page+Title",
        ConfluenceRef(space_key="SPACEKEY", page_title="Page Title"),
    ),
    (
        "/SPACEKEY",
        ConfluenceRef(space_key="SPACEKEY"),
    ),
    (
        "/SPACEKEY/Page+Title",
        ConfluenceRef(space_key="SPACEKEY", page_title="Page Title"),
    ),
    (
        "https://wiki.aaa.aaa/spaces/SPACEKEY/pages/123456789/Page+Title",
        ConfluenceRef(space_key="SPACEKEY", page_id=123456789, page_title="Page Title"),
    ),
    (
        "/spaces/SPACEKEY/pages/123456789/Page+Title",
        ConfluenceRef(space_key="SPACEKEY", page_id=123456789, page_title="Page Title"),
    ),
]


class TestParseConfluencePath:
    """Test cases for parse_confluence_path function."""

    @pytest.mark.parametrize(("url", "expected"), _PARSE_CONFLUENCE_PATH_CASES)
    def test_parse_confluence_path(self, url: str, expected: ConfluenceRef | None) -> None:
        path = urllib.parse.urlparse(url).path if "://" in url else url
        result = parse_confluence_path(path)
        if expected is None:
            assert result is None
        else:
            assert result is not None
            assert result.model_dump() == expected.model_dump()


class TestResponseHook:
    """Test cases for response_hook function."""

    def test_successful_response(self, caplog: pytest.LogCaptureFixture) -> None:
        """Test that successful responses don't log warnings."""
        response = MagicMock(spec=requests.Response)
        response.ok = True
        response.status_code = 200

        result = response_hook(response)

        assert result == response
        assert len(caplog.records) == 0

    def test_failed_response(self, caplog: pytest.LogCaptureFixture) -> None:
        """Test that failed responses log warnings."""
        response = MagicMock(spec=requests.Response)
        response.ok = False
        response.status_code = 404
        response.url = "https://test.atlassian.net/api/test"
        response.headers = {"Content-Type": "application/json"}

        result = response_hook(response)

        assert result == response
        assert len(caplog.records) == 1
        log_record = caplog.records[0]
        expected_msg = "Request to https://test.atlassian.net/api/test failed with status 404"
        assert expected_msg in log_record.message
        assert "Response headers: {'Content-Type': 'application/json'}" in log_record.message


class TestApiClientFactory:
    """Test cases for ApiClientFactory class."""

    def test_init(self) -> None:
        """Test ApiClientFactory initialization stores an AtlassianSdkConnectionConfig."""
        config = AtlassianSdkConnectionConfig()
        factory = ApiClientFactory(config)
        assert factory.connection_config == config
        assert isinstance(factory.connection_config, AtlassianSdkConnectionConfig)

    @patch("confluence_markdown_exporter.api_clients.ConfluenceApiSdk")
    def test_create_confluence_success(
        self, mock_confluence_sdk: MagicMock, sample_api_details: ApiDetails
    ) -> None:
        """Test successful Confluence client creation."""
        mock_instance = MagicMock()
        mock_instance.get_all_spaces.return_value = [{"key": "TEST"}]
        mock_confluence_sdk.return_value = mock_instance

        sdk_config = AtlassianSdkConnectionConfig()
        factory = ApiClientFactory(sdk_config)

        result = factory.create_confluence(SAMPLE_CONFLUENCE_URL, sample_api_details)

        assert result == mock_instance
        mock_confluence_sdk.assert_called_once_with(
            url=SAMPLE_CONFLUENCE_URL,
            username=sample_api_details.username.get_secret_value(),
            password=sample_api_details.api_token.get_secret_value(),
            token=sample_api_details.pat.get_secret_value(),
            **sdk_config.model_dump(),
        )
        mock_instance.get_all_spaces.assert_called_once_with(limit=1)

    @patch("confluence_markdown_exporter.api_clients.ConfluenceApiSdk")
    def test_create_confluence_connection_failure(
        self, mock_confluence_sdk: MagicMock, sample_api_details: ApiDetails
    ) -> None:
        """Test Confluence client creation with connection failure."""
        mock_instance = MagicMock()
        mock_instance.get_all_spaces.side_effect = ApiError("Connection failed")
        mock_confluence_sdk.return_value = mock_instance

        factory = ApiClientFactory(AtlassianSdkConnectionConfig())

        with pytest.raises(ConnectionError, match="Confluence connection failed"):
            factory.create_confluence(SAMPLE_CONFLUENCE_URL, sample_api_details)

    @patch("confluence_markdown_exporter.api_clients.JiraApiSdk")
    def test_create_jira_success(
        self, mock_jira_sdk: MagicMock, sample_api_details: ApiDetails
    ) -> None:
        """Test successful Jira client creation."""
        mock_instance = MagicMock()
        mock_instance.get_all_projects.return_value = [{"key": "TEST"}]
        mock_jira_sdk.return_value = mock_instance

        sdk_config = AtlassianSdkConnectionConfig()
        factory = ApiClientFactory(sdk_config)

        result = factory.create_jira(SAMPLE_CONFLUENCE_URL, sample_api_details)

        assert result == mock_instance
        mock_jira_sdk.assert_called_once_with(
            url=SAMPLE_CONFLUENCE_URL,
            username=sample_api_details.username.get_secret_value(),
            password=sample_api_details.api_token.get_secret_value(),
            token=sample_api_details.pat.get_secret_value(),
            **sdk_config.model_dump(),
        )
        mock_instance.get_all_projects.assert_called_once()

    @patch("confluence_markdown_exporter.api_clients.JiraApiSdk")
    def test_create_jira_connection_failure(
        self, mock_jira_sdk: MagicMock, sample_api_details: ApiDetails
    ) -> None:
        """Test Jira client creation with connection failure."""
        mock_instance = MagicMock()
        mock_instance.get_all_projects.side_effect = ApiError("Connection failed")
        mock_jira_sdk.return_value = mock_instance

        factory = ApiClientFactory(AtlassianSdkConnectionConfig())

        with pytest.raises(ConnectionError, match="Jira connection failed"):
            factory.create_jira(SAMPLE_CONFLUENCE_URL, sample_api_details)


class TestGetConfluenceInstance:
    """Test cases for get_confluence_instance function."""

    @patch("confluence_markdown_exporter.api_clients._confluence_clients", {})
    @patch("confluence_markdown_exporter.api_clients.get_settings")
    @patch("confluence_markdown_exporter.api_clients.ApiClientFactory")
    def test_successful_connection(
        self,
        mock_factory_class: MagicMock,
        mock_get_settings: MagicMock,
        sample_config_model: ConfigModel,
    ) -> None:
        """Test successful Confluence instance creation."""
        mock_get_settings.return_value = sample_config_model
        mock_factory = MagicMock()
        mock_confluence = MagicMock()
        mock_factory.create_confluence.return_value = mock_confluence
        mock_factory_class.return_value = mock_factory

        result = get_confluence_instance(SAMPLE_CONFLUENCE_URL)

        assert result == mock_confluence
        mock_factory_class.assert_called_once_with(sample_config_model.connection_config)
        mock_factory.create_confluence.assert_called_once_with(
            SAMPLE_CONFLUENCE_URL,
            sample_config_model.auth.get_instance(SAMPLE_CONFLUENCE_URL),
        )

    @patch("confluence_markdown_exporter.api_clients._confluence_clients", {})
    @patch("confluence_markdown_exporter.api_clients.get_settings")
    @patch("confluence_markdown_exporter.api_clients.ApiClientFactory")
    def test_connection_failure_raises(
        self,
        mock_factory_class: MagicMock,
        mock_get_settings: MagicMock,
        sample_config_model: ConfigModel,
    ) -> None:
        """Test that a Confluence connection failure raises AuthNotConfiguredError."""
        mock_get_settings.return_value = sample_config_model

        mock_factory = MagicMock()
        mock_factory.create_confluence.side_effect = ConnectionError("Connection failed")
        mock_factory_class.return_value = mock_factory

        with pytest.raises(AuthNotConfiguredError) as exc_info:
            get_confluence_instance(SAMPLE_CONFLUENCE_URL)

        assert exc_info.value.url == SAMPLE_CONFLUENCE_URL
        assert exc_info.value.service == "Confluence"
        assert mock_factory.create_confluence.call_count == 1


class TestAuthConfigContextPath:
    """Test auth lookup for instances deployed under a context path (e.g. /confluence)."""

    def _make_config(self, key: str) -> AuthConfig:
        details = ApiDetails(username=SecretStr("user"), api_token=SecretStr("token"))
        return AuthConfig(confluence={key: details})

    @pytest.mark.parametrize(
        ("stored_key", "lookup_url"),
        [
            # Auth stored without context path, URL includes context path
            ("https://host.example.com", "https://host.example.com/confluence"),
            ("https://host.example.com", "https://host.example.com/confluence/spaces/KEY"),
            ("https://host.example.com", "https://host.example.com/confluence/display/KEY/Title"),
            # Auth stored with context path, URL includes context path
            ("https://host.example.com/confluence", "https://host.example.com/confluence"),
            (
                "https://host.example.com/confluence",
                "https://host.example.com/confluence/spaces/KEY/pages/123",
            ),
            # Non-standard port
            ("https://host.example.com:8443", "https://host.example.com:8443/confluence"),
        ],
    )
    def test_get_instance_matches_context_path_url(
        self, stored_key: str, lookup_url: str
    ) -> None:
        config = self._make_config(stored_key)
        assert config.get_instance(lookup_url) is not None

    @pytest.mark.parametrize(
        ("stored_key", "lookup_url"),
        [
            # Different host — must not match
            ("https://other.example.com", "https://host.example.com/confluence"),
            # Different port — must not match
            ("https://host.example.com:8080", "https://host.example.com:9090/confluence"),
            # Gateway URL — must not match by host fallback
            (
                "https://api.atlassian.com/ex/confluence/CLOUD1",
                "https://api.atlassian.com/ex/confluence/CLOUD2/wiki/spaces/KEY",
            ),
        ],
    )
    def test_get_instance_no_false_match(self, stored_key: str, lookup_url: str) -> None:
        config = self._make_config(stored_key)
        assert config.get_instance(lookup_url) is None


================================================
FILE: tests/unit/test_confluence.py
================================================
"""Unit tests for confluence module URL resolution."""

from __future__ import annotations

import types
from pathlib import Path
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from confluence_markdown_exporter.confluence import Attachment
from confluence_markdown_exporter.confluence import Page
from confluence_markdown_exporter.confluence import Space
from confluence_markdown_exporter.confluence import User
from confluence_markdown_exporter.confluence import Version


class MockPage:
    """Minimal page object for Converter tests."""

    def __init__(self) -> None:
        self.id = "test-page"
        self.title = "Test Page"
        self.type = ""
        self.html = ""
        self.body_storage = ""
        self.web_url = ""
        self.tiny_url = ""
        self.labels = []
        self.ancestors = []
        self.space = MagicMock()
        self.space.key = "TEST"
        self.version = MagicMock()
        self.version.number = 1
        self.version.when = ""
        self.version.by = MagicMock()
        self.version.by.display_name = ""
        self.history = MagicMock()
        self.history.created = ""
        self.history.created_by = MagicMock()
        self.history.created_by.display_name = ""

    def get_attachment_by_file_id(self, file_id: str) -> None:
        return None


@pytest.fixture
def converter() -> Page.Converter:
    return Page.Converter(MockPage())


class TestSquareBracketEscaping:
    """Square brackets in plain text must be escaped to avoid markdown link syntax."""

    def test_bracket_notation_escaped(self, converter: Page.Converter) -> None:
        html = "<p>test [R1] test</p>"
        result = converter.convert(html).strip()
        assert result == r"test \[R1\] test"

    def test_bracket_at_start(self, converter: Page.Converter) -> None:
        html = "<p>[R1] test</p>"
        result = converter.convert(html).strip()
        assert result == r"\[R1\] test"

    def test_bracket_at_end(self, converter: Page.Converter) -> None:
        html = "<p>test [R1]</p>"
        result = converter.convert(html).strip()
        assert result == r"test \[R1\]"

    def test_multiple_bracket_groups(self, converter: Page.Converter) -> None:
        html = "<p>[A1] and [B2]</p>"
        result = converter.convert(html).strip()
        assert result == r"\[A1\] and \[B2\]"

    def test_bracket_in_code_not_escaped(self, converter: Page.Converter) -> None:
        html = "<code>[R1]</code>"
        result = converter.convert(html).strip()
        assert result == "`[R1]`"

    def test_real_link_not_affected(self, converter: Page.Converter) -> None:
        html = '<a href="https://example.com">click here</a>'
        result = converter.convert(html).strip()
        assert result == "[click here](https://example.com)"


class TestAnchorLinkConversion:
    """Internal anchor links must use the href value for slug, not link text."""

    def test_anchor_uses_href_not_link_text(self, converter: Page.Converter) -> None:
        """Anchor slug derived from href, not display text."""
        html = '<a href="#1.-Request-Service">request service</a>'
        result = converter.convert(html).strip()
        assert result == "[request service](#1-request-service)"

    def test_anchor_plain_heading(self, converter: Page.Converter) -> None:
        """Simple heading anchor round-trips correctly."""
        html = '<a href="#My-Heading">My Heading</a>'
        result = converter.convert(html).strip()
        assert result == "[My Heading](#my-heading)"

    def test_anchor_with_numbers_and_punctuation(self, converter: Page.Converter) -> None:
        """Numbered heading anchors match GitHub markdown anchor format."""
        html = '<a href="#2.-Setup-Steps">setup steps</a>'
        result = converter.convert(html).strip()
        assert result == "[setup steps](#2-setup-steps)"

    def test_wiki_anchor_uses_link_text(self, converter: Page.Converter) -> None:
        """Wiki links use link text for slug, not href."""
        from unittest.mock import patch

        with patch("confluence_markdown_exporter.confluence.settings") as mock_settings:
            mock_settings.export.page_href = "wiki"
            html = '<a href="#1.-Request-Service">Request Service</a>'
            result = converter.convert(html).strip()
        assert result == "[[#Request Service]]"


def _make_attachment(
    att_id: str,
    file_id: str,
    title: str = "file.png",
    media_type: str = "image/png",
) -> Attachment:
    space = Space(base_url="https://example.com", key="TS", name="Test", description="", homepage=0)
    version = Version(
        number=1,
        by=User(account_id="u1", display_name="User", username="user", public_name="", email=""),
        when="2024-01-01T00:00:00Z",
        friendly_when="Jan 1",
    )
    return Attachment(
        base_url="https://example.com",
        title=title,
        space=space,
        ancestors=[],
        version=version,
        id=att_id,
        file_size=100,
        media_type=media_type,
        media_type_description="",
        file_id=file_id,
        collection_name="",
        download_link="/download",
        comment="",
    )


def _make_page(
    body: str,
    body_export: str,
    attachments: list[Attachment],
    body_storage: str = "",
) -> Page:
    space = Space(base_url="https://example.com", key="TS", name="Test", description="", homepage=0)
    version = Version(
        number=1,
        by=User(account_id="u1", display_name="User", username="user", public_name="", email=""),
        when="2024-01-01T00:00:00Z",
        friendly_when="Jan 1",
    )
    return Page(
        base_url="https://example.com",
        id=1,
        title="Test Page",
        space=space,
        ancestors=[],
        version=version,
        body=body,
        body_export=body_export,
        editor2="",
        body_storage=body_storage,
        labels=[],
        attachments=attachments,
    )


class TestAttachmentsForExport:
    """_attachments_for_export selects the right attachments."""

    def test_file_id_in_body_included(self) -> None:
        att = _make_attachment("111", "abc-guid-111")
        page = _make_page(
            body='<img data-media-id="abc-guid-111" src="...">',
            body_export="",
            attachments=[att],
        )
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.attachments_export = "referenced"
            result = page._attachments_for_export()
        assert att in result

    def test_attachment_id_in_body_included(self) -> None:
        """SVG/MP4 referenced via data-linked-resource-id must be exported."""
        att = _make_attachment(
            "99999", "xyz-guid-99", title="image.svg", media_type="image/svg+xml"
        )
        page = _make_page(
            body='<img data-linked-resource-id="99999" src="...">',
            body_export="",
            attachments=[att],
        )
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.attachments_export = "referenced"
            result = page._attachments_for_export()
        assert att in result

    def test_attachment_id_in_body_export_included(self) -> None:
        """Attachment referenced only in body_export (e.g. MP4) must be exported."""
        att = _make_attachment("88888", "xyz-guid-88", title="video.mp4", media_type="video/mp4")
        page = _make_page(
            body="",
            body_export='<a data-linked-resource-id="88888" href="...">video.mp4</a>',
            attachments=[att],
        )
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.attachments_export = "referenced"
            result = page._attachments_for_export()
        assert att in result

    def test_title_in_body_src_url_included(self) -> None:
        """SVG referenced only by filename in src URL (no data attributes) must be exported."""
        att = _make_attachment(
            "66666", "xyz-guid-66", title="MEP-Symbol_CH-REP.svg", media_type="image/svg+xml"
        )
        page = _make_page(
            body='<img src="/download/attachments/12345/MEP-Symbol_CH-REP.svg?version=1">',
            body_export="",
            attachments=[att],
        )
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.attachments_export = "referenced"
            result = page._attachments_for_export()
        assert att in result

    def test_title_with_spaces_url_encoded_in_body_export_included(self) -> None:
        att = _make_attachment("55555", "xyz-guid-55", title="my video.mp4", media_type="video/mp4")
        page = _make_page(
            body="",
            body_export='<img src="/download/attachments/12345/my%20video.mp4">',
            attachments=[att],
        )
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.attachments_export = "referenced"
            result = page._attachments_for_export()
        assert att in result

    def test_unreferenced_attachment_excluded(self) -> None:
        att = _make_attachment("77777", "xyz-guid-77", title="unused.png")
        page = _make_page(body="no references here", body_export="", attachments=[att])
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.attachments_export = "referenced"
            result = page._attachments_for_export()
        assert att not in result

    def test_attachments_export_all_returns_all(self) -> None:
        att1 = _make_attachment("111", "aaa")
        att2 = _make_attachment("222", "bbb", title="other.svg", media_type="image/svg+xml")
        page = _make_page(body="", body_export="", attachments=[att1, att2])
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.attachments_export = "all"
            result = page._attachments_for_export()
        assert result == [att1, att2]


class TestAttachmentsExportFlag:
    """Tests for the export.attachments_export setting."""

    def _make_attachment_mock(self, att_id: str = "att-1", version: int = 3) -> MagicMock:
        att = MagicMock()
        att.id = att_id
        att.version.number = version
        att.export_path = Path(f"attachments/{att_id}.bin")
        return att

    def _make_page_mock(self, attachments: list) -> MagicMock:
        page = MagicMock()
        page.id = 42
        page._attachments_for_export.return_value = attachments
        return page

    def test_referenced_default_exports_attachments(self, tmp_path: Path) -> None:
        """With attachments_export='referenced' (default), attachments are downloaded."""
        att = self._make_attachment_mock()
        page = self._make_page_mock([att])

        with (
            patch("confluence_markdown_exporter.confluence.settings") as mock_settings,
            patch(
                "confluence_markdown_exporter.confluence.LockfileManager"
            ) as mock_lockfile,
            patch("confluence_markdown_exporter.confluence.get_stats"),
        ):
            mock_settings.export.attachments_export = "referenced"
            mock_settings.export.output_path = tmp_path
            mock_lockfile.get_page_attachment_entries.return_value = {}

            result = Page.export_attachments(page)

        att.export.assert_called_once()
        assert "att-1" in result

    def test_disabled_skips_download_and_lockfile(self) -> None:
        """With attachments_export='disabled', no download and no lockfile lookup."""
        att = self._make_attachment_mock()
        page = self._make_page_mock([att])

        with (
            patch("confluence_markdown_exporter.confluence.settings") as mock_settings,
            patch(
                "confluence_markdown_exporter.confluence.LockfileManager"
            ) as mock_lockfile,
            patch("confluence_markdown_exporter.confluence.get_stats"),
        ):
            mock_settings.export.attachments_export = "disabled"

            result = Page.export_attachments(page)

        assert result == {}
        att.export.assert_not_called()
        mock_lockfile.get_page_attachment_entries.assert_not_called()

    def test_metadata_still_populated_when_disabled(self) -> None:
        """Page.from_json populates Page.attachments even when downloads are disabled.

        Guards against future scope creep that would gate metadata loading on
        the same flag — body image and file links must keep resolving.
        """
        base_url = "https://example.atlassian.net"
        fake_space = Space(
            base_url=base_url, key="K", name="Space", description="", homepage=None
        )
        fake_user = User(
            account_id="", username="", display_name="", public_name="", email=""
        )
        fake_version = Version(number=1, by=fake_user, when="", friendly_when="")
        fake_attachment = Attachment(
            base_url=base_url,
            id="att-1",
            title="file.png",
            space=fake_space,
            ancestors=[],
            version=fake_version,
            file_size=10,
            media_type="image/png",
            media_type_description="",
            file_id="file-id-1",
            collection_name="",
            download_link="",
            comment="",
        )
        page_data = {
            "id": 42,
            "title": "Test",
            "_expandable": {"space": "/rest/api/space/K"},
            "body": {
                "view": {"value": ""},
                "export_view": {"value": ""},
                "editor2": {"value": ""},
            },
            "metadata": {"labels": {"results": []}},
            "ancestors": [],
            "version": {},
        }

        with (
            patch(
                "confluence_markdown_exporter.confluence.Attachment.from_page_id",
                return_value=[fake_attachment],
            ),
            patch(
                "confluence_markdown_exporter.confluence.Space.from_key",
                return_value=fake_space,
            ),
            patch("confluence_markdown_exporter.confluence.settings") as mock_settings,
        ):
            mock_settings.export.attachments_export = "disabled"

            page = Page.from_json(page_data, base_url)

        assert len(page.attachments) == 1
        assert page.attachments[0].id == "att-1"


class TestTransformErrorImg:
    """transform-error SVG images must resolve via data-encoded-xml."""

    def test_transform_error_resolves_attachment_by_encoded_xml(self) -> None:
        from pathlib import Path
        from urllib.parse import quote

        class MockAttachment:
            title = "MEP-Symbol_CH-REP.svg"
            export_path = Path("TEST/attachments/guid123.svg")

        class MockPageWithSvg:
            def __init__(self) -> None:
                self.id = "test-page"
                self.title = "Test Page"
                self.html = ""
                self.body_storage = ""
                self.labels: list = []
                self.ancestors: list = []
                self.export_path = Path("TEST/Instructions for Use.md")

            def get_attachment_by_file_id(self, _fid: str) -> None:
                return None

            def get_attachment_by_id(self, _aid: str) -> None:
                return None

            def get_attachments_by_title(self, title: str) -> list:
                if title == "MEP-Symbol_CH-REP.svg":
                    return [MockAttachment()]
                return []

        encoded = quote('<ac:image><ri:attachment ri:filename="MEP-Symbol_CH-REP.svg"/></ac:image>')
        html = (
            f'<img class="transform-error" data-encoded-xml="{encoded}" '
            f'src="https://example.com/placeholder/error" title="">'
        )

        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.attachment_href = "relative"
            s.export.page_href = "relative"
            conv = Page.Converter(MockPageWithSvg())  # type: ignore[arg-type]
            result = conv.convert(html).strip()

        assert "placeholder/error" not in result
        assert "MEP-Symbol_CH-REP.svg" in result or "guid123.svg" in result


class TestParseImageCaptions:
    """_parse_image_captions extracts captions from Confluence storage XML."""

    def test_cdata_caption_extracted(self) -> None:
        from confluence_markdown_exporter.confluence import _parse_image_captions

        storage = (
            '<ac:image ac:align="center">'
            '<ri:attachment ri:filename="testbild.jpeg"/>'
            "<ac:caption>"
            "<ac:plain-text-body><![CDATA[My Caption]]></ac:plain-text-body>"
            "</ac:caption>"
            "</ac:image>"
        )
        assert _parse_image_captions(storage) == {"testbild.jpeg": "My Caption"}

    def test_plain_text_caption_extracted(self) -> None:
        from confluence_markdown_exporter.confluence import _parse_image_captions

        storage = (
            "<ac:image>"
            '<ri:attachment ri:filename="photo.png"/>'
            "<ac:caption>"
            "<ac:plain-text-body>Plain Caption</ac:plain-text-body>"
            "</ac:caption>"
            "</ac:image>"
        )
        assert _parse_image_captions(storage) == {"photo.png": "Plain Caption"}

    def test_paragraph_caption_extracted(self) -> None:
        from confluence_markdown_exporter.confluence import _parse_image_captions

        storage = (
            '<ac:image ac:align="center">'
            '<ri:attachment ri:filename="screenshot.png" ri:version-at-save="1"/>'
            "<ac:caption><p>Dialog in VS Code to create a new branch</p></ac:caption>"
            "</ac:image>"
        )
        result = _parse_image_captions(storage)
        assert result == {"screenshot.png": "Dialog in VS Code to create a new branch"}

    def test_caption_with_attributes_extracted(self) -> None:
        from confluence_markdown_exporter.confluence import _parse_image_captions

        storage = (
            '<ac:image ac:align="center" ac:width="544">'
            '<ri:attachment ri:filename="TissueMap.png" ri:version-at-save="1"/>'
            '<ac:caption ac:local-id="6a5ac213-73a0">'
            "<p>Exemplary Tissue Map</p>"
            "</ac:caption>"
            "</ac:image>"
        )
        result = _parse_image_captions(storage)
        assert result == {"TissueMap.png": "Exemplary Tissue Map"}

    def test_image_without_caption_excluded(self) -> None:
        from confluence_markdown_exporter.confluence import _parse_image_captions

        storage = (
            "<ac:image>"
            '<ri:attachment ri:filename="no-caption.png"/>'
            "</ac:image>"
        )
        assert _parse_image_captions(storage) == {}

    def test_multiple_images_mixed(self) -> None:
        from confluence_markdown_exporter.confluence import _parse_image_captions

        storage = (
            "<ac:image>"
            '<ri:attachment ri:filename="a.png"/>'
            "<ac:caption><ac:plain-text-body>"
            "<![CDATA[Caption A]]></ac:plain-text-body></ac:caption>"
            "</ac:image>"
            "<ac:image>"
            '<ri:attachment ri:filename="b.png"/>'
            "</ac:image>"
            "<ac:image>"
            '<ri:attachment ri:filename="c.jpg"/>'
            "<ac:caption><ac:plain-text-body>"
            "<![CDATA[Caption C]]></ac:plain-text-body></ac:caption>"
            "</ac:image>"
        )
        result = _parse_image_captions(storage)
        assert result == {"a.png": "Caption A", "c.jpg": "Caption C"}

    def test_empty_storage_returns_empty(self) -> None:
        from confluence_markdown_exporter.confluence import _parse_image_captions

        assert _parse_image_captions("") == {}


class TestImageCaptionsInConvertImg:
    """convert_img renders captions as italics below the image when image_captions is enabled."""

    def test_caption_rendered_as_italic_below_image(self) -> None:
        att = _make_attachment("111", "abc-guid-111", title="testbild.jpeg")
        storage = (
            "<ac:image>"
            '<ri:attachment ri:filename="testbild.jpeg"/>'
            "<ac:caption>"
            "<ac:plain-text-body><![CDATA[My Caption]]></ac:plain-text-body>"
            "</ac:caption>"
            "</ac:image>"
        )
        page = _make_page(
            body='<img data-media-id="abc-guid-111" src="/download/testbild.jpeg" alt="">',
            body_export="",
            attachments=[att],
            body_storage=storage,
        )
        _att_path = "{space_name}/attachments/{attachment_file_id}{attachment_extension}"
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.attachment_href = "relative"
            s.export.attachment_path = _att_path
            s.export.page_href = "relative"
            s.export.page_path = "{space_name}/{page_title}.md"
            s.export.image_captions = True
            s.export.include_document_title = False
            s.export.page_breadcrumbs = False
            conv = Page.Converter(page)
            result = conv.convert(page.body).strip()
        assert "![](" in result  # image with empty alt
        assert "*My Caption*" in result
        lines = result.splitlines()
        img_line = next(i for i, line in enumerate(lines) if "![](" in line)
        assert lines[img_line + 1] == "*My Caption*"

    def test_caption_disabled_preserves_original_alt(self) -> None:
        att = _make_attachment("111", "abc-guid-111", title="testbild.jpeg")
        storage = (
            "<ac:image>"
            '<ri:attachment ri:filename="testbild.jpeg"/>'
            "<ac:caption>"
            "<ac:plain-text-body><![CDATA[My Caption]]></ac:plain-text-body>"
            "</ac:caption>"
            "</ac:image>"
        )
        page = _make_page(
            body='<img data-media-id="abc-guid-111" src="/download/testbild.jpeg" alt="">',
            body_export="",
            attachments=[att],
            body_storage=storage,
        )
        _att_path = "{space_name}/attachments/{attachment_file_id}{attachment_extension}"
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.attachment_href = "relative"
            s.export.attachment_path = _att_path
            s.export.page_href = "relative"
            s.export.page_path = "{space_name}/{page_title}.md"
            s.export.image_captions = False
            s.export.include_document_title = False
            s.export.page_breadcrumbs = False
            conv = Page.Converter(page)
            result = conv.convert(page.body).strip()
        assert "My Caption" not in result


class TestPageFromUrl:
    """Test cases for Page.from_url."""

    def test_from_url_prefers_page_id_query_parameter_for_legacy_server_url(self) -> None:
        """Legacy Server/DC viewpage.action links should resolve by pageId."""
        page_url = (
            "https://wiki.example.com/pages/viewpage.action"
            "?pageId=317425825&src=contextnavpagetreemode"
        )

        with (
            patch("confluence_markdown_exporter.confluence.get_confluence_instance"),
            patch("confluence_markdown_exporter.confluence.Page.from_id") as mock_from_id,
            patch("confluence_markdown_exporter.confluence.get_thread_confluence") as mock_client,
        ):
            mock_from_id.return_value = "page"

            result = Page.from_url(page_url)

        assert result == "page"
        mock_from_id.assert_called_once_with(317425825, "https://wiki.example.com")
        mock_client.assert_not_called()


class TestSpanHighlightConversion:
    """Background-color spans must become <mark> elements when enabled."""

    def test_background_color_rgb_converted_to_mark(self, converter: Page.Converter) -> None:
        html = '<p><span style="background-color: rgb(248,230,160);">hello</span></p>'
        result = converter.convert(html).strip()
        assert '<mark style="background: #f8e6a0;">hello</mark>' in result

    def test_multiple_channels_converted_correctly(self, converter: Page.Converter) -> None:
        html = '<p><span style="background-color: rgb(198,237,251);">text</span></p>'
        result = converter.convert(html).strip()
        assert '<mark style="background: #c6edfb;">text</mark>' in result

    def test_highlight_disabled_returns_plain_text(self, converter: Page.Converter) -> None:
        html = '<p><span style="background-color: rgb(248,230,160);">hello</span></p>'
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.convert_text_highlights = False
            s.export.convert_font_colors = True
            result = converter.convert(html).strip()
        assert "<mark" not in result
        assert "hello" in result


class TestCellHighlightConversion:
    """Confluence `data-highlight-colour` on <td>/<th> must become <mark> wrappers."""

    def test_td_hex_attribute_wraps_in_mark(self, converter: Page.Converter) -> None:
        html = (
            '<table><tbody><tr>'
            '<td data-highlight-colour="#fff0b3"><p>2</p></td>'
            '</tr></tbody></table>'
        )
        result = converter.convert(html)
        assert '<mark style="background: #fff0b3;">2</mark>' in result

    def test_th_hex_attribute_wraps_in_mark(self, converter: Page.Converter) -> None:
        html = (
            '<table><tbody><tr>'
            '<th data-highlight-colour="#ffd5d2"><p><strong>P / S</strong></p></th>'
            '</tr></tbody></table>'
        )
        result = converter.convert(html)
        assert '<mark style="background: #ffd5d2;">**P / S**</mark>' in result

    def test_default_header_gray_not_wrapped(self, converter: Page.Converter) -> None:
        """Confluence's default <th> background (#f4f5f7) is not user-chosen — skip."""
        html = (
            '<table><tbody><tr>'
            '<th data-highlight-colour="#f4f5f7"><p><strong>P / S</strong></p></th>'
            '<td data-highlight-colour="#f4f5f7"><p><strong>P5</strong></p></td>'
            '</tr></tbody></table>'
        )
        result = converter.convert(html)
        assert "<mark" not in result

    def test_transparent_attribute_not_wrapped(self, converter: Page.Converter) -> None:
        html = (
            '<table><tbody><tr>'
            '<td data-highlight-colour="transparent"><p>plain</p></td>'
            '</tr></tbody></table>'
        )
        result = converter.convert(html)
        assert "<mark" not in result
        assert "plain" in result

    def test_missing_attribute_not_wrapped(self, converter: Page.Converter) -> None:
        html = (
            '<table><tbody><tr><td><p>plain</p></td></tr></tbody></table>'
        )
        result = converter.convert(html)
        assert "<mark" not in result
        assert "plain" in result

    def test_invalid_hex_not_wrapped(self, converter: Page.Converter) -> None:
        html = (
            '<table><tbody><tr>'
            '<td data-highlight-colour="not-a-color"><p>x</p></td>'
            '</tr></tbody></table>'
        )
        result = converter.convert(html)
        assert "<mark" not in result

    def test_empty_cell_with_highlight_renders_nbsp(self, converter: Page.Converter) -> None:
        html = (
            '<table><tbody><tr>'
            '<td data-highlight-colour="#ff8f73"></td>'
            '</tr></tbody></table>'
        )
        result = converter.convert(html)
        assert '<mark style="background: #ff8f73;">&nbsp;</mark>' in result

    def test_setting_disabled_returns_plain_text(self, converter: Page.Converter) -> None:
        html = (
            '<table><tbody><tr>'
            '<td data-highlight-colour="#fff0b3"><p>2</p></td>'
            '</tr></tbody></table>'
        )
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.convert_text_highlights = False
            s.export.convert_font_colors = True
            s.export.convert_status_badges = True
            result = converter.convert(html)
        assert "<mark" not in result
        assert "2" in result


class TestSpanFontColorConversion:
    """Color spans must become <font> elements when enabled."""

    def test_inline_color_rgb_converted_to_font(self, converter: Page.Converter) -> None:
        html = '<p><span style="color: rgb(7,71,166);">blue text</span></p>'
        result = converter.convert(html).strip()
        assert '<font style="color: #0747a6;">blue text</font>' in result

    def test_background_color_not_matched_as_font_color(self, converter: Page.Converter) -> None:
        html = '<p><span style="background-color: rgb(248,230,160);">hi</span></p>'
        result = converter.convert(html).strip()
        assert "<font" not in result
        assert '<mark style="background: #f8e6a0;">hi</mark>' in result

    def test_data_colorid_resolved_from_style_tag(self) -> None:
        page = MockPage()
        page.html = (
            '<style>[data-colorid=abc123]{color:#ff5630} '
            'html[data-color-mode=dark] [data-colorid=abc123]{color:#cf2600}</style>'
        )
        conv = Page.Converter(page)  # type: ignore[arg-type]
        html = '<p><span data-colorid="abc123">colored</span></p>'
        result = conv.convert(html).strip()
        assert '<font style="color: #ff5630;">colored</font>' in result

    def test_data_colorid_unknown_falls_through(self, converter: Page.Converter) -> None:
        html = '<p><span data-colorid="unknown999">text</span></p>'
        result = converter.convert(html).strip()
        assert "<font" not in result
        assert "text" in result

    def test_font_color_disabled_returns_plain_text(self, converter: Page.Converter) -> None:
        html = '<p><span style="color: rgb(255,86,48);">red text</span></p>'
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.convert_text_highlights = True
            s.export.convert_font_colors = False
            result = converter.convert(html).strip()
        assert "<font" not in result
        assert "red text" in result


class TestStatusBadgeConversion:
    """Confluence status-macro lozenge spans must become <mark> elements when enabled."""

    def _badge(self, extra_class: str, label: str) -> str:
        classes = f"status-macro aui-lozenge aui-lozenge-visual-refresh {extra_class}".strip()
        return (
            f'<p><span class="{classes}" data-macro-name="status">{label}</span></p>'
        )

    def test_gray_badge(self, converter: Page.Converter) -> None:
        html = self._badge("", "IN PROGRESS")
        result = converter.convert(html).strip()
        assert '<mark style="background: #dfe1e6;">IN PROGRESS</mark>' in result

    def test_blue_badge(self, converter: Page.Converter) -> None:
        html = self._badge("aui-lozenge-complete", "DONE")
        result = converter.convert(html).strip()
        assert '<mark style="background: #cce0ff;">DONE</mark>' in result

    def test_green_badge(self, converter: Page.Converter) -> None:
        html = self._badge("aui-lozenge-success", "SUCCESS")
        result = converter.convert(html).strip()
        assert '<mark style="background: #baf3db;">SUCCESS</mark>' in result

    def test_yellow_badge(self, converter: Page.Converter) -> None:
        html = self._badge("aui-lozenge-current", "ORANGE")
        result = converter.convert(html).strip()
        assert '<mark style="background: #f8e6a0;">ORANGE</mark>' in result

    def test_red_badge(self, converter: Page.Converter) -> None:
        html = self._badge("aui-lozenge-error", "BLOCKED")
        result = converter.convert(html).strip()
        assert '<mark style="background: #ffd5d2;">BLOCKED</mark>' in result

    def test_purple_badge(self, converter: Page.Converter) -> None:
        html = self._badge("aui-lozenge-progress", "VIOLET")
        result = converter.convert(html).strip()
        assert '<mark style="background: #dfd8fd;">VIOLET</mark>' in result

    def test_badge_disabled_returns_plain_text(self, converter: Page.Converter) -> None:
        html = self._badge("aui-lozenge-error", "BLOCKED")
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.convert_status_badges = False
            s.export.convert_font_colors = True
            s.export.convert_text_highlights = True
            result = converter.convert(html).strip()
        assert "<mark" not in result
        assert "BLOCKED" in result


_DETAILS_HTML = """
<div data-macro-name="details">
    <table>
        <tr><th>Author</th><td>John Doe</td></tr>
        <tr><th>Status</th><td>Active</td></tr>
    </table>
</div>
"""


class TestPagePropertiesFormat:
    """Page Properties macro renders according to page_properties_format setting."""

    def _converter(self) -> Page.Converter:
        return Page.Converter(MockPage())

    def test_frontmatter_removes_table(self) -> None:
        converter = self._converter()
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.page_properties_format = "frontmatter"
            result = converter.convert(_DETAILS_HTML)
        assert "Author" not in result
        assert "author" in converter.page_properties
        assert converter.page_properties["author"] == "John Doe"

    def test_table_keeps_table_no_properties(self) -> None:
        converter = self._converter()
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.page_properties_format = "table"
            result = converter.convert(_DETAILS_HTML)
        assert "Author" in result
        assert converter.page_properties == {}

    def test_frontmatter_and_table_keeps_both(self) -> None:
        converter = self._converter()
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.page_properties_format = "frontmatter_and_table"
            result = converter.convert(_DETAILS_HTML)
        assert "Author" in result
        assert "author" in converter.page_properties
        assert converter.page_properties["author"] == "John Doe"

    def test_dataview_inline_field(self) -> None:
        converter = self._converter()
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.page_properties_format = "dataview-inline-field"
            result = converter.convert(_DETAILS_HTML)
        assert "Author:: John Doe" in result
        assert "Status:: Active" in result
        assert "|" not in result
        assert converter.page_properties == {}

    def test_meta_bind_view_fields(self) -> None:
        converter = self._converter()
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.page_properties_format = "meta-bind-view-fields"
            result = converter.convert(_DETAILS_HTML)
        assert "| **Author** | `VIEW[{author}][text(renderMarkdown)]` |" in result
        assert "| **Status** | `VIEW[{status}][text(renderMarkdown)]` |" in result
        assert "author" in converter.page_properties
        assert "status" in converter.page_properties

    def test_duplicate_keys_get_numeric_suffix(self) -> None:
        html = """
        <div data-macro-name="details">
            <table>
                <tr><th>Status</th><td>Draft</td></tr>
                <tr><th>Status</th><td>Review</td></tr>
                <tr><th>Status</th><td>Final</td></tr>
            </table>
        </div>
        """
        converter = self._converter()
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.page_properties_format = "frontmatter"
            converter.convert(html)
        assert converter.page_properties["status"] == "Draft"
        assert converter.page_properties["status_2"] == "Review"
        assert converter.page_properties["status_3"] == "Final"

    def test_duplicate_keys_in_inline_fields(self) -> None:
        html = """
        <div data-macro-name="details">
            <table>
                <tr><th>Tag</th><td>foo</td></tr>
                <tr><th>Tag</th><td>bar</td></tr>
            </table>
        </div>
        """
        converter = self._converter()
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.page_properties_format = "dataview-inline-field"
            result = converter.convert(html)
        assert "Tag:: foo" in result
        assert "Tag 2:: bar" in result


class TestPagePropertiesMigration:
    """Legacy page_properties_as_front_matter bool migrates to page_properties_format."""

    def test_old_true_maps_to_frontmatter(self) -> None:
        from confluence_markdown_exporter.utils.app_data_store import ExportConfig

        config = ExportConfig.model_validate({"page_properties_as_front_matter": True})
        assert config.page_properties_format == "frontmatter"

    def test_old_false_maps_to_table(self) -> None:
        from confluence_markdown_exporter.utils.app_data_store import ExportConfig

        config = ExportConfig.model_validate({"page_properties_as_front_matter": False})
        assert config.page_properties_format == "table"

    def test_new_field_takes_precedence_over_old(self) -> None:
        from confluence_markdown_exporter.utils.app_data_store import ExportConfig

        config = ExportConfig.model_validate(
            {"page_properties_as_front_matter": True, "page_properties_format": "table"}
        )
        assert config.page_properties_format == "table"

    def test_default_is_frontmatter_and_table(self) -> None:
        from confluence_markdown_exporter.utils.app_data_store import ExportConfig

        config = ExportConfig()
        assert config.page_properties_format == "frontmatter_and_table"


class TestConfluenceUrlInFrontmatter:
    """Confluence page URLs render to YAML front matter according to the setting."""

    _WEBUI = "https://example.atlassian.net/wiki/spaces/TEST/pages/123/Test+Page"
    _TINYUI = "https://example.atlassian.net/wiki/x/AbCdEf"

    def _converter(self, *, with_urls: bool = True) -> Page.Converter:
        page = MockPage()
        if with_urls:
            page.web_url = self._WEBUI
            page.tiny_url = self._TINYUI
        return Page.Converter(page)

    def test_get_web_url_combines_base_and_webui(self) -> None:
        from confluence_markdown_exporter.confluence import _get_web_url

        data = {
            "_links": {
                "base": "https://example.atlassian.net/wiki",
                "webui": "/spaces/TEST/pages/123/Test+Page",
            }
        }
        assert _get_web_url(data) == self._WEBUI

    def test_get_tiny_url_combines_base_and_tinyui(self) -> None:
        from confluence_markdown_exporter.confluence import _get_tiny_url

        data = {
            "_links": {
                "base": "https://example.atlassian.net/wiki",
                "tinyui": "/x/AbCdEf",
            }
        }
        assert _get_tiny_url(data) == self._TINYUI

    def test_helpers_strip_redundant_separators(self) -> None:
        from confluence_markdown_exporter.confluence import _get_web_url

        data = {
            "_links": {
                "base": "https://example.atlassian.net/wiki/",
                "webui": "/spaces/TEST/pages/123/Test+Page",
            }
        }
        assert _get_web_url(data) == self._WEBUI

    def test_helpers_return_empty_when_links_missing(self) -> None:
        from confluence_markdown_exporter.confluence import _get_tiny_url
        from confluence_markdown_exporter.confluence import _get_web_url

        assert _get_web_url({}) == ""
        assert _get_tiny_url({}) == ""

    def test_helpers_return_empty_when_links_not_dict(self) -> None:
        from confluence_markdown_exporter.confluence import _get_tiny_url
        from confluence_markdown_exporter.confluence import _get_web_url

        assert _get_web_url({"_links": "broken"}) == ""
        assert _get_tiny_url({"_links": None}) == ""

    def test_helpers_return_empty_when_base_or_rel_missing(self) -> None:
        from confluence_markdown_exporter.confluence import _get_web_url

        assert _get_web_url({"_links": {"base": "https://example.com"}}) == ""
        assert _get_web_url({"_links": {"webui": "/spaces/TEST"}}) == ""

    def test_frontmatter_contains_webui_url_when_mode_webui(self) -> None:
        converter = self._converter()
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.confluence_url_in_frontmatter = "webui"
            result = converter.front_matter
        assert f"confluence_webui_url: {self._WEBUI}" in result
        assert "confluence_tinyui_url" not in result

    def test_frontmatter_contains_tinyui_url_when_mode_tinyui(self) -> None:
        converter = self._converter()
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.confluence_url_in_frontmatter = "tinyui"
            result = converter.front_matter
        assert f"confluence_tinyui_url: {self._TINYUI}" in result
        assert "confluence_webui_url" not in result

    def test_frontmatter_contains_both_when_mode_both(self) -> None:
        converter = self._converter()
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.confluence_url_in_frontmatter = "both"
            result = converter.front_matter
        assert f"confluence_webui_url: {self._WEBUI}" in result
        assert f"confluence_tinyui_url: {self._TINYUI}" in result

    def test_frontmatter_omits_urls_when_mode_none(self) -> None:
        converter = self._converter()
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.confluence_url_in_frontmatter = "none"
            result = converter.front_matter
        assert "confluence_webui_url" not in result
        assert "confluence_tinyui_url" not in result

    def test_frontmatter_skips_when_url_value_is_empty(self) -> None:
        converter = self._converter(with_urls=False)
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.confluence_url_in_frontmatter = "both"
            result = converter.front_matter
        assert "confluence_webui_url" not in result
        assert "confluence_tinyui_url" not in result

    def test_macro_value_takes_precedence_over_extracted_url(self) -> None:
        converter = self._converter()
        converter.page_properties["confluence_webui_url"] = "manual-override"
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.confluence_url_in_frontmatter = "webui"
            result = converter.front_matter
        assert "confluence_webui_url: manual-override" in result
        assert self._WEBUI not in result


class TestPageMetadataInFrontmatter:
    """Page metadata fields render to YAML front matter according to the setting."""

    def _make_page(
        self,
        *,
        display_name: str = "Alex Johnson",
        page_type: str = "page",
        created: str = "2024-08-15T08:34:12.000+02:00",
        created_by: str = "Sam Creator",
    ) -> MockPage:
        page = MockPage()
        page.id = 123
        page.type = page_type
        space = MagicMock()
        space.key = "TEAM"
        page.space = space
        version = MagicMock()
        version.when = "2026-04-12T10:34:00.000+02:00"
        version.number = 7
        version.by = MagicMock()
        version.by.display_name = display_name
        page.version = version
        history = MagicMock()
        history.created = created
        history.created_by = MagicMock()
        history.created_by.display_name = created_by
        page.history = history
        return page

    def _converter(self, **kwargs: object) -> Page.Converter:
        return Page.Converter(self._make_page(**kwargs))

    def test_default_disabled_writes_no_metadata(self) -> None:
        converter = self._converter()
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.confluence_url_in_frontmatter = "none"
            s.export.page_metadata_in_frontmatter = False
            result = converter.front_matter
        assert "confluence_page_id" not in result
        assert "confluence_space_key" not in result
        assert "confluence_type" not in result
        assert "confluence_created" not in result
        assert "confluence_created_by" not in result
        assert "confluence_last_modified" not in result
        assert "confluence_last_modified_by" not in result
        assert "confluence_version" not in result

    def test_enabled_writes_all_eight_keys(self) -> None:
        converter = self._converter()
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.confluence_url_in_frontmatter = "none"
            s.export.page_metadata_in_frontmatter = True
            result = converter.front_matter
        assert "confluence_page_id: '123'" in result
        assert "confluence_space_key: TEAM" in result
        assert "confluence_type: page" in result
        assert "confluence_created: '2024-08-15T08:34:12.000+02:00'" in result
        assert "confluence_created_by: Sam Creator" in result
        assert "confluence_last_modified: '2026-04-12T10:34:00.000+02:00'" in result
        assert "confluence_last_modified_by: Alex Johnson" in result
        assert "confluence_version: 7" in result
        assert "confluence_version: '7'" not in result

    def test_blogpost_type_renders(self) -> None:
        converter = self._converter(page_type="blogpost")
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.confluence_url_in_frontmatter = "none"
            s.export.page_metadata_in_frontmatter = True
            result = converter.front_matter
        assert "confluence_type: blogpost" in result

    def test_macro_precedence_for_page_id(self) -> None:
        converter = self._converter()
        converter.page_properties["confluence_page_id"] = "macro-override"
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.confluence_url_in_frontmatter = "none"
            s.export.page_metadata_in_frontmatter = True
            result = converter.front_matter
        assert "confluence_page_id: macro-override" in result
        assert "confluence_page_id: '123'" not in result

    @pytest.mark.parametrize(
        ("key", "macro_value", "api_substring"),
        [
            ("confluence_type", "macro-type", "confluence_type: page"),
            ("confluence_created", "macro-created", "2024-08-15T08:34:12.000+02:00"),
            ("confluence_created_by", "macro-author", "Sam Creator"),
        ],
    )
    def test_macro_precedence_for_history_fields(
        self, key: str, macro_value: str, api_substring: str
    ) -> None:
        converter = self._converter()
        converter.page_properties[key] = macro_value
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.confluence_url_in_frontmatter = "none"
            s.export.page_metadata_in_frontmatter = True
            result = converter.front_matter
        assert f"{key}: {macro_value}" in result
        assert api_substring not in result

    def test_empty_display_name_skipped(self) -> None:
        converter = self._converter(display_name="")
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.confluence_url_in_frontmatter = "none"
            s.export.page_metadata_in_frontmatter = True
            result = converter.front_matter
        assert "confluence_last_modified_by" not in result
        assert "confluence_page_id: '123'" in result
        assert "confluence_space_key: TEAM" in result
        assert "confluence_last_modified" in result
        assert "confluence_version: 7" in result

    def test_empty_creator_skipped(self) -> None:
        converter = self._converter(created_by="")
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.confluence_url_in_frontmatter = "none"
            s.export.page_metadata_in_frontmatter = True
            result = converter.front_matter
        assert "confluence_created_by" not in result
        assert "confluence_created: '2024-08-15T08:34:12.000+02:00'" in result
        assert "confluence_type: page" in result

    def test_empty_type_skipped(self) -> None:
        converter = self._converter(page_type="")
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.confluence_url_in_frontmatter = "none"
            s.export.page_metadata_in_frontmatter = True
            result = converter.front_matter
        assert "confluence_type" not in result
        assert "confluence_page_id: '123'" in result
        assert "confluence_created_by: Sam Creator" in result


class TestInlineCommentsFrontMatter:
    """Pin the YAML front matter keys written into *.comments.md sidecars."""

    def test_front_matter_uses_confluence_prefix(self) -> None:
        page = MockPage()
        page.id = 123
        page.title = "My Page"
        page.space = MagicMock()
        page.space.key = "TEAM"
        page.base_url = "https://example.atlassian.net"
        page.export_path = Path("TEAM/My Page.md")
        page._marked_texts = {"ref-1": "marked excerpt"}
        page._COMMENT_TITLE_MAX_LEN = Page._COMMENT_TITLE_MAX_LEN.default
        page._fetch_inline_comments = lambda: [
            {
                "id": "c1",
                "extensions": {"inlineProperties": {"markerRef": "ref-1"}},
                "history": {
                    "createdBy": {"displayName": "Alice"},
                    "createdDate": "2026-04-01T10:00:00Z",
                },
                "body": {"view": {"value": "<p>nice</p>"}},
            }
        ]
        page._fetch_page_comments = list
        page._fetch_comment_replies = lambda _cid: []
        page._render_inline_comments = types.MethodType(Page._render_inline_comments, page)
        page._render_page_comments = types.MethodType(Page._render_page_comments, page)

        with (
            patch("confluence_markdown_exporter.confluence.save_file") as mock_save,
            patch("confluence_markdown_exporter.confluence.settings") as s,
        ):
            s.export.output_path = Path("out")
            s.export.comments_export = "inline"
            Page.export_comments_sidecar(page)

        assert mock_save.called
        content = mock_save.call_args[0][1]

        # New keys with correct YAML form
        assert "confluence_page_id: '123'" in content
        assert 'confluence_page_title: "My Page"' in content
        assert (
            'confluence_webui_url: "https://example.atlassian.net'
            '/wiki/spaces/TEAM/pages/123"' in content
        )

        # Regression guard: old keys must not reappear
        assert "\npage_id:" not in content
        assert "\npage_title:" not in content
        assert "\nsource:" not in content


def _make_comments_page(
    *,
    inline_comments: list[dict] | None = None,
    page_comments: list[dict] | None = None,
    replies: dict[str, list[dict]] | None = None,
    marked_texts: dict[str, str] | None = None,
) -> MockPage:
    page = MockPage()
    page.id = 123
    page.title = "My Page"
    page.space = MagicMock()
    page.space.key = "TEAM"
    page.base_url = "https://example.atlassian.net"
    page.export_path = Path("TEAM/My Page.md")
    page._marked_texts = marked_texts or {}
    page._COMMENT_TITLE_MAX_LEN = Page._COMMENT_TITLE_MAX_LEN.default
    page._fetch_inline_comments = lambda: list(inline_comments or [])
    page._fetch_page_comments = lambda: list(page_comments or [])
    replies_map = replies or {}
    page._fetch_comment_replies = lambda cid: list(replies_map.get(cid, []))
    page._render_inline_comments = types.MethodType(Page._render_inline_comments, page)
    page._render_page_comments = types.MethodType(Page._render_page_comments, page)
    return page


def _run_export_capturing_save(page: MockPage, mode: str) -> MagicMock:
    with (
        patch("confluence_markdown_exporter.confluence.save_file") as mock_save,
        patch("confluence_markdown_exporter.confluence.settings") as s,
    ):
        s.export.output_path = Path("out")
        s.export.comments_export = mode
        Page.export_comments_sidecar(page)
    return mock_save


def _inline_comment(
    ref: str = "ref-1",
    body: str = "<p>nice</p>",
    cid: str = "c1",
    author: str = "Alice",
) -> dict:
    return {
        "id": cid,
        "extensions": {"inlineProperties": {"markerRef": ref}},
        "history": {
            "createdBy": {"displayName": author},
            "createdDate": "2026-04-01T10:00:00Z",
        },
        "body": {"view": {"value": body}},
    }


def _page_comment(
    cid: str = "p1",
    body: str = "<p>discussion body</p>",
    author: str = "Bob",
    *,
    resolved: bool = False,
) -> dict:
    return {
        "id": cid,
        "extensions": {"resolution": {"status": "resolved" if resolved else "open"}},
        "history": {
            "createdBy": {"displayName": author},
            "createdDate": "2026-04-02T11:00:00Z",
        },
        "body": {"view": {"value": body}},
    }


class TestPageCommentsSidecarBody:
    """Sidecar rendering for page-level (footer) and combined comments."""

    def test_only_footer_writes_only_page_section(self) -> None:
        page = _make_comments_page(page_comments=[_page_comment()])
        save = _run_export_capturing_save(page, "footer")
        assert save.called
        content = save.call_args[0][1]
        assert "## Page comments" in content
        assert "## Inline comments" not in content
        assert "discussion body" in content
        assert "**Bob** · 2026-04-02" in content

    def test_all_writes_both_sections_inline_first(self) -> None:
        page = _make_comments_page(
            inline_comments=[_inline_comment()],
            page_comments=[_page_comment()],
            marked_texts={"ref-1": "marked excerpt"},
        )
        save = _run_export_capturing_save(page, "all")
        assert save.called
        content = save.call_args[0][1]
        assert "## Inline comments" in content
        assert "## Page comments" in content
        assert content.index("## Inline comments") < content.index("## Page comments")

    def test_none_writes_no_file(self) -> None:
        page = _make_comments_page(
            inline_comments=[_inline_comment()],
            page_comments=[_page_comment()],
        )
        save = _run_export_capturing_save(page, "none")
        assert save.called is False

    def test_inline_only_omits_page_section(self) -> None:
        page = _make_comments_page(
            inline_comments=[_inline_comment()],
            page_comments=[_page_comment()],
            marked_texts={"ref-1": "marked excerpt"},
        )
        save = _run_export_capturing_save(page, "inline")
        assert save.called
        content = save.call_args[0][1]
        assert "## Inline comments" in content
        assert "## Page comments" not in content

    def test_page_comment_title_falls_back_to_comment_id(self) -> None:
        page = _make_comments_page(
            page_comments=[_page_comment(cid="abcdef1234567", body="")],
        )
        save = _run_export_capturing_save(page, "footer")
        assert save.called
        content = save.call_args[0][1]
        assert "### Comment abcdef12" in content

    def test_page_comment_replies_render_under_parent(self) -> None:
        replies = {
            "p1": [
                {
                    "id": "r1",
                    "history": {
                        "createdBy": {"displayName": "Carol"},
                        "createdDate": "2026-04-03T11:00:00Z",
                    },
                    "body": {"view": {"value": "<p>reply one</p>"}},
                },
                {
                    "id": "r2",
                    "history": {
                        "createdBy": {"displayName": "Dave"},
                        "createdDate": "2026-04-03T12:00:00Z",
                    },
                    "body": {"view": {"value": "<p>reply two</p>"}},
                },
            ]
        }
        page = _make_comments_page(
            page_comments=[_page_comment(cid="p1", body="<p>parent body</p>", author="Bob")],
            replies=replies,
        )
        save = _run_export_capturing_save(page, "footer")
        assert save.called
        content = save.call_args[0][1]
        assert content.index("Bob") < content.index("Carol") < content.index("Dave")
        assert "reply one" in content
        assert "reply two" in content

    def test_fetch_page_comments_filters_resolved(self) -> None:
        page = MockPage()
        page.id = 123
        page.base_url = "https://example.atlassian.net"

        client = MagicMock()
        client.get_page_comments.return_value = {
            "results": [
                _page_comment(cid="open1", body="<p>open one</p>"),
                _page_comment(cid="resolved1", body="<p>resolved one</p>", resolved=True),
                _page_comment(cid="open2", body="<p>open two</p>"),
            ],
            "_links": {},
        }

        with patch(
            "confluence_markdown_exporter.confluence.get_thread_confluence",
            return_value=client,
        ):
            results = Page._fetch_page_comments(page)

        ids = [c["id"] for c in results]
        assert ids == ["open1", "open2"]


class TestPagePropertiesReportDataview:
    """Page Properties Report macro can be exported as a Dataview DQL query."""

    _REPORT_HTML = (
        '<table class="aui metadata-summary-macro null"'
        ' data-cql=\'label = "tool-validation" and parent = "42"\''
        ' data-current-content-id="42"'
        ' data-current-space-key="TS"'
        ' data-first-column-heading="Title"'
        ' data-headings="Tool Version,Approved for Use"'
        ' data-sort-by="Title"'
        ' data-reverse-sort="false">'
        "</table>"
    )

    _BODY_EXPORT = (
        '<table class="aui metadata-summary-macro null"'
        ' data-cql=\'label = "tool-validation" and parent = "42"\''
        ">"
        "<tr><th>Title</th><th>Tool Version</th><th>Approved for Use</th></tr>"
        "<tr><td>Page A</td><td>1.0</td><td>Yes</td></tr>"
        "</table>"
    )

    class _MockPageWithExport:
        def __init__(self, body_export: str = "") -> None:
            from pathlib import Path

            self.id = 42
            self.title = "Test Page"
            self.html = ""
            self.labels: list = []
            self.ancestors: list = []
            self.body_export = body_export
            self.export_path = Path("Test Space/Test Page/Test Page.md")

        def get_attachment_by_file_id(self, file_id: str) -> None:
            return None

    def test_dataview_output_contains_table_clause(self) -> None:
        page = self._MockPageWithExport(body_export=self._BODY_EXPORT)
        converter = Page.Converter(page)
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.page_properties_report_format = "dataview"
            result = converter.convert(self._REPORT_HTML)
        assert "```dataview" in result
        expected_cols = 'tool_version AS "Tool Version", approved_for_use AS "Approved for Use"'
        assert f"TABLE {expected_cols}" in result

    def test_dataview_output_contains_from_clause(self) -> None:
        page = self._MockPageWithExport(body_export=self._BODY_EXPORT)
        converter = Page.Converter(page)
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.page_properties_report_format = "dataview"
            result = converter.convert(self._REPORT_HTML)
        assert 'FROM "Test Space/Test Page"' in result

    def test_dataview_from_clause_with_current_content_ancestor(self) -> None:
        html = (
            '<table class="aui metadata-summary-macro null"'
            " data-cql='label = \"tool\" and ancestor = currentContent()'"
            ' data-current-content-id="99"'
            ' data-current-space-key="TS"'
            ' data-first-column-heading="Name"'
            ' data-headings="Vendor"'
            ' data-sort-by="Name"'
            ' data-reverse-sort="false">'
            "</table>"
        )
        page = self._MockPageWithExport(body_export="")
        converter = Page.Converter(page)
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.page_properties_report_format = "dataview"
            result = converter.convert(html)
        assert 'FROM "Test Space/Test Page"' in result

    def test_dataview_output_contains_label_in_from_clause(self) -> None:
        page = self._MockPageWithExport(body_export=self._BODY_EXPORT)
        converter = Page.Converter(page)
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.page_properties_report_format = "dataview"
            result = converter.convert(self._REPORT_HTML)
        assert "#tool-validation" in result

    def test_dataview_output_contains_sort_clause(self) -> None:
        page = self._MockPageWithExport(body_export=self._BODY_EXPORT)
        converter = Page.Converter(page)
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.page_properties_report_format = "dataview"
            result = converter.convert(self._REPORT_HTML)
        assert "SORT title ASC" in result

    def test_frozen_table_when_format_is_frozen(self) -> None:
        page = self._MockPageWithExport(body_export=self._BODY_EXPORT)
        converter = Page.Converter(page)
        with patch("confluence_markdown_exporter.confluence.settings") as s:
            s.export.page_properties_report_format = "frozen"
            result = converter.convert(self._REPORT_HTML)
        assert "```dataview" not in result
        assert "Page A" in result


class TestAttachmentTemplateVars:
    """`attachment_file_id` falls back to the content id when fileId is empty."""

    def test_cloud_style_keeps_file_id(self) -> None:
        """Cloud attachments expose the GUID fileId verbatim."""
        attachment = _make_attachment("content-456", "cloud-guid-123")
        assert attachment._template_vars["attachment_file_id"] == "cloud-guid-123"

    def test_dc_style_falls_back_to_content_id(self) -> None:
        """Data Center / Server attachments fall back to the content id."""
        attachment = _make_attachment("content-456", "")
        assert attachment._template_vars["attachment_file_id"] == "content-456"

    def test_two_dc_attachments_get_distinct_paths(self) -> None:
        """Two DC attachments with the same extension must not collide."""
        att1 = _make_attachment("123", "")
        att2 = _make_attachment("124", "")

        with patch("confluence_markdown_exporter.confluence.settings") as mock_settings:
            mock_settings.export.attachment_path = (
                "{space_name}/attachments/{attachment_file_id}{attachment_extension}"
            )
            path1 = att1.export_path
            path2 = att2.export_path

        assert path1 != path2


class TestWikiLinkDisambiguation:
    """Wiki page links use a vault-relative path when titles collide across spaces."""

    def _make_target_page(self, page_id: int, title: str, space_key: str) -> Page:
        space = Space(
            base_url="https://example.com",
            key=space_key,
            name=space_key,
            description="",
            homepage=0,
        )
        version = Version(
            number=1,
            by=User(
                account_id="u1",
                display_name="User",
                username="user",
                public_name="",
                email="",
            ),
            when="2024-01-01T00:00:00Z",
            friendly_when="Jan 1",
        )
        return Page(
            base_url="https://example.com",
            id=page_id,
            title=title,
            space=space,
            ancestors=[],
            version=version,
            body="",
            body_export="",
            editor2="",
            body_storage="",
            labels=[],
            attachments=[],
        )

    def test_unique_title_emits_short_wiki_link(self) -> None:
        from confluence_markdown_exporter.utils.page_registry import PageTitleRegistry

        PageTitleRegistry.reset()
        target = self._make_target_page(101, "Unique Page", "ALPHA")
        PageTitleRegistry.register(target.id, target.title)

        source = _make_page(body="", body_export="", attachments=[])

        with (
            patch("confluence_markdown_exporter.confluence.Page.from_id", return_value=target),
            patch("confluence_markdown_exporter.confluence.settings") as s,
        ):
            s.export.page_href = "wiki"
            s.export.page_path = "{space_name}/{page_title}.md"
            conv = Page.Converter(source)
            html = '<a data-linked-resource-type="page" data-linked-resource-id="101">x</a>'
            result = conv.convert(html).strip()

        PageTitleRegistry.reset()
        assert result == "[[Unique Page]]"

    def test_colliding_title_emits_path_qualified_wiki_link(self) -> None:
        from confluence_markdown_exporter.utils.page_registry import PageTitleRegistry

        PageTitleRegistry.reset()
        target_alpha = self._make_target_page(201, "Shared Title", "ALPHA")
        target_beta = self._make_target_page(202, "Shared Title", "BETA")
        PageTitleRegistry.register(target_alpha.id, target_alpha.title)
        PageTitleRegistry.register(target_beta.id, target_beta.title)

        source = _make_page(body="", body_export="", attachments=[])

        with (
            patch(
                "confluence_markdown_exporter.confluence.Page.from_id",
                return_value=target_alpha,
            ),
            patch("confluence_markdown_exporter.confluence.settings") as s,
        ):
            s.export.page_href = "wiki"
            s.export.page_path = "{space_name}/{page_title}.md"
            conv = Page.Converter(source)
            html = '<a data-linked-resource-type="page" data-linked-resource-id="201">x</a>'
            result = conv.convert(html).strip()

        PageTitleRegistry.reset()
        assert result == "[[ALPHA/Shared Title|Shared Title]]"

    def test_relative_link_unaffected(self) -> None:
        from confluence_markdown_exporter.utils.page_registry import PageTitleRegistry

        PageTitleRegistry.reset()
        target_alpha = self._make_target_page(201, "Shared Title", "ALPHA")
        target_beta = self._make_target_page(202, "Shared Title", "BETA")
        PageTitleRegistry.register(target_alpha.id, target_alpha.title)
        PageTitleRegistry.register(target_beta.id, target_beta.title)

        source = _make_page(body="", body_export="", attachments=[])

        with (
            patch(
                "confluence_markdown_exporter.confluence.Page.from_id",
                return_value=target_alpha,
            ),
            patch("confluence_markdown_exporter.confluence.settings") as s,
        ):
            s.export.page_href = "relative"
            s.export.page_path = "{space_name}/{page_title}.md"
            conv = Page.Converter(source)
            html = '<a data-linked-resource-type="page" data-linked-resource-id="201">x</a>'
            result = conv.convert(html).strip()

        PageTitleRegistry.reset()
        assert "Shared%20Title.md" in result
        assert result.startswith("[Shared Title](")


class TestAbsoluteUrlPageLinks:
    """Absolute Confluence URLs in href must resolve to page links, not pass through."""

    def _make_target_page(self, page_id: int, title: str, space_key: str) -> Page:
        space = Space(
            base_url="https://example.com",
            key=space_key,
            name=space_key,
            description="",
            homepage=0,
        )
        version = Version(
            number=1,
            by=User(
                account_id="u1",
                display_name="User",
                username="user",
                public_name="",
                email="",
            ),
            when="2024-01-01T00:00:00Z",
            friendly_when="Jan 1",
        )
        return Page(
            base_url="https://example.com",
            id=page_id,
            title=title,
            space=space,
            ancestors=[],
            version=version,
            body="",
            body_export="",
            editor2="",
            body_storage="",
            labels=[],
            attachments=[],
        )

    def test_absolute_url_same_host_resolves_page(self) -> None:
        from confluence_markdown_exporter.utils.page_registry import PageTitleRegistry

        PageTitleRegistry.reset()
        target = self._make_target_page(1437663233, "Linked Page", "STRUCT")

        source = _make_page(body="", body_export="", attachments=[])

        with (
            patch(
                "confluence_markdown_exporter.confluence.Page.from_id",
                return_value=target,
            ),
            patch("confluence_markdown_exporter.confluence.settings") as s,
        ):
            s.export.page_href = "wiki"
            s.export.page_path = "{space_name}/{page_title}.md"
            conv = Page.Converter(source)
            html = (
                '<a href="https://example.com/wiki/spaces/STRUCT/pages/1437663233">'
                "https://example.com/wiki/spaces/STRUCT/pages/1437663233</a>"
            )
            result = conv.convert(html).strip()

        PageTitleRegistry.reset()
        assert result == "[[Linked Page]]"

    def test_absolute_url_different_host_left_alone(self) -> None:
        source = _make_page(body="", body_export="", attachments=[])
        conv = Page.Converter(source)
        html = (
            '<a href="https://other.atlassian.net/wiki/spaces/X/pages/9/T">'
            "https://other.atlassian.net/wiki/spaces/X/pages/9/T</a>"
        )
        result = conv.convert(html).strip()
        assert result == "<https://other.atlassian.net/wiki/spaces/X/pages/9/T>"

    def test_legacy_pageid_query_resolves_page(self) -> None:
        from confluence_markdown_exporter.utils.page_registry import PageTitleRegistry

        PageTitleRegistry.reset()
        target = self._make_target_page(555, "Legacy Page", "OLD")

        source = _make_page(body="", body_export="", attachments=[])

        with (
            patch(
                "confluence_markdown_exporter.confluence.Page.from_id",
                return_value=target,
            ),
            patch("confluence_markdown_exporter.confluence.settings") as s,
        ):
            s.export.page_href = "wiki"
            s.export.page_path = "{space_name}/{page_title}.md"
            conv = Page.Converter(source)
            html = '<a href="https://example.com/pages/viewpage.action?pageId=555">x</a>'
            result = conv.convert(html).strip()

        PageTitleRegistry.reset()
        assert result == "[[Legacy Page]]"


================================================
FILE: tests/unit/test_emoticon_conversion.py
================================================
"""Test that Confluence emoticon img tags are converted to unicode emoji."""

from __future__ import annotations

from typing import TYPE_CHECKING

import pytest

if TYPE_CHECKING:
    from confluence_markdown_exporter.confluence import Page


@pytest.fixture
def converter() -> Page.Converter:
    from confluence_markdown_exporter.confluence import Page

    class MockPage:
        def __init__(self) -> None:
            self.id = "test-page"
            self.title = "Test Page"
            self.html = ""
            self.labels = []
            self.ancestors = []

        def get_attachment_by_file_id(self, file_id: str) -> None:
            return None

    return Page.Converter(MockPage())


class TestEmoticonConversion:
    def test_atlassian_check_mark(self, converter: Page.Converter) -> None:
        html = (
            '<img class="emoticon emoticon-tick"'
            ' data-emoji-id="atlassian-check_mark"'
            ' data-emoji-fallback=":check_mark:"'
            ' data-emoji-shortname=":check_mark:"'
            ' alt="(tick)" />'
        )
        assert converter.convert(html).strip() == "✅"

    def test_atlassian_cross_mark(self, converter: Page.Converter) -> None:
        html = (
            '<img class="emoticon emoticon-cross"'
            ' data-emoji-id="atlassian-cross_mark"'
            ' data-emoji-fallback=":cross_mark:"'
            ' data-emoji-shortname=":cross_mark:"'
            ' alt="(error)" />'
        )
        assert converter.convert(html).strip() == "❌"

    def test_unicode_emoji_by_hex_id(self, converter: Page.Converter) -> None:
        html = (
            '<img class="emoticon emoticon-blue-star"'
            ' data-emoji-id="1f6e0"'
            ' data-emoji-fallback="\U0001f6e0️"'
            ' data-emoji-shortname=":tools:"'
            ' alt="(blue star)" />'
        )
        assert converter.convert(html).strip() == "\U0001f6e0️"

    def test_unicode_emoji_fallback_direct(self, converter: Page.Converter) -> None:
        html = (
            '<img class="emoticon"'
            ' data-emoji-id="1f600"'
            ' data-emoji-fallback="\U0001f600"'
            ' alt="smile" />'
        )
        assert converter.convert(html).strip() == "\U0001f600"

    def test_custom_emoji_uuid_falls_back_to_shortname(self, converter: Page.Converter) -> None:
        html = (
            '<img class="emoticon emoticon-blue-star"'
            ' data-emoji-id="fb5b359f-23fa-44bd-872b-676e207eaaef"'
            ' data-emoji-fallback=":alert-1:"'
            ' data-emoji-shortname=":alert-1:"'
            ' alt="(blue star)" />'
        )
        assert converter.convert(html).strip() == ":alert-1:"

    def test_non_emoticon_img_unchanged(self, converter: Page.Converter) -> None:
        html = '<img src="http://example.com/image.png" alt="photo" />'
        result = converter.convert(html).strip()
        assert "emoticon" not in result
        assert "example.com" in result

    def test_emoticon_inline_in_text(self, converter: Page.Converter) -> None:
        html = (
            'Status: <img class="emoticon emoticon-tick"'
            ' data-emoji-id="atlassian-check_mark"'
            ' data-emoji-fallback=":check_mark:"'
            ' alt="(tick)" /> Done'
        )
        result = converter.convert(html).strip()
        assert "✅" in result
        assert "Done" in result


================================================
FILE: tests/unit/test_include_macro_conversion.py
================================================
"""Unit tests for `include` / `excerpt-include` macro conversion."""

from unittest.mock import MagicMock
from unittest.mock import patch

from bs4 import BeautifulSoup

from confluence_markdown_exporter.confluence import Page


def _make_page(editor2: str) -> MagicMock:
    page = MagicMock(spec=Page)
    page.id = 12345
    page.title = "Test Page"
    page.html = "<h1>Test Page</h1>"
    page.labels = []
    page.ancestors = []
    page.attachments = []
    page.editor2 = editor2
    return page


INCLUDE_EDITOR2 = """<?xml version="1.0" encoding="UTF-8"?>
<ac:structured-macro ac:name="include" ac:schema-version="1"
    ac:local-id="local-1" ac:macro-id="macro-include-1">
    <ac:parameter ac:name="">
        <ac:link><ri:page ri:content-title="Shared Reference Page"
            ri:version-at-save="1" /></ac:link>
    </ac:parameter>
</ac:structured-macro>"""

EXCERPT_INCLUDE_EDITOR2 = """<?xml version="1.0" encoding="UTF-8"?>
<ac:structured-macro ac:name="excerpt-include" ac:schema-version="1"
    ac:local-id="local-2" ac:macro-id="macro-excerpt-1">
    <ac:parameter ac:name="">
        <ac:link><ri:page ri:content-title="Source Page" /></ac:link>
    </ac:parameter>
    <ac:parameter ac:name="name">Named Excerpt</ac:parameter>
</ac:structured-macro>"""


@patch("confluence_markdown_exporter.confluence.settings")
def test_include_macro_transclusion_mode(mock_settings: MagicMock) -> None:
    mock_settings.export.include_document_title = False
    mock_settings.export.page_breadcrumbs = False
    mock_settings.export.include_macro = "transclusion"

    converter = Page.Converter(_make_page(INCLUDE_EDITOR2))

    html = (
        '<div data-macro-name="include" data-macro-id="macro-include-1">'
        "<p>fallback inline text</p></div>"
    )
    el = BeautifulSoup(html, "html.parser").find("div")

    result = converter.convert_include(el, "fallback inline text", [])

    assert result.strip() == "![[Shared Reference Page]]"


@patch("confluence_markdown_exporter.confluence.settings")
def test_excerpt_include_macro_transclusion_mode(mock_settings: MagicMock) -> None:
    mock_settings.export.include_document_title = False
    mock_settings.export.page_breadcrumbs = False
    mock_settings.export.include_macro = "transclusion"

    converter = Page.Converter(_make_page(EXCERPT_INCLUDE_EDITOR2))

    html = (
        '<div data-macro-name="excerpt-include" data-macro-id="macro-excerpt-1">'
        "<p>resolved excerpt body</p></div>"
    )
    el = BeautifulSoup(html, "html.parser").find("div")

    result = converter.convert_include(el, "resolved excerpt body", [])

    assert result.strip() == "![[Source Page]]"


@patch("confluence_markdown_exporter.confluence.settings")
def test_include_macro_inline_mode(mock_settings: MagicMock) -> None:
    mock_settings.export.include_document_title = False
    mock_settings.export.page_breadcrumbs = False
    mock_settings.export.include_macro = "inline"

    converter = Page.Converter(_make_page(INCLUDE_EDITOR2))

    html = (
        '<div data-macro-name="include" data-macro-id="macro-include-1">'
        "<p>inlined content</p></div>"
    )
    el = BeautifulSoup(html, "html.parser").find("div")

    result = converter.convert_include(el, "inlined content", [])

    assert "![[" not in result


@patch("confluence_markdown_exporter.confluence.settings")
def test_excerpt_include_inline_strips_source_page_title_panel(
    mock_settings: MagicMock,
) -> None:
    mock_settings.export.include_document_title = False
    mock_settings.export.page_breadcrumbs = False
    mock_settings.export.include_macro = "inline"

    converter = Page.Converter(_make_page(EXCERPT_INCLUDE_EDITOR2))

    html = (
        '<div class="panel conf-macro output-inline" data-macro-name="excerpt-include"'
        ' data-macro-id="macro-excerpt-1">'
        '<div class="panelHeader"><b>Source Page</b></div>'
        '<div class="panelContent"><table><tr><td>body cell</td></tr></table></div>'
        "</div>"
    )

    stripped = converter._strip_excerpt_include_panel_titles(html)

    assert "Source Page" not in stripped
    assert "panelHeader" not in stripped
    assert "panelContent" not in stripped
    assert "body cell" in stripped


@patch("confluence_markdown_exporter.confluence.settings")
def test_excerpt_include_inline_keeps_body_when_no_panel(
    mock_settings: MagicMock,
) -> None:
    mock_settings.export.include_document_title = False
    mock_settings.export.page_breadcrumbs = False
    mock_settings.export.include_macro = "inline"

    converter = Page.Converter(_make_page(EXCERPT_INCLUDE_EDITOR2))

    html = (
        '<span class="conf-macro output-inline" data-macro-name="excerpt-include"'
        ' data-macro-id="macro-excerpt-1">actual excerpt body</span>'
    )

    stripped = converter._strip_excerpt_include_panel_titles(html)

    assert "actual excerpt body" in stripped


@patch("confluence_markdown_exporter.confluence.settings")
def test_include_macro_transclusion_falls_back_when_target_unresolvable(
    mock_settings: MagicMock,
) -> None:
    mock_settings.export.include_document_title = False
    mock_settings.export.page_breadcrumbs = False
    mock_settings.export.include_macro = "transclusion"

    # editor2 has a different macro-id → lookup fails
    converter = Page.Converter(_make_page(INCLUDE_EDITOR2))

    html = '<div data-macro-name="include" data-macro-id="missing-id"><p>inlined content</p></div>'
    el = BeautifulSoup(html, "html.parser").find("div")

    result = converter.convert_include(el, "inlined content", [])

    assert "![[" not in result


================================================
FILE: tests/unit/test_main.py
================================================
"""Unit tests for main module."""

import pytest
import typer

from confluence_markdown_exporter.main import app
from confluence_markdown_exporter.main import version


class TestVersionCommand:
    """Test cases for version command."""

    def test_version_output(self, capsys: pytest.CaptureFixture[str]) -> None:
        """Test that version command outputs correct format."""
        version()

        captured = capsys.readouterr()
        assert "confluence-markdown-exporter" in captured.out
        # Should contain version information
        assert len(captured.out.strip()) > len("confluence-markdown-exporter")


class TestAppConfiguration:
    """Test cases for the Typer app configuration."""

    def test_app_is_typer_instance(self) -> None:
        """Test that app is a Typer instance."""
        assert isinstance(app, typer.Typer)

    def test_app_has_commands(self) -> None:
        """Test that app has expected top-level commands."""
        commands = [
            callback.callback.__name__.replace("_", "-")
            for callback in app.registered_commands
            if callback.callback is not None
        ]

        expected_commands = ["pages", "pages-with-descendants", "spaces", "orgs", "version"]
        for expected_command in expected_commands:
            assert expected_command in commands

    def test_app_has_config_group(self) -> None:
        """Test that the config sub-app is registered as a command group."""
        group_names = [group.name for group in app.registered_groups]
        assert "config" in group_names


================================================
FILE: tests/unit/test_nbsp_fix.py
================================================
"""Test that Unicode whitespace (especially &nbsp;) is preserved in inline formatting."""

from __future__ import annotations

from typing import TYPE_CHECKING

import pytest

if TYPE_CHECKING:
    from confluence_markdown_exporter.confluence import Page


class TestNbspPreservation:
    """Test that non-breaking spaces and other Unicode whitespace are preserved."""

    @pytest.fixture
    def converter(self) -> Page.Converter:
        """Create a minimal Page object with a Converter for testing."""
        from confluence_markdown_exporter.confluence import Page

        # Create a minimal page object for testing
        class MockPage:
            def __init__(self) -> None:
                self.id = "test-page"
                self.title = "Test Page"
                self.html = ""
                self.labels = []
                self.ancestors = []

            def get_attachment_by_file_id(self, file_id: str) -> None:
                return None

        page = MockPage()
        return Page.Converter(page)

    def test_em_with_leading_nbsp(self, converter: Page.Converter) -> None:
        """Test <em>&nbsp;text</em> converts to ' *text*' (space before asterisk)."""
        html = "<em>&nbsp;text</em>"
        result = converter.convert(html).strip()
        assert result == "*text*", f"Expected '*text*' but got '{result}'"
        # The space is preserved in the conversion
        html_with_context = "word<em>&nbsp;text</em>"
        result_with_context = converter.convert(html_with_context).strip()
        assert "word *text*" in result_with_context or "word  *text*" in result_with_context

    def test_em_with_trailing_nbsp(self, converter: Page.Converter) -> None:
        """Test <em>text&nbsp;</em> converts to '*text* ' (space after asterisk)."""
        html = "<em>text&nbsp;</em>"
        result = converter.convert(html).strip()
        assert result == "*text*", f"Expected '*text*' but got '{result}'"
        # The space is preserved in the conversion
        html_with_context = "<em>text&nbsp;</em>word"
        result_with_context = converter.convert(html_with_context).strip()
        assert "*text* word" in result_with_context or "*text*  word" in result_with_context

    def test_em_with_both_nbsp(self, converter: Page.Converter) -> None:
        """Test <em>&nbsp;text&nbsp;</em> preserves both spaces."""
        html = "word<em>&nbsp;text&nbsp;</em>end"
        result = converter.convert(html).strip()
        # Should have spaces around the emphasis
        assert "*text*" in result
        # Check that there's space before and after
        assert "word *text* end" in result or "word  *text*  end" in result

    def test_strong_with_leading_nbsp(self, converter: Page.Converter) -> None:
        """Test <strong>&nbsp;text</strong> converts to ' **text**'."""
        html = "word<strong>&nbsp;text</strong>"
        result = converter.convert(html).strip()
        assert "**text**" in result
        assert "word **text**" in result or "word  **text**" in result

    def test_strong_with_trailing_nbsp(self, converter: Page.Converter) -> None:
        """Test <strong>text&nbsp;</strong> converts to '**text** '."""
        html = "<strong>text&nbsp;</strong>word"
        result = converter.convert(html).strip()
        assert "**text**" in result
        assert "**text** word" in result or "**text**  word" in result

    def test_code_with_leading_nbsp(self, converter: Page.Converter) -> None:
        """Test <code>&nbsp;text</code> converts to ' `text`'."""
        html = "word<code>&nbsp;text</code>"
        result = converter.convert(html).strip()
        assert "`text`" in result
        assert "word `text`" in result or "word  `text`" in result

    def test_code_with_trailing_nbsp(self, converter: Page.Converter) -> None:
        """Test <code>text&nbsp;</code> converts to '`text` '."""
        html = "<code>text&nbsp;</code>word"
        result = converter.convert(html).strip()
        assert "`text`" in result
        assert "`text` word" in result or "`text`  word" in result

    def test_i_tag_with_nbsp(self, converter: Page.Converter) -> None:
        """Test <i>&nbsp;text</i> (italic alias) preserves space."""
        html = "word<i>&nbsp;text</i>"
        result = converter.convert(html).strip()
        assert "*text*" in result
        assert "word *text*" in result or "word  *text*" in result

    def test_b_tag_with_nbsp(self, converter: Page.Converter) -> None:
        """Test <b>&nbsp;text</b> (bold alias) preserves space."""
        html = "word<b>&nbsp;text</b>"
        result = converter.convert(html).strip()
        assert "**text**" in result
        assert "word **text**" in result or "word  **text**" in result

    def test_real_world_confluence_example(self, converter: Page.Converter) -> None:
        """Test the actual example from MOSART Audio.md."""
        html = "property<em>&nbsp;JungerRoot</em> ."
        result = converter.convert(html).strip()
        # Should NOT be "property*JungerRoot*" (missing space)
        assert "property*JungerRoot*" not in result, "Space was lost!"
        # Should be "property *JungerRoot*" or "property  *JungerRoot*"
        assert "*JungerRoot*" in result
        assert "property" in result

    def test_multiple_nbsp_in_sequence(self, converter: Page.Converter) -> None:
        """Test multiple &nbsp; entities in a row."""
        html = "word<em>&nbsp;&nbsp;text</em>"
        result = converter.convert(html).strip()
        # Multiple nbsp should become multiple spaces
        assert "*text*" in result or "* text*" in result

    def test_mixed_whitespace(self, converter: Page.Converter) -> None:
        """Test normal spaces work alongside nbsp."""
        html = "see <em>figure 1</em> below"
        result = converter.convert(html).strip()
        assert "see *figure 1* below" in result

    def test_normalize_helper_function(self, converter: Page.Converter) -> None:
        """Test the _normalize_unicode_whitespace helper directly."""
        # Test with various Unicode whitespace characters
        test_text = "\xa0text\xa0"  # \xa0 is nbsp

        # Before normalization
        assert "\xa0" in test_text

        # Normalize
        normalized_text = converter._normalize_unicode_whitespace(test_text)

        # After normalization - nbsp should be replaced with regular space
        assert "\xa0" not in normalized_text, "nbsp should be replaced"
        assert normalized_text.strip() == "text", "Text should be preserved"
        # Spaces should now be regular spaces
        assert normalized_text.startswith(" "), "Leading space should be preserved"
        assert normalized_text.endswith(" "), "Trailing space should be preserved"

    def test_unicode_em_space(self, converter: Page.Converter) -> None:
        """Test that EM SPACE (\u2003) is also normalized."""
        test_text = "\u2003text"  # EM SPACE

        normalized_text = converter._normalize_unicode_whitespace(test_text)

        assert "\u2003" not in normalized_text, "EM SPACE should be replaced"
        assert normalized_text.strip() == "text"
        assert normalized_text.startswith(" "), "Space should be preserved as regular space"

    def test_unicode_thin_space(self, converter: Page.Converter) -> None:
        """Test that THIN SPACE (\u2009) is normalized."""
        test_text = "text\u2009end"  # THIN SPACE

        normalized_text = converter._normalize_unicode_whitespace(test_text)

        assert "\u2009" not in normalized_text, "THIN SPACE should be replaced"
        assert normalized_text == "text end", "Space should be preserved as regular space"

    def test_preserves_newlines_and_tabs(self, converter: Page.Converter) -> None:
        """Test that normal whitespace (newlines, tabs) are NOT affected."""
        test_text = "text\nwith\nnewlines"

        normalized_text = converter._normalize_unicode_whitespace(test_text)

        # Newlines should be preserved
        assert "\n" in normalized_text
        assert normalized_text == test_text, "Regular whitespace should not be touched"

    def test_no_modification_when_no_unicode_whitespace(self, converter: Page.Converter) -> None:
        """Test that text without Unicode whitespace is not modified."""
        test_text = "normal text"

        normalized_text = converter._normalize_unicode_whitespace(test_text)

        assert normalized_text == test_text, "Normal text should not be modified"


================================================
FILE: tests/unit/test_plantuml_code_block_detection.py
================================================
"""Unit tests for PlantUML auto-detection in code blocks."""

from unittest.mock import MagicMock
from unittest.mock import patch

from bs4 import BeautifulSoup

from confluence_markdown_exporter.confluence import Page


class TestPlantUMLCodeBlockDetection:
    """Test cases for @startuml auto-detection in <pre> code blocks."""

    def _make_page(self) -> MagicMock:
        page = MagicMock(spec=Page)
        page.id = 12345
        page.title = "Test Page"
        page.html = "<h1>Test Page</h1>"
        page.labels = []
        page.ancestors = []
        page.attachments = []
        page.editor2 = ""
        page.body_storage = ""
        return page

    @patch("confluence_markdown_exporter.confluence.settings")
    def test_pre_with_startuml_uses_plantuml_fence(self, mock_settings: MagicMock) -> None:
        """Code block containing @startuml should be fenced as plantuml."""
        mock_settings.export.include_document_title = False

        converter = Page.Converter(self._make_page())

        html = (
            '<pre data-syntaxhighlighter-params="brush: java; gutter: false">'
            "@startuml\nA -> B\n@enduml</pre>"
        )
        el = BeautifulSoup(html, "html.parser").find("pre")

        result = converter.convert_pre(el, "@startuml\nA -> B\n@enduml", [])

        assert "```plantuml" in result
        assert "```java" not in result

    @patch("confluence_markdown_exporter.confluence.settings")
    def test_pre_without_startuml_keeps_original_language(self, mock_settings: MagicMock) -> None:
        """Regular code blocks should keep their original language."""
        mock_settings.export.include_document_title = False

        converter = Page.Converter(self._make_page())

        html = (
            '<pre data-syntaxhighlighter-params="brush: java; gutter: false">'
            "public class Foo {}</pre>"
        )
        el = BeautifulSoup(html, "html.parser").find("pre")

        result = converter.convert_pre(el, "public class Foo {}", [])

        assert "```java" in result
        assert "```plantuml" not in result

    @patch("confluence_markdown_exporter.confluence.settings")
    def test_pre_empty_text_returns_empty(self, mock_settings: MagicMock) -> None:
        """Empty pre block should return empty string."""
        mock_settings.export.include_document_title = False

        converter = Page.Converter(self._make_page())

        html = "<pre></pre>"
        el = BeautifulSoup(html, "html.parser").find("pre")

        result = converter.convert_pre(el, "", [])

        assert result == ""

    @patch("confluence_markdown_exporter.confluence.settings")
    def test_pre_no_language_with_startuml(self, mock_settings: MagicMock) -> None:
        """Pre block without brush param but containing @startuml gets plantuml fence."""
        mock_settings.export.include_document_title = False

        converter = Page.Converter(self._make_page())

        html = "<pre>@startuml\nBob -> Alice\n@enduml</pre>"
        el = BeautifulSoup(html, "html.parser").find("pre")

        result = converter.convert_pre(el, "@startuml\nBob -> Alice\n@enduml", [])

        assert "```plantuml" in result


================================================
FILE: tests/unit/test_plantuml_conversion.py
================================================
"""Unit tests for PlantUML diagram conversion."""

from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
from bs4 import BeautifulSoup

from confluence_markdown_exporter.confluence import Page


class TestPlantUMLConversion:
    """Test cases for PlantUML diagram conversion."""

    @pytest.fixture
    def mock_page(self) -> MagicMock:
        """Create a mock page with PlantUML content in editor2 (Cloud format)."""
        page = MagicMock(spec=Page)
        page.id = 12345
        page.title = "Test Page"
        page.html = "<h1>Test Page</h1>"
        page.labels = []
        page.ancestors = []
        page.attachments = []
        page.body_storage = ""

        # Sample editor2 XML with PlantUML macro
        uml_data = '{"umlDefinition":"@startuml\\nAlice -> Bob: Hello\\n@enduml"}'
        page.editor2 = f'''<?xml version="1.0" encoding="UTF-8"?>
<ac:structured-macro ac:name="plantuml" ac:schema-version="1"
    ac:macro-id="test-macro-id-123">
    <ac:parameter ac:name="fileName">plantuml_test</ac:parameter>
    <ac:plain-text-body><![CDATA[{uml_data}]]></ac:plain-text-body>
</ac:structured-macro>'''

        return page

    @pytest.fixture
    def mock_server_page(self) -> MagicMock:
        """Create a mock page with PlantUML content in body.storage (Server format)."""
        page = MagicMock(spec=Page)
        page.id = 67890
        page.title = "Server Page"
        page.html = "<h1>Server Page</h1>"
        page.labels = []
        page.ancestors = []
        page.attachments = []
        page.editor2 = ""

        page.body_storage = (
            '<ac:structured-macro ac:name="plantuml">'
            "<ac:plain-text-body>"
            "<![CDATA[@startuml\nAlice -> Bob: Hello\n@enduml]]>"
            "</ac:plain-text-body>"
            "</ac:structured-macro>"
        )

        return page

    @patch("confluence_markdown_exporter.confluence.settings")
    def test_convert_plantuml_cloud_editor2(
        self, mock_settings: MagicMock, mock_page: MagicMock
    ) -> None:
        """Test PlantUML conversion from editor2 XML (Cloud format)."""
        mock_settings.export.include_document_title = False
        mock_settings.export.page_breadcrumbs = False

        converter = Page.Converter(mock_page)

        html = '<div data-macro-name="plantuml" data-macro-id="test-macro-id-123"></div>'
        el = BeautifulSoup(html, "html.parser").find("div")

        result = converter.convert_plantuml(el, "", [])

        assert "```plantuml" in result
        assert "@startuml" in result
        assert "Alice -> Bob: Hello" in result
        assert "@enduml" in result

    @patch("confluence_markdown_exporter.confluence.settings")
    def test_convert_plantuml_server_storage(
        self, mock_settings: MagicMock, mock_server_page: MagicMock
    ) -> None:
        """Test PlantUML conversion from body.storage (Server/DC format)."""
        mock_settings.export.include_document_title = False

        converter = Page.Converter(mock_server_page)

        # Server renders PlantUML as <span> without macro-id
        html = '<span class="plantuml-svg-image" data-macro-name="plantuml"></span>'
        el = BeautifulSoup(html, "html.parser").find("span")

        result = converter.convert_plantuml(el, "", [])

        assert "```plantuml" in result
        assert "@startuml" in result
        assert "Alice -> Bob: Hello" in result
        assert "@enduml" in result

    @patch("confluence_markdown_exporter.confluence.settings")
    def test_convert_plantuml_server_multiple_diagrams(
        self, mock_settings: MagicMock
    ) -> None:
        """Test positional matching of multiple PlantUML diagrams on Server."""
        mock_settings.export.include_document_title = False

        page = MagicMock(spec=Page)
        page.id = 11111
        page.title = "Multi-Diagram Page"
        page.html = "<h1>Multi-Diagram Page</h1>"
        page.labels = []
        page.ancestors = []
        page.attachments = []
        page.editor2 = ""

        page.body_storage = (
            '<ac:structured-macro ac:name="plantuml">'
            "<ac:plain-text-body>"
            "<![CDATA[@startuml\nAlice -> Bob: First\n@enduml]]>"
            "</ac:plain-text-body>"
            "</ac:structured-macro>"
            "<p>Some text between diagrams</p>"
            '<ac:structured-macro ac:name="plantuml">'
            "<ac:plain-text-body>"
            "<![CDATA[@startuml\nBob -> Carol: Second\n@enduml]]>"
            "</ac:plain-text-body>"
            "</ac:structured-macro>"
        )

        converter = Page.Converter(page)

        html1 = '<span data-macro-name="plantuml"></span>'
        el1 = BeautifulSoup(html1, "html.parser").find("span")
        result1 = converter.convert_plantuml(el1, "", [])

        html2 = '<span data-macro-name="plantuml"></span>'
        el2 = BeautifulSoup(html2, "html.parser").find("span")
        result2 = converter.convert_plantuml(el2, "", [])

        assert "Alice -> Bob: First" in result1
        assert "Bob -> Carol: Second" in result2

    @patch("confluence_markdown_exporter.confluence.settings")
    def test_convert_plantuml_no_source_available(
        self, mock_settings: MagicMock
    ) -> None:
        """Test PlantUML conversion when neither editor2 nor storage has content."""
        mock_settings.export.include_document_title = False

        page = MagicMock(spec=Page)
        page.id = 99999
        page.title = "Empty Page"
        page.html = "<h1>Empty Page</h1>"
        page.labels = []
        page.ancestors = []
        page.attachments = []
        page.editor2 = ""
        page.body_storage = ""

        converter = Page.Converter(page)

        html = '<div data-macro-name="plantuml"></div>'
        el = BeautifulSoup(html, "html.parser").find("div")

        result = converter.convert_plantuml(el, "", [])

        assert "<!-- PlantUML diagram" in result
        assert "source not found" in result

    @patch("confluence_markdown_exporter.confluence.settings")
    def test_convert_plantuml_complex_diagram(self, mock_settings: MagicMock) -> None:
        """Test PlantUML conversion with a complex diagram."""
        mock_settings.export.include_document_title = False

        page = MagicMock(spec=Page)
        page.id = 12345
        page.title = "Test Page"
        page.html = "<h1>Test Page</h1>"
        page.labels = []
        page.ancestors = []
        page.attachments = []
        page.body_storage = ""

        # Complex PlantUML diagram - properly escaped for JSON
        uml_definition = (
            "@startuml\\nskinparam backgroundColor white\\ntitle Test Diagram\\n\\n"
            "|Actor|\\nstart\\n:Action 1;\\n:Action 2;\\nstop\\n@enduml"
        )

        page.editor2 = f'''<?xml version="1.0" encoding="UTF-8"?>
<ac:structured-macro ac:name="plantuml" ac:schema-version="1"
    ac:macro-id="complex-macro-id">
    <ac:plain-text-body><![CDATA[{{"umlDefinition":"{uml_definition}"}}]]></ac:plain-text-body>
</ac:structured-macro>'''

        converter = Page.Converter(page)

        html = '<div data-macro-name="plantuml" data-macro-id="complex-macro-id"></div>'
        el = BeautifulSoup(html, "html.parser").find("div")

        result = converter.convert_plantuml(el, "", [])

        assert "```plantuml" in result
        assert "@startuml" in result
        assert "skinparam backgroundColor white" in result
        assert "title Test Diagram" in result
        assert "@enduml" in result

    @patch("confluence_markdown_exporter.confluence.settings")
    def test_convert_plantuml_editor2_fallback_to_storage(
        self, mock_settings: MagicMock
    ) -> None:
        """Test that when editor2 macro-id doesn't match, storage is used as fallback."""
        mock_settings.export.include_document_title = False

        page = MagicMock(spec=Page)
        page.id = 22222
        page.title = "Fallback Page"
        page.html = "<h1>Fallback Page</h1>"
        page.labels = []
        page.ancestors = []
        page.attachments = []

        # editor2 has a macro but with a different ID
        page.editor2 = '''<?xml version="1.0" encoding="UTF-8"?>
<ac:structured-macro ac:name="plantuml" ac:macro-id="different-id">
    <ac:plain-text-body><![CDATA[{"umlDefinition":"@startuml\\nwrong\\n@enduml"}]]></ac:plain-text-body>
</ac:structured-macro>'''

        # body.storage has the correct content
        page.body_storage = (
            '<ac:structured-macro ac:name="plantuml">'
            "<ac:plain-text-body>"
            "<![CDATA[@startuml\nCorrect from storage\n@enduml]]>"
            "</ac:plain-text-body>"
            "</ac:structured-macro>"
        )

        converter = Page.Converter(page)

        # View element references an ID not found in editor2
        html = '<div data-macro-name="plantuml" data-macro-id="nonexistent-id"></div>'
        el = BeautifulSoup(html, "html.parser").find("div")

        result = converter.convert_plantuml(el, "", [])

        assert "```plantuml" in result
        assert "Correct from storage" in result

    @patch("confluence_markdown_exporter.confluence.settings")
    def test_convert_plantuml_invalid_json_falls_through(
        self, mock_settings: MagicMock
    ) -> None:
        """Test that invalid JSON in editor2 falls through to storage."""
        mock_settings.export.include_document_title = False

        page = MagicMock(spec=Page)
        page.id = 33333
        page.title = "Invalid JSON Page"
        page.html = "<h1>Invalid JSON Page</h1>"
        page.labels = []
        page.ancestors = []
        page.attachments = []

        page.editor2 = '''<?xml version="1.0" encoding="UTF-8"?>
<ac:structured-macro ac:name="plantuml" ac:macro-id="json-error-id">
    <ac:plain-text-body><![CDATA[{invalid json}]]></ac:plain-text-body>
</ac:structured-macro>'''

        page.body_storage = (
            '<ac:structured-macro ac:name="plantuml">'
            "<ac:plain-text-body>"
            "<![CDATA[@startuml\nFallback content\n@enduml]]>"
            "</ac:plain-text-body>"
            "</ac:structured-macro>"
        )

        converter = Page.Converter(page)

        html = '<div data-macro-name="plantuml" data-macro-id="json-error-id"></div>'
        el = BeautifulSoup(html, "html.parser").find("div")

        result = converter.convert_plantuml(el, "", [])

        assert "```plantuml" in result
        assert "Fallback content" in result

    @patch("confluence_markdown_exporter.confluence.settings")
    def test_convert_span_dispatches_plantuml(
        self, mock_settings: MagicMock, mock_server_page: MagicMock
    ) -> None:
        """Test that convert_span dispatches plantuml macros on Server."""
        mock_settings.export.include_document_title = False
        mock_settings.export.convert_text_highlights = False
        mock_settings.export.convert_font_colors = False

        converter = Page.Converter(mock_server_page)

        html = (
            '<span class="plantuml-svg-image conf-macro output-inline" '
            'data-hasbody="true" data-macro-name="plantuml">'
            "<svg>...</svg>"
            "</span>"
        )
        el = BeautifulSoup(html, "html.parser").find("span")

        result = converter.convert_span(el, "svg text", [])

        assert "```plantuml" in result
        assert "@startuml" in result


================================================
FILE: tests/unit/test_template_placeholders.py
================================================
"""Test that <template> placeholders are escaped for Obsidian compatibility."""

from __future__ import annotations

from typing import TYPE_CHECKING

import pytest

if TYPE_CHECKING:
    from confluence_markdown_exporter.confluence import Page


class TestTemplatePlaceholderEscaping:
    """Test that angle-bracket template placeholders are escaped for Obsidian."""

    @pytest.fixture
    def converter(self) -> Page.Converter:
        from confluence_markdown_exporter.confluence import Page

        class MockPage:
            def __init__(self) -> None:
                self.id = "test-page"
                self.title = "Test Page"
                self.html = ""
                self.labels = []
                self.ancestors = []

            def get_attachment_by_file_id(self, file_id: str) -> None:
                return None

        return Page.Converter(MockPage())

    def test_multi_word_placeholder_escaped(self, converter: Page.Converter) -> None:
        result = converter._escape_template_placeholders("Replace <medical device> here.")
        assert result == "Replace \\<medical device\\> here."

    def test_allcaps_placeholder_escaped(self, converter: Page.Converter) -> None:
        result = converter._escape_template_placeholders(
            "Page: Literature Search Report: <TOPIC>"
        )
        assert result == "Page: Literature Search Report: \\<TOPIC\\>"

    def test_complex_placeholder_escaped(self, converter: Page.Converter) -> None:
        text = "the <(e.g., clinical performance or state of the art)> of <medical device>."
        result = converter._escape_template_placeholders(text)
        assert "\\<(e.g., clinical performance or state of the art)\\>" in result
        assert "\\<medical device\\>" in result

    def test_placeholder_with_slash_in_name_escaped(self, converter: Page.Converter) -> None:
        result = converter._escape_template_placeholders(
            "the <medical device/equivalent device> here"
        )
        assert "\\<medical device/equivalent device\\>" in result

    def test_fake_closing_tag_placeholder_escaped(self, converter: Page.Converter) -> None:
        result = converter._escape_template_placeholders("use the </insert excerpt> function")
        assert "\\</insert excerpt\\>" in result

    def test_br_tag_preserved(self, converter: Page.Converter) -> None:
        result = converter._escape_template_placeholders("text<br/>more text")
        assert result == "text<br/>more text"

    def test_br_with_space_preserved(self, converter: Page.Converter) -> None:
        result = converter._escape_template_placeholders("text<br />more text")
        assert result == "text<br />more text"

    def test_br_uppercase_preserved(self, converter: Page.Converter) -> None:
        result = converter._escape_template_placeholders("text<BR/>more text")
        assert result == "text<BR/>more text"

    def test_closing_html_tag_preserved(self, converter: Page.Converter) -> None:
        result = converter._escape_template_placeholders("</div>")
        assert result == "</div>"

    def test_inline_code_not_modified(self, converter: Page.Converter) -> None:
        result = converter._escape_template_placeholders("Use `<TOPIC>` here.")
        assert result == "Use `<TOPIC>` here."

    def test_fenced_code_block_not_modified(self, converter: Page.Converter) -> None:
        text = "before\n```\n<TOPIC>\n<medical device>\n```\nafter"
        result = converter._escape_template_placeholders(text)
        assert "<TOPIC>" in result
        assert "<medical device>" in result
        assert "\\<TOPIC\\>" not in result

    def test_tilde_fenced_code_block_not_modified(self, converter: Page.Converter) -> None:
        text = "before\n~~~\n<TOPIC>\n~~~\nafter"
        result = converter._escape_template_placeholders(text)
        assert "<TOPIC>" in result

    def test_text_outside_code_block_still_escaped(self, converter: Page.Converter) -> None:
        text = "Replace <TOPIC> here.\n```\n<TOPIC>\n```\nAlso <medical device>."
        result = converter._escape_template_placeholders(text)
        lines = result.split("\n")
        assert "\\<TOPIC\\>" in lines[0]
        assert "<TOPIC>" in lines[2]
        assert "\\<medical device\\>" in lines[4]

    def test_https_autolink_preserved(self, converter: Page.Converter) -> None:
        result = converter._escape_template_placeholders(
            "URL: <https://api.airamed.de/v1/udi>."
        )
        assert result == "URL: <https://api.airamed.de/v1/udi>."

    def test_http_autolink_preserved(self, converter: Page.Converter) -> None:
        result = converter._escape_template_placeholders("see <http://example.com/path?q=1>")
        assert result == "see <http://example.com/path?q=1>"

    def test_mailto_autolink_preserved(self, converter: Page.Converter) -> None:
        result = converter._escape_template_placeholders("contact <mailto:foo@bar.com>")
        assert result == "contact <mailto:foo@bar.com>"

    def test_email_autolink_preserved(self, converter: Page.Converter) -> None:
        result = converter._escape_template_placeholders("contact <foo@bar.com> now")
        assert result == "contact <foo@bar.com> now"

    def test_autolink_with_space_still_escaped(self, converter: Page.Converter) -> None:
        # Not a valid autolink (contains whitespace) — treat as placeholder
        result = converter._escape_template_placeholders("<https://x y>")
        assert result == "\\<https://x y\\>"


================================================
FILE: tests/unit/utils/__init__.py
================================================
"""Unit tests for utils module."""


================================================
FILE: tests/unit/utils/test_app_data_store_env.py
================================================
"""Tests for ENV var override support in AppSettings."""

import os
import tempfile
from pathlib import Path
from unittest.mock import patch

import pytest

from confluence_markdown_exporter.utils.app_data_store import AppSettings
from confluence_markdown_exporter.utils.app_data_store import ConfigModel
from confluence_markdown_exporter.utils.app_data_store import ExportConfig
from confluence_markdown_exporter.utils.app_data_store import get_settings
from confluence_markdown_exporter.utils.app_data_store import load_app_data


class TestEnvVarOverrides:
    """Verify that CME_ env vars override stored config values without persisting."""

    def test_log_level_env_override(self) -> None:
        """CME_EXPORT__LOG_LEVEL overrides stored log_level."""
        with patch.dict(os.environ, {"CME_EXPORT__LOG_LEVEL": "DEBUG"}):
            settings = get_settings()
        assert settings.export.log_level == "DEBUG"

    def test_output_path_env_override(self) -> None:
        """CME_EXPORT__OUTPUT_PATH overrides stored output_path."""
        with patch.dict(os.environ, {"CME_EXPORT__OUTPUT_PATH": "/some/custom/export"}):
            settings = get_settings()
        assert settings.export.output_path == Path("/some/custom/export")

    def test_max_workers_env_override(self) -> None:
        """CME_CONNECTION_CONFIG__MAX_WORKERS overrides stored max_workers."""
        with patch.dict(os.environ, {"CME_CONNECTION_CONFIG__MAX_WORKERS": "3"}):
            settings = get_settings()
        assert settings.connection_config.max_workers == 3

    def test_verify_ssl_env_override_false(self) -> None:
        """CME_CONNECTION_CONFIG__VERIFY_SSL=false sets verify_ssl to False."""
        with patch.dict(os.environ, {"CME_CONNECTION_CONFIG__VERIFY_SSL": "false"}):
            settings = get_settings()
        assert settings.connection_config.verify_ssl is False

    def test_skip_unchanged_env_override(self) -> None:
        """CME_EXPORT__SKIP_UNCHANGED=false sets skip_unchanged to False."""
        with patch.dict(os.environ, {"CME_EXPORT__SKIP_UNCHANGED": "false"}):
            settings = get_settings()
        assert settings.export.skip_unchanged is False

    def test_save_log_to_file_default_false(self) -> None:
        """save_log_to_file defaults to False so existing behavior is preserved."""
        assert ExportConfig().save_log_to_file is False

    def test_save_log_to_file_env_override(self) -> None:
        """CME_EXPORT__SAVE_LOG_TO_FILE=true sets save_log_to_file to True."""
        with patch.dict(os.environ, {"CME_EXPORT__SAVE_LOG_TO_FILE": "true"}):
            settings = get_settings()
        assert settings.export.save_log_to_file is True

    def test_attachments_export_env_override(self) -> None:
        """CME_EXPORT__ATTACHMENTS_EXPORT overrides attachments_export."""
        with patch.dict(os.environ, {"CME_EXPORT__ATTACHMENTS_EXPORT": "all"}):
            settings = get_settings()
        assert settings.export.attachments_export == "all"

    def test_comments_export_env_override(self) -> None:
        """CME_EXPORT__COMMENTS_EXPORT overrides comments_export."""
        with patch.dict(os.environ, {"CME_EXPORT__COMMENTS_EXPORT": "all"}):
            settings = get_settings()
        assert settings.export.comments_export == "all"

    def test_confluence_url_in_frontmatter_env_override(self) -> None:
        """CME_EXPORT__CONFLUENCE_URL_IN_FRONTMATTER overrides confluence_url_in_frontmatter."""
        with patch.dict(os.environ, {"CME_EXPORT__CONFLUENCE_URL_IN_FRONTMATTER": "both"}):
            settings = get_settings()
        assert settings.export.confluence_url_in_frontmatter == "both"

    def test_page_metadata_in_frontmatter_env_override(self) -> None:
        """CME_EXPORT__PAGE_METADATA_IN_FRONTMATTER=true sets page_metadata_in_frontmatter."""
        with patch.dict(os.environ, {"CME_EXPORT__PAGE_METADATA_IN_FRONTMATTER": "true"}):
            settings = get_settings()
        assert settings.export.page_metadata_in_frontmatter is True

    def test_env_var_does_not_persist(self) -> None:
        """ENV var override is session-only and does not alter the JSON config file."""
        with tempfile.TemporaryDirectory() as tmpdir:
            config_path = Path(tmpdir) / "app_data.json"
            with patch.dict(
                os.environ,
                {
                    "CME_CONFIG_PATH": str(config_path),
                    "CME_EXPORT__LOG_LEVEL": "ERROR",
                },
            ):
                settings = get_settings()
                assert settings.export.log_level == "ERROR"
                # Config file should not exist (no write triggered by get_settings)
                assert not config_path.exists() or (
                    "ERROR" not in config_path.read_text()
                )

    def test_file_config_used_without_env_override(self) -> None:
        """Without ENV var, the stored file config value is returned."""
        import confluence_markdown_exporter.utils.app_data_store as ads

        stored = ConfigModel()
        stored.export.log_level = "WARNING"  # type: ignore[assignment]

        with patch.object(ads, "APP_CONFIG_PATH") as mock_path:
            mock_path.exists.return_value = True
            mock_path.read_text.return_value = stored.model_dump_json()

            # Ensure no override is set
            env = {k: v for k, v in os.environ.items() if k != "CME_EXPORT__LOG_LEVEL"}
            with patch.dict(os.environ, env, clear=True):
                settings = get_settings()
        assert settings.export.log_level == "WARNING"

    def test_env_override_takes_precedence_over_file(self) -> None:
        """ENV var overrides a value that differs in the stored config file."""
        import confluence_markdown_exporter.utils.app_data_store as ads

        stored = ConfigModel()
        stored.export.log_level = "WARNING"  # type: ignore[assignment]

        with patch.object(ads, "APP_CONFIG_PATH") as mock_path:
            mock_path.exists.return_value = True
            mock_path.read_text.return_value = stored.model_dump_json()

            with patch.dict(os.environ, {"CME_EXPORT__LOG_LEVEL": "DEBUG"}):
                settings = get_settings()
        assert settings.export.log_level == "DEBUG"

    def test_multiple_env_overrides(self) -> None:
        """Multiple ENV vars can be overridden simultaneously."""
        with patch.dict(
            os.environ,
            {
                "CME_EXPORT__LOG_LEVEL": "ERROR",
                "CME_EXPORT__FILENAME_LENGTH": "100",
                "CME_CONNECTION_CONFIG__TIMEOUT": "60",
                "CME_CONNECTION_CONFIG__USE_V2_API": "true",
            },
        ):
            settings = get_settings()
        assert settings.export.log_level == "ERROR"
        assert settings.export.filename_length == 100
        assert settings.connection_config.timeout == 60
        assert settings.connection_config.use_v2_api is True

    def test_page_href_env_override(self) -> None:
        """CME_EXPORT__PAGE_HREF overrides page_href."""
        with patch.dict(os.environ, {"CME_EXPORT__PAGE_HREF": "absolute"}):
            settings = get_settings()
        assert settings.export.page_href == "absolute"

    def test_attachment_href_env_override(self) -> None:
        """CME_EXPORT__ATTACHMENT_HREF overrides attachment_href."""
        with patch.dict(os.environ, {"CME_EXPORT__ATTACHMENT_HREF": "absolute"}):
            settings = get_settings()
        assert settings.export.attachment_href == "absolute"

    def test_cleanup_stale_env_override(self) -> None:
        """CME_EXPORT__CLEANUP_STALE=false disables cleanup_stale."""
        with patch.dict(os.environ, {"CME_EXPORT__CLEANUP_STALE": "false"}):
            settings = get_settings()
        assert settings.export.cleanup_stale is False

    def test_backoff_and_retry_env_override(self) -> None:
        """CME_CONNECTION_CONFIG__BACKOFF_AND_RETRY=false disables retry."""
        with patch.dict(os.environ, {"CME_CONNECTION_CONFIG__BACKOFF_AND_RETRY": "false"}):
            settings = get_settings()
        assert settings.connection_config.backoff_and_retry is False

    def test_max_backoff_seconds_env_override(self) -> None:
        """CME_CONNECTION_CONFIG__MAX_BACKOFF_SECONDS overrides max_backoff_seconds."""
        with patch.dict(os.environ, {"CME_CONNECTION_CONFIG__MAX_BACKOFF_SECONDS": "120"}):
            settings = get_settings()
        assert settings.connection_config.max_backoff_seconds == 120

    def test_enable_jira_enrichment_env_override(self) -> None:
        """CME_EXPORT__ENABLE_JIRA_ENRICHMENT=false disables Jira enrichment."""
        with patch.dict(os.environ, {"CME_EXPORT__ENABLE_JIRA_ENRICHMENT": "false"}):
            settings = get_settings()
        assert settings.export.enable_jira_enrichment is False

    def test_lockfile_name_env_override(self) -> None:
        """CME_EXPORT__LOCKFILE_NAME overrides lockfile_name."""
        with patch.dict(os.environ, {"CME_EXPORT__LOCKFILE_NAME": "my-lock.json"}):
            settings = get_settings()
        assert settings.export.lockfile_name == "my-lock.json"

    def test_existence_check_batch_size_env_override(self) -> None:
        """CME_EXPORT__EXISTENCE_CHECK_BATCH_SIZE overrides the batch size."""
        with patch.dict(os.environ, {"CME_EXPORT__EXISTENCE_CHECK_BATCH_SIZE": "50"}):
            settings = get_settings()
        assert settings.export.existence_check_batch_size == 50

    def test_app_settings_is_base_settings_subclass(self) -> None:
        """AppSettings is a BaseSettings subclass."""
        from pydantic_settings import BaseSettings

        assert issubclass(AppSettings, BaseSettings)

    def test_invalid_log_level_env_var_raises(self) -> None:
        """An invalid log level value raises a validation error."""
        from pydantic import ValidationError

        with patch.dict(os.environ, {"CME_EXPORT__LOG_LEVEL": "INVALID"}), pytest.raises(
            ValidationError
        ):
            get_settings()


class TestLoadAppData:
    """Tests for load_app_data robustness."""

    def test_empty_config_file_returns_defaults(self) -> None:
        """Empty config file must not raise JSONDecodeError."""
        import confluence_markdown_exporter.utils.app_data_store as ads

        with patch.object(ads, "APP_CONFIG_PATH") as mock_path:
            mock_path.exists.return_value = True
            mock_path.read_text.return_value = ""
            result = load_app_data()
        assert isinstance(result, dict)

    def test_invalid_json_config_file_returns_defaults(self) -> None:
        """Corrupt config file must not raise JSONDecodeError."""
        import confluence_markdown_exporter.utils.app_data_store as ads

        with patch.object(ads, "APP_CONFIG_PATH") as mock_path:
            mock_path.exists.return_value = True
            mock_path.read_text.return_value = "not json {"
            result = load_app_data()
        assert isinstance(result, dict)


class TestAttachmentPathMigration:
    """Test migration of attachment_path templates that omit {attachment_extension}."""

    def test_title_without_extension_gets_migrated(self) -> None:
        """{attachment_title} alone is migrated to include {attachment_extension}."""
        config = ExportConfig(attachment_path="{space_name}/{attachment_title}")
        assert config.attachment_path == "{space_name}/{attachment_title}{attachment_extension}"

    def test_title_with_other_path_segments_migrated(self) -> None:
        """Migration works regardless of surrounding path segments."""
        config = ExportConfig(attachment_path="{page_title}/{attachment_title}")
        assert config.attachment_path == "{page_title}/{attachment_title}{attachment_extension}"

    def test_title_already_has_extension_not_changed(self) -> None:
        """Template already containing {attachment_extension} is left unchanged."""
        original = "{space_name}/{attachment_title}{attachment_extension}"
        config = ExportConfig(attachment_path=original)
        assert config.attachment_path == original

    def test_no_attachment_title_not_changed(self) -> None:
        """Default template without {attachment_title} is left unchanged."""
        original = "{space_name}/attachments/{attachment_file_id}{attachment_extension}"
        config = ExportConfig(attachment_path=original)
        assert config.attachment_path == original

    def test_migration_via_env_var(self) -> None:
        """Migration also applies when the template comes from an ENV var."""
        with patch.dict(
            os.environ,
            {"CME_EXPORT__ATTACHMENT_PATH": "{space_name}/attachments/{attachment_title}"},
        ):
            settings = get_settings()
        assert settings.export.attachment_path == (
            "{space_name}/attachments/{attachment_title}{attachment_extension}"
        )


class TestAttachmentsExportMigration:
    """Migration of legacy attachment_export_all bool to attachments_export literal."""

    def test_legacy_false_maps_to_referenced(self) -> None:
        """attachment_export_all=False migrates to attachments_export='referenced'."""
        config = ExportConfig.model_validate({"attachment_export_all": False})
        assert config.attachments_export == "referenced"

    def test_legacy_true_maps_to_all(self) -> None:
        """attachment_export_all=True migrates to attachments_export='all'."""
        config = ExportConfig.model_validate({"attachment_export_all": True})
        assert config.attachments_export == "all"

    def test_new_field_takes_precedence_over_old(self) -> None:
        """When both are present, the explicit new value wins and old is dropped."""
        config = ExportConfig.model_validate(
            {"attachment_export_all": True, "attachments_export": "disabled"}
        )
        assert config.attachments_export == "disabled"


class TestCommentsExportMigration:
    """Migration of legacy inline_comments bool to comments_export literal."""

    def test_legacy_true_maps_to_inline(self) -> None:
        """inline_comments=True migrates to comments_export='inline'."""
        config = ExportConfig.model_validate({"inline_comments": True})
        assert config.comments_export == "inline"

    def test_legacy_false_maps_to_none(self) -> None:
        """inline_comments=False migrates to comments_export='none'."""
        config = ExportConfig.model_validate({"inline_comments": False})
        assert config.comments_export == "none"

    def test_new_field_takes_precedence_over_old(self) -> None:
        """When both are present, the explicit new value wins and old is dropped."""
        config = ExportConfig.model_validate(
            {"inline_comments": True, "comments_export": "footer"}
        )
        assert config.comments_export == "footer"

    def test_legacy_key_does_not_appear_on_model(self) -> None:
        """The legacy key is consumed during migration and is not set on the model."""
        config = ExportConfig.model_validate({"inline_comments": True})
        assert not hasattr(config, "inline_comments")


================================================
FILE: tests/unit/utils/test_drawio_converter.py
================================================
"""Tests for DrawIO converter functionality."""

from pathlib import Path

from confluence_markdown_exporter.utils.drawio_converter import extract_mermaid_data
from confluence_markdown_exporter.utils.drawio_converter import format_mermaid_markdown
from confluence_markdown_exporter.utils.drawio_converter import load_and_parse_drawio
from confluence_markdown_exporter.utils.drawio_converter import load_drawio_file
from confluence_markdown_exporter.utils.drawio_converter import parse_mermaid_json


class TestLoadDrawioFile:
    """Test DrawIO file loading."""

    def test_load_existing_file(self, tmp_path: Path) -> None:
        """Test loading an existing DrawIO file."""
        test_content = "<mxfile><diagram>test</diagram></mxfile>"
        test_file = tmp_path / "test.drawio"
        test_file.write_text(test_content)

        result = load_drawio_file(test_file)
        assert result == test_content

    def test_load_nonexistent_file(self, tmp_path: Path) -> None:
        """Test loading a nonexistent file returns None."""
        nonexistent = tmp_path / "nonexistent.drawio"
        result = load_drawio_file(nonexistent)
        assert result is None


class TestExtractMermaidData:
    """Test mermaid data extraction from XML."""

    def test_extract_valid_mermaid_data(self) -> None:
        """Test extracting valid mermaid data."""
        # XML parser preserves case, so use UserObject and mermaidData
        xml_content = """<?xml version="1.0" encoding="UTF-8"?>
<mxfile>
  <diagram>
    <mxGraphModel>
      <root>
        <UserObject mermaidData='{"data": "graph TB\\n  A --> B"}' />
      </root>
    </mxGraphModel>
  </diagram>
</mxfile>"""
        result = extract_mermaid_data(xml_content)
        assert result is not None
        assert "graph TB" in result

    def test_extract_no_mermaid_data(self) -> None:
        """Test extraction when no mermaid data exists."""
        xml_content = """<?xml version="1.0" encoding="UTF-8"?>
<mxfile>
  <diagram>
    <mxGraphModel>
      <root>
        <UserObject />
      </root>
    </mxGraphModel>
  </diagram>
</mxfile>"""
        result = extract_mermaid_data(xml_content)
        assert result is None

    def test_extract_invalid_xml(self) -> None:
        """Test extraction with invalid XML returns None."""
        xml_content = "<invalid>xml"
        result = extract_mermaid_data(xml_content)
        assert result is None


class TestParseMermaidJson:
    """Test mermaid JSON parsing."""

    def test_parse_json_with_data_field(self) -> None:
        """Test parsing JSON with 'data' field."""
        json_data = '{"data": "graph TB\\n  A --> B"}'
        result = parse_mermaid_json(json_data)
        assert result == "graph TB\n  A --> B"

    def test_parse_plain_diagram(self) -> None:
        """Test parsing plain diagram string."""
        diagram = "graph TB\n  A --> B"
        result = parse_mermaid_json(diagram)
        assert result == diagram

    def test_parse_malformed_json(self) -> None:
        """Test parsing malformed JSON returns input as-is."""
        malformed = '{"incomplete": '
        result = parse_mermaid_json(malformed)
        assert result == malformed


class TestFormatMermaidMarkdown:
    """Test mermaid markdown formatting."""

    def test_format_diagram(self) -> None:
        """Test formatting a diagram as markdown."""
        diagram = "graph TB\n  A --> B"
        result = format_mermaid_markdown(diagram)
        assert result == "```mermaid\ngraph TB\n  A --> B\n```"


class TestLoadAndParseDrawio:
    """Integration tests for full DrawIO parsing."""

    def test_full_pipeline(self, tmp_path: Path) -> None:
        """Test full pipeline from file to markdown."""
        # XML parser preserves case, so use UserObject and mermaidData
        mermaid_data = '{"data": "graph TB\\n    A[Start]\\n    B[End]\\n    A --> B"}'
        xml_content = f"""<?xml version="1.0" encoding="UTF-8"?>
<mxfile>
  <diagram>
    <mxGraphModel>
      <root>
        <UserObject mermaidData='{mermaid_data}' />
      </root>
    </mxGraphModel>
  </diagram>
</mxfile>"""
        test_file = tmp_path / "test.drawio"
        test_file.write_text(xml_content)

        result = load_and_parse_drawio(test_file)
        assert result is not None
        assert "```mermaid" in result
        assert "graph TB" in result
        assert "A[Start]" in result
        assert "B[End]" in result

    def test_nonexistent_file(self, tmp_path: Path) -> None:
        """Test with nonexistent file returns None."""
        result = load_and_parse_drawio(tmp_path / "nonexistent.drawio")
        assert result is None

    def test_file_without_mermaid_data(self, tmp_path: Path) -> None:
        """Test file without mermaid data returns None."""
        xml_content = """<?xml version="1.0" encoding="UTF-8"?>
<mxfile>
  <diagram>
    <mxGraphModel>
      <root>
        <mxCell />
      </root>
    </mxGraphModel>
  </diagram>
</mxfile>"""
        test_file = tmp_path / "test.drawio"
        test_file.write_text(xml_content)

        result = load_and_parse_drawio(test_file)
        assert result is None


================================================
FILE: tests/unit/utils/test_export.py
================================================
"""Unit tests for export module."""

import tempfile
from pathlib import Path
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from confluence_markdown_exporter.utils.export import escape_character_class
from confluence_markdown_exporter.utils.export import github_heading_slug
from confluence_markdown_exporter.utils.export import parse_encode_setting
from confluence_markdown_exporter.utils.export import sanitize_filename
from confluence_markdown_exporter.utils.export import sanitize_key
from confluence_markdown_exporter.utils.export import save_file


class TestParseEncodeSetting:
    """Test cases for parse_encode_setting function."""

    def test_empty_string(self) -> None:
        """Test parsing empty string returns empty dict."""
        result = parse_encode_setting("")
        assert result == {}

    def test_simple_mapping(self) -> None:
        """Test parsing simple character mapping."""
        result = parse_encode_setting('" ":"%2D","-":"%2D"')
        expected = {" ": "%2D", "-": "%2D"}
        assert result == expected

    def test_mixed_mapping(self) -> None:
        """Test parsing mixed character mapping."""
        result = parse_encode_setting('" ":"dash","-":"%2D"')
        expected = {" ": "dash", "-": "%2D"}
        assert result == expected

    def test_equals_mapping(self) -> None:
        """Test parsing equals sign mapping."""
        result = parse_encode_setting('"=":" equals "')
        expected = {"=": " equals "}
        assert result == expected

    def test_special_characters(self) -> None:
        """Test parsing special characters."""
        result = parse_encode_setting('"\\"":" quote ","\\\\":" backslash "')
        expected = {'"': " quote ", "\\": " backslash "}
        assert result == expected

    def test_invalid_json(self) -> None:
        """Test that invalid JSON returns empty dict."""
        result = parse_encode_setting("invalid json")
        assert result == {}

    def test_non_dict_json(self) -> None:
        """Test that non-dict JSON returns empty dict."""
        result = parse_encode_setting('"this is a string"')
        assert result == {}

    def test_malformed_json(self) -> None:
        """Test that malformed JSON returns empty dict."""
        result = parse_encode_setting('"key":"value",')
        assert result == {}


class TestSaveFile:
    """Test cases for save_file function."""

    def test_save_string_content(self) -> None:
        """Test saving string content to file."""
        with tempfile.TemporaryDirectory() as temp_dir:
            file_path = Path(temp_dir) / "test.txt"
            content = "Hello, World!"

            save_file(file_path, content)

            assert file_path.exists()
            assert file_path.read_text(encoding="utf-8") == content

    def test_save_bytes_content(self) -> None:
        """Test saving bytes content to file."""
        with tempfile.TemporaryDirectory() as temp_dir:
            file_path = Path(temp_dir) / "test.bin"
            content = b"Binary content"

            save_file(file_path, content)

            assert file_path.exists()
            assert file_path.read_bytes() == content

    def test_create_parent_directories(self) -> None:
        """Test that parent directories are created when needed."""
        with tempfile.TemporaryDirectory() as temp_dir:
            file_path = Path(temp_dir) / "subdir" / "nested" / "test.txt"
            content = "Test content"

            save_file(file_path, content)

            assert file_path.exists()
            assert file_path.read_text(encoding="utf-8") == content

    def test_overwrite_existing_file(self) -> None:
        """Test overwriting an existing file."""
        with tempfile.TemporaryDirectory() as temp_dir:
            file_path = Path(temp_dir) / "test.txt"
            original_content = "Original content"
            new_content = "New content"

            save_file(file_path, original_content)
            save_file(file_path, new_content)

            assert file_path.read_text(encoding="utf-8") == new_content

    def test_invalid_content_type(self) -> None:
        """Test that invalid content type raises TypeError."""
        with tempfile.TemporaryDirectory() as temp_dir:
            file_path = Path(temp_dir) / "test.txt"

            with pytest.raises(TypeError, match=r"Content must be either a string or bytes\."):
                save_file(file_path, 123)  # type: ignore[arg-type]


class TestSanitizeFilename:
    """Test cases for sanitize_filename function."""

    @patch("confluence_markdown_exporter.utils.export.export_options")
    def test_no_encoding_specified(self, mock_export_options: MagicMock) -> None:
        """Test sanitizing filename with no encoding specified."""
        mock_export_options.filename_encoding = ""
        mock_export_options.filename_length = 255
        mock_export_options.filename_lowercase = False

        result = sanitize_filename("Test File.txt")
        assert result == "Test File.txt"

    @patch("confluence_markdown_exporter.utils.export.export_options")
    def test_with_encoding_mapping(self, mock_export_options: MagicMock) -> None:
        """Test sanitizing filename with encoding mapping."""
        mock_export_options.filename_encoding = '" ":"_",":":"_"'
        mock_export_options.filename_length = 255
        mock_export_options.filename_lowercase = False

        result = sanitize_filename("Test File: Name.txt")
        assert result == "Test_File__Name.txt"

    @patch("confluence_markdown_exporter.utils.export.export_options")
    def test_with_encoding_mapping_lowercase(self, mock_export_options: MagicMock) -> None:
        """Test sanitizing filename with encoding mapping."""
        mock_export_options.filename_encoding = '" ":"_",":":"_"'
        mock_export_options.filename_length = 255
        mock_export_options.filename_lowercase = True

        result = sanitize_filename("Test File: Name.txt")
        assert result == "test_file__name.txt"

    @patch("confluence_markdown_exporter.utils.export.export_options")
    def test_trim_trailing_spaces_and_dots(self, mock_export_options: MagicMock) -> None:
        """Test that trailing spaces and dots are trimmed."""
        mock_export_options.filename_encoding = ""
        mock_export_options.filename_length = 255
        mock_export_options.filename_lowercase = False

        result = sanitize_filename("filename . . ")
        assert result == "filename"

    @patch("confluence_markdown_exporter.utils.export.export_options")
    def test_reserved_windows_names(self, mock_export_options: MagicMock) -> None:
        """Test that reserved Windows names are handled."""
        mock_export_options.filename_encoding = ""
        mock_export_options.filename_length = 255
        mock_export_options.filename_lowercase = False

        reserved_names = ["CON", "PRN", "AUX", "NUL", "COM1", "LPT1"]
        for name in reserved_names:
            result = sanitize_filename(name)
            assert result == f"{name}_"

            # Test case insensitive
            result = sanitize_filename(name.lower())
            assert result == f"{name.lower()}_"

    @patch("confluence_markdown_exporter.utils.export.export_options")
    def test_filename_length_limit(self, mock_export_options: MagicMock) -> None:
        """Test that filename length is limited."""
        mock_export_options.filename_encoding = ""
        mock_export_options.filename_length = 10

        long_filename = "very_long_filename_that_exceeds_limit"
        result = sanitize_filename(long_filename)
        assert len(result) == 10
        assert result == long_filename[:10]

    @patch("confluence_markdown_exporter.utils.export.export_options")
    def test_complex_filename_sanitization(self, mock_export_options: MagicMock) -> None:
        """Test complex filename sanitization with multiple rules."""
        mock_export_options.filename_encoding = '" ":"_","?":"_",":":"_"'
        mock_export_options.filename_length = 50
        mock_export_options.filename_lowercase = False

        filename = "My Document: What? How?  . ."
        result = sanitize_filename(filename)
        # Character replacements happen first, then rstrip of spaces and dots
        assert result == "My_Document__What__How___._"

    @patch("confluence_markdown_exporter.utils.export.export_options")
    def test_control_characters_removed(self, mock_export_options: MagicMock) -> None:
        """Control characters (e.g. backspace) should be stripped."""
        mock_export_options.filename_encoding = ""
        mock_export_options.filename_length = 255

        result = sanitize_filename("on-pr\x08emise")
        assert result == "on-premise"

    @patch("confluence_markdown_exporter.utils.export.export_options")
    def test_multiple_control_characters(self, mock_export_options: MagicMock) -> None:
        """Multiple control characters should all be stripped."""
        mock_export_options.filename_encoding = ""
        mock_export_options.filename_length = 255

        result = sanitize_filename("test\x00\x08\x1fname")
        assert result == "testname"


class TestSanitizeKey:
    """Test cases for sanitize_key function."""

    def test_basic_string(self) -> None:
        """Test sanitizing basic string."""
        result = sanitize_key("Test String")
        assert result == "test_string"

    def test_special_characters(self) -> None:
        """Test sanitizing string with special characters."""
        result = sanitize_key("Test-Key: With @ Special % Characters!")
        assert result == "test_key_with_special_characters"

    def test_multiple_underscores_collapse(self) -> None:
        """Test that multiple consecutive underscores are collapsed."""
        result = sanitize_key("test___multiple___underscores")
        assert result == "test_multiple_underscores"

    def test_trim_leading_trailing_underscores(self) -> None:
        """Test that leading and trailing underscores are trimmed."""
        result = sanitize_key("__test_key__")
        assert result == "test_key"

    def test_starts_with_number(self) -> None:
        """Test that string starting with number gets key_ prefix."""
        result = sanitize_key("123test")
        assert result == "key_123test"

    def test_starts_with_special_character(self) -> None:
        """Test that string starting with special character becomes valid after processing."""
        result = sanitize_key("@test")
        # "@test" -> "@test" (lowercase) -> "_test" (replace @) -> "test" (strip _)
        # Since "test" starts with 't' (a letter), no key_ prefix is added
        assert result == "test"

    def test_custom_connector(self) -> None:
        """Test using custom connector character."""
        result = sanitize_key("Test String", connector="-")
        assert result == "test-string"

    def test_already_valid_key(self) -> None:
        """Test that already valid key remains unchanged."""
        result = sanitize_key("valid_key")
        assert result == "valid_key"

    def test_empty_string(self) -> None:
        """Test sanitizing empty string."""
        result = sanitize_key("")
        assert result == "key_"

    def test_only_special_characters(self) -> None:
        """Test string with only special characters."""
        result = sanitize_key("@#$%")
        assert result == "key_"


class TestGithubHeadingSlug:
    """Test cases for github_heading_slug function."""

    def test_leading_hyphen_preserved(self) -> None:
        """Heading starting with hyphen keeps it — the reported bug."""
        assert github_heading_slug("- Final State") == "-final-state"

    def test_plain_heading(self) -> None:
        assert github_heading_slug("Final State") == "final-state"

    def test_uppercase(self) -> None:
        assert github_heading_slug("Hello World") == "hello-world"

    def test_special_chars_removed(self) -> None:
        assert github_heading_slug("Hello, World!") == "hello-world"

    def test_multiple_spaces_collapsed(self) -> None:
        assert github_heading_slug("Hello  World") == "hello-world"

    def test_trailing_hyphen(self) -> None:
        assert github_heading_slug("Hello -") == "hello-"

    def test_empty_string(self) -> None:
        assert github_heading_slug("") == ""


class TestEscapeCharacterClass:
    """Test cases for escape_character_class function."""

    def test_escape_backslash(self) -> None:
        """Test escaping backslash character."""
        result = escape_character_class("\\")
        assert result == "\\\\"

    def test_escape_dash(self) -> None:
        """Test escaping dash character."""
        result = escape_character_class("-")
        assert result == "\\-"

    def test_escape_right_bracket(self) -> None:
        """Test escaping right bracket character."""
        result = escape_character_class("]")
        assert result == "\\]"

    def test_escape_caret(self) -> None:
        """Test escaping caret character."""
        result = escape_character_class("^")
        assert result == "\\^"

    def test_escape_multiple_characters(self) -> None:
        """Test escaping multiple special characters."""
        result = escape_character_class("\\-]^")
        assert result == "\\\\\\-\\]\\^"

    def test_no_special_characters(self) -> None:
        """Test string with no special characters."""
        result = escape_character_class("abc123")
        assert result == "abc123"

    def test_mixed_characters(self) -> None:
        """Test string with mix of special and normal characters."""
        result = escape_character_class("a-b]c^d\\e")
        assert result == "a\\-b\\]c\\^d\\\\e"

    def test_empty_string(self) -> None:
        """Test escaping empty string."""
        result = escape_character_class("")
        assert result == ""


================================================
FILE: tests/unit/utils/test_lockfile.py
================================================
"""Unit tests for lockfile module."""

import json
import tempfile
from pathlib import Path
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from confluence_markdown_exporter.utils.lockfile import AttachmentEntry
from confluence_markdown_exporter.utils.lockfile import ConfluenceLock
from confluence_markdown_exporter.utils.lockfile import LockfileManager
from confluence_markdown_exporter.utils.lockfile import OrgEntry
from confluence_markdown_exporter.utils.lockfile import PageEntry
from confluence_markdown_exporter.utils.lockfile import SpaceEntry

LOCKFILE_FILENAME = "confluence-lock.json"
_TEST_BASE_URL = "https://test.atlassian.net"
_TEST_SPACE_KEY = "TEST"


def _make_mock_page(
    page_id: int,
    version_number: int,
    export_path: str,
    *,
    base_url: str = _TEST_BASE_URL,
    space_key: str = _TEST_SPACE_KEY,
) -> MagicMock:
    """Create a mock page/descendant with the attributes used by LockfileManager."""
    page = MagicMock()
    page.id = page_id
    page.version.number = version_number
    page.export_path = Path(export_path)
    page.title = f"Page {page_id}"
    page.base_url = base_url
    page.space.key = space_key
    return page


def _lock_with_pages(
    pages: dict,
    *,
    base_url: str = _TEST_BASE_URL,
    space_key: str = _TEST_SPACE_KEY,
) -> ConfluenceLock:
    """Build a ConfluenceLock with pages nested under the given org/space."""
    return ConfluenceLock(
        orgs={
            base_url: OrgEntry(
                spaces={space_key: SpaceEntry(pages=pages)}
            )
        }
    )


def _lock_data(
    pages: dict,
    *,
    base_url: str = _TEST_BASE_URL,
    space_key: str = _TEST_SPACE_KEY,
) -> dict:
    """Build a lockfile JSON-compatible dict with pages nested under org/space."""
    return {
        "lockfile_version": 2,
        "last_export": "2025-01-01T00:00:00+00:00",
        "orgs": {
            base_url: {
                "spaces": {
                    space_key: {"pages": pages}
                }
            }
        },
    }


@pytest.fixture(autouse=True)
def _reset_lockfile_manager() -> None:
    """Reset LockfileManager class state before each test."""
    LockfileManager._lockfile_path = None
    LockfileManager._lock = None
    LockfileManager._output_path = None
    LockfileManager._all_entries_snapshot = {}
    LockfileManager._seen_page_ids = set()


class TestLockfileManagerInit:
    """Test cases for LockfileManager.init."""

    @patch("confluence_markdown_exporter.utils.app_data_store.get_settings")
    def test_init_creates_empty_lock_when_no_lockfile(
        self,
        mock_get_settings: MagicMock,
    ) -> None:
        """When lockfile does not exist, init creates an empty lock."""
        with tempfile.TemporaryDirectory() as tmp:
            mock_get_settings.return_value.export.output_path = Path(tmp)
            mock_get_settings.return_value.export.lockfile_name = LOCKFILE_FILENAME

            LockfileManager.init()

            assert LockfileManager._lock is not None
            assert LockfileManager._lock.orgs == {}
            assert LockfileManager._lockfile_path == Path(tmp) / LOCKFILE_FILENAME

    @patch("confluence_markdown_exporter.utils.app_data_store.get_settings")
    def test_init_loads_existing_lockfile(
        self,
        mock_get_settings: MagicMock,
    ) -> None:
        """When lockfile exists, init loads its contents."""
        with tempfile.TemporaryDirectory() as tmp:
            mock_get_settings.return_value.export.output_path = Path(tmp)
            mock_get_settings.return_value.export.lockfile_name = LOCKFILE_FILENAME
            lockfile_path = Path(tmp) / LOCKFILE_FILENAME
            data = _lock_data(
                {"100": {"title": "Page A", "version": 3, "export_path": "space/Page A.md"}}
            )
            lockfile_path.write_text(json.dumps(data), encoding="utf-8")

            LockfileManager.init()

            assert LockfileManager._lock is not None
            entry = LockfileManager._lock.get_page("100")
            assert entry is not None
            assert entry.version == 3

    @patch("confluence_markdown_exporter.utils.app_data_store.get_settings")
    def test_init_snapshots_all_entries(
        self,
        mock_get_settings: MagicMock,
    ) -> None:
        """Init snapshots all lockfile entries for moved-page detection."""
        with tempfile.TemporaryDirectory() as tmp:
            mock_get_settings.return_value.export.output_path = Path(tmp)
            mock_get_settings.return_value.export.lockfile_name = LOCKFILE_FILENAME
            lockfile_path = Path(tmp) / LOCKFILE_FILENAME
            data = _lock_data({
                "100": {"title": "A", "version": 1, "export_path": "a.md"},
                "200": {"title": "B", "version": 2, "export_path": "b.md"},
            })
            lockfile_path.write_text(json.dumps(data), encoding="utf-8")

            LockfileManager.init()

            assert set(LockfileManager._all_entries_snapshot.keys()) == {"100", "200"}
            assert LockfileManager._seen_page_ids == set()

    @patch("confluence_markdown_exporter.utils.app_data_store.get_settings")
    def test_init_discards_v1_lockfile(
        self,
        mock_get_settings: MagicMock,
    ) -> None:
        """A v1 lockfile (flat pages dict) is discarded and replaced with an empty lock."""
        with tempfile.TemporaryDirectory() as tmp:
            mock_get_settings.return_value.export.output_path = Path(tmp)
            mock_get_settings.return_value.export.lockfile_name = LOCKFILE_FILENAME
            lockfile_path = Path(tmp) / LOCKFILE_FILENAME
            v1_data = {
                "lockfile_version": 1,
                "last_export": "2025-01-01T00:00:00+00:00",
                "pages": {
                    "100": {"title": "Old Page", "version": 1, "export_path": "old.md"},
                },
            }
            lockfile_path.write_text(json.dumps(v1_data), encoding="utf-8")

            LockfileManager.init()

            assert LockfileManager._lock is not None
            assert LockfileManager._lock.orgs == {}


class TestLockfileManagerRecordPage:
    """Test cases for LockfileManager.record_page."""

    def test_record_page_creates_lockfile(self) -> None:
        """record_page creates the lockfile on disk and writes the page entry."""
        with tempfile.TemporaryDirectory() as tmp:
            lockfile_path = Path(tmp) / LOCKFILE_FILENAME
            LockfileManager._lockfile_path = lockfile_path
            LockfileManager._lock = ConfluenceLock()

            page = _make_mock_page(page_id=100, version_number=1, export_path="space/Page A.md")
            LockfileManager.record_page(page)

            assert lockfile_path.exists()
            saved = json.loads(lockfile_path.read_text(encoding="utf-8"))
            pages = saved["orgs"][_TEST_BASE_URL]["spaces"][_TEST_SPACE_KEY]["pages"]
            assert "100" in pages
            assert pages["100"]["version"] == 1

    def test_record_page_does_nothing_when_not_initialized(self) -> None:
        """record_page is a no-op when LockfileManager has not been initialized."""
        page = _make_mock_page(page_id=100, version_number=1, export_path="space/Page A.md")

        # Should not raise
        LockfileManager.record_page(page)

    def test_record_page_updates_existing_entry(self) -> None:
        """record_page updates an existing page entry with the new version."""
        with tempfile.TemporaryDirectory() as tmp:
            lockfile_path = Path(tmp) / LOCKFILE_FILENAME
            LockfileManager._lockfile_path = lockfile_path
            LockfileManager._lock = _lock_with_pages({
                "100": PageEntry(title="Page A", version=1, export_path="space/Page A.md"),
            })

            page = _make_mock_page(page_id=100, version_number=2, export_path="space/Page A.md")
            LockfileManager.record_page(page)

            saved = json.loads(lockfile_path.read_text(encoding="utf-8"))
            pages = saved["orgs"][_TEST_BASE_URL]["spaces"][_TEST_SPACE_KEY]["pages"]
            assert pages["100"]["version"] == 2

    def test_record_page_adds_to_seen_page_ids(self) -> None:
        """record_page adds the page ID to the seen set."""
        with tempfile.TemporaryDirectory() as tmp:
            lockfile_path = Path(tmp) / LOCKFILE_FILENAME
            LockfileManager._lockfile_path = lockfile_path
            LockfileManager._lock = ConfluenceLock()

            page = _make_mock_page(page_id=100, version_number=1, export_path="a.md")
            LockfileManager.record_page(page)

            assert "100" in LockfileManager._seen_page_ids

    def test_record_page_across_multiple_orgs_and_spaces(self) -> None:
        """Pages from different orgs and spaces are stored independently."""
        with tempfile.TemporaryDirectory() as tmp:
            lockfile_path = Path(tmp) / LOCKFILE_FILENAME
            LockfileManager._lockfile_path = lockfile_path
            LockfileManager._lock = ConfluenceLock()

            page_a = _make_mock_page(
                100, 1, "a.md", base_url="https://org-a.atlassian.net", space_key="AAA"
            )
            page_b = _make_mock_page(
                200, 1, "b.md", base_url="https://org-b.atlassian.net", space_key="BBB"
            )
            LockfileManager.record_page(page_a)
            LockfileManager.record_page(page_b)

            saved = json.loads(lockfile_path.read_text(encoding="utf-8"))
            assert "100" in saved["orgs"]["https://org-a.atlassian.net"]["spaces"]["AAA"]["pages"]
            assert "200" in saved["orgs"]["https://org-b.atlassian.net"]["spaces"]["BBB"]["pages"]


class TestLockfileManagerShouldExport:
    """Test cases for LockfileManager.should_export."""

    def test_page_not_in_lockfile_should_export(self) -> None:
        """A page not present in the lockfile should be exported."""
        LockfileManager._lock = _lock_with_pages({
            "999": PageEntry(title="Other", version=1, export_path="other.md"),
        })

        page = _make_mock_page(page_id=123, version_number=1, export_path="space/New.md")
        assert LockfileManager.should_export(page) is True

    def test_page_in_lockfile_same_version_same_path_should_not_export(self) -> None:
        """A page with same version and same path should NOT be exported."""
        LockfileManager._lock = _lock_with_pages({
            "123": PageEntry(title="Page A", version=5, export_path="space/Page A.md"),
        })

        page = _make_mock_page(page_id=123, version_number=5, export_path="space/Page A.md")
        assert LockfileManager.should_export(page) is False

    def test_page_in_lockfile_different_version_should_export(self) -> None:
        """A page whose version has changed should be exported."""
        LockfileManager._lock = _lock_with_pages({
            "123": PageEntry(title="Page A", version=5, export_path="space/Page A.md"),
        })

        page = _make_mock_page(page_id=123, version_number=6, export_path="space/Page A.md")
        assert LockfileManager.should_export(page) is True

    def test_page_in_lockfile_different_export_path_should_export(self) -> None:
        """A page whose export path has changed (file moved) should be exported."""
        LockfileManager._lock = _lock_with_pages({
            "123": PageEntry(title="Page A", version=5, export_path="old/Page A.md"),
        })

        page = _make_mock_page(page_id=123, version_number=5, export_path="new/Page A.md")
        assert LockfileManager.should_export(page) is True

    def test_lock_is_none_should_export(self) -> None:
        """When lockfile manager is not initialized, all pages should be exported."""
        assert LockfileManager._lock is None

        page = _make_mock_page(page_id=123, version_number=1, export_path="space/Page A.md")
        assert LockfileManager.should_export(page) is True

    def test_missing_output_file_should_export(self) -> None:
        """A page whose output file no longer exists on disk should be re-exported."""
        with tempfile.TemporaryDirectory() as tmp:
            output = Path(tmp)
            LockfileManager._output_path = output
            LockfileManager._lock = _lock_with_pages({
                "123": PageEntry(title="Page A", version=5, export_path="space/Page A.md"),
            })

            # File does NOT exist on disk
            page = _make_mock_page(page_id=123, version_number=5, export_path="space/Page A.md")
            assert LockfileManager.should_export(page) is True

    def test_existing_output_file_unchanged_should_not_export(self) -> None:
        """A page whose output file exists and is up-to-date should NOT be re-exported."""
        with tempfile.TemporaryDirectory() as tmp:
            output = Path(tmp)
            md_file = output / "space" / "Page A.md"
            md_file.parent.mkdir(parents=True)
            md_file.write_text("content")

            LockfileManager._output_path = output
            LockfileManager._lock = _lock_with_pages({
                "123": PageEntry(title="Page A", version=5, export_path="space/Page A.md"),
            })

            page = _make_mock_page(page_id=123, version_number=5, export_path="space/Page A.md")
            assert LockfileManager.should_export(page) is False


class TestLockfileManagerMarkSeen:
    """Test cases for LockfileManager.mark_seen."""

    def test_mark_seen_adds_page_ids(self) -> None:
        """mark_seen adds page IDs to the seen set."""
        LockfileManager.mark_seen([100, 200, 300])
        assert LockfileManager._seen_page_ids == {"100", "200", "300"}

    def test_mark_seen_accumulates(self) -> None:
        """mark_seen accumulates across multiple calls."""
        LockfileManager.mark_seen([100])
        LockfileManager.mark_seen([200])
        assert LockfileManager._seen_page_ids == {"100", "200"}


class TestLockfileManagerCleanup:
    """Test cases for LockfileManager.cleanup."""

    def test_cleanup_noop_when_not_initialized(self) -> None:
        """Cleanup does nothing when not initialized."""
        LockfileManager.remove_pages(set())  # Should not raise

    def test_cleanup_deletes_file_for_removed_page(self) -> None:
        """Pages deleted from Confluence have their files removed."""
        with tempfile.TemporaryDirectory() as tmp:
            output = Path(tmp)
            md_file = output / "space" / "Removed.md"
            md_file.parent.mkdir(parents=True)
            md_file.write_text("content")

            lockfile_path = output / LOCKFILE_FILENAME
            LockfileManager._output_path = output
            LockfileManager._lockfile_path = lockfile_path
            LockfileManager._lock = _lock_with_pages({
                "100": PageEntry(title="Removed", version=1, export_path="space/Removed.md"),
            })
            LockfileManager._all_entries_snapshot = dict(LockfileManager._lock.all_pages())
            LockfileManager._seen_page_ids = set()  # page 100 not seen

            LockfileManager.remove_pages({"100"})

            assert not md_file.exists()

    def test_cleanup_removes_entry_from_lockfile(self) -> None:
        """Deleted pages are removed from the lockfile."""
        with tempfile.TemporaryDirectory() as tmp:
            output = Path(tmp)
            lockfile_path = output / LOCKFILE_FILENAME
            LockfileManager._output_path = output
            LockfileManager._lockfile_path = lockfile_path
            LockfileManager._lock = _lock_with_pages({
                "100": PageEntry(title="Removed", version=1, export_path="space/Removed.md"),
                "200": PageEntry(title="Kept", version=1, export_path="space/Kept.md"),
            })
            LockfileManager._all_entries_snapshot = dict(LockfileManager._lock.all_pages())
            LockfileManager._seen_page_ids = {"200"}

            LockfileManager.remove_pages({"100"})

            saved = json.loads(lockfile_path.read_text(encoding="utf-8"))
            pages = saved["orgs"][_TEST_BASE_URL]["spaces"][_TEST_SPACE_KEY]["pages"]
            assert "100" not in pages
            assert "200" in pages

    def test_cleanup_deletes_old_file_for_moved_page(self) -> None:
        """When a page's export_path changes, the old file is deleted."""
        with tempfile.TemporaryDirectory() as tmp:
            output = Path(tmp)
            old_file = output / "old" / "Page.md"
            old_file.parent.mkdir(parents=True)
            old_file.write_text("old content")

            lockfile_path = output / LOCKFILE_FILENAME
            LockfileManager._output_path = output
            LockfileManager._lockfile_path = lockfile_path
            LockfileManager._all_entries_snapshot = {
                "100": PageEntry(title="Page", version=1, export_path="old/Page.md"),
            }
            LockfileManager._lock = _lock_with_pages({
                "100": PageEntry(title="Page", version=2, export_path="new/Page.md"),
            })
            LockfileManager._seen_page_ids = {"100"}

            LockfileManager.remove_pages(set())

            assert not old_file.exists()

    def test_cleanup_keeps_page_existing_on_confluence(self) -> None:
        """Unseen pages that still exist on Confluence are kept."""
        with tempfile.TemporaryDirectory() as tmp:
            output = Path(tmp)
            md_file = output / "space" / "Still.md"
            md_file.parent.mkdir(parents=True)
            md_file.write_text("content")

            lockfile_path = output / LOCKFILE_FILENAME
            LockfileManager._output_path = output
            LockfileManager._lockfile_path = lockfile_path
            LockfileManager._lock = _lock_with_pages({
                "100": PageEntry(title="Still", version=1, export_path="space/Still.md"),
            })
            LockfileManager._all_entries_snapshot = dict(LockfileManager._lock.all_pages())
            LockfileManager._seen_page_ids = set()

            LockfileManager.remove_pages(set())

            assert md_file.exists()
            assert LockfileManager._lock.get_page("100") is not None

    def test_cleanup_keeps_unchanged_seen_pages(self) -> None:
        """Pages that were seen during export are not checked via API."""
        with tempfile.TemporaryDirectory() as tmp:
            output = Path(tmp)
            lockfile_path = output / LOCKFILE_FILENAME
            LockfileManager._output_path = output
            LockfileManager._lockfile_path = lockfile_path
            LockfileManager._lock = _lock_with_pages({
                "100": PageEntry(title="Seen", version=1, export_path="a.md"),
            })
            LockfileManager._all_entries_snapshot = dict(LockfileManager._lock.all_pages())
            LockfileManager._seen_page_ids = {"100"}

            LockfileManager.remove_pages(set())
            # fetch_deleted_page_ids is never called — all pages were seen

    def test_cleanup_handles_already_deleted_file(self) -> None:
        """Cleanup does not fail when the file is already gone."""
        with tempfile.TemporaryDirectory() as tmp:
            output = Path(tmp)
            lockfile_path = output / LOCKFILE_FILENAME
            LockfileManager._output_path = output
            LockfileManager._lockfile_path = lockfile_path
            LockfileManager._lock = _lock_with_pages({
                "100": PageEntry(title="Gone", version=1, export_path="space/Gone.md"),
            })
            LockfileManager._all_entries_snapshot = dict(LockfileManager._lock.all_pages())
            LockfileManager._seen_page_ids = set()

            LockfileManager.remove_pages({"100"})  # Should not raise

    def test_cleanup_api_failure_keeps_pages(self) -> None:
        """When API check fails, pages are kept (safe default)."""
        with tempfile.TemporaryDirectory() as tmp:
            output = Path(tmp)
            md_file = output / "space" / "Safe.md"
            md_file.parent.mkdir(parents=True)
            md_file.write_text("content")

            lockfile_path = output / LOCKFILE_FILENAME
            LockfileManager._output_path = output
            LockfileManager._lockfile_path = lockfile_path
            LockfileManager._lock = _lock_with_pages({
                "100": PageEntry(title="Safe", version=1, export_path="space/Safe.md"),
            })
            LockfileManager._all_entries_snapshot = dict(LockfileManager._lock.all_pages())
            LockfileManager._seen_page_ids = set()

            # Pass empty set: safe default — don't delete anything on API failure
            LockfileManager.remove_pages(set())

            assert md_file.exists()
            assert LockfileManager._lock.get_page("100") is not None


class TestFetchDeletedPageIds:
    """Test cases for fetch_deleted_page_ids."""

    def test_empty_input_returns_empty(self) -> None:
        """Empty list returns empty set."""
        from confluence_markdown_exporter.confluence import fetch_deleted_page_ids

        result = fetch_deleted_page_ids([], _TEST_BASE_URL)
        assert result == set()

    @patch("confluence_markdown_exporter.confluence.settings")
    @patch("confluence_markdown_exporter.confluence.get_thread_confluence")
    def test_returns_deleted_ids(
        self, mock_get_client: MagicMock, mock_settings: MagicMock
    ) -> None:
        """Returns IDs that no longer exist on Confluence."""
        mock_settings.connection_config.use_v2_api = True
        mock_settings.export.existence_check_batch_size = 250
        mock_client = MagicMock()
        mock_client.get.return_value = {
            "results": [{"id": "100"}, {"id": "300"}],
        }
        mock_get_client.return_value = mock_client

        from confluence_markdown_exporter.confluence import fetch_deleted_page_ids

        result = fetch_deleted_page_ids(["100", "200", "300"], _TEST_BASE_URL)
        assert result == {"200"}

    @patch("confluence_markdown_exporter.confluence.settings")
    @patch("confluence_markdown_exporter.confluence.get_thread_confluence")
    def test_api_error_returns_no_deleted_ids(
        self, mock_get_client: MagicMock, mock_settings: MagicMock
    ) -> None:
        """On API error, returns empty set (safe: don't delete anything)."""
        mock_settings.connection_config.use_v2_api = True
        mock_settings.export.existence_check_batch_size = 250
        mock_client = MagicMock()
        mock_client.get.side_effect = Exception("Network error")
        mock_get_client.return_value = mock_client

        from confluence_markdown_exporter.confluence import fetch_deleted_page_ids

        result = fetch_deleted_page_ids(["100", "200"], _TEST_BASE_URL)
        assert result == set()

    @patch("confluence_markdown_exporter.confluence.settings")
    @patch("confluence_markdown_exporter.confluence.get_thread_confluence")
    def test_batches_large_sets(
        self, mock_get_client: MagicMock, mock_settings: MagicMock
    ) -> None:
        """300 IDs are split into 2 v2-API batches of 250."""
        mock_settings.connection_config.use_v2_api = True
        mock_settings.export.existence_check_batch_size = 250
        ids = [str(i) for i in range(300)]
        mock_client = MagicMock()
        mock_client.get.return_value = {"results": []}
        mock_get_client.return_value = mock_client

        from confluence_markdown_exporter.confluence import fetch_deleted_page_ids

        fetch_deleted_page_ids(ids, _TEST_BASE_URL)

        assert mock_client.get.call_count == 2


class TestConfluenceLockSave:
    """Test cases for ConfluenceLock.save."""

    def test_save_is_atomic_on_success(self) -> None:
        """After save, the file contains valid, complete JSON."""
        with tempfile.TemporaryDirectory() as tmp:
            lockfile_path = Path(tmp) / "confluence-lock.json"
            lock = _lock_with_pages({
                "100": PageEntry(title="Page A", version=1, export_path="space/Page A.md"),
            })

            lock.save(lockfile_path)

            content = lockfile_path.read_text(encoding="utf-8")
            data = json.loads(content)
            pages = data["orgs"][_TEST_BASE_URL]["spaces"][_TEST_SPACE_KEY]["pages"]
            assert pages["100"]["version"] == 1
            tmp_files = list(Path(tmp).glob("*.tmp"))
            assert tmp_files == []

    def test_save_windows_permission_error_fallback(self) -> None:
        """On Windows, PermissionError from replace falls back to unlink + rename."""
        with tempfile.TemporaryDirectory() as tmp:
            lockfile_path = Path(tmp) / "confluence-lock.json"
            lock = _lock_with_pages({
                "100": PageEntry(title="Page A", version=1, export_path="space/Page A.md"),
            })

            with patch(
                "confluence_markdown_exporter.utils.lockfile.Path.replace",
                side_effect=PermissionError("WinError 5"),
            ):
                lock.save(lockfile_path)

            content = lockfile_path.read_text(encoding="utf-8")
            data = json.loads(content)
            pages = data["orgs"][_TEST_BASE_URL]["spaces"][_TEST_SPACE_KEY]["pages"]
            assert "100" in pages
            tmp_files = list(Path(tmp).glob("*.tmp"))
            assert tmp_files == []

    def test_save_cleans_up_tmp_on_error(self) -> None:
        """When writing fails, no .tmp files are left behind."""
        with tempfile.TemporaryDirectory() as tmp:
            lockfile_path = Path(tmp) / "confluence-lock.json"
            lock = _lock_with_pages({
                "100": PageEntry(title="Page A", version=1, export_path="space/Page A.md"),
            })

            with (
                patch(
                    "confluence_markdown_exporter.utils.lockfile.Path.replace",
                    side_effect=OSError("disk error"),
                ),
                pytest.raises(OSError, match="disk error"),
            ):
                lock.save(lockfile_path)

            tmp_files = list(Path(tmp).glob("*.tmp"))
            assert tmp_files == []

    def test_save_preserves_original_on_error(self) -> None:
        """When writing fails, the original lockfile is not corrupted."""
        with tempfile.TemporaryDirectory() as tmp:
            lockfile_path = Path(tmp) / "confluence-lock.json"
            original_data = _lock_data({
                "100": {"title": "Page A", "version": 1, "export_path": "space/Page A.md"},
            })
            lockfile_path.write_text(json.dumps(original_data), encoding="utf-8")

            lock = _lock_with_pages({
                "200": PageEntry(title="Page B", version=1, export_path="space/Page B.md"),
            })

            with (
                patch(
                    "confluence_markdown_exporter.utils.lockfile.Path.replace",
                    side_effect=OSError("disk error"),
                ),
                pytest.raises(OSError, match="disk error"),
            ):
                lock.save(lockfile_path)

            content = lockfile_path.read_text(encoding="utf-8")
            data = json.loads(content)
            pages = data["orgs"][_TEST_BASE_URL]["spaces"][_TEST_SPACE_KEY]["pages"]
            assert "100" in pages
            assert "200" not in pages

    def test_save_with_delete_ids(self) -> None:
        """Save removes entries specified in delete_ids."""
        with tempfile.TemporaryDirectory() as tmp:
            lockfile_path = Path(tmp) / "confluence-lock.json"
            lock = _lock_with_pages({
                "100": PageEntry(title="A", version=1, export_path="a.md"),
                "200": PageEntry(title="B", version=1, export_path="b.md"),
            })

            lock.save(lockfile_path, delete_ids={"100"})

            saved = json.loads(lockfile_path.read_text(encoding="utf-8"))
            pages = saved["orgs"][_TEST_BASE_URL]["spaces"][_TEST_SPACE_KEY]["pages"]
            assert "100" not in pages
            assert "200" in pages


class TestConfluenceLockSaveSortsKeys:
    """Test cases for sorted key output in ConfluenceLock.save."""

    def test_save_sorts_page_keys(self) -> None:
        """Pages in the saved lockfile should be sorted by page ID."""
        with tempfile.TemporaryDirectory() as tmp:
            lockfile_path = Path(tmp) / "confluence-lock.json"
            lock = _lock_with_pages({
                "999": PageEntry(title="Page C", version=1, export_path="c.md"),
                "123": PageEntry(title="Page A", version=2, export_path="a.md"),
                "456": PageEntry(title="Page B", version=1, export_path="b.md"),
            })

            lock.save(lockfile_path)

            content = lockfile_path.read_text(encoding="utf-8")
            data = json.loads(content)
            pages = data["orgs"][_TEST_BASE_URL]["spaces"][_TEST_SPACE_KEY]["pages"]
            page_ids = list(pages.keys())
            assert page_ids == ["123", "456", "999"]

    def test_save_preserves_model_field_order(self) -> None:
        """Top-level keys should follow the model field order."""
        with tempfile.TemporaryDirectory() as tmp:
            lockfile_path = Path(tmp) / "confluence-lock.json"
            lock = _lock_with_pages({
                "100": PageEntry(title="Page A", version=1, export_path="a.md"),
            })

            lock.save(lockfile_path)

            content = lockfile_path.read_text(encoding="utf-8")
            data = json.loads(content)
            keys = list(data.keys())
            assert keys == ["lockfile_version", "last_export", "orgs"]

    def test_save_sorts_spaces_and_orgs(self) -> None:
        """Orgs and spaces within the saved lockfile should be sorted."""
        with tempfile.TemporaryDirectory() as tmp:
            lockfile_path = Path(tmp) / "confluence-lock.json"
            lock = ConfluenceLock(
                orgs={
                    "https://z-org.atlassian.net": OrgEntry(
                        spaces={
                            "ZZZ": SpaceEntry(
                                pages={"1": PageEntry(title="P", version=1, export_path="p.md")}
                            ),
                            "AAA": SpaceEntry(pages={}),
                        }
                    ),
                    "https://a-org.atlassian.net": OrgEntry(spaces={}),
                }
            )

            lock.save(lockfile_path)

            data = json.loads(lockfile_path.read_text(encoding="utf-8"))
            org_keys = list(data["orgs"].keys())
            assert org_keys == ["https://a-org.atlassian.net", "https://z-org.atlassian.net"]
            space_keys = list(data["orgs"]["https://z-org.atlassian.net"]["spaces"].keys())
            assert space_keys == ["AAA", "ZZZ"]


class TestAttachmentEntryTracking:
    """Tests for attachment tracking in the lock file."""

    def test_page_entry_stores_attachments(self) -> None:
        """PageEntry persists attachment entries keyed by attachment ID."""
        entry = PageEntry(
            title="Page",
            version=1,
            export_path="a.md",
            attachments={
                "att1": AttachmentEntry(version=3, path="space/attachments/uuid-a.png"),
            },
        )
        assert entry.attachments["att1"].version == 3
        assert entry.attachments["att1"].path == "space/attachments/uuid-a.png"

    def test_page_entry_attachments_default_empty(self) -> None:
        """PageEntry.attachments defaults to empty dict (backward-compatible)."""
        entry = PageEntry(title="Page", version=1, export_path="a.md")
        assert entry.attachments == {}

    def test_lock_file_roundtrip_with_attachments(self) -> None:
        """Attachment entries survive a JSON save/load cycle."""
        with tempfile.TemporaryDirectory() as tmp:
            lockfile_path = Path(tmp) / "confluence-lock.json"
            lock = _lock_with_pages({
                "100": PageEntry(
                    title="Page A",
                    version=1,
                    export_path="a.md",
                    attachments={
                        "att1": AttachmentEntry(version=2, path="space/attachments/file.png"),
                    },
                ),
            })

            lock.save(lockfile_path)

            saved = json.loads(lockfile_path.read_text(encoding="utf-8"))
            org = saved["orgs"][_TEST_BASE_URL]["spaces"][_TEST_SPACE_KEY]
            att = org["pages"]["100"]["attachments"]["att1"]
            assert att["version"] == 2
            assert att["path"] == "space/attachments/file.png"

    def test_lock_file_missing_attachments_field_loads_as_empty(self) -> None:
        """Old lock files without 'attachments' field load without error."""
        with tempfile.TemporaryDirectory() as tmp:
            lockfile_path = Path(tmp) / "confluence-lock.json"
            old_format = _lock_data({
                "100": {"title": "Page A", "version": 3, "export_path": "a.md"},
            })
            lockfile_path.write_text(json.dumps(old_format), encoding="utf-8")

            lock = ConfluenceLock.load(lockfile_path)

            entry = lock.get_page("100")
            assert entry is not None
            assert entry.attachments == {}

    def test_record_page_stores_attachment_entries(self) -> None:
        """record_page persists attachment entries to the lock file."""
        with tempfile.TemporaryDirectory() as tmp:
            lockfile_path = Path(tmp) / LOCKFILE_FILENAME
            LockfileManager._lockfile_path = lockfile_path
            LockfileManager._lock = ConfluenceLock()

            page = _make_mock_page(page_id=100, version_number=1, export_path="a.md")
            attachment_entries = {
                "att42": AttachmentEntry(version=5, path="space/attachments/abc.png"),
            }
            LockfileManager.record_page(page, attachment_entries)

            saved = json.loads(lockfile_path.read_text(encoding="utf-8"))
            pages = saved["orgs"][_TEST_BASE_URL]["spaces"][_TEST_SPACE_KEY]["pages"]
            att = pages["100"]["attachments"]["att42"]
            assert att["version"] == 5
            assert att["path"] == "space/attachments/abc.png"

    def test_get_page_attachment_entries_returns_entries(self) -> None:
        """get_page_attachment_entries returns the stored attachment dict for a page."""
        LockfileManager._lock = _lock_with_pages({
            "100": PageEntry(
                title="Page",
                version=1,
                export_path="a.md",
                attachments={
                    "att1": AttachmentEntry(version=2, path="space/attachments/x.png"),
                },
            ),
        })

        entries = LockfileManager.get_page_attachment_entries("100")
        assert "att1" in entries
        assert entries["att1"].version == 2

    def test_get_page_attachment_entries_returns_empty_for_unknown_page(self) -> None:
        """get_page_attachment_entries returns {} for a page not in the lock."""
        LockfileManager._lock = _lock_with_pages({})
        assert LockfileManager.get_page_attachment_entries("999") == {}

    def test_get_page_attachment_entries_returns_empty_when_not_initialized(self) -> None:
        """get_page_attachment_entries returns {} when the manager is not initialized."""
        assert LockfileManager._lock is None
        assert LockfileManager.get_page_attachment_entries("100") == {}


================================================
FILE: tests/unit/utils/test_measure_time.py
================================================
"""Unit tests for the measure_time module."""

import logging
import time
from datetime import datetime
from unittest.mock import patch

import pytest

from confluence_markdown_exporter.utils.measure_time import measure
from confluence_markdown_exporter.utils.measure_time import measure_time


class TestMeasureTime:
    """Test cases for measure_time decorator."""

    def test_measure_time_decorator_logs(self, caplog: pytest.LogCaptureFixture) -> None:
        """Test that measure_time decorator logs execution time."""
        logger_name = "confluence_markdown_exporter.utils.measure_time"
        caplog.set_level(logging.INFO, logger=logger_name)

        @measure_time
        def test_function(x: int, y: int) -> int:
            time.sleep(0.01)
            return x + y

        result = test_function(2, 3)
        assert result == 5

        log_messages = [record.message for record in caplog.records]
        assert len(log_messages) == 1
        assert "Function 'test_function' took" in log_messages[0]
        assert "seconds to execute" in log_messages[0]

    def test_measure_time_with_exception(self, caplog: pytest.LogCaptureFixture) -> None:
        """Test that measure_time decorator handles exceptions properly."""
        logger_name = "confluence_markdown_exporter.utils.measure_time"
        caplog.set_level(logging.INFO, logger=logger_name)

        @measure_time
        def failing_function() -> None:
            msg = "Test error"
            raise ValueError(msg)

        with pytest.raises(ValueError, match="Test error"):
            failing_function()

        # The decorator should not log on exception (it only logs on success)
        log_messages = [record.message for record in caplog.records]
        assert len(log_messages) == 0

    def test_measure_time_with_return_value(self) -> None:
        """Test that measure_time decorator preserves return values."""

        @measure_time
        def function_with_return() -> str:
            return "test_result"

        result = function_with_return()
        assert result == "test_result"

    def test_measure_time_with_args_kwargs(self) -> None:
        """Test that measure_time decorator works with args and kwargs."""

        @measure_time
        def function_with_params(a: int, b: int, c: int = 3) -> int:
            return a + b + c

        result = function_with_params(1, 2, c=4)
        assert result == 7


class TestMeasureContextManager:
    """Test cases for measure context manager."""

    def test_measure_success(self) -> None:
        """Test measure context manager completes successfully."""
        with measure("Test Operation"):
            time.sleep(0.01)

    def test_measure_with_exception(self) -> None:
        """Test measure context manager re-raises exceptions."""

        def failing_operation() -> None:
            msg = "Test error"
            raise ValueError(msg)

        with pytest.raises(ValueError, match="Test error"), measure("Failing Operation"):
            failing_operation()

    def test_measure_debug_logs_start(self, caplog: pytest.LogCaptureFixture) -> None:
        """Test that measure logs the start time at DEBUG level."""
        logger_name = "confluence_markdown_exporter.utils.measure_time"
        caplog.set_level(logging.DEBUG, logger=logger_name)

        with measure("Debug Operation"):
            pass

        debug_messages = [r.message for r in caplog.records if r.levelno == logging.DEBUG]
        assert any("Started at" in m for m in debug_messages)

    @patch("confluence_markdown_exporter.utils.measure_time.datetime")
    def test_measure_timing_calculation(self, mock_datetime: pytest.MonkeyPatch) -> None:
        """Test that measure context manager does not suppress exceptions on timing."""
        start_time = datetime(2023, 1, 1, 12, 0, 0)
        end_time = datetime(2023, 1, 1, 12, 0, 5)

        mock_datetime.now.side_effect = [start_time, end_time]

        with measure("Timed Operation"):
            pass

    def test_measure_no_exception_propagation(self) -> None:
        """Test that measure context manager doesn't suppress exceptions."""

        class CustomError(Exception):
            pass

        def raise_error() -> None:
            msg = "Custom error message"
            raise CustomError(msg)

        with pytest.raises(CustomError), measure("Exception Test"):
            raise_error()


================================================
FILE: tests/unit/utils/test_page_registry.py
================================================
"""Tests for PageTitleRegistry collision detection."""

from __future__ import annotations

import pytest

from confluence_markdown_exporter.utils.page_registry import PageTitleRegistry


@pytest.fixture(autouse=True)
def _clean_registry() -> None:
    PageTitleRegistry.reset()
    yield
    PageTitleRegistry.reset()


def test_unique_title_not_ambiguous() -> None:
    PageTitleRegistry.register(1, "Shared Title")
    assert PageTitleRegistry.is_ambiguous("Shared Title") is False


def test_two_pages_same_title_ambiguous() -> None:
    PageTitleRegistry.register(1, "Shared Title")
    PageTitleRegistry.register(2, "Shared Title")
    assert PageTitleRegistry.is_ambiguous("Shared Title") is True


def test_unknown_title_not_ambiguous() -> None:
    assert PageTitleRegistry.is_ambiguous("Never Seen") is False


def test_re_register_same_id_does_not_inflate_count() -> None:
    PageTitleRegistry.register(1, "Shared Title")
    PageTitleRegistry.register(1, "Shared Title")
    PageTitleRegistry.register(1, "Shared Title")
    assert PageTitleRegistry.is_ambiguous("Shared Title") is False
    assert PageTitleRegistry.title_count("Shared Title") == 1


def test_renaming_page_updates_counts() -> None:
    PageTitleRegistry.register(1, "Old Title")
    PageTitleRegistry.register(2, "Old Title")
    assert PageTitleRegistry.is_ambiguous("Old Title") is True

    PageTitleRegistry.register(1, "New Title")
    assert PageTitleRegistry.is_ambiguous("Old Title") is False
    assert PageTitleRegistry.title_count("Old Title") == 1
    assert PageTitleRegistry.title_count("New Title") == 1


def test_reset_clears_state() -> None:
    PageTitleRegistry.register(1, "X")
    PageTitleRegistry.register(2, "X")
    PageTitleRegistry.reset()
    assert PageTitleRegistry.is_ambiguous("X") is False
    assert PageTitleRegistry.title_count("X") == 0


def test_blank_inputs_ignored() -> None:
    PageTitleRegistry.register(0, "X")
    PageTitleRegistry.register(1, "")
    assert PageTitleRegistry.title_count("X") == 0
    assert PageTitleRegistry.title_count("") == 0


================================================
FILE: tests/unit/utils/test_rich_console.py
================================================
"""Tests for the logging helpers in rich_console."""

import logging
from pathlib import Path

from confluence_markdown_exporter.utils.rich_console import setup_logging


def test_setup_logging_writes_to_file(tmp_path: Path) -> None:
    """When a log_file is given, log records are also written to that file."""
    log_file = tmp_path / "cme.log"
    setup_logging("DEBUG", log_file=log_file)

    logger = logging.getLogger("cme.test")
    logger.debug("a debug message")
    logger.info("an info message")

    for handler in logging.getLogger().handlers:
        handler.flush()

    contents = log_file.read_text(encoding="utf-8")
    assert "a debug message" in contents
    assert "an info message" in contents


def test_setup_logging_without_file_does_not_create_one(tmp_path: Path) -> None:
    """Default invocation does not create a log file."""
    log_file = tmp_path / "cme.log"
    setup_logging("INFO")

    logging.getLogger("cme.test").info("hello")

    assert not log_file.exists()


================================================
FILE: tests/unit/utils/test_table_converter.py
================================================
"""Tests for the table_converter module."""

from bs4 import BeautifulSoup

from confluence_markdown_exporter.utils.table_converter import TableConverter


class TestTableConverter:
    """Test TableConverter class."""

    def test_pipe_character_in_cell(self) -> None:
        """Test that pipe characters are escaped in table cells."""
        html = """
        <table>
            <tr>
                <th>Column 1</th>
                <th>Column 2</th>
            </tr>
            <tr>
                <td>Value with | pipe</td>
                <td>Normal value</td>
            </tr>
        </table>
        """
        BeautifulSoup(html, "html.parser")
        converter = TableConverter()
        result = converter.convert(html)

        # The pipe character should be escaped
        assert "\\|" in result
        # The result should still have proper table structure
        assert "Column 1" in result
        assert "Column 2" in result
        assert "Value with" in result
        assert "pipe" in result

    def test_multiple_pipes_in_cell(self) -> None:
        """Test that multiple pipe characters are escaped in table cells."""
        html = """
        <table>
            <tr>
                <th>Header</th>
            </tr>
            <tr>
                <td>Value | with | multiple | pipes</td>
            </tr>
        </table>
        """
        BeautifulSoup(html, "html.parser")
        converter = TableConverter()
        result = converter.convert(html)

        # All pipe characters should be escaped (3 pipes in the content)
        assert result.count("\\|") == 3
        assert "Value" in result
        assert "with" in result
        assert "multiple" in result
        assert "pipes" in result

    def test_pipe_character_in_header(self) -> None:
        """Test that pipe characters are escaped in table header cells."""
        html = """
        <table>
            <tr>
                <th>Column | 1</th>
                <th>Column | 2</th>
            </tr>
            <tr>
                <td>Value 1</td>
                <td>Value 2</td>
            </tr>
        </table>
        """
        converter = TableConverter()
        result = converter.convert(html)

        # The pipe characters in headers should be escaped (2 pipes)
        assert result.count("\\|") == 2
        assert "Column" in result
        assert "Value 1" in result
        assert "Value 2" in result

    def test_table_without_pipes(self) -> None:
        """Test normal table conversion without pipe characters."""
        html = """
        <table>
            <tr>
                <th>Name</th>
                <th>Age</th>
            </tr>
            <tr>
                <td>John</td>
                <td>30</td>
            </tr>
        </table>
        """
        converter = TableConverter()
        result = converter.convert(html)

        assert "Name" in result
        assert "Age" in result
        assert "John" in result
        assert "30" in result
        # Should have proper table structure
        assert "|" in result
        assert "---" in result
        # Should have no escaped pipes
        assert "\\|" not in result

    def test_convert_p_bool_parent_tags_no_crash(self) -> None:
        """convert_p must not crash when markdownify passes bool instead of set."""
        converter = TableConverter()
        el = BeautifulSoup("<p>text.</p>", "html.parser").p
        assert el is not None
        result = converter.convert_p(el, "text.", parent_tags=False)  # type: ignore[arg-type]
        assert "text." in result

    def test_convert_ol_bool_parent_tags_no_crash(self) -> None:
        """convert_ol must not crash when markdownify passes bool instead of set."""
        converter = TableConverter()
        el = BeautifulSoup("<ol><li>item</li></ol>", "html.parser").ol
        assert el is not None
        result = converter.convert_ol(el, "item", parent_tags=False)  # type: ignore[arg-type]
        assert "item" in result

    def test_convert_ul_bool_parent_tags_no_crash(self) -> None:
        """convert_ul must not crash when markdownify passes bool instead of set."""
        converter = TableConverter()
        el = BeautifulSoup("<ul><li>item</li></ul>", "html.parser").ul
        assert el is not None
        result = converter.convert_ul(el, "item", parent_tags=False)  # type: ignore[arg-type]
        assert "item" in result

    def test_single_item_ul_in_cell_strips_list_symbol(self) -> None:
        """Single-item ul in a table cell should not render a leading '- '."""
        html = """
        <table>
            <tr>
                <th>Header</th>
            </tr>
            <tr>
                <td><ul><li>Only item</li></ul></td>
            </tr>
        </table>
        """
        converter = TableConverter()
        result = converter.convert(html)

        assert "Only item" in result
        assert "- Only item" not in result

    def test_multi_item_ul_in_cell_keeps_list_symbols(self) -> None:
        """Multi-item ul in a table cell should still render with '- ' prefixes."""
        html = """
        <table>
            <tr>
                <th>Header</th>
            </tr>
            <tr>
                <td><ul><li>First</li><li>Second</li></ul></td>
            </tr>
        </table>
        """
        converter = TableConverter()
        result = converter.convert(html)

        assert "- First" in result
        assert "- Second" in result

    def test_ol_in_cell_with_empty_paragraph_shows_number(self) -> None:
        """Ol with empty <p> in a table cell should show the CSS-implicit number."""
        html = """
        <table>
            <tr><th>Header</th></tr>
            <tr><td><ol start="1"><li><p></p></li></ol></td></tr>
        </table>
        """
        converter = TableConverter()
        result = converter.convert(html)
        assert "1" in result

    def test_ol_in_cell_with_empty_paragraph_respects_start(self) -> None:
        """Ol with start attribute and empty <p> should use the start number."""
        html = """
        <table>
            <tr><th>Header</th></tr>
            <tr><td><ol start="3"><li><p></p></li></ol></td></tr>
        </table>
        """
        converter = TableConverter()
        result = converter.convert(html)
        assert "3" in result

    def test_ol_in_cell_with_content(self) -> None:
        """Ol with text content in a table cell should number each item."""
        html = """
        <table>
            <tr><th>Header</th></tr>
            <tr><td><ol start="1"><li><p>alpha</p></li><li><p>beta</p></li></ol></td></tr>
        </table>
        """
        converter = TableConverter()
        result = converter.convert(html)
        assert "1. alpha" in result
        assert "2. beta" in result
        assert "<br>" in result

    def test_ul_in_cell_with_paragraph_items(self) -> None:
        """Ul with <p>-wrapped items in a table cell should use '- ' bullet syntax."""
        html = """
        <table>
            <tr><th>Header</th></tr>
            <tr><td><ul><li><p>First</p></li><li><p>Second</p></li><li><p>Third</p></li></ul></td></tr>
        </table>
        """
        converter = TableConverter()
        result = converter.convert(html)
        assert "- First" in result
        assert "<br>- Second" in result
        assert "<br>- Third" in result

    def test_td_detection_still_works_with_set_parent_tags(self) -> None:
        """set-based parent_tags (markdownify 1.x) must still trigger td-specific behaviour."""
        converter = TableConverter()
        el = BeautifulSoup("<p>text.</p>", "html.parser").p
        assert el is not None
        result = converter.convert_p(el, "text.", {"td", "_inline"})  # type: ignore[arg-type]
        assert result.endswith("<br/>")


================================================
FILE: tests/unit/utils/test_type_converter.py
================================================
"""Unit tests for type_converter module."""

import pytest

from confluence_markdown_exporter.utils.type_converter import str_to_bool


class TestStrToBool:
    """Test cases for str_to_bool function."""

    def test_true_values(self) -> None:
        """Test that various true values are converted correctly."""
        true_values = ["true", "True", "TRUE", "1", "yes", "Yes", "YES", "on", "On", "ON"]
        for value in true_values:
            assert str_to_bool(value) is True, f"Failed for value: {value}"

    def test_false_values(self) -> None:
        """Test that various false values are converted correctly."""
        false_values = [
            "false",
            "False",
            "FALSE",
            "0",
            "no",
            "No",
            "NO",
            "off",
            "Off",
            "OFF",
        ]
        for value in false_values:
            assert str_to_bool(value) is False, f"Failed for value: {value}"

    def test_whitespace_handling(self) -> None:
        """Test that whitespace is properly stripped."""
        assert str_to_bool("  true  ") is True
        assert str_to_bool("\tfalse\t") is False
        assert str_to_bool("\n1\n") is True
        assert str_to_bool("  0  ") is False

    def test_invalid_values(self) -> None:
        """Test that invalid values raise ValueError."""
        invalid_values = ["maybe", "2", "invalid", "", "true false", "truthy"]
        for value in invalid_values:
            with pytest.raises(ValueError, match=f"Invalid boolean string: '{value}'"):
                str_to_bool(value)

    def test_empty_string(self) -> None:
        """Test that empty string raises ValueError."""
        with pytest.raises(ValueError, match="Invalid boolean string: ''"):
            str_to_bool("")

    def test_none_handling(self) -> None:
        """Test behavior with None (should raise AttributeError for strip method)."""
        with pytest.raises(AttributeError):
            str_to_bool(None)  # type: ignore[arg-type]


================================================
FILE: tsconfig.json
================================================
{
  "extends": "@docusaurus/tsconfig",
  "compilerOptions": {
    "baseUrl": "."
  },
  "exclude": ["build", ".docusaurus", "node_modules"]
}