Repository: Spenhouet/confluence-markdown-exporter Branch: main Commit: 303989bb4f0e Files: 91 Total size: 617.7 KB Directory structure: gitextract_yof8yoxc/ ├── .dockerignore ├── .github/ │ ├── FUNDING.yml │ ├── ISSUE_TEMPLATE/ │ │ ├── 1_bug_report.yaml │ │ ├── 2_feature_request.yaml │ │ ├── 3_question.yaml │ │ └── config.yml │ ├── PULL_REQUEST_TEMPLATE.md │ ├── dependabot.yml │ └── workflows/ │ ├── docker-build.yml │ ├── docker-publish.yml │ ├── docs.yml │ ├── python-build.yml │ ├── python-publish.yml │ └── release.yml ├── .gitignore ├── .python-version ├── .vscode/ │ ├── extensions.json │ ├── launch.json │ ├── settings.json │ └── tasks.json ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── README.md ├── confluence_markdown_exporter/ │ ├── __init__.py │ ├── api_clients.py │ ├── config.py │ ├── confluence.py │ ├── main.py │ └── utils/ │ ├── __init__.py │ ├── app_data_store.py │ ├── config_interactive.py │ ├── drawio_converter.py │ ├── export.py │ ├── lockfile.py │ ├── measure_time.py │ ├── page_registry.py │ ├── rich_console.py │ ├── table_converter.py │ └── type_converter.py ├── docs/ │ ├── compatibility.md │ ├── configuration/ │ │ ├── authentication.md │ │ ├── ci.md │ │ ├── index.md │ │ ├── options.md │ │ └── target-systems.md │ ├── contributing.md │ ├── docker.md │ ├── features.md │ ├── installation.md │ ├── intro.md │ ├── troubleshooting.md │ └── usage.md ├── docusaurus.config.ts ├── package.json ├── pyproject.toml ├── scripts/ │ ├── build-versions.mjs │ └── bump-docs-version.sh ├── sidebars.ts ├── src/ │ ├── components/ │ │ ├── HomepageFeatures/ │ │ │ ├── index.tsx │ │ │ └── styles.module.css │ │ └── quickstart/ │ │ └── index.tsx │ ├── css/ │ │ └── custom.css │ └── pages/ │ ├── index.module.css │ └── index.tsx ├── tests/ │ ├── __init__.py │ ├── conftest.py │ ├── integration/ │ │ ├── __init__.py │ │ └── test_cli_integration.py │ └── unit/ │ ├── __init__.py │ ├── test_alert_conversion.py │ ├── test_api_clients.py │ ├── test_confluence.py │ ├── test_emoticon_conversion.py │ ├── test_include_macro_conversion.py │ ├── test_main.py │ ├── test_nbsp_fix.py │ ├── test_plantuml_code_block_detection.py │ ├── test_plantuml_conversion.py │ ├── test_template_placeholders.py │ └── utils/ │ ├── __init__.py │ ├── test_app_data_store_env.py │ ├── test_drawio_converter.py │ ├── test_export.py │ ├── test_lockfile.py │ ├── test_measure_time.py │ ├── test_page_registry.py │ ├── test_rich_console.py │ ├── test_table_converter.py │ └── test_type_converter.py └── tsconfig.json ================================================ FILE CONTENTS ================================================ ================================================ FILE: .dockerignore ================================================ .git .github .claude .venv dist build *.egg-info __pycache__ .pytest_cache .ruff_cache .mypy_cache node_modules tests scratch AIRAscore .vscode .idea *.log .DS_Store ================================================ FILE: .github/FUNDING.yml ================================================ # These are supported funding model platforms github: Spenhouet ================================================ FILE: .github/ISSUE_TEMPLATE/1_bug_report.yaml ================================================ name: Bug report description: Report an error or unexpected behavior labels: ["bug"] body: - type: markdown attributes: value: | Thank you for taking the time to report an issue! We're glad to have you involved with confluence-markdown-exporter. **Before reporting, please make sure to search through [existing issues](https://github.com/Spenhouet/confluence-markdown-exporter/issues?q=is:issue+is:open+label:bug) (including [closed](https://github.com/Spenhouet/confluence-markdown-exporter/issues?q=is:issue%20state:closed%20label:bug)).** - type: markdown attributes: value: | ### Diagnostic info Run `cme bugreport` and paste the full output in the **Diagnostic info** field below. This command prints your version, system details, and configuration — with all secrets automatically redacted. - type: textarea attributes: label: Description description: | A clear and concise description of the bug, including a minimal reproducible example. Be sure to include the command you invoked (e.g., `cme pages https://company.atlassian.net/wiki/spaces/KEY/pages/123/Title`). validations: required: true - type: textarea attributes: label: Diagnostic info description: | Paste the output of `cme bugreport` here. This includes your version, Python/OS info, and configuration with secrets redacted. placeholder: | ## Bug Report Diagnostic Info ### Version confluence-markdown-exporter x.y.z ### System Python: ... Platform: ... Architecture: ... ### Config Config file: ... ```yaml ... ``` render: markdown validations: required: false - type: input attributes: label: Version description: | What version of confluence-markdown-exporter are you using? (Already included in `cme bugreport` output — fill in here only if you didn't run that command.) placeholder: e.g., confluence-markdown-exporter 4.0.3 validations: required: false - type: input attributes: label: Confluence Version description: | What Confluence version are you using? Include whether it's Cloud or Server/Data Center. Example: `Confluence Cloud` or `Confluence Server 7.19.2` placeholder: e.g., Confluence Cloud or Confluence Server 7.19.2 validations: required: false - type: input attributes: label: Jira Version description: | What Jira version are you using (or not)? Include whether it's Cloud or Server/Data Center. Example: `Jira Cloud` or `Jira Server 8.20.5` placeholder: e.g., Jira Cloud or Jira Server 8.20.5 validations: required: false ================================================ FILE: .github/ISSUE_TEMPLATE/2_feature_request.yaml ================================================ name: Feature request description: Suggest a new feature or enhancement labels: ["enhancement"] body: - type: markdown attributes: value: | Thank you for taking the time to suggest a feature! We're glad to have you involved with confluence-markdown-exporter. **Before submitting, please make sure to search through [existing feature requests](https://github.com/Spenhouet/confluence-markdown-exporter/issues?q=is:issue+is:open+label:enhancement) (including [closed](https://github.com/Spenhouet/confluence-markdown-exporter/issues?q=is:issue%20state:closed%20label:enhancement)).** - type: textarea attributes: label: Problem Description description: | A clear and concise description of the problem or limitation you're experiencing. What is the use case? What workflow or task would this feature enable or improve? validations: required: true - type: textarea attributes: label: Proposed Solution description: | A clear and concise description of what you want to happen. How do you envision this feature working? What would the ideal implementation look like? If you have ideas about commands, options, or configuration, please include examples: ```bash # Example command or usage confluence-markdown-exporter ``` validations: required: true - type: textarea attributes: label: Alternatives Considered description: | A clear and concise description of any alternative solutions or features you've considered. Are there workarounds you're currently using? What other tools or approaches have you tried? validations: required: false - type: textarea attributes: label: Use Cases description: | Describe specific scenarios where this feature would be helpful. Please provide concrete examples of how you (or others) would use this feature in practice. validations: required: false ================================================ FILE: .github/ISSUE_TEMPLATE/3_question.yaml ================================================ name: Question description: Ask a question about confluence-markdown-exporter labels: ["question"] body: - type: textarea attributes: label: Question description: Describe your question in detail. validations: required: true - type: input attributes: label: Version description: What version of confluence-markdown-exporter are you using? (see `confluence-markdown-exporter version`) placeholder: e.g., confluence-markdown-exporter 3.0.3 validations: required: false - type: input attributes: label: Confluence Version description: | What Confluence version are you using? Include whether it's Cloud or Server/Data Center. Example: `Confluence Cloud` or `Confluence Server 7.19.2` placeholder: e.g., Confluence Cloud or Confluence Server 7.19.2 validations: required: false - type: input attributes: label: Jira Version description: | What Jira version are you using (or not)? Include whether it's Cloud or Server/Data Center. Example: `Jira Cloud` or `Jira Server 8.20.5` placeholder: e.g., Jira Cloud or Jira Server 8.20.5 validations: required: false ================================================ FILE: .github/ISSUE_TEMPLATE/config.yml ================================================ blank_issues_enabled: false contact_links: - name: Documentation url: https://github.com/Spenhouet/confluence-markdown-exporter#readme about: Read the project documentation and README ================================================ FILE: .github/PULL_REQUEST_TEMPLATE.md ================================================ ## Summary ## Test Plan ================================================ FILE: .github/dependabot.yml ================================================ version: 2 updates: - package-ecosystem: "github-actions" directory: "/" schedule: interval: "daily" groups: actions: patterns: - "*" ================================================ FILE: .github/workflows/docker-build.yml ================================================ name: Build Docker image on: pull_request: branches: [main] paths: - Dockerfile - .dockerignore - .github/workflows/docker-build.yml - pyproject.toml - uv.lock - confluence_markdown_exporter/** # Also build on push to main so the GHA cache is primed on the default # branch. Tag-triggered publish runs fall back to the default branch's # cache, which would otherwise stay cold until the first release. push: branches: [main] paths: - Dockerfile - .dockerignore - .github/workflows/docker-build.yml - pyproject.toml - uv.lock - confluence_markdown_exporter/** permissions: contents: read jobs: build: name: Build image (PR verification) runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v6 - name: Set up QEMU uses: docker/setup-qemu-action@v4 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v4 - name: Build (no push) uses: docker/build-push-action@v7 with: context: . file: ./Dockerfile platforms: linux/amd64,linux/arm64 push: false cache-from: type=gha cache-to: type=gha,mode=max,ignore-error=true ================================================ FILE: .github/workflows/docker-publish.yml ================================================ name: Publish Docker image on: workflow_call: inputs: version: description: "Release version to publish (e.g. 5.1.0)" required: true type: string workflow_dispatch: inputs: version: description: "Release version to publish (e.g. 5.1.0). Must match an existing git tag." required: true type: string permissions: contents: read jobs: publish: name: Publish image to Docker Hub runs-on: ubuntu-latest environment: name: dockerhub url: https://hub.docker.com/r/${{ vars.DOCKERHUB_IMAGE || 'spenhouet/confluence-markdown-exporter' }} env: IMAGE_NAME: ${{ vars.DOCKERHUB_IMAGE || 'spenhouet/confluence-markdown-exporter' }} steps: - name: Checkout release tag uses: actions/checkout@v6 with: ref: ${{ inputs.version }} - name: Set up QEMU uses: docker/setup-qemu-action@v4 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v4 - name: Log in to Docker Hub uses: docker/login-action@v4 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Extract metadata id: meta uses: docker/metadata-action@v6 with: images: ${{ env.IMAGE_NAME }} tags: | type=semver,pattern={{version}},value=${{ inputs.version }} type=semver,pattern={{major}}.{{minor}},value=${{ inputs.version }} type=semver,pattern={{major}},value=${{ inputs.version }} type=raw,value=latest labels: | org.opencontainers.image.title=confluence-markdown-exporter org.opencontainers.image.description=Export Confluence pages to Markdown org.opencontainers.image.url=https://github.com/${{ github.repository }} org.opencontainers.image.source=https://github.com/${{ github.repository }} org.opencontainers.image.version=${{ inputs.version }} org.opencontainers.image.licenses=MIT - name: Build and push uses: docker/build-push-action@v7 with: context: . file: ./Dockerfile platforms: linux/amd64,linux/arm64 push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} cache-from: type=gha cache-to: type=gha,mode=max,ignore-error=true provenance: true - name: Update Docker Hub description uses: peter-evans/dockerhub-description@v5 continue-on-error: true with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} repository: ${{ env.IMAGE_NAME }} short-description: Export Confluence pages to Markdown (CLI) readme-filepath: ./README.md ================================================ FILE: .github/workflows/docs.yml ================================================ name: Deploy docs on: push: branches: [main] paths: - "docs/**" - "versioned_docs/**" - "versioned_sidebars/**" - "versions.json" - "src/**" - "static/**" - "docusaurus.config.ts" - "sidebars.ts" - "tsconfig.json" - "package.json" - "package-lock.json" - ".github/workflows/docs.yml" workflow_dispatch: permissions: contents: read pages: write id-token: write concurrency: group: pages cancel-in-progress: false jobs: build: name: Build docs runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 with: fetch-depth: 0 - name: Setup Node uses: actions/setup-node@v6 with: node-version: 20 cache: npm - name: Install dependencies run: npm ci - name: Build site (with versioned docs from git tags) run: npm run build:versioned - name: Upload artifact uses: actions/upload-pages-artifact@v5 with: path: build deploy: name: Deploy to GitHub Pages needs: build runs-on: ubuntu-latest environment: name: github-pages url: ${{ steps.deployment.outputs.page_url }} steps: - name: Deploy to GitHub Pages id: deployment uses: actions/deploy-pages@v5 ================================================ FILE: .github/workflows/python-build.yml ================================================ name: Build Python package on: push: branches: [main] pull_request: branches: [main] jobs: test: name: Test, lint and build runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - name: Install uv uses: astral-sh/setup-uv@v7 with: enable-cache: true - name: Install dependencies run: uv sync --locked --all-groups - name: Run linting with ruff run: uv run ruff check - name: Run tests with pytest run: uv run pytest - name: Test build (with sources for development) run: uv build - name: Test build (without sources for publication) run: | rm -rf dist/ uv build --no-sources - name: Test package installation and import run: | uv run --with dist/*.whl --no-project -- python -c "import confluence_markdown_exporter; print('Package imports successfully')" - name: Test CLI commands run: | uv run --with dist/*.whl --no-project confluence-markdown-exporter --help uv run --with dist/*.whl --no-project cme --help - name: Upload build artifacts for inspection uses: actions/upload-artifact@v7 with: name: build-artifacts path: dist/ retention-days: 5 ================================================ FILE: .github/workflows/python-publish.yml ================================================ name: Publish Python package on: workflow_call: inputs: version: description: "Release version to publish (e.g. 5.1.0)" required: true type: string workflow_dispatch: inputs: version: description: "Release version to publish (e.g. 5.1.0). Must match an existing git tag." required: true type: string permissions: contents: write id-token: write attestations: write jobs: publish: name: Publish to PyPI runs-on: ubuntu-latest environment: name: release url: https://pypi.org/p/confluence-markdown-exporter steps: - name: Checkout release tag uses: actions/checkout@v6 with: ref: ${{ inputs.version }} - name: Install uv uses: astral-sh/setup-uv@v7 with: enable-cache: true - name: Install dependencies run: uv sync --locked --all-groups - name: Build distributions run: uv build --no-sources - name: Generate artifact attestations uses: actions/attest-build-provenance@v4.1.0 with: subject-path: "dist/*" - name: Publish to PyPI run: uv publish - name: Sign the distributions with Sigstore uses: sigstore/gh-action-sigstore-python@v3.3.0 with: inputs: >- ./dist/*.tar.gz ./dist/*.whl - name: Upload signed artifacts to GitHub Release env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | gh release upload "${{ inputs.version }}" dist/** \ --repo "$GITHUB_REPOSITORY" ================================================ FILE: .github/workflows/release.yml ================================================ name: Release on: workflow_dispatch: inputs: version_bump: description: "Version bump type" required: true default: "patch" type: choice options: - patch - minor - major - alpha - beta - rc custom_version: description: "Custom version (leave empty to use bump type)" required: false type: string permissions: contents: write id-token: write attestations: write jobs: release: name: Bump version and create release runs-on: ubuntu-latest permissions: contents: write outputs: version: ${{ steps.export-version.outputs.value }} steps: - uses: actions/checkout@v6 with: token: ${{ secrets.GITHUB_TOKEN }} - name: Install uv uses: astral-sh/setup-uv@v7 with: enable-cache: true - name: Install dependencies run: uv sync --locked --all-groups - name: Update version (custom) if: ${{ github.event.inputs.custom_version != '' }} run: | uv version ${{ github.event.inputs.custom_version }} echo "NEW_VERSION=${{ github.event.inputs.custom_version }}" >> $GITHUB_ENV - name: Update version (bump) if: ${{ github.event.inputs.custom_version == '' }} run: | NEW_VERSION=$(uv version --bump ${{ github.event.inputs.version_bump }} | awk '{print $NF}') echo "NEW_VERSION=${NEW_VERSION}" >> $GITHUB_ENV - name: Export version as job output id: export-version run: echo "value=${NEW_VERSION}" >> "$GITHUB_OUTPUT" - name: Test build with new version run: | uv build --no-sources uv run --with dist/*.whl --no-project -- python -c "import confluence_markdown_exporter; print('Package imports successfully')" - name: Update version references in README and docs run: scripts/bump-docs-version.sh "${{ env.NEW_VERSION }}" - name: Commit version update run: | git config --local user.email "action@github.com" git config --local user.name "GitHub Action" # -u: stage modifications to tracked files only; never add untracked files. git add -u pyproject.toml uv.lock README.md docs src git diff --cached --quiet || git commit -m "Bump version to ${{ env.NEW_VERSION }}" git push - name: Create release tag run: | git tag "${{ env.NEW_VERSION }}" git push origin "${{ env.NEW_VERSION }}" - name: Create and publish GitHub Release env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | gh release create "${{ env.NEW_VERSION }}" \ --title "Release ${{ env.NEW_VERSION }}" \ --generate-notes publish-python: name: Publish Python package needs: release uses: ./.github/workflows/python-publish.yml with: version: ${{ needs.release.outputs.version }} secrets: inherit publish-docker: name: Publish Docker image needs: release uses: ./.github/workflows/docker-publish.yml with: version: ${{ needs.release.outputs.version }} secrets: inherit ================================================ FILE: .gitignore ================================================ ### Custom ### **/*.env scratch/ log/ .ssh/ _tmp/* *.tar.gz *.sh~ *.zip *.jpg ### LLM Agents ### # The source stays vendor agnostic .claude/ ### Virtual Environments ### .venv/ .venv-*/ # Created by https://www.gitignore.io/api/code,python # Edit at https://www.gitignore.io/?templates=code,python ### Code ### .vscode/* !.vscode/settings.json !.vscode/tasks.json !.vscode/launch.json !.vscode/extensions.json ### Docusaurus ### node_modules/ .docusaurus/ .docusaurus-faster/ docs-build/ # Versioned docs are generated at build time from git tags by scripts/build-versions.mjs versioned_docs/ versioned_sidebars/ versions.json ### Python ### # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # Mr Developer .mr.developer.cfg .project .pydevproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # End of https://www.gitignore.io/api/code,python # Beads / Dolt files (added by bd init) .dolt/ *.db .beads-credential-key ================================================ FILE: .python-version ================================================ 3.10.12 ================================================ FILE: .vscode/extensions.json ================================================ { // See https://go.microsoft.com/fwlink/?LinkId=827846 to learn about workspace recommendations. // Extension identifier format: ${publisher}.${name}. Example: vscode.csharp // List of extensions which should be recommended for users of this workspace. "recommendations": [ "astral-sh.ty", "charliermarsh.ruff", "github.vscode-github-actions", "ms-python.python", "njpwerner.autodocstring", ], // List of extensions recommended by VS Code that should not be recommended for users of this workspace. "unwantedRecommendations": [] } ================================================ FILE: .vscode/launch.json ================================================ { // Use IntelliSense to learn about possible attributes. // Hover to view descriptions of existing attributes. // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "configurations": [ { "name": "Python: Current File", "type": "debugpy", "request": "launch", "program": "${file}", "justMyCode": false, "console": "integratedTerminal", "cwd": "${workspaceFolder}", "env": { "PYTHONPATH": "${workspaceRoot}" } }, { "name": "Python: Export Page(s)", "type": "debugpy", "request": "launch", "program": "${workspaceFolder}/confluence_markdown_exporter/main.py", "justMyCode": false, "args": [ "pages", "" ], "console": "integratedTerminal", "cwd": "${workspaceFolder}", "env": { "PYTHONPATH": "${workspaceRoot}", "CME_CONFIG_PATH": "scratch/cme_config.json", "CME_EXPORT__LOG_LEVEL": "DEBUG", "CME_EXPORT__OUTPUT_PATH": "scratch" } }, { "name": "Python: Export Page(s) with Descendants", "type": "debugpy", "request": "launch", "program": "${workspaceFolder}/confluence_markdown_exporter/main.py", "justMyCode": false, "args": [ "pages-with-descendants", "" ], "console": "integratedTerminal", "cwd": "${workspaceFolder}", "env": { "PYTHONPATH": "${workspaceRoot}", "CME_CONFIG_PATH": "scratch/cme_config.json", "CME_EXPORT__LOG_LEVEL": "DEBUG", "CME_EXPORT__OUTPUT_PATH": "scratch" } }, { "name": "Python: Export Space(s)", "type": "debugpy", "request": "launch", "program": "${workspaceFolder}/confluence_markdown_exporter/main.py", "justMyCode": false, "args": [ "spaces", "" ], "console": "integratedTerminal", "cwd": "${workspaceFolder}", "env": { "PYTHONPATH": "${workspaceRoot}", "CME_CONFIG_PATH": "scratch/cme_config.json", "CME_EXPORT__LOG_LEVEL": "DEBUG", "CME_EXPORT__OUTPUT_PATH": "scratch" } }, { "name": "Python: Export Org(s)", "type": "debugpy", "request": "launch", "program": "${workspaceFolder}/confluence_markdown_exporter/main.py", "justMyCode": false, "args": [ "orgs", "" ], "console": "integratedTerminal", "cwd": "${workspaceFolder}", "env": { "PYTHONPATH": "${workspaceRoot}", "CME_CONFIG_PATH": "scratch/cme_config.json", "CME_EXPORT__LOG_LEVEL": "DEBUG", "CME_EXPORT__OUTPUT_PATH": "scratch" } }, { "name": "Python: Config (Interactive)", "type": "debugpy", "request": "launch", "program": "${workspaceFolder}/confluence_markdown_exporter/main.py", "justMyCode": false, "args": [ "config" ], "console": "integratedTerminal", "cwd": "${workspaceFolder}", "env": { "PYTHONPATH": "${workspaceRoot}", "CME_CONFIG_PATH": "scratch/cme_config.json", "CME_EXPORT__LOG_LEVEL": "DEBUG" } } ] } ================================================ FILE: .vscode/settings.json ================================================ { "files.eol": "\n", "editor.formatOnSave": true, "autoDocstring.docstringFormat": "google", "autoDocstring.startOnNewLine": true, "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, "python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python", "jupyter.notebookFileRoot": "${workspaceFolder}", "task.autoDetect": "off", "[python]": { "editor.defaultFormatter": "charliermarsh.ruff", "editor.codeActionsOnSave": { "source.fixAll": "explicit", "source.organizeImports": "explicit" } }, "[json]": { "editor.defaultFormatter": "vscode.json-language-features" }, "jupyter.debugJustMyCode": false, "debugpy.debugJustMyCode": false, "[markdown]": { "diffEditor.ignoreTrimWhitespace": false, "editor.unicodeHighlight.ambiguousCharacters": false, "editor.unicodeHighlight.invisibleCharacters": false, "editor.wordWrap": "on", "editor.quickSuggestions": { "comments": "off", "strings": "off", "other": "on" }, "editor.fontLigatures": true, "editor.glyphMargin": false, "editor.minimap.enabled": false, "editor.wrappingIndent": "indent", "editor.overviewRulerBorder": false, "editor.lineHeight": 24, "editor.renderWhitespace": "none", "editor.suggest.showSnippets": false, "editor.tabSize": 2, "editor.wordBasedSuggestions": "off", "files.autoSave": "onFocusChange", "files.insertFinalNewline": true, }, "markdown.updateLinksOnFileMove.enabled": "prompt", "markdown.validate.enabled": true, } ================================================ FILE: .vscode/tasks.json ================================================ { "version": "2.0.0", "tasks": [] } ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing Any contribution is welcome! This document provides guidelines for contributing to the confluence-markdown-exporter project. ## Table of Contents - [Getting Started](#getting-started) - [Development Workflow](#development-workflow) - [Testing](#testing) - [Code Quality](#code-quality) - [Release Process](#release-process) - [Pull Request Guidelines](#pull-request-guidelines) ## Getting Started ### Prerequisites - Python 3.10 or higher - Git - `uv` (Python package manager) - `jq` (for JSON processing) ### Install jq ```bash sudo apt-get install jq ``` ### Install `uv` Following the [uv installation guide](https://docs.astral.sh/uv/getting-started/installation): ```bash curl -LsSf https://astral.sh/uv/install.sh | sh ``` Add shell completion (optional): ```bash echo 'eval "$(uv generate-shell-completion bash)"' >> ~/.bashrc ``` ### Project Setup 1. **Fork and Clone the Repository** ```bash git clone https://github.com/Spenhouet/confluence-markdown-exporter.git cd confluence-markdown-exporter ``` 2. **Install Dependencies** ```bash uv sync --all-groups ``` This will: - Create a virtual environment - Install all dependencies (including development dependencies via dependency groups) - Install the project in editable mode 3. **Verify Installation** ```bash uv run confluence-markdown-exporter --help uv run cme --help ``` ## Development Workflow ### Running the Application ```bash # Run with uv (recommended) uv run confluence-markdown-exporter [commands] uv run cme [commands] # Or activate the virtual environment source .venv/bin/activate confluence-markdown-exporter [commands] ``` ### Adding Dependencies ```bash # Add runtime dependency uv add package-name # Add development dependency (to dev group) uv add --group dev package-name # Add to custom dependency group uv add --group group-name package-name ``` ### Updating Dependencies ```bash # Update all dependencies uv sync --upgrade # Update specific dependency uv sync --upgrade-package package-name ``` ## Testing We use `pytest` for testing. Tests are located in the `tests/` directory. ### Running Tests ```bash # Run all tests uv run pytest # Run tests with verbose output uv run pytest -v # Run specific test file uv run pytest tests/test_basic.py # Run specific test uv run pytest tests/test_basic.py::test_package_imports ``` ### Writing Tests 1. **Create test files** in the `tests/` directory with the prefix `test_` 2. **Follow naming conventions**: `test_*.py` files, `test_*` functions 3. **Use descriptive test names** that explain what is being tested 4. **Add docstrings** to explain complex test scenarios Example test structure: ```python def test_feature_description() -> None: """Test that the feature works as expected.""" # Arrange input_data = "test input" # Act result = function_under_test(input_data) # Assert assert result == expected_output ``` ## Code Quality ### Linting with Ruff We use `ruff` for Python linting and code formatting. ```bash # Check code quality uv run ruff check # Auto-fix issues where possible uv run ruff check --fix # Check specific files or directories uv run ruff check confluence_markdown_exporter/ uv run ruff check tests/ ``` ### Code Style Guidelines - **Line length**: Maximum 100 characters - **Docstring style**: Google docstring convention - **Import formatting**: One import per line (enforced by ruff) - **Type hints**: Use type annotations for new code ### Pre-commit Workflow Before committing: 1. **Run linting**: `uv run ruff check` 2. **Run tests**: `uv run pytest` 3. **Fix any issues** before committing ## Release Process > [!NOTE] > Only relevant for maintainers. ### Automated Release We use GitHub Actions for automated releases: 1. **Trigger Release Workflow** - Go to GitHub Actions tab - Run "Release" workflow - Choose version bump type (patch/minor/major) or specify custom version 2. **Automated Steps** - Updates version in `pyproject.toml` - Runs tests and builds - Creates Git tag - Publishes to PyPI - Creates GitHub release with auto-generated notes - Publishes the multi-arch Docker image to Docker Hub ## Pull Request Guidelines ### Before Submitting 1. **Create a feature branch** ```bash git checkout -b feature/your-feature-name ``` 2. **Run the full test suite** ```bash uv run ruff check uv run pytest uv build --no-sources # Test build ``` 3. **Update documentation** if needed ### PR Requirements - ✅ **All tests pass** (verified by CI) - ✅ **Code passes linting** (ruff check) - ✅ **Descriptive PR title** and description - ✅ **Reference related issues** if applicable - ✅ **Update tests** for new functionality - ✅ **Update documentation** for user-facing changes ## Development Environment ### Recommended Tools - **IDE**: VS Code with Python extension - **Git client**: Command line or your preferred GUI - **Terminal**: Any modern terminal with shell completion ### VS Code Extensions Recommended extensions for development: - Python (Microsoft) - Ruff (Astral Software) - GitLens (GitKraken) - markdownlint (David Anson) ### Project Structure ```text confluence-markdown-exporter/ ├── .github/workflows/ # CI/CD workflows ├── confluence_markdown_exporter/ # Main package │ ├── __init__.py │ ├── main.py # CLI entry point │ ├── confluence.py # Core functionality │ ├── api_clients.py # API integrations │ └── utils/ # Utility modules ├── tests/ # Test suite ├── .ruff.toml # Ruff configuration ├── pyproject.toml # Project configuration ├── uv.lock # Dependency lock file └── CONTRIBUTING.md # This file ``` ## Getting Help - **GitHub Issues**: For bug reports and feature requests - **GitHub Discussions**: For questions and general discussion - **Documentation**: Check the README and code comments Thank you for contributing to confluence-markdown-exporter! 🚀 ================================================ FILE: Dockerfile ================================================ # syntax=docker/dockerfile:1.7 # ---- builder --------------------------------------------------------------- FROM python:3.12-slim AS builder ARG TARGETARCH COPY --from=ghcr.io/astral-sh/uv:0.8 /uv /uvx /usr/local/bin/ ENV UV_LINK_MODE=copy \ UV_COMPILE_BYTECODE=1 \ UV_PYTHON_DOWNLOADS=never WORKDIR /app # Install runtime dependencies only. This layer is cached unless uv.lock or # pyproject.toml change. Metadata is bind-mounted so it does not get baked # into the layer and invalidate it on unrelated edits. RUN --mount=type=cache,target=/root/.cache/uv,id=uv-$TARGETARCH \ --mount=type=bind,source=uv.lock,target=uv.lock \ --mount=type=bind,source=pyproject.toml,target=pyproject.toml \ --mount=type=bind,source=README.md,target=README.md \ uv sync --locked --no-install-project --no-editable --no-dev # Install the project itself into the venv. Invalidates on source edits. COPY pyproject.toml uv.lock README.md ./ COPY confluence_markdown_exporter ./confluence_markdown_exporter RUN --mount=type=cache,target=/root/.cache/uv,id=uv-$TARGETARCH \ uv sync --locked --no-editable --no-dev # ---- runtime --------------------------------------------------------------- FROM python:3.12-slim AS runtime ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 \ PATH="/app/.venv/bin:$PATH" \ HOME=/data/config \ XDG_CONFIG_HOME=/data/config \ CME_CONFIG_PATH=/data/config/app_data.json \ CME_EXPORT__OUTPUT_PATH=/data/output RUN groupadd --system --gid 1000 cme \ && useradd --system --uid 1000 --gid cme --home-dir /data/config --shell /usr/sbin/nologin cme \ && mkdir -p /data/output /data/config \ && chown -R cme:cme /data # Copy only the venv, not the source. `--no-editable` made the install # self-contained so the source tree is not needed at runtime. COPY --from=builder /app/.venv /app/.venv USER cme WORKDIR /data/output ENTRYPOINT ["confluence-markdown-exporter"] CMD ["--help"] ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2025 Sebastian Penhouet Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================

confluence-markdown-exporter

The confluence-markdown-exporter exports Confluence pages in Markdown format. This exporter helps in migrating content from Confluence to platforms that support Markdown e.g. Obsidian, Gollum, Azure DevOps (ADO), Foam, Dendron and more.

Build Python package Build and publish to PyPI PyPI version Docker Hub version Documentation

## What it does Exports individual pages, pages with descendants, or entire Confluence spaces via the Atlassian API into clean Markdown. Skips unchanged pages by default, re-exporting only what has changed since the last run. Supported targets include Obsidian, Gollum, Azure DevOps (ADO) wikis, Foam, Dendron, and anything else that consumes Markdown. Full feature list, configuration reference, and target-system presets live in the **[documentation site](https://spenhouet.github.io/confluence-markdown-exporter/)**. ## Quickstart ### 1. Install **macOS and Linux** ```bash curl -LsSf uvx.sh/confluence-markdown-exporter/install.sh | sh ``` **Windows** ```powershell powershell -ExecutionPolicy ByPass -c "irm https://uvx.sh/confluence-markdown-exporter/install.ps1 | iex" ``` Installing a specific version: ```bash curl -LsSf uvx.sh/confluence-markdown-exporter/5.1.1/install.sh | sh ``` Alternative install methods (PyPI via `pip` / `uv`, prebuilt Docker image) are covered in the [installation docs](https://spenhouet.github.io/confluence-markdown-exporter/installation) and the [Docker page](https://spenhouet.github.io/confluence-markdown-exporter/docker). > **Using the Docker image?** Steps 2 and 3 below use the local `cme` CLI. Inside the Docker image there is no interactive `cme config` menu; you supply a pre-defined config (mounted JSON file or `CME_*` environment variables) and run a single export command per container invocation. See the [Docker page](https://spenhouet.github.io/confluence-markdown-exporter/docker) for the non-interactive flow. ### 2. Authenticate Set Confluence credentials interactively (URL, username, API token / PAT): ```sh cme config edit auth.confluence ``` See [Authentication](https://spenhouet.github.io/confluence-markdown-exporter/configuration/authentication) for token scopes and Jira setup. ### 3. Export ```sh # A single page cme pages # A page and all its descendants cme pages-with-descendants # An entire space cme spaces # Every space of an organisation cme orgs ``` Output goes to the configured `export.output_path` (current directory by default). ## Documentation The full documentation lives at **** and includes: - [Installation](https://spenhouet.github.io/confluence-markdown-exporter/installation) (curl / PowerShell / pip / uv) - [Usage guide](https://spenhouet.github.io/confluence-markdown-exporter/usage): pages, descendants, spaces, orgs, output layout - [Feature list](https://spenhouet.github.io/confluence-markdown-exporter/features): supported Confluence content, macros, and add-ons - [Configuration](https://spenhouet.github.io/confluence-markdown-exporter/configuration): config commands, ENV vars, full option reference - [Target-system presets](https://spenhouet.github.io/confluence-markdown-exporter/configuration/target-systems): Obsidian, Azure DevOps, … - [Docker](https://spenhouet.github.io/confluence-markdown-exporter/docker): prebuilt images for non-interactive / CI use - [CI / non-interactive use](https://spenhouet.github.io/confluence-markdown-exporter/configuration/ci) - [Compatibility](https://spenhouet.github.io/confluence-markdown-exporter/compatibility) and [Troubleshooting](https://spenhouet.github.io/confluence-markdown-exporter/troubleshooting) ## Contributing If you would like to contribute, please read [our contribution guideline](CONTRIBUTING.md). ## License This tool is an open source project released under the [MIT License](LICENSE). ================================================ FILE: confluence_markdown_exporter/__init__.py ================================================ """Confluence Markdown Exporter package.""" try: from importlib.metadata import version __version__ = version("confluence-markdown-exporter") except Exception: # noqa: BLE001 # fallback if package not installed or metadata not available __version__ = "unknown" ================================================ FILE: confluence_markdown_exporter/api_clients.py ================================================ import logging import re import urllib.parse from threading import Lock from threading import local from typing import Annotated import requests from atlassian import Confluence as ConfluenceApiSdk from atlassian import Jira as JiraApiSdk from pydantic import AfterValidator from pydantic import BaseModel from confluence_markdown_exporter.utils.app_data_store import ApiDetails from confluence_markdown_exporter.utils.app_data_store import AtlassianSdkConnectionConfig from confluence_markdown_exporter.utils.app_data_store import get_settings from confluence_markdown_exporter.utils.app_data_store import normalize_instance_url from confluence_markdown_exporter.utils.app_data_store import set_setting_with_keys logger = logging.getLogger(__name__) # URL-keyed caches for API clients _confluence_clients: dict[str, ConfluenceApiSdk] = {} _jira_clients: dict[str, JiraApiSdk] = {} _clients_lock = Lock() # Thread-local storage for per-URL Confluence clients (one per worker thread per URL) _thread_local = local() _CLOUD_DOMAIN = ".atlassian.net" _GATEWAY_PREFIX = "https://api.atlassian.com/ex" def parse_gateway_url(url: str) -> tuple[str, str] | None: m = re.search(r"https://api\.atlassian\.com/ex/(confluence|jira)/([^/?#]+)", url) return (m.group(1).lower(), m.group(2)) if m else None def build_gateway_url(service: str, cloud_id: str) -> str: return f"{_GATEWAY_PREFIX}/{service.lower()}/{cloud_id}" def ensure_service_gateway_url(url: str, service: str | None = None) -> str: """Ensure the gateway URL uses the specified service. ``https://api.atlassian.com/ex/confluence/{cloudId}`` becomes ``https://api.atlassian.com/ex/jira/{cloudId}``. Non-gateway URLs are returned as-is. """ if parsed := parse_gateway_url(url): return build_gateway_url(service or parsed[0], parsed[1]) return url def _is_standard_atlassian_cloud_url(url: str) -> bool: """Return True if *url* looks like a standard Atlassian Cloud instance URL.""" try: hostname = urllib.parse.urlparse(url).hostname or "" return hostname.endswith(_CLOUD_DOMAIN) except Exception: # noqa: BLE001 return False def _try_fetch_cloud_id(base_url: str) -> str | None: """Try to fetch the Atlassian Cloud ID from the public tenant info endpoint. Returns the cloud ID string, or None if the fetch fails (e.g. for Server instances). """ try: resp = requests.get(f"{base_url}/_edge/tenant_info", timeout=5) if resp.ok: return resp.json().get("cloudId") except Exception as e: # noqa: BLE001 logger.debug("Could not fetch Cloud ID from %s/_edge/tenant_info: %s", base_url, e) return None def _get_confluence_sdk_url(base_url: str, auth: ApiDetails) -> str: """Return the SDK URL for Confluence, using the API gateway when a Cloud ID is configured.""" if auth.cloud_id: return f"{_GATEWAY_PREFIX}/confluence/{auth.cloud_id}" return base_url def _get_jira_sdk_url(base_url: str, auth: ApiDetails) -> str: """Return the SDK URL for Jira, using the API gateway when a Cloud ID is configured.""" if auth.cloud_id: return f"{_GATEWAY_PREFIX}/jira/{auth.cloud_id}" return base_url def _decode_url_part(v: str | None) -> None | str: if v is None or v == "": return None return urllib.parse.unquote_plus(v) class ConfluenceRef(BaseModel): space_key: Annotated[str, AfterValidator(_decode_url_part)] page_id: int | None = None page_title: Annotated[str | None, AfterValidator(_decode_url_part)] = None # 1) Cloud [/wiki]/spaces/{space_key}[/pages/{page_id}[/{page_title}]] _CLOUD_URL_RE = re.compile( r"^(?:/ex/confluence/[^/]+)?(?:/wiki)?/spaces/" r"(?P[A-Za-z0-9_~-]+)" r"(?:/pages/(?P\d+)(?:/(?P[^/?#]+))?)?" r"(?:/(?!pages/)[^/?#]+)?/?$" ) # 2) Server [/display]/{space_key}[/{page_title}] _SERVER_URL_RE = re.compile( r"^(?:/display)?" r"/(?P[A-Za-z0-9._-]+)" r"(?:/(?P[^/?#]+))?/?$" ) def parse_confluence_path(path: str) -> ConfluenceRef | None: """Parse only the path portion of a Confluence URL and return a ConfluenceRef dict. Matching order: 1) Cloud [/wiki]/spaces/{space_key}[/pages/{page_id}[/{page_title}]] 2) Server [/display]/{space_key}[/{page_title}] """ if not path: return None if not path.startswith("/"): path = "/" + path path = path.rstrip("/") if m := _CLOUD_URL_RE.match(path) or _SERVER_URL_RE.match(path): return ConfluenceRef.model_validate(m.groupdict()) return None class AuthNotConfiguredError(BaseException): """Raised when a connection attempt fails and no valid auth is configured for the URL. Inherits from BaseException (not Exception) so that broad ``except Exception`` handlers in export loops do not accidentally swallow it — it must propagate to the app boundary. """ def __init__(self, url: str, service: str = "Confluence") -> None: self.url = url self.service = service super().__init__(f"No valid authentication configured for {service} at {url}") class JiraAuthenticationError(Exception): """Raised when a Jira API response indicates an authentication failure.""" def _jira_auth_failure_hook( response: requests.Response, *_args: object, **_kwargs: object ) -> requests.Response: """Raise JiraAuthenticationError when Jira signals authentication failure.""" if response.headers.get("X-Seraph-Loginreason") == "AUTHENTICATED_FAILED": msg = f"Jira authentication failed for request to {response.url}" raise JiraAuthenticationError(msg) return response def response_hook( response: requests.Response, *_args: object, **_kwargs: object ) -> requests.Response: """Log response headers when requests fail.""" if not response.ok: logger.warning( "Request to %s failed with status %s. Response headers: %s", response.url, response.status_code, dict(response.headers), ) return response class ApiClientFactory: """Factory for creating authenticated Confluence and Jira API clients with retry config.""" def __init__(self, connection_config: AtlassianSdkConnectionConfig) -> None: # Reconstruct as the base SDK type so model_dump() only yields SDK-compatible fields, # even when a ConnectionConfig subclass is passed. self.connection_config = AtlassianSdkConnectionConfig.model_validate( connection_config.model_dump() ) def create_confluence(self, url: str, auth: ApiDetails) -> ConfluenceApiSdk: try: instance = ConfluenceApiSdk( url=url, username=auth.username.get_secret_value() if auth.api_token else None, password=auth.api_token.get_secret_value() if auth.api_token else None, token=auth.pat.get_secret_value() if auth.pat else None, **self.connection_config.model_dump(), ) instance.get_all_spaces(limit=1) except Exception as e: msg = f"Confluence connection failed: {e}" raise ConnectionError(msg) from e return instance def create_jira(self, url: str, auth: ApiDetails) -> JiraApiSdk: try: instance = JiraApiSdk( url=url, username=auth.username.get_secret_value() if auth.api_token else None, password=auth.api_token.get_secret_value() if auth.api_token else None, token=auth.pat.get_secret_value() if auth.pat else None, **self.connection_config.model_dump(), ) instance.get_all_projects() except Exception as e: msg = f"Jira connection failed: {e}" raise ConnectionError(msg) from e return instance def get_confluence_instance(url: str) -> ConfluenceApiSdk: """Get authenticated Confluence API client for *url*. Creates a new client if one doesn't exist for that URL yet and caches it. Prompts for auth config on connection failure. When the configured auth for *url* includes a Cloud ID, API calls are routed through the Atlassian API gateway (``https://api.atlassian.com/ex/confluence/{cloud_id}``), which enables the use of scoped API tokens. For standard Atlassian Cloud instances (``.atlassian.net``) the Cloud ID is fetched and stored automatically on first connection. """ url = normalize_instance_url(ensure_service_gateway_url(url, "confluence")) with _clients_lock: if url in _confluence_clients: logger.debug("Confluence client cache hit for %s", url) return _confluence_clients[url] settings = get_settings() auth = settings.auth.get_instance(url) if auth is None: raise AuthNotConfiguredError(url, "Confluence") logger.debug("Creating new Confluence client for %s", url) # Auto-fetch and store the Cloud ID for standard Atlassian Cloud instances if not auth.cloud_id and _is_standard_atlassian_cloud_url(url): cloud_id = _try_fetch_cloud_id(url) if cloud_id: logger.info("Auto-fetched Atlassian Cloud ID for %s — storing in config", url) set_setting_with_keys(["auth", "confluence", url, "cloud_id"], cloud_id) settings = get_settings() auth = settings.auth.get_instance(url) or ApiDetails() sdk_url = _get_confluence_sdk_url(url, auth) try: client = ApiClientFactory(settings.connection_config).create_confluence(sdk_url, auth) logger.info("Connected to Confluence at %s", sdk_url) except ConnectionError as e: logger.exception("[red bold]Confluence authentication failed for %s.[/red bold]", url) raise AuthNotConfiguredError(url, "Confluence") from e if settings.export.log_level == "DEBUG": client.session.hooks["response"] = [response_hook] with _clients_lock: _confluence_clients[url] = client return client def get_thread_confluence(base_url: str) -> ConfluenceApiSdk: """Get or create a thread-local Confluence client for *base_url*. The atlassian-python-api Confluence client uses requests.Session, which is NOT thread-safe. Each worker thread keeps its own dict of clients keyed by base URL so that multi-instance exports are also thread-safe. """ base_url = normalize_instance_url(base_url) if not hasattr(_thread_local, "clients"): _thread_local.clients = {} if base_url not in _thread_local.clients: logger.debug("Initializing thread-local Confluence client for %s", base_url) _thread_local.clients[base_url] = get_confluence_instance(base_url) return _thread_local.clients[base_url] def get_jira_instance(url: str) -> JiraApiSdk: """Get authenticated Jira API client for *url*. Creates a new client if one doesn't exist for that URL yet and caches it. When the input is a Confluence gateway URL (``/ex/confluence/{cloudId}``), it is automatically converted to the Jira gateway URL (``/ex/jira/{cloudId}``) before auth lookup and SDK connection. This handles the common case where the caller derives the Jira URL from a Confluence page's ``base_url``. When the configured auth for *url* includes a Cloud ID, API calls are routed through the Atlassian API gateway (``https://api.atlassian.com/ex/jira/{cloud_id}``). For standard Atlassian Cloud instances the Cloud ID is fetched and stored automatically. """ # Always work with the Jira gateway URL, even if the caller passed the Confluence one. url = normalize_instance_url(ensure_service_gateway_url(url, "jira")) settings = get_settings() if not settings.export.enable_jira_enrichment: msg = "Jira API client was requested eventhough Jira enrichment is disabled." raise RuntimeWarning(msg) with _clients_lock: if url in _jira_clients: logger.debug("Jira client cache hit for %s", url) return _jira_clients[url] auth = settings.auth.get_jira_instance(url) if auth is None: raise AuthNotConfiguredError(url, "Jira") logger.debug("Creating new Jira client for %s", url) # Auto-fetch and store the Cloud ID for standard Atlassian Cloud instances if not auth.cloud_id and _is_standard_atlassian_cloud_url(url): cloud_id = _try_fetch_cloud_id(url) if cloud_id: logger.info("Auto-fetched Atlassian Cloud ID for %s — storing in config", url) set_setting_with_keys(["auth", "jira", url, "cloud_id"], cloud_id) settings = get_settings() auth = settings.auth.get_jira_instance(url) or auth sdk_url = _get_jira_sdk_url(url, auth) try: client = ApiClientFactory(settings.connection_config).create_jira(sdk_url, auth) logger.info("Connected to Jira at %s", sdk_url) except ConnectionError as e: logger.exception("[red bold]Jira authentication failed for %s.[/red bold]", url) raise AuthNotConfiguredError(url, "Jira") from e client.session.hooks["response"].append(_jira_auth_failure_hook) if settings.export.log_level == "DEBUG": client.session.hooks["response"].append(response_hook) with _clients_lock: _jira_clients[url] = client return client def invalidate_confluence_client(url: str) -> None: """Remove a cached Confluence client so the next call creates a fresh one.""" with _clients_lock: _confluence_clients.pop(normalize_instance_url(url), None) def invalidate_jira_client(url: str) -> None: """Remove a cached Jira client so the next call creates a fresh one.""" with _clients_lock: _jira_clients.pop(normalize_instance_url(url), None) def handle_jira_auth_failure(url: str) -> None: """Handle a Jira authentication failure by invalidating the cached client and raising.""" invalidate_jira_client(url) raise AuthNotConfiguredError(url, "Jira") ================================================ FILE: confluence_markdown_exporter/config.py ================================================ """Config sub-app for the cme CLI.""" import json import logging from typing import Annotated import jmespath import typer import yaml from confluence_markdown_exporter.utils.app_data_store import APP_CONFIG_PATH from confluence_markdown_exporter.utils.app_data_store import get_settings from confluence_markdown_exporter.utils.app_data_store import reset_to_defaults from confluence_markdown_exporter.utils.app_data_store import set_setting logger = logging.getLogger(__name__) # Each table row must be its own \n\n-separated block so typer's epilog # renderer keeps single \n between rows, forming valid markdown table syntax. _CONFIG_KEYS_EPILOG = ( "---\n\n" "**Available config keys** (run `cme config list` to see all current values):\n\n" "| Key | Description |\n\n" "| --- | ----------- |\n\n" "| `export.output_path` | Directory where exported files are saved |\n\n" "| `export.log_level` | Verbosity: `DEBUG`, `INFO`, `WARNING`, `ERROR` |\n\n" "| `export.save_log_to_file` | Also write logs to `cme.log` next to the config file |\n\n" "| `export.skip_unchanged` | Skip pages unchanged since last export |\n\n" "| `export.cleanup_stale` | Delete local files for removed pages |\n\n" "| `export.page_path` | File path template for exported pages |\n\n" "| `export.attachment_path` | File path template for exported attachments |\n\n" "| `export.page_href` | Link style for pages: `relative` or `absolute` |\n\n" "| `export.attachment_href` | Link style for attachments: `relative` or `absolute` |\n\n" "| `export.include_document_title` | Prepend H1 title to each page |\n\n" "| `export.include_toc` | Export Table of Contents macro (`true`/`false`) |\n\n" "| `export.include_macro` | How to render `include`/`excerpt-include` macros:" " `inline` (default) or `transclusion` (Obsidian `![[Page Title]]` embed) |\n\n" "| `export.page_breadcrumbs` | Include breadcrumb links at top of page |\n\n" "| `export.confluence_url_in_frontmatter` | Include Confluence page URL in YAML " "front matter: `none`, `webui`, `tinyui`, `both` |\n\n" "| `export.page_metadata_in_frontmatter` | Add Confluence page metadata " "fields (page_id, space_key, type, created, created_by, last_modified, " "last_modified_by, version) to YAML front matter (`true`/`false`) |\n\n" "| `export.enable_jira_enrichment` | Fetch Jira data for enriched links |\n\n" "| `export.attachments_export` | Which attachments to download:" " `referenced` (default), `all`, `disabled` |\n\n" "| `export.image_captions` | Use image captions as markdown alt text (`true`/`false`) |\n\n" "| `export.comments_export` | Which comments to export to sidecar " "`.comments.md` files: `none` (default), `inline`, `footer`, `all` |\n\n" "| `export.convert_status_badges` | Convert Confluence status badges to `` elements |\n\n" "| `export.convert_text_highlights` | Convert background-color spans to `` elements |\n\n" "| `export.convert_font_colors` | Convert font-color spans to `` elements |\n\n" "| `export.filename_length` | Maximum filename length (default: 255) |\n\n" "| `connection_config.max_workers` | Parallel export workers (default: 20) |\n\n" "| `connection_config.use_v2_api` | Use Confluence REST API v2 (`true`/`false`) |\n\n" "| `connection_config.verify_ssl` | Verify SSL certificates (`true`/`false`) |\n\n" "| `connection_config.timeout` | API request timeout in seconds |\n\n" "| `auth.confluence` | Credentials keyed by instance URL — use `cme config edit` |\n\n" "| `auth.jira` | Jira credentials keyed by instance URL — use `cme config edit` |\n\n" "---\n\n" "Env var override: prefix with `CME_` and `__` as delimiter. " "Examples: `CME_EXPORT__OUTPUT_PATH=/tmp/export`, `CME_CONNECTION_CONFIG__MAX_WORKERS=5`.\n\n" ) app = typer.Typer( rich_markup_mode="markdown", invoke_without_command=True, help=( "Manage configuration interactively or via subcommands.\n\n" "Running `cme config` without a subcommand opens the **interactive menu**, " "which lets you browse and change all settings including authentication credentials.\n\n" "For scripting or automation, use the subcommands below." ), epilog=( "**Subcommands at a glance:**\n\n" "- `cme config` — interactive menu\n\n" "- `cme config list` — print full config as YAML\n\n" "- `cme config list -o json` — print full config as JSON\n\n" "- `cme config get export.log_level` — print a single value\n\n" "- `cme config set export.log_level=DEBUG` — set a value\n\n" "- `cme config edit auth.confluence` — edit credentials interactively\n\n" "- `cme config path` — show config file path\n\n" "- `cme config reset` — reset all settings to defaults\n\n" "- `cme config reset export.log_level` — reset a single key to its default\n\n" ), ) @app.callback(invoke_without_command=True) def callback(ctx: typer.Context) -> None: """Open the interactive configuration menu if no subcommand is given.""" if ctx.invoked_subcommand is None: from confluence_markdown_exporter.utils.config_interactive import main_config_menu_loop main_config_menu_loop(None) @app.command( help=( "Reset configuration to defaults.\n\n" "Without a `KEY` argument, resets the **entire configuration** to factory defaults. " "Pass a dot-notation key to reset only that key or section.\n\n" "Use `--yes` / `-y` to skip the confirmation prompt (useful in scripts)." ), epilog=( "**Examples:**\n\n" "- `cme config reset` — reset everything (prompts for confirmation)\n\n" "- `cme config reset --yes` — skip confirmation prompt\n\n" "- `cme config reset export.log_level` — reset a single key to its default\n\n" "- `cme config reset connection_config` — reset a whole section to defaults\n\n" ), ) def reset( key: Annotated[ str | None, typer.Argument( help=( "Dot-notation config key or section to reset to its default. " "If omitted, the entire configuration is reset. " "Examples: `export.log_level`, `connection_config`, `export`." ), metavar="KEY", ), ] = None, yes: Annotated[ # noqa: FBT002 bool, typer.Option("--yes", "-y", help="Skip the confirmation prompt."), ] = False, ) -> None: if not yes: target = f"'{key}'" if key else "all configuration" confirmed = typer.confirm(f"Reset {target} to defaults?", default=False) if not confirmed: raise typer.Abort reset_to_defaults(key) target = f"'{key}'" if key else "Configuration" typer.echo(f"{target} reset to defaults.") @app.command( help=( "Print the path to the configuration file.\n\n" "Override the config file location by setting the `CME_CONFIG_PATH` environment variable." ), epilog=( "**Example:**\n\n" "- `cme config path`\n\n" "- `CME_CONFIG_PATH=/custom/path.json cme config path` — custom config file\n\n" ), ) def path() -> None: """Output the path to the configuration file.""" typer.echo(str(APP_CONFIG_PATH)) @app.command( name="list", help=( "Print the current configuration as YAML (default) or JSON.\n\n" "Shows all settings and their current effective values. " "Use this to discover available config keys for `cme config get` and " "`cme config set`.\n\n" "> **Note:** Secret values (API tokens, passwords) are printed in plaintext." ), epilog=( "**Examples:**\n\n" "- `cme config list` — YAML output (default)\n\n" "- `cme config list -o json` — JSON output\n\n" "- `cme config list -o yaml` — explicit YAML\n\n" ), ) def list_config( output: Annotated[ str, typer.Option( "--output", "-o", help="Output format. Accepted values: `yaml` (default) or `json`.", metavar="FORMAT", ), ] = "yaml", ) -> None: """Output the current configuration as YAML or JSON.""" current_settings = get_settings() data = json.loads(current_settings.model_dump_json()) fmt = output.lower() if fmt == "json": typer.echo(json.dumps(data, indent=2)) elif fmt in ("yaml", "yml"): typer.echo(yaml.dump(data, default_flow_style=False, allow_unicode=True), nl=False) else: typer.echo(f"Unknown format '{output}': expected 'yaml' or 'json'.", err=True) raise typer.Exit(code=1) @app.command( help=( "Print the current value of a single config key.\n\n" "Keys use dot notation to address nested settings " "(e.g. `export.log_level`, `connection_config.max_workers`). " "Nested sections are printed as YAML. " "Run `cme config list` to see all available keys." ), epilog=( "**Examples:**\n\n" "- `cme config get export.log_level`\n\n" "- `cme config get export.output_path`\n\n" "- `cme config get connection_config.max_workers`\n\n" "- `cme config get connection_config` — prints the whole section as YAML\n\n" "- `cme config get export` — prints all export settings\n\n" + _CONFIG_KEYS_EPILOG ), ) def get( key: Annotated[ str, typer.Argument( help=( "Config key in dot notation. " "Examples: `export.log_level`, `connection_config.max_workers`, `export`." ), metavar="KEY", ), ], ) -> None: """Output the current value of a config key.""" current_settings = get_settings() data = json.loads(current_settings.model_dump_json()) value = jmespath.search(key, data) if value is None: typer.echo(f"Key '{key}' not found.", err=True) raise typer.Exit(code=1) if isinstance(value, dict | list): typer.echo(yaml.dump(value, default_flow_style=False, allow_unicode=True), nl=False) else: typer.echo(str(value)) @app.command( name="set", help=( "Set one or more configuration values.\n\n" "Each argument must be a `key=value` pair using dot notation for the key. " "Values are parsed as JSON where possible " "(so `true`, `false`, numbers, and JSON arrays work), " "falling back to a plain string.\n\n" "> **Note:** For auth keys that contain a URL " "(e.g. `auth.confluence.https://...`), use `cme config edit auth.confluence` " "instead — the interactive editor handles URL-based keys correctly." ), epilog=( "**Examples:**\n\n" "- `cme config set export.log_level=DEBUG`\n\n" "- `cme config set export.output_path=/tmp/export`\n\n" "- `cme config set export.skip_unchanged=false`\n\n" "- `cme config set connection_config.max_workers=5`\n\n" "- `cme config set connection_config.verify_ssl=false`\n\n" "- `cme config set export.log_level=INFO export.output_path=./out`" " — multiple keys at once\n\n" + _CONFIG_KEYS_EPILOG ), ) def set_config( key_values: Annotated[ list[str], typer.Argument( help=( "One or more `key=value` pairs. " "Keys use dot notation (e.g. `export.log_level=DEBUG`). " "Values are parsed as JSON first, then as plain strings. " "For auth keys containing URLs, use `cme config edit` instead." ), metavar="KEY=VALUE", ), ], ) -> None: """Set one or more configuration values.""" for kv in key_values: if "=" not in kv: typer.echo(f"Invalid format '{kv}': expected key=value.", err=True) raise typer.Exit(code=1) key, _, raw_value = kv.partition("=") value = _parse_value(raw_value) try: set_setting(key.strip(), value) except (ValueError, KeyError) as e: typer.echo(f"Failed to set '{key.strip()}': {e}", err=True) raise typer.Exit(code=1) from e typer.echo("Configuration updated.") @app.command( help=( "Open the interactive editor for a specific config key.\n\n" "Launches the interactive configuration menu pre-navigated to the given key. " "Especially useful for editing authentication credentials, " "where the instance URL is part of the key and cannot be set via `cme config set`." ), epilog=( "**Examples:**\n\n" "- `cme config edit auth.confluence` — add or update Confluence credentials\n\n" "- `cme config edit auth.jira` — edit Jira credentials\n\n" "- `cme config edit export.log_level` — edit a setting interactively\n\n" "- `cme config edit export.output_path` — set output path interactively\n\n" ), ) def edit( key: Annotated[ str, typer.Argument( help=( "Config key to open in the interactive editor, using dot notation. " "Examples: `auth.confluence`, `auth.jira`, `export.log_level`." ), metavar="KEY", ), ], ) -> None: """Open the interactive editor for a specific config key.""" from confluence_markdown_exporter.utils.config_interactive import main_config_menu_loop main_config_menu_loop(key) def _parse_value(value_str: str) -> object: """Parse a CLI value string, trying JSON first then falling back to raw string. Handles JSON scalars (true/false, numbers, null), arrays, and objects. Also accepts Python-style True/False for convenience. """ try: return json.loads(value_str) except json.JSONDecodeError: pass lower = value_str.lower() if lower == "true": return True if lower == "false": return False return value_str ================================================ FILE: confluence_markdown_exporter/confluence.py ================================================ """Confluence API documentation. https://developer.atlassian.com/cloud/confluence/rest/v1/intro """ import functools import json import logging import mimetypes import os import re import urllib.parse from collections.abc import Set from concurrent.futures import ThreadPoolExecutor from concurrent.futures import as_completed from os import PathLike from pathlib import Path from string import Template from typing import Any from typing import ClassVar from typing import Literal from typing import TypeAlias from typing import cast from urllib.parse import unquote from urllib.parse import urlparse import yaml from atlassian.errors import ApiError from atlassian.errors import ApiNotFoundError from bs4 import BeautifulSoup from bs4 import Tag from markdownify import ATX from markdownify import MarkdownConverter from pydantic import BaseModel from pydantic import Field from requests import HTTPError from requests import RequestException from rich.progress import BarColumn from rich.progress import MofNCompleteColumn from rich.progress import Progress from rich.progress import SpinnerColumn from rich.progress import TaskProgressColumn from rich.progress import TextColumn from rich.progress import TimeElapsedColumn from rich.progress import TimeRemainingColumn from tabulate import tabulate from confluence_markdown_exporter.api_clients import JiraAuthenticationError from confluence_markdown_exporter.api_clients import build_gateway_url from confluence_markdown_exporter.api_clients import get_confluence_instance from confluence_markdown_exporter.api_clients import get_jira_instance from confluence_markdown_exporter.api_clients import get_thread_confluence from confluence_markdown_exporter.api_clients import handle_jira_auth_failure from confluence_markdown_exporter.api_clients import parse_confluence_path from confluence_markdown_exporter.api_clients import parse_gateway_url from confluence_markdown_exporter.utils.app_data_store import get_settings from confluence_markdown_exporter.utils.app_data_store import normalize_instance_url from confluence_markdown_exporter.utils.drawio_converter import load_and_parse_drawio from confluence_markdown_exporter.utils.export import github_heading_slug from confluence_markdown_exporter.utils.export import sanitize_filename from confluence_markdown_exporter.utils.export import sanitize_key from confluence_markdown_exporter.utils.export import save_file from confluence_markdown_exporter.utils.lockfile import AttachmentEntry from confluence_markdown_exporter.utils.lockfile import LockfileManager from confluence_markdown_exporter.utils.page_registry import PageTitleRegistry from confluence_markdown_exporter.utils.rich_console import ExportStats from confluence_markdown_exporter.utils.rich_console import console from confluence_markdown_exporter.utils.rich_console import get_stats from confluence_markdown_exporter.utils.rich_console import reset_stats from confluence_markdown_exporter.utils.table_converter import TableConverter JsonResponse: TypeAlias = dict StrPath: TypeAlias = str | PathLike[str] logger = logging.getLogger(__name__) _MAX_UNICODE_CODEPOINT = 0x10FFFF _RE_RGB_BG = re.compile(r"background-color:\s*rgb\((\d+),\s*(\d+),\s*(\d+)\)") _RE_RGB_COLOR = re.compile(r"(?\w])\[data-colorid=(\w+)\]\{color:(#[0-9a-fA-F]+)\}") _RE_HEX_COLOR = re.compile(r"^#(?:[0-9a-fA-F]{3}|[0-9a-fA-F]{6})$") # Confluence default header backgrounds — applied automatically to cells, and # (in matrix-style tables) to row-label s. Treated as "no user-chosen colour". _DEFAULT_HEADER_BGS = frozenset({"#f4f5f7", "#f2f2f2"}) def _rgb_to_hex(r: int, g: int, b: int) -> str: return f"#{r:02x}{g:02x}{b:02x}" def _extract_cell_highlight_hex(el: Tag) -> str | None: """Return Confluence cell background hex from data-highlight-colour, or None. Confluence Cloud sets `data-highlight-colour="#rrggbb"` (or `"transparent"`) on `` / `` when a cell background colour is applied. """ val = el.get("data-highlight-colour") if not isinstance(val, str): return None val = val.strip().lower() if not val or val == "transparent" or val in _DEFAULT_HEADER_BGS: return None if _RE_HEX_COLOR.match(val): return val return None # Background colours for Confluence status-badge lozenges (Atlassian design token pastels). _LOZENGE_COLORS: dict[str, str] = { "aui-lozenge-complete": "#cce0ff", # blue "aui-lozenge-success": "#baf3db", # green "aui-lozenge-current": "#f8e6a0", # yellow / orange "aui-lozenge-error": "#ffd5d2", # red "aui-lozenge-progress": "#dfd8fd", # purple / violet } def _require_dict(response: object, context: str) -> JsonResponse: """Validate that an API response is a dict, not an HTML redirect or error string. SAML SSO redirects and session-expiry responses are returned as raw HTML strings by the atlassian-python-api client instead of raising an exception. Calling .get() on such a string produces a confusing AttributeError; this helper surfaces a clear message instead. """ if isinstance(response, dict): return response preview = str(response)[:120].replace("\n", " ") if "SAMLRequest" in str(response) or "SAMLResponse" in str(response): msg = ( f"Authentication failed for {context}: received a SAML SSO redirect instead of JSON. " "Check that your Confluence token/credentials are correct and not expired." ) else: msg = f"Unexpected non-dict response for {context}: {preview!r}" raise ValueError(msg) def _extract_base_url(url: str) -> str: """Extract the base URL from a Confluence or Jira URL. For Atlassian Cloud URLs (``*.atlassian.net``) returns ``{scheme}://{hostname}``. For Atlassian API gateway URLs of the form ``https://api.atlassian.com/ex/{service}/{cloudId}/...`` returns ``https://api.atlassian.com/ex/{service}/{cloudId}`` so that the Cloud ID is preserved as part of the base URL used for auth lookup and SDK initialisation. For Server/Data Center instances with a context path (e.g. ``https://host/confluence/spaces/KEY``), the context path is preserved so the SDK client hits the correct REST endpoints. """ parsed = urllib.parse.urlparse(url) if parsed.scheme is None or parsed.hostname is None: msg = ( "Invalid URL: a scheme (http:// or https://) and hostname are required. " "Expected format: 'https://[:port]/...'." ) raise ValueError(msg) if gateway := parse_gateway_url(url): return normalize_instance_url(build_gateway_url(*gateway)) # For Server/DC instances the Confluence webapp may be deployed under a # context path (e.g. ``/confluence``). Preserve everything before the # first path segment that belongs to Confluence's own routing. _confluence_route_segments = { "wiki", "display", "spaces", "rest", "pages", "plugins", "dosearchsite.action", } segments = [s for s in parsed.path.split("/") if s] context_parts: list[str] = [] for segment in segments: if segment.lower() in _confluence_route_segments: break context_parts.append(segment) base = f"{parsed.scheme}://{parsed.hostname}" if parsed.port and parsed.port not in (80, 443): base = f"{parsed.scheme}://{parsed.hostname}:{parsed.port}" if context_parts: base = f"{base}/{'/'.join(context_parts)}" return normalize_instance_url(base) def _join_confluence_link(data: JsonResponse, key: str) -> str: links = data.get("_links", {}) if not isinstance(links, dict): return "" base = links.get("base") rel = links.get(key) if not isinstance(base, str) or not isinstance(rel, str) or not base or not rel: return "" return f"{base.rstrip('/')}/{rel.lstrip('/')}" def _get_web_url(data: JsonResponse) -> str: return _join_confluence_link(data, "webui") def _get_tiny_url(data: JsonResponse) -> str: return _join_confluence_link(data, "tinyui") _JIRA_ROUTE_SEGMENTS = { "agile", "backlog", "board", "browse", "issues", "plugins", "projects", "rest", "secure", "servicedesk", "software", } _HTML_ELEMENTS = frozenset( { "a", "abbr", "acronym", "address", "area", "article", "aside", "audio", "b", "base", "bdi", "bdo", "blockquote", "body", "br", "button", "canvas", "caption", "cite", "code", "col", "colgroup", "data", "datalist", "dd", "del", "details", "dfn", "dialog", "div", "dl", "dt", "em", "embed", "fieldset", "figcaption", "figure", "font", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html", "i", "iframe", "img", "input", "ins", "kbd", "keygen", "label", "legend", "li", "link", "main", "map", "mark", "menu", "menuitem", "meta", "meter", "nav", "noscript", "object", "ol", "optgroup", "option", "output", "p", "picture", "pre", "progress", "q", "rp", "rt", "ruby", "s", "samp", "script", "section", "select", "small", "source", "span", "strong", "style", "sub", "summary", "sup", "table", "tbody", "td", "template", "textarea", "tfoot", "th", "thead", "time", "title", "tr", "track", "u", "ul", "var", "video", "wbr", } ) _ANGLE_BRACKET_RE = re.compile(r"<([^<>\n]*)>") _CODE_FENCE_RE = re.compile(r"^(`{3,}|~{3,})") _INLINE_CODE_RE = re.compile(r"`[^`\n]*`") _AUTOLINK_URI_RE = re.compile(r"^[A-Za-z][A-Za-z0-9+.\-]{1,31}:[^\s<>]*$") _AUTOLINK_EMAIL_RE = re.compile( r"^[A-Za-z0-9.!#$%&'*+/=?^_`{|}~\-]+@[A-Za-z0-9](?:[A-Za-z0-9\-]{0,61}[A-Za-z0-9])?" r"(?:\.[A-Za-z0-9](?:[A-Za-z0-9\-]{0,61}[A-Za-z0-9])?)*$" ) def _extract_jira_base_url(url: str) -> str | None: """Extract the Jira instance base URL from a Jira issue URL. Strips Jira-specific routing segments (e.g. ``browse``) so that the context path is preserved for Server/DC deployments (e.g. ``https://host/jira``), matching the key format used in ``auth.jira`` configuration. Returns ``None`` when *url* is not an absolute URL. """ parsed = urllib.parse.urlparse(url) if not parsed.scheme or not parsed.hostname: return None if gateway := parse_gateway_url(url): return normalize_instance_url(build_gateway_url(*gateway)) segments = [s for s in parsed.path.split("/") if s] context_parts: list[str] = [] for segment in segments: if segment.lower() in _JIRA_ROUTE_SEGMENTS: break context_parts.append(segment) base = f"{parsed.scheme}://{parsed.hostname}" if parsed.port and parsed.port not in (80, 443): base = f"{parsed.scheme}://{parsed.hostname}:{parsed.port}" if context_parts: base = f"{base}/{'/'.join(context_parts)}" return normalize_instance_url(base) settings = get_settings() class JiraIssue(BaseModel): key: str summary: str description: str | None status: str @classmethod def from_json(cls, data: JsonResponse) -> "JiraIssue": fields = data.get("fields", {}) return cls( key=data.get("key", ""), summary=fields.get("summary", ""), description=fields.get("description", ""), status=fields.get("status", {}).get("name", ""), ) @classmethod def from_key(cls, issue_key: str, jira_url: str) -> "JiraIssue | None": """Fetch a Jira issue by key.""" settings = get_settings() if not settings.export.enable_jira_enrichment: return None try: return cls._fetch_cached(issue_key, jira_url) except JiraAuthenticationError: handle_jira_auth_failure(jira_url) return None @classmethod @functools.lru_cache(maxsize=100) def _fetch_cached(cls, issue_key: str, jira_url: str) -> "JiraIssue": jira_instance = get_jira_instance(jira_url) issue_data = cast("JsonResponse", jira_instance.get_issue(issue_key)) return cls.from_json(issue_data) class User(BaseModel): account_id: str username: str display_name: str public_name: str email: str @classmethod def from_json(cls, data: JsonResponse) -> "User": return cls( account_id=data.get("accountId", ""), username=data.get("username", ""), display_name=data.get("displayName", ""), public_name=data.get("publicName", ""), email=data.get("email", ""), ) @classmethod @functools.lru_cache(maxsize=100) def from_username(cls, username: str, base_url: str = "") -> "User": return cls.from_json( cast( "JsonResponse", get_thread_confluence(base_url).get_user_details_by_username(username), ) ) @classmethod @functools.lru_cache(maxsize=100) def from_userkey(cls, userkey: str, base_url: str = "") -> "User": return cls.from_json( cast( "JsonResponse", get_thread_confluence(base_url).get_user_details_by_userkey(userkey), ) ) @classmethod @functools.lru_cache(maxsize=100) def from_accountid(cls, accountid: str, base_url: str = "") -> "User": return cls.from_json( cast( "JsonResponse", get_thread_confluence(base_url).get_user_details_by_accountid(accountid), ) ) class Version(BaseModel): number: int by: User when: str friendly_when: str @classmethod def from_json(cls, data: JsonResponse) -> "Version": return cls( number=data.get("number", 0), by=User.from_json(data.get("by", {})), when=data.get("when", ""), friendly_when=data.get("friendlyWhen", ""), ) class History(BaseModel): created: str created_by: User @classmethod def from_json(cls, data: JsonResponse) -> "History": return cls( created=data.get("createdDate", ""), created_by=User.from_json(data.get("createdBy", {})), ) class Organization(BaseModel): base_url: str spaces: list["Space"] @property def pages(self) -> list["Page | Descendant"]: return [page for space in self.spaces for page in space.pages] def export(self) -> None: """Export all pages across all spaces, showing per-space discovery progress.""" all_pages: list[Page | Descendant] = [] n = len(self.spaces) logger.info("Exporting %d space(s) from %s", n, self.base_url) with console.status("", spinner="dots") as status: for i, space in enumerate(self.spaces, 1): status.update( f"[dim]Fetching pages for space [highlight]{space.name}[/highlight]" f" ({i}/{n})…[/dim]" ) all_pages.extend(space.pages) logger.info("Discovered %d page(s) across %d space(s)", len(all_pages), n) export_pages(all_pages) @classmethod def from_json(cls, data: JsonResponse, base_url: str) -> "Organization": return cls( base_url=base_url, spaces=[Space.from_json(space, base_url) for space in data.get("results", [])], ) @classmethod @functools.lru_cache(maxsize=100) def from_url(cls, base_url: str) -> "Organization": logger.debug("Fetching space list from %s", base_url) with console.status( f"[dim]Fetching space list from [highlight]{base_url}[/highlight]…[/dim]" ): org = cls.from_json( cast( "JsonResponse", get_thread_confluence(base_url).get_all_spaces( space_type="global", space_status="current", expand="homepage" ), ), base_url, ) logger.info("Found %d space(s) in %s", len(org.spaces), base_url) return org class Space(BaseModel): base_url: str key: str name: str description: str homepage: int | None @property def pages(self) -> list["Page | Descendant"]: if self.homepage is None: logger.warning( f"Space '{self.name}' (key: {self.key}) has no homepage. No pages will be exported." ) return [] homepage = Page.from_id(self.homepage, self.base_url) return [homepage, *homepage.descendants] def export(self) -> None: """Export all pages in this space to Markdown.""" logger.debug("Fetching pages for space '%s' (%s)", self.name, self.key) with console.status( f"[dim]Fetching pages for space [highlight]{self.name}[/highlight]…[/dim]" ): pages = self.pages logger.info("Found %d page(s) in space '%s'", len(pages), self.name) export_pages(pages) @classmethod def from_json(cls, data: JsonResponse, base_url: str) -> "Space": return cls( base_url=base_url, key=data.get("key", ""), name=data.get("name", ""), description=data.get("description", {}).get("plain", {}).get("value", ""), homepage=data.get("homepage", {}).get("id"), ) @classmethod @functools.lru_cache(maxsize=100) def from_key(cls, space_key: str, base_url: str) -> "Space": return cls.from_json( cast( "JsonResponse", get_thread_confluence(base_url).get_space(space_key, expand="homepage"), ), base_url, ) @classmethod def from_url(cls, space_url: str) -> "Space": """Retrieve a Space object given a Confluence space URL. The Confluence instance is selected automatically by matching the URL's hostname against configured instances. If no match is found, a new entry is registered in the auth config so the user can fill in credentials via the interactive config menu. Supports standard instance URLs (``https://company.atlassian.net/wiki/spaces/KEY``) and Atlassian API gateway URLs (``https://api.atlassian.com/ex/confluence/{cloudId}/wiki/spaces/KEY``). """ base_url = _extract_base_url(space_url) # Ensure a client exists (creates/prompts if first time for this host) get_confluence_instance(base_url) parsed = urllib.parse.urlparse(space_url) base_path = urllib.parse.urlparse(base_url).path.rstrip("/") relative_path = parsed.path[len(base_path) :] if match := parse_confluence_path(relative_path): if match.space_key: logger.debug("Resolved space key '%s' from URL %s", match.space_key, space_url) return cls.from_key(match.space_key, base_url) msg = f"Could not parse space URL {space_url}." raise ValueError(msg) class Label(BaseModel): id: str name: str prefix: str @classmethod def from_json(cls, data: JsonResponse) -> "Label": return cls( id=data.get("id", ""), name=data.get("name", ""), prefix=data.get("prefix", ""), ) class Document(BaseModel): base_url: str title: str space: Space ancestors: list["Ancestor"] version: Version @property def _template_vars(self) -> dict[str, str]: homepage_id = "" homepage_title = "" if self.space.homepage: homepage_id = str(self.space.homepage) homepage_title = sanitize_filename( Page.from_id(self.space.homepage, self.base_url).title ) return { "space_key": sanitize_filename(self.space.key), "space_name": sanitize_filename(self.space.name), "homepage_id": homepage_id, "homepage_title": homepage_title, "ancestor_ids": "/".join(str(a.id) for a in self.ancestors), "ancestor_titles": "/".join(sanitize_filename(a.title) for a in self.ancestors), } class Attachment(Document): id: str file_size: int media_type: str media_type_description: str file_id: str collection_name: str download_link: str comment: str @property def extension(self) -> str: if self.comment == "draw.io diagram" and self.media_type == "application/vnd.jgraph.mxfile": return ".drawio" if self.comment == "draw.io preview" and self.media_type == "image/png": return ".drawio.png" return mimetypes.guess_extension(self.media_type) or "" @property def filename(self) -> str: return f"{self.file_id}{self.extension}" @property def _template_vars(self) -> dict[str, str]: ext = self.extension title = self.title title_without_ext = title[: -len(ext)] if ext and title.endswith(ext) else Path(title).stem return { **super()._template_vars, "attachment_id": str(self.id), "attachment_title": sanitize_filename(title_without_ext), # file_id is a GUID and does not need sanitization. On # Confluence Data Center / Server the API does not populate # fileId, so fall back to the content id which is always # present and unique. "attachment_file_id": self.file_id or str(self.id), "attachment_extension": self.extension, } @property def export_path(self) -> Path: filepath_template = Template(settings.export.attachment_path.replace("{", "${")) return Path(filepath_template.safe_substitute(self._template_vars)) @classmethod def from_json(cls, data: JsonResponse, base_url: str) -> "Attachment": extensions = data.get("extensions", {}) container = data.get("container", {}) return cls( base_url=base_url, id=data.get("id", ""), title=data.get("title", ""), space=Space.from_key( data.get("_expandable", {}).get("space", "").split("/")[-1], base_url ), file_size=extensions.get("fileSize", 0), media_type=extensions.get("mediaType", ""), media_type_description=extensions.get("mediaTypeDescription", ""), file_id=extensions.get("fileId", ""), collection_name=extensions.get("collectionName", ""), download_link=data.get("_links", {}).get("download", ""), comment=extensions.get("comment", ""), ancestors=[ *[ Ancestor.from_json(ancestor, base_url) for ancestor in container.get("ancestors", []) ], Ancestor.from_json(container, base_url), ][1:], version=Version.from_json(data.get("version", {})), ) @classmethod def from_page_id(cls, page_id: int, base_url: str) -> list["Attachment"]: attachments = [] start = 0 paging_limit = 50 size = paging_limit # Initialize to limit to enter the loop while size >= paging_limit: response = cast( "JsonResponse", get_thread_confluence(base_url).get_attachments_from_content( page_id, start=start, limit=paging_limit, expand="container.ancestors,version", ), ) attachments.extend( [cls.from_json(att, base_url) for att in response.get("results", [])] ) size = response.get("size", 0) start += size logger.debug("Found %d attachment(s) for page id=%s", len(attachments), page_id) return attachments def export(self) -> None: stats = get_stats() filepath = settings.export.output_path / self.export_path if filepath.exists(): logger.debug("Skipping attachment '%s' — already exists at %s", self.title, filepath) return logger.debug("Downloading attachment '%s' to %s", self.title, filepath) client = get_thread_confluence(self.base_url) try: response = client.request( method="GET", path=client.url + self.download_link, absolute=True, advanced_mode=True, ) response.raise_for_status() # Raise error if request fails except HTTPError: logger.warning("There is no attachment with title '%s'. Skipping export.", self.title) stats.inc_attachments_failed() return except RequestException as e: logger.warning("Failed to download attachment '%s': %s. Skipping.", self.title, e) stats.inc_attachments_failed() return save_file(filepath, response.content) logger.debug("Saved attachment '%s' (%d bytes)", self.title, len(response.content)) stats.inc_attachments_exported() class Ancestor(Document): id: int @classmethod def from_json(cls, data: JsonResponse, base_url: str) -> "Ancestor": return cls( base_url=base_url, id=data.get("id", 0), title=data.get("title", ""), space=Space.from_key( data.get("_expandable", {}).get("space", "").split("/")[-1], base_url ), ancestors=[], # Ancestors of ancestor is not needed for now. version=Version.from_json({}), # Version of ancestor is not needed for now. ) class Descendant(Document): id: int @property def _template_vars(self) -> dict[str, str]: return { **super()._template_vars, "page_id": str(self.id), "page_title": sanitize_filename(self.title), } @property def export_path(self) -> Path: filepath_template = Template(settings.export.page_path.replace("{", "${")) return Path(filepath_template.safe_substitute(self._template_vars)) @classmethod def from_json(cls, data: JsonResponse, base_url: str) -> "Descendant": return cls( base_url=base_url, id=data.get("id", 0), title=data.get("title", ""), space=Space.from_key( data.get("_expandable", {}).get("space", "").split("/")[-1], base_url ), ancestors=[ Ancestor.from_json(ancestor, base_url) for ancestor in data.get("ancestors", []) ][1:], version=Version.from_json(data.get("version", {})), ) def _parse_image_captions(storage_xml: str) -> dict[str, str]: """Return {filename: caption} parsed from Confluence storage-format XML.""" captions: dict[str, str] = {} if not storage_xml: return captions for block in re.findall(r"]*>.*?", storage_xml, re.DOTALL): filename_m = re.search(r'ri:filename="([^"]+)"', block) if not filename_m: continue caption_m = re.search(r"]*>(.*?)", block, re.DOTALL) if not caption_m: continue caption_content = caption_m.group(1) # CDATA in ac:plain-text-body (older format) cdata_m = re.search( r"\s*\s*", caption_content, re.DOTALL, ) if cdata_m: caption = cdata_m.group(1).strip() else: # HTML elements in caption (e.g.

text

) — strip tags caption = BeautifulSoup(caption_content, "html.parser").get_text().strip() if caption: captions[filename_m.group(1)] = caption return captions class Page(Document): id: int type: str = "" web_url: str = "" tiny_url: str = "" body: str body_export: str editor2: str body_storage: str = "" labels: list["Label"] attachments: list["Attachment"] history: History = Field( default_factory=lambda: History(created="", created_by=User.from_json({})) ) @property def descendants(self) -> list["Descendant"]: url = "rest/api/content/search" params = { "cql": f"type=page AND ancestor={self.id}", "expand": "metadata.properties,ancestors,version", "limit": 250, } results = [] client = get_thread_confluence(self.base_url) try: response = cast("dict", client.get(url, params=params)) results.extend(response.get("results", [])) next_path = response.get("_links", {}).get("next") while next_path: response = cast("dict", client.get(next_path)) results.extend(response.get("results", [])) next_path = response.get("_links", {}).get("next") except HTTPError as e: if e.response.status_code == 404: # noqa: PLR2004 logger.warning( f"Content with ID {self.id} not found (404) when fetching descendants." ) return [] return [] except Exception: logger.exception( f"Unexpected error when fetching descendants for content ID {self.id}." ) return [] return [Descendant.from_json(result, self.base_url) for result in results] @property def _template_vars(self) -> dict[str, str]: return { **super()._template_vars, "page_id": str(self.id), "page_title": sanitize_filename(self.title), } @property def export_path(self) -> Path: filepath_template = Template(settings.export.page_path.replace("{", "${")) return Path(filepath_template.safe_substitute(self._template_vars)) @property def html(self) -> str: if settings.export.include_document_title: return f"

{self.title}

{self.body}" return self.body @property def markdown(self) -> str: return self.Converter(self).markdown def export(self) -> dict[str, AttachmentEntry]: if self.title == "Page not accessible": logger.warning("Skipping export for inaccessible page id=%s", self.id) return {} logger.debug("Exporting page id=%s '%s'", self.id, self.title) if settings.export.log_level == "DEBUG": self.export_body() # Export attachments first so the files can be utilized during markdown conversion logger.debug("Exporting attachments for page id=%s", self.id) attachment_entries = self.export_attachments() logger.debug("Converting to Markdown for page id=%s", self.id) self.export_markdown() if settings.export.comments_export != "none": logger.debug("Exporting comments for page id=%s", self.id) self.export_comments_sidecar() logger.info( "Exported '%s' -> %s", self.title, settings.export.output_path / self.export_path ) return attachment_entries def export_with_descendants(self) -> None: with console.status( f"[dim]Fetching descendants of [highlight]{self.title}[/highlight]…[/dim]" ): pages = [self, *self.descendants] export_pages(pages) def export_body(self) -> None: soup = BeautifulSoup(self.html, "html.parser") save_file( settings.export.output_path / self.export_path.parent / f"{self.export_path.stem}_body_view.html", str(soup.prettify()), ) soup = BeautifulSoup(self.body_export, "html.parser") save_file( settings.export.output_path / self.export_path.parent / f"{self.export_path.stem}_body_export_view.html", str(soup.prettify()), ) save_file( settings.export.output_path / self.export_path.parent / f"{self.export_path.stem}_body_editor2.xml", str(self.editor2), ) def export_markdown(self) -> None: conv = self.Converter(self) save_file( settings.export.output_path / self.export_path, conv.markdown, ) self._marked_texts: dict[str, str] = conv._marked_texts _COMMENT_TITLE_MAX_LEN = 60 def _fetch_inline_comments(self) -> list[dict]: client = get_thread_confluence(self.base_url) results: list[dict] = [] try: resp = cast( "dict", client.get_page_comments( self.id, location="inline", expand="extensions.inlineProperties,extensions.resolution,body.view,history.createdBy", limit=50, ), ) for comment in resp.get("results", []): status = comment.get("extensions", {}).get("resolution", {}).get("status", "open") if status == "open": results.append(comment) next_path = resp.get("_links", {}).get("next") while next_path: resp = cast("dict", client.get(next_path)) for comment in resp.get("results", []): status = ( comment.get("extensions", {}).get("resolution", {}).get("status", "open") ) if status == "open": results.append(comment) next_path = resp.get("_links", {}).get("next") except Exception: # noqa: BLE001 logger.warning("Failed to fetch inline comments for page id=%s", self.id) return results def _fetch_page_comments(self) -> list[dict]: client = get_thread_confluence(self.base_url) results: list[dict] = [] try: resp = cast( "dict", client.get_page_comments( self.id, location="footer", expand="extensions.resolution,body.view,history.createdBy", limit=50, ), ) for comment in resp.get("results", []): status = comment.get("extensions", {}).get("resolution", {}).get("status", "open") if status == "open": results.append(comment) next_path = resp.get("_links", {}).get("next") while next_path: resp = cast("dict", client.get(next_path)) for comment in resp.get("results", []): status = ( comment.get("extensions", {}).get("resolution", {}).get("status", "open") ) if status == "open": results.append(comment) next_path = resp.get("_links", {}).get("next") except Exception: # noqa: BLE001 logger.warning("Failed to fetch page comments for page id=%s", self.id) return results def _fetch_comment_replies(self, comment_id: str) -> list[dict]: client = get_thread_confluence(self.base_url) try: resp = cast( "dict", client.get( f"rest/api/content/{comment_id}/child/comment", params={"expand": "body.view,history.createdBy", "limit": 50}, ), ) return resp.get("results", []) except Exception: # noqa: BLE001 return [] def export_comments_sidecar(self) -> None: mode = settings.export.comments_export inline = self._fetch_inline_comments() if mode in ("inline", "all") else [] page = self._fetch_page_comments() if mode in ("footer", "all") else [] if not inline and not page: return source_url = f"{self.base_url}/wiki/spaces/{self.space.key}/pages/{self.id}" lines: list[str] = [ "---", f"confluence_page_id: '{self.id}'", f'confluence_page_title: "{self.title}"', f'confluence_webui_url: "{source_url}"', "---", "", ] if inline: lines.append("## Inline comments") lines.append("") self._render_inline_comments(lines, inline) if page: lines.append("## Page comments") lines.append("") self._render_page_comments(lines, page) save_file( settings.export.output_path / self.export_path.parent / f"{self.export_path.stem}.comments.md", "\n".join(lines), ) def _render_inline_comments(self, lines: list[str], comments: list[dict]) -> None: for comment in comments: ref = comment.get("extensions", {}).get("inlineProperties", {}).get("markerRef", "") marked_md = self._marked_texts.get(ref, "") plain = re.sub(r"\s+", " ", marked_md).strip() n = self._COMMENT_TITLE_MAX_LEN short_title = plain[:n] + "…" if len(plain) > n else plain if not short_title: short_title = f"Comment {ref[:8]}" lines.append(f"### {short_title}") lines.append("") if marked_md: lines.extend( f"> {line}" if line.strip() else ">" for line in marked_md.splitlines() ) lines.append("") author = comment.get("history", {}).get("createdBy", {}).get("displayName", "Unknown") created = comment.get("history", {}).get("createdDate", "")[:10] body_md = ( MarkdownConverter() .convert(comment.get("body", {}).get("view", {}).get("value", "")) .strip() ) lines.append(f"**{author}** · {created}") lines.append("") if body_md: lines.append(body_md) lines.append("") for reply in self._fetch_comment_replies(comment["id"]): r_author = ( reply.get("history", {}).get("createdBy", {}).get("displayName", "Unknown") ) r_created = reply.get("history", {}).get("createdDate", "")[:10] r_body_md = ( MarkdownConverter() .convert(reply.get("body", {}).get("view", {}).get("value", "")) .strip() ) lines.append(f"**{r_author}** · {r_created}") lines.append("") if r_body_md: lines.append(r_body_md) lines.append("") def _render_page_comments(self, lines: list[str], comments: list[dict]) -> None: for comment in comments: body_md = ( MarkdownConverter() .convert(comment.get("body", {}).get("view", {}).get("value", "")) .strip() ) plain = re.sub(r"\s+", " ", body_md).strip() n = self._COMMENT_TITLE_MAX_LEN short_title = plain[:n] + "…" if len(plain) > n else plain if not short_title: short_title = f"Comment {str(comment.get('id', ''))[:8]}" lines.append(f"### {short_title}") lines.append("") author = comment.get("history", {}).get("createdBy", {}).get("displayName", "Unknown") created = comment.get("history", {}).get("createdDate", "")[:10] lines.append(f"**{author}** · {created}") lines.append("") if body_md: lines.append(body_md) lines.append("") for reply in self._fetch_comment_replies(comment["id"]): r_author = ( reply.get("history", {}).get("createdBy", {}).get("displayName", "Unknown") ) r_created = reply.get("history", {}).get("createdDate", "")[:10] r_body_md = ( MarkdownConverter() .convert(reply.get("body", {}).get("view", {}).get("value", "")) .strip() ) lines.append(f"**{r_author}** · {r_created}") lines.append("") if r_body_md: lines.append(r_body_md) lines.append("") def _attachments_for_export(self) -> list["Attachment"]: """Return the subset of attachments that should be exported for this page.""" if settings.export.attachments_export == "all": return list(self.attachments) bodies = self.body + self.body_export return [ a for a in self.attachments if (a.filename.endswith(".drawio") and f"diagramName={a.title}" in self.body) or ( a.filename.endswith((".drawio.png", ".drawio")) and a.title.replace(" ", "%20") in self.body_export ) or a.file_id in bodies or a.id in bodies or a.title in bodies or a.title.replace(" ", "%20") in bodies ] def export_attachments(self) -> dict[str, AttachmentEntry]: if settings.export.attachments_export == "disabled": logger.debug("Attachment download disabled for page id=%s", self.id) return {} old_entries = LockfileManager.get_page_attachment_entries(str(self.id)) new_entries: dict[str, AttachmentEntry] = {} output_path = settings.export.output_path stats = get_stats() for attachment in self._attachments_for_export(): att_id = attachment.id att_version = attachment.version.number if attachment.version else 0 # Skip download if the same attachment version is tracked and the file still exists if att_id in old_entries: old = old_entries[att_id] if old.version == att_version and (output_path / old.path).exists(): new_entries[att_id] = old logger.debug( "Skipping unchanged attachment '%s' (v%d)", attachment.title, att_version ) stats.inc_attachments_skipped() continue attachment.export() if att_version: new_entries[att_id] = AttachmentEntry( version=att_version, path=str(attachment.export_path) ) # Clean up orphaned attachment files when an attachment was re-versioned for att_id, old_entry in old_entries.items(): if att_id in new_entries and old_entry.path != new_entries[att_id].path: old_file = output_path / old_entry.path old_file.unlink(missing_ok=True) logger.info("Deleted old attachment file: %s", old_entry.path) stats.inc_attachments_removed() return new_entries def get_attachment_by_id(self, attachment_id: str) -> Attachment | None: """Get the Attachment object by its ID. Confluence Server sometimes stores attachments without a file_id. Fall back to the plain attachment.id and return None if nothing matches. """ for a in self.attachments: if attachment_id in a.id: return a if a.file_id and attachment_id in a.file_id: return a return None def get_attachment_by_file_id(self, file_id: str) -> Attachment | None: for a in self.attachments: if a.file_id and file_id in a.file_id: return a return None def get_attachments_by_title(self, title: str) -> list[Attachment]: return [attachment for attachment in self.attachments if attachment.title == title] @classmethod def from_json(cls, data: JsonResponse, base_url: str) -> "Page": return cls( base_url=base_url, id=data.get("id", 0), type=data.get("type", ""), web_url=_get_web_url(data), tiny_url=_get_tiny_url(data), title=data.get("title", ""), space=Space.from_key( data.get("_expandable", {}).get("space", "").split("/")[-1], base_url ), body=data.get("body", {}).get("view", {}).get("value", ""), body_export=data.get("body", {}).get("export_view", {}).get("value", ""), editor2=data.get("body", {}).get("editor2", {}).get("value", ""), body_storage=data.get("body", {}).get("storage", {}).get("value", ""), labels=[ Label.from_json(label) for label in data.get("metadata", {}).get("labels", {}).get("results", []) ], attachments=Attachment.from_page_id(data.get("id", 0), base_url), ancestors=[ Ancestor.from_json(ancestor, base_url) for ancestor in data.get("ancestors", []) ][1:], version=Version.from_json(data.get("version", {})), history=History.from_json(data.get("history", {})), ) @classmethod @functools.lru_cache(maxsize=1000) def from_id(cls, page_id: int, base_url: str) -> "Page": _empty_space = Space(base_url=base_url, key="", name="", description="", homepage=0) if page_id is None: logger.warning("Page ID is None, returning empty page") return cls( base_url=base_url, id=0, title="Page not accessible", space=_empty_space, body="", body_export="", editor2="", labels=[], attachments=[], ancestors=[], ) logger.debug("Fetching page id=%s from %s", page_id, base_url) expand = ( "body.view,body.export_view,body.editor2,body.storage,metadata.labels," "metadata.properties,ancestors,version,history,history.createdBy" ) try: return cls.from_json( _require_dict( get_thread_confluence(base_url).get_page_by_id( page_id, expand=expand, ), f"page id={page_id} at {base_url}", ), base_url, ) except (ApiError, HTTPError): logger.warning("Could not access page id=%s — treating as inaccessible", page_id) return cls( base_url=base_url, id=page_id, title="Page not accessible", space=_empty_space, body="", body_export="", editor2="", labels=[], attachments=[], ancestors=[], version=Version.from_json({}), ) @classmethod def from_url(cls, page_url: str) -> "Page": """Retrieve a Page object given a Confluence page URL. The Confluence instance is selected automatically by matching the URL's hostname against configured instances. If no match is found, a new entry is registered in the auth config so the user can fill in credentials via the interactive config menu. Supports standard instance URLs and Atlassian API gateway URLs of the form ``https://api.atlassian.com/ex/confluence/{cloudId}/wiki/spaces/KEY/pages/123``. """ base_url = _extract_base_url(page_url) # Ensure a client exists (creates/prompts if first time for this host) get_confluence_instance(base_url) parsed = urllib.parse.urlparse(page_url) query_params = urllib.parse.parse_qs(parsed.query) page_id_param = next( ( values[0] for key, values in query_params.items() if key.lower() == "pageid" and values and values[0] ), None, ) if page_id_param and page_id_param.isdigit(): page_id = int(page_id_param) logger.debug( "Resolved page id=%s from Confluence query string in URL %s", page_id, page_url ) return Page.from_id(page_id, base_url) base_path = urllib.parse.urlparse(base_url).path.rstrip("/") relative_path = parsed.path[len(base_path) :] if match := parse_confluence_path(relative_path): if match.page_id: logger.debug("Resolved page id=%s from Confluence URL %s", match.page_id, page_url) return Page.from_id(match.page_id, base_url) if match.space_key and match.page_title: logger.debug( "Resolving page '%s' in space '%s' from Confluence URL %s", match.page_title, match.space_key, page_url, ) page_data = _require_dict( get_thread_confluence(base_url).get_page_by_title( space=match.space_key, title=match.page_title, expand="version" ), f"page title={match.page_title!r} space={match.space_key!r} at {base_url}", ) return Page.from_id(page_data["id"], base_url) msg = f"Could not parse page URL {page_url}." raise ValueError(msg) class Converter(TableConverter, MarkdownConverter): """Create a custom MarkdownConverter for Confluence HTML to Markdown conversion.""" class Options(MarkdownConverter.DefaultOptions): # type: ignore[assignment] bullets = "-" heading_style = ATX macros_to_ignore: Set[str] = frozenset(["qc-read-and-understood-signature-box"]) front_matter_indent = 2 def __init__(self, page: "Page", **options) -> None: # noqa: ANN003 super().__init__(**options) self.page = page self.page_properties = {} self._marked_texts: dict[str, str] = {} self._colorid_map_cache: dict[str, str] | None = None self._image_captions_cache: dict[str, str] | None = None self._panel_icon_map_cache: dict[str, str] | None = None self._plantuml_index: int = 0 self._storage_plantuml_macros_cache: list[Tag] | None = None @property def _colorid_map(self) -> dict[str, str]: if self._colorid_map_cache is None: cache: dict[str, str] = {} soup = BeautifulSoup(self.page.html, "html.parser") for style_tag in soup.find_all("style"): css = style_tag.get_text() for m in _RE_COLORID_CSS.finditer(css): color_id = m.group(1) if color_id not in cache: cache[color_id] = m.group(2) self._colorid_map_cache = cache return self._colorid_map_cache @property def _storage_plantuml_macros(self) -> list[Tag]: """Cache and return all PlantUML structured-macros from body.storage.""" if self._storage_plantuml_macros_cache is None: macros: list[Tag] = [] if self.page.body_storage: wrapped = f"{self.page.body_storage}" soup = BeautifulSoup(wrapped, "xml") macros.extend( macro for macro in soup.find_all("structured-macro") if isinstance(macro, Tag) and macro.get("name") == "plantuml" ) self._storage_plantuml_macros_cache = macros return self._storage_plantuml_macros_cache @property def _image_captions(self) -> dict[str, str]: if self._image_captions_cache is None: self._image_captions_cache = _parse_image_captions(self.page.body_storage) return self._image_captions_cache @property def _panel_icon_map(self) -> dict[str, str]: """Map panel macro-id to its custom icon emoji from editor2 XML.""" if self._panel_icon_map_cache is None: cache: dict[str, str] = {} if self.page.editor2: wrapped = f"{self.page.editor2}" soup = BeautifulSoup(wrapped, "xml") panel_names = {"panel", "info", "note", "tip", "warning"} for macro in soup.find_all("structured-macro"): if not isinstance(macro, Tag): continue if macro.get("name") not in panel_names: continue macro_id = macro.get("macro-id") if not macro_id: continue emoji = self._extract_panel_emoji(macro) if emoji: cache[str(macro_id)] = emoji self._panel_icon_map_cache = cache return self._panel_icon_map_cache @staticmethod def _extract_panel_emoji(macro: Tag) -> str | None: params: dict[str, str] = {} for p in macro.find_all("parameter", recursive=False): if not isinstance(p, Tag): continue name = p.get("name") if name: params[str(name)] = p.get_text(strip=True) if text := params.get("panelIconText"): return text if icon_id := params.get("panelIconId"): try: cps = [int(cp, 16) for cp in icon_id.split("-")] if all(0 <= cp <= _MAX_UNICODE_CODEPOINT for cp in cps): return "".join(chr(cp) for cp in cps) except (OverflowError, ValueError): pass return None @property def markdown(self) -> str: html = self._strip_excerpt_include_panel_titles(self.page.html) md_body = self.convert(html) md_body = self._escape_template_placeholders(md_body) markdown = f"{self.front_matter}\n" if settings.export.page_breadcrumbs: markdown += f"{self.breadcrumbs}\n" markdown += f"{md_body}\n" return markdown @property def front_matter(self) -> str: indent = self.options["front_matter_indent"] self.set_page_properties(tags=self.labels) self._add_confluence_url_properties() self._add_page_metadata_properties() if not self.page_properties: return "" yml = yaml.dump(self.page_properties, indent=indent).strip() # Indent the root level list items yml = re.sub(r"^( *)(- )", r"\1" + " " * indent + r"\2", yml, flags=re.MULTILINE) return f"---\n{yml}\n---\n" def _add_confluence_url_properties(self) -> None: mode = settings.export.confluence_url_in_frontmatter if mode == "none": return if mode in ("webui", "both") and self.page.web_url: key = sanitize_key("confluence_webui_url") if key not in self.page_properties: self.page_properties[key] = self.page.web_url if mode in ("tinyui", "both") and self.page.tiny_url: key = sanitize_key("confluence_tinyui_url") if key not in self.page_properties: self.page_properties[key] = self.page.tiny_url def _add_page_metadata_properties(self) -> None: if not settings.export.page_metadata_in_frontmatter: return page = self.page version = page.version history = page.history metadata = { # Stored as str to stay JS-safe-integer compatible: Confluence # Cloud page IDs can exceed 2^53, which JS-based SSGs (Hugo, # Astro, ...) parsing the front matter would silently truncate. "confluence_page_id": str(page.id), "confluence_space_key": page.space.key, "confluence_type": page.type, "confluence_created": history.created, "confluence_created_by": history.created_by.display_name, "confluence_last_modified": version.when, "confluence_last_modified_by": version.by.display_name, "confluence_version": version.number, } for raw_key, value in metadata.items(): if value in (None, "", 0): continue key = sanitize_key(raw_key) if key not in self.page_properties: self.page_properties[key] = value @property def breadcrumbs(self) -> str: return ( " > ".join( [self.convert_page_link(ancestor.id) for ancestor in self.page.ancestors] ) + "\n" ) @property def labels(self) -> list[str]: return [label.name for label in self.page.labels] def set_page_properties(self, **props: list[str] | str | None) -> None: for key, value in props.items(): if value: self.page_properties[sanitize_key(key)] = value def convert_page_properties( self, el: BeautifulSoup, text: str, parent_tags: list[str] ) -> str | None: fmt = settings.export.page_properties_format if fmt == "table": return text rows = [ cast("list[Tag]", tr.find_all(["th", "td"])) for tr in cast("list[Tag]", el.find_all("tr")) if tr ] if not rows: return None props: dict[str, str] = {} key_counts: dict[str, int] = {} for row in rows: if len(row) == 2: # noqa: PLR2004 raw_key = row[0].get_text(strip=True) count = key_counts.get(raw_key, 0) + 1 key_counts[raw_key] = count unique_key = raw_key if count == 1 else f"{raw_key} {count}" props[unique_key] = self.convert(str(row[1])).strip() if fmt in ("frontmatter", "frontmatter_and_table", "meta-bind-view-fields"): self.set_page_properties(**props) if fmt == "frontmatter": return None if fmt == "frontmatter_and_table": return text if fmt == "dataview-inline-field": lines = "\n".join(f"{k}:: {v}" for k, v in props.items()) return f"\n{lines}\n" # meta-bind-view-fields: two-column table with VIEW fields in value column table_data = [ (f"**{k}**", f"`VIEW[{{{sanitize_key(k)}}}][text(renderMarkdown)]`") for k in props ] return "\n\n" + tabulate(table_data, headers=["", ""], tablefmt="pipe") + "\n" def convert_alert(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: """Convert Confluence info macros to Markdown GitHub style alerts. GitHub specific alert types: https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#alerts Inside table cells GitHub alerts don't render in most viewers (Obsidian, etc.), so emit a leading emoji + plain text instead. """ alert_type_map = { "info": "IMPORTANT", "panel": "NOTE", "tip": "TIP", "note": "WARNING", "warning": "CAUTION", } alert_emoji_map = { "NOTE": "\U0001f4dd", "TIP": "\U0001f4a1", "IMPORTANT": "❗", "WARNING": "⚠️", "CAUTION": "\U0001f6d1", } alert_type = alert_type_map.get(str(el["data-macro-name"]), "NOTE") macro_id = el.get("data-macro-id") custom_emoji = self._panel_icon_map.get(str(macro_id)) if macro_id else None emoji = custom_emoji or alert_emoji_map[alert_type] tags = parent_tags if isinstance(parent_tags, list | set) else set() if "td" in tags or "th" in tags: return f"{emoji} {text.strip()}" blockquote = super().convert_blockquote(el, text, parent_tags) return f"\n> [!{alert_type}]{blockquote}" def convert_div(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: # Handle Confluence macros if el.has_attr("data-macro-name"): macro_name = str(el["data-macro-name"]) if macro_name in self.options["macros_to_ignore"]: return "" macro_handlers = { "panel": self.convert_alert, "info": self.convert_alert, "note": self.convert_alert, "tip": self.convert_alert, "warning": self.convert_alert, "details": self.convert_page_properties, "drawio": self.convert_drawio, "plantuml": self.convert_plantuml, "scroll-ignore": self.convert_hidden_content, "toc": self.convert_toc, "jira": self.convert_jira_table, "attachments": self.convert_attachments, "markdown": self.convert_markdown, "mohamicorp-markdown": self.convert_markdown, "include": self.convert_include, "excerpt-include": self.convert_include, } if macro_name in macro_handlers: return macro_handlers[macro_name](el, text, parent_tags) class_handlers = { "expand-container": self.convert_expand_container, "columnLayout": self.convert_column_layout, } for class_name, handler in class_handlers.items(): if class_name in str(el.get("class", "")): return handler(el, text, parent_tags) return super().convert_div(el, text, parent_tags) def convert_expand_container( self, el: BeautifulSoup, text: str, parent_tags: list[str] ) -> str: """Convert expand-container div to HTML details element.""" # Extract summary text from expand-control-text summary_element = el.find("span", class_="expand-control-text") summary_text = ( summary_element.get_text().strip() if summary_element else "Click here to expand..." ) # Extract content from expand-content content_element = el.find("div", class_="expand-content") # Recursively convert the content content = ( self.process_tag(content_element, parent_tags).strip() if content_element else "" ) # Return as details element return f"\n
\n{summary_text}\n\n{content}\n\n
\n\n" def _span_highlight(self, style: str, text: str) -> str | None: bg_m = _RE_RGB_BG.search(style) if not bg_m: return None hex_color = _rgb_to_hex(int(bg_m.group(1)), int(bg_m.group(2)), int(bg_m.group(3))) return f'{text}' def _wrap_cell_highlight(self, el: BeautifulSoup, text: str) -> str: if not settings.export.convert_text_highlights: return text bg = _extract_cell_highlight_hex(el) if bg is None: return text inner = text or " " return f'{inner}' def convert_td(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: text = super().convert_td(el, text, parent_tags) return self._wrap_cell_highlight(el, text) def convert_th(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: text = super().convert_th(el, text, parent_tags) return self._wrap_cell_highlight(el, text) def _span_font_color(self, el: BeautifulSoup, style: str, text: str) -> str | None: color_m = _RE_RGB_COLOR.search(style) if color_m: hex_color = _rgb_to_hex( int(color_m.group(1)), int(color_m.group(2)), int(color_m.group(3)) ) return f'{text}' color_id = el.get("data-colorid") if isinstance(color_id, str): hex_color = self._colorid_map.get(color_id) if hex_color: return f'{text}' return None def _span_status_badge(self, el: BeautifulSoup, text: str) -> str | None: if not settings.export.convert_status_badges: return None classes = el.get("class") or [] if not isinstance(classes, list): return None if "status-macro" not in classes: return None bg = "#dfe1e6" # default gray for cls, color in _LOZENGE_COLORS.items(): if cls in classes: bg = color break return f'{text.strip()}' def convert_span(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: # noqa: C901, PLR0911 if el.has_attr("data-macro-name"): if el["data-macro-name"] == "jira": return self.convert_jira_issue(el, text, parent_tags) if el["data-macro-name"] == "status": result = self._span_status_badge(el, text) if result is not None: return result if el["data-macro-name"] == "plantuml": return self.convert_plantuml(el, text, parent_tags) if el.has_attr("class") and "inline-comment-marker" in el["class"]: return self.convert_inline_comment_marker(el, text, parent_tags) raw_style = el.get("style", "") style = raw_style if isinstance(raw_style, str) else "" if settings.export.convert_text_highlights: result = self._span_highlight(style, text) if result is not None: return result if settings.export.convert_font_colors: result = self._span_font_color(el, style, text) if result is not None: return result return text def convert_inline_comment_marker( self, el: BeautifulSoup, text: str, _parent_tags: list[str] ) -> str: if settings.export.comments_export in ("inline", "all"): ref = el.get("data-ref", "") if isinstance(ref, str) and ref and ref not in self._marked_texts: self._marked_texts[ref] = text return text def convert_attachments(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: file_header = el.find("th", {"class": "filename-column"}) file_header_text = file_header.text.strip() if file_header else "File" modified_header = el.find("th", {"class": "modified-column"}) modified_header_text = modified_header.text.strip() if modified_header else "Modified" def _get_path(p: Path) -> str: attachment_path = self._get_path_for_href(p, settings.export.attachment_href) return attachment_path.replace(" ", "%20") def _attachment_link(att: Attachment) -> str: if settings.export.attachment_href == "wiki": return f"[[{att.export_path.name}|{att.title}]]" return f"[{att.title}]({_get_path(att.export_path)})" rows = [ { "file": _attachment_link(att), "modified": f"{att.version.friendly_when} by {self.convert_user(att.version.by)}", # noqa: E501 } for att in self.page.attachments ] html = f""" {"".join(f"" for row in rows)}
{file_header_text}{modified_header_text}
{row['file']}{row['modified']}
""" return ( f"\n\n{self.convert_table(BeautifulSoup(html, 'html.parser'), text, parent_tags)}\n" ) def convert_column_layout( self, el: BeautifulSoup, text: str, parent_tags: list[str] ) -> str: cells = el.find_all("div", {"class": "cell"}) if len(cells) < 2: # noqa: PLR2004 return super().convert_div(el, text, parent_tags) html = f"{''.join([f'' for cell in cells])}
{cell!s}
" return self.convert_table(BeautifulSoup(html, "html.parser"), text, parent_tags) def convert_jira_table(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: jira_tables = BeautifulSoup(self.page.body_export, "html.parser").find_all( "div", {"class": "jira-table"} ) if len(jira_tables) == 0: logger.warning("No Jira table found. Ignoring.") return text if len(jira_tables) > 1: logger.exception("Multiple Jira tables are not supported. Ignoring.") return text return self.process_tag(jira_tables[0], parent_tags) def convert_toc(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: if not settings.export.include_toc: return "" tocs = BeautifulSoup(self.page.body_export, "html.parser").find_all( "div", {"class": "toc-macro"} ) if len(tocs) == 0: logger.warning("Could not find TOC macro. Ignoring.") return text if len(tocs) > 1: logger.exception("Multiple TOC macros are not supported. Ignoring.") return text return self.process_tag(tocs[0], parent_tags) def convert_hidden_content( self, el: BeautifulSoup, text: str, parent_tags: list[str] ) -> str: content = super().convert_p(el, text, parent_tags) if not content.strip(): return "" return f"\n\n" def convert_jira_issue(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: issue_key = el.get("data-jira-key") link = cast("BeautifulSoup", el.find("a", {"class": "jira-issue-key"})) if not link: return text if not issue_key: return self.process_tag(link, parent_tags) try: jira_url = _extract_jira_base_url(str(link.get("href", ""))) or self.page.base_url issue = JiraIssue.from_key(str(issue_key), jira_url) except HTTPError: return f"[[{issue_key}]]({link.get('href')})" if not issue: return f"[[{issue_key}]]({link.get('href')})" return f"[[{issue.key}] {issue.summary}]({link.get('href')})" def convert_pre(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: # type: ignore[override] if not text: return "" code_language = "" if el.has_attr("data-syntaxhighlighter-params"): match = re.search(r"brush:\s*([^;]+)", str(el["data-syntaxhighlighter-params"])) if match: code_language = match.group(1) if "@startuml" in text: code_language = "plantuml" return f"\n\n```{code_language}\n{text}\n```\n\n" def convert_sub(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: return f"{text}" def convert_sup(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: """Convert superscript to Markdown footnotes.""" if el.previous_sibling is None: return f"[^{text}]:" # Footnote definition return f"[^{text}]" # f"{text}" def convert_a(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: # noqa: PLR0911, PLR0912, C901 if "user-mention" in str(el.get("class")): return self.convert_user_mention(el, text, parent_tags) if "createpage.action" in str(el.get("href")) or "createlink" in str(el.get("class")): logger.warning( f"Broken link detected: '{text}' on page '{self.page.title}' " f"(ID: {self.page.id}). This is likely a Confluence bug. " f"Please report this issue to Atlassian Support." ) # Find fallback link without using string= parameter to avoid # BeautifulSoup recursion bug. The string= parameter triggers # recursive .string property access which fails on Fabric # Editor v2 HTML with fab:media tags try: soup = BeautifulSoup(self.page.editor2, "html.parser") for link in soup.find_all("a"): # Use get_text() instead of .string to avoid recursion issues link_text = link.get_text(strip=True) if link_text == text: # Prevent infinite recursion if fallback is the same element if isinstance(link, Tag) and link.get("href") != el.get("href"): return self.convert_a(link, text, parent_tags) # type: ignore[arg-type] except RecursionError: # editor2 HTML contains problematic tags (e.g., fab:media) # that cause BS4 recursion. Skip fallback and return # wiki-style link pass # If no matching link found, return wiki-style link return f"[[{text}]]" if "page" in str(el.get("data-linked-resource-type")): page_id = str(el.get("data-linked-resource-id", "")) if page_id and page_id != "null": return self.convert_page_link(int(page_id)) if "attachment" in str(el.get("data-linked-resource-type")): link = self.convert_attachment_link(el, text, parent_tags) # convert_attachment_link may return None if the attachment meta is incomplete return link or f"[{text}]({el.get('href')})" href_str = str(el.get("href", "")) if href_str: parsed_href = urlparse(href_str) base_host = urlparse(getattr(self.page, "base_url", "") or "").hostname if not parsed_href.hostname or parsed_href.hostname == base_host: query_params = urllib.parse.parse_qs(parsed_href.query) page_id_param = next( ( values[0] for key, values in query_params.items() if key.lower() == "pageid" and values and values[0] ), None, ) if page_id_param and page_id_param.isdigit(): return self.convert_page_link(int(page_id_param)) if match := parse_confluence_path(parsed_href.path): if match.page_id: return self.convert_page_link(match.page_id) if (href := href_str).startswith("#"): if settings.export.page_href == "wiki": return f"[[#{text}]]" return f"[{text}](#{github_heading_slug(href[1:])})" return super().convert_a(el, text, parent_tags) def convert_page_link(self, page_id: int) -> str: if not page_id: msg = "Page link does not have valid page_id." raise ValueError(msg) page = Page.from_id(page_id, self.page.base_url) if page.title == "Page not accessible": logger.warning( f"Confluence page link (ID: {page_id}) is not accessible, " f"referenced from page '{self.page.title}' (ID: {self.page.id})" ) return f"[Page not accessible (ID: {page_id})]" PageTitleRegistry.register(int(page.id), page.title) if settings.export.page_href == "wiki": if PageTitleRegistry.is_ambiguous(page.title): vault_path = page.export_path.with_suffix("").as_posix() return f"[[{vault_path}|{page.title}]]" return f"[[{page.title}]]" page_path = self._get_path_for_href(page.export_path, settings.export.page_href) return f"[{page.title}]({page_path.replace(' ', '%20')})" def convert_attachment_link( self, el: BeautifulSoup, text: str, parent_tags: list[str] ) -> str: """Build a Markdown link for an attachment. If the attachment metadata is missing, return the original Confluence URL instead of crashing. """ attachment = None if fid := el.get("data-linked-resource-file-id"): attachment = self.page.get_attachment_by_file_id(str(fid)) if not attachment and (fid := el.get("data-media-id")): attachment = self.page.get_attachment_by_file_id(str(fid)) if not attachment and (aid := el.get("data-linked-resource-id")): attachment = self.page.get_attachment_by_id(str(aid)) if attachment is None: href = el.get("href") or text return f"[{text}]({href})" if settings.export.attachment_href == "wiki": return f"[[{attachment.export_path.name}|{attachment.title}]]" path = self._get_path_for_href(attachment.export_path, settings.export.attachment_href) return f"[{attachment.title}]({path.replace(' ', '%20')})" def convert_time(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: if el.has_attr("datetime"): return f"{el['datetime']}" return f"{text}" def convert_user_mention(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: if aid := el.get("data-account-id"): try: return self.convert_user(User.from_accountid(str(aid), self.page.base_url)) except ApiNotFoundError: logger.warning(f"User {aid} not found. Using text instead.") return self.convert_user_name(text) def convert_user(self, user: User) -> str: return self.convert_user_name(user.display_name) def convert_user_name(self, name: str) -> str: return name.removesuffix("(Unlicensed)").removesuffix("(Deactivated)").strip() def convert_li(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: md = super().convert_li(el, text, parent_tags) bullet = self.options["bullets"][0] # Convert Confluence task lists to GitHub task lists if el.has_attr("data-inline-task-id"): is_checked = el.has_attr("class") and "checked" in el["class"] return md.replace(f"{bullet} ", f"{bullet} {'[x]' if is_checked else '[ ]'} ", 1) return md _ATLASSIAN_EMOTICONS: ClassVar[dict[str, str]] = { "atlassian-check_mark": "✅", "atlassian-cross_mark": "❌", "atlassian-yes": "👍", "atlassian-no": "👎", "atlassian-information": "\u2139\ufe0f", "atlassian-warning": "⚠️", "atlassian-forbidden": "🚫", "atlassian-plus": "\u2795", "atlassian-minus": "\u2796", "atlassian-question": "❓", "atlassian-exclamation": "❗", "atlassian-light_on": "💡", "atlassian-light_off": "💡", "atlassian-star_yellow": "⭐", "atlassian-blue_star": "🔵", "atlassian-smile": "😊", "atlassian-sad": "😞", "atlassian-tongue": "😛", "atlassian-biggrin": "😁", "atlassian-wink": "😉", } def _convert_emoticon(self, el: BeautifulSoup) -> str | None: classes = el.get("class") or [] if "emoticon" not in classes: return None emoji_id = str(el.get("data-emoji-id", "")) fallback = str(el.get("data-emoji-fallback", "")) if fallback and not fallback.startswith(":"): return fallback if emoji_id: try: codepoints = [int(cp, 16) for cp in emoji_id.split("-")] if all(0 <= cp <= _MAX_UNICODE_CODEPOINT for cp in codepoints): return "".join(chr(cp) for cp in codepoints) except (OverflowError, ValueError): pass if emoji_id in self._ATLASSIAN_EMOTICONS: return self._ATLASSIAN_EMOTICONS[emoji_id] shortname = str(el.get("data-emoji-shortname", "")) return shortname or fallback or str(el.get("alt", "")) or None def convert_img(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: # noqa: C901, PLR0911, PLR0912 if emoticon := self._convert_emoticon(el): return emoticon attachment = None if fid := el.get("data-media-id"): attachment = self.page.get_attachment_by_file_id(str(fid)) if not attachment and (fid := el.get("data-media-id")): attachment = self.page.get_attachment_by_file_id(str(fid)) if not attachment and (fid := el.get("data-linked-resource-file-id")): attachment = self.page.get_attachment_by_file_id(str(fid)) if not attachment and (aid := el.get("data-linked-resource-id")): attachment = self.page.get_attachment_by_id(str(aid)) if not attachment and (encoded_xml := el.get("data-encoded-xml")): decoded = unquote(str(encoded_xml)) if m := re.search(r'ri:filename="([^"]+)"', decoded): matches = self.page.get_attachments_by_title(m.group(1)) if matches: attachment = matches[0] url_src = str(el.get("src", "")) if ".drawio.png" in url_src: filename = unquote(urlparse(url_src).path.split("/")[-1]) drawio_result = self._convert_drawio_embedded_mermaid(filename) if drawio_result: return drawio_result # If no mermaid diagram extracted, use PNG as attachment fallback if attachment is None: drawio_images = self.page.get_attachments_by_title(filename) if len(drawio_images) > 0: attachment = drawio_images[0] if attachment is None: href = el.get("href") or text if href: return f"![{text}]({href})" if url_src: return f"![{text}]({url_src})" return text caption = ( self._image_captions.get(attachment.title, "") if settings.export.image_captions else "" ) if settings.export.attachment_href == "wiki": img_md = f"![[{attachment.export_path.name}]]" return f"{img_md}\n*{caption}*" if caption else img_md path = self._get_path_for_href(attachment.export_path, settings.export.attachment_href) el["src"] = path.replace(" ", "%20") tags = parent_tags if isinstance(parent_tags, list | set) else set() if "_inline" in tags: tags = set(tags) tags.discard("_inline") # Always show images. img_md = super().convert_img(el, text, tags) # type: ignore[union-attr] return f"{img_md}\n*{caption}*" if caption else img_md def _normalize_unicode_whitespace(self, text: str) -> str: r"""Normalize Unicode whitespace to regular spaces. This fixes an issue where markdownify's chomp() function strips Unicode whitespace characters (like \xa0 from  ) entirely, causing missing spaces in markdown output. Confluence often uses   (non-breaking space, \xa0) inside inline formatting tags like  text. BeautifulSoup correctly converts this to \xa0, but markdownify's chomp() doesn't preserve it, resulting in output like "word*text*" instead of "word *text*". This method normalizes all Unicode whitespace characters to regular ASCII spaces so they are preserved by markdownify's chomp() function. Args: text: Text string to normalize Returns: Text with Unicode whitespace replaced by regular spaces """ # Normalize all Unicode whitespace to regular space # This includes: \xa0 (nbsp), \u2000-\u200a (various spaces), # \u2028 (line separator), \u2029 (paragraph separator), etc. # Keep \n, \r, \t as-is since they have semantic meaning normalized = text for char in text: if char.isspace() and char not in " \n\r\t": # Replace Unicode whitespace with regular space normalized = normalized.replace(char, " ") return normalized def escape(self, text: str, parent_tags: list[str]) -> str: escaped: str = cast("Any", MarkdownConverter).escape(self, text, parent_tags) return escaped.replace("[", r"\[").replace("]", r"\]") def _escape_template_placeholders(self, text: str) -> str: r"""Escape patterns that Obsidian misparsed as HTML tags. Confluence templates use to mark values that need replacing. Obsidian's renderer treats these as HTML, breaking page formatting. This method escapes them to \ so they render as literal angle-bracket text. Valid HTML tags (e.g.
) are preserved. Content inside fenced code blocks and inline code spans is left untouched. """ def _escape_if_placeholder(m: re.Match) -> str: inner = m.group(1) if _AUTOLINK_URI_RE.match(inner) or _AUTOLINK_EMAIL_RE.match(inner): return m.group(0) # Strip leading slash (closing tag), get first token, strip trailing slash stripped = inner.strip().lstrip("/") tag_name = re.split(r"[\s/]", stripped)[0].lower() if stripped else "" if tag_name in _HTML_ELEMENTS or inner.startswith("!"): return m.group(0) return f"\\<{inner}\\>" lines = text.split("\n") result = [] in_fence = False for line in lines: if _CODE_FENCE_RE.match(line): in_fence = not in_fence result.append(line) continue if in_fence: result.append(line) continue # Interleave non-code and inline-code parts; only process non-code parts = _INLINE_CODE_RE.split(line) codes = _INLINE_CODE_RE.findall(line) processed = [] for i, part in enumerate(parts): processed.append(_ANGLE_BRACKET_RE.sub(_escape_if_placeholder, part)) if i < len(codes): processed.append(codes[i]) result.append("".join(processed)) return "\n".join(result) def convert_em(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: """Convert tags, preserving spaces from Unicode whitespace entities.""" text = self._normalize_unicode_whitespace(text) return super().convert_em(el, text, parent_tags) def convert_strong(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: """Convert tags, preserving spaces from Unicode whitespace entities.""" text = self._normalize_unicode_whitespace(text) return super().convert_strong(el, text, parent_tags) def convert_code(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: """Convert tags, preserving spaces from Unicode whitespace entities.""" text = self._normalize_unicode_whitespace(text) return super().convert_code(el, text, parent_tags) def convert_i(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: """Convert tags, preserving spaces from Unicode whitespace entities.""" text = self._normalize_unicode_whitespace(text) return super().convert_i(el, text, parent_tags) def convert_b(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: """Convert tags, preserving spaces from Unicode whitespace entities.""" text = self._normalize_unicode_whitespace(text) return super().convert_b(el, text, parent_tags) def _convert_drawio_embedded_mermaid(self, filename: str) -> str | None: """Extract mermaid diagram from DrawIO PNG preview image. Args: filename: The filename of the drawio diagram image. Returns: Markdown formatted mermaid diagram or None if not found. """ drawio_title = filename.removesuffix(".png") drawio_attachments = self.page.get_attachments_by_title(drawio_title) if len(drawio_attachments) == 0: return None drawio_filepath = settings.export.output_path / drawio_attachments[0].export_path if not drawio_filepath.exists(): return None # Extract mermaid diagram from DrawIO file return load_and_parse_drawio(str(drawio_filepath)) def convert_drawio(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: if match := re.search(r"\|diagramName=(.+?)\|", str(el)): drawio_name = match.group(1) preview_name = f"{drawio_name}.png" drawio_attachments = self.page.get_attachments_by_title(drawio_name) preview_attachments = self.page.get_attachments_by_title(preview_name) if not drawio_attachments or not preview_attachments: return f"\n\n\n" if settings.export.attachment_href == "wiki": preview_filename = preview_attachments[0].export_path.name drawio_filename = drawio_attachments[0].export_path.name drawio_image_embedding = f"![[{preview_filename}|{drawio_name}]]" drawio_link = f"[[{drawio_filename}|{drawio_image_embedding}]]" else: drawio_path = self._get_path_for_href( drawio_attachments[0].export_path, settings.export.attachment_href ) preview_path = self._get_path_for_href( preview_attachments[0].export_path, settings.export.attachment_href ) drawio_image_embedding = f"![{drawio_name}]({preview_path.replace(' ', '%20')})" drawio_link = f"[{drawio_image_embedding}]({drawio_path.replace(' ', '%20')})" return f"\n{drawio_link}\n\n" return "" def _extract_uml_from_editor2(self, macro_id: str) -> str | None: """Extract PlantUML source from editor2 XML by macro-id (Cloud format).""" if not self.page.editor2: return None wrapped = f"{self.page.editor2}" soup = BeautifulSoup(wrapped, "xml") for macro in soup.find_all("structured-macro"): if not isinstance(macro, Tag): continue if macro.get("name") != "plantuml" or macro.get("macro-id") != macro_id: continue plain_text_body = macro.find("plain-text-body") if not isinstance(plain_text_body, Tag): continue cdata = plain_text_body.get_text(strip=True) if not cdata: continue try: return json.loads(cdata).get("umlDefinition") or None except json.JSONDecodeError: return None return None def _extract_uml_from_storage(self) -> str | None: """Extract PlantUML source from body.storage by position (Server format).""" storage_macros = self._storage_plantuml_macros idx = self._plantuml_index self._plantuml_index += 1 if idx >= len(storage_macros): return None plain_text_body = storage_macros[idx].find("plain-text-body") if not isinstance(plain_text_body, Tag): return None uml = plain_text_body.get_text(strip=True) return uml or None def convert_plantuml(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: """Convert PlantUML diagrams to Markdown code blocks. Supports two Confluence formats: 1. **Cloud / editor2**: The editor2 XML contains structured macros with the UML definition in a JSON CDATA section (``{"umlDefinition": "..."}``). Each macro carries a ``macro-id`` that is also present in the view HTML as ``data-macro-id``. 2. **Server / Data Center**: ``editor2`` is often empty. The UML source lives as raw ``@startuml`` text inside ```` CDATA sections in ``body.storage``. The view HTML renders each diagram inside a ```` without a ``macro-id``. Diagrams are matched by position (Nth diagram in storage corresponds to the Nth ```` in view HTML). """ # Strategy 1: editor2 with macro-id (Cloud) macro_id = el.get("data-macro-id") if macro_id: uml = self._extract_uml_from_editor2(str(macro_id)) if uml: return f"\n```plantuml\n{uml}\n```\n\n" # Strategy 2: body.storage fallback (Server / Data Center) uml = self._extract_uml_from_storage() if uml: return f"\n```plantuml\n{uml}\n```\n\n" logger.warning("PlantUML macro could not be resolved from editor2 or body.storage") return "\n\n\n" def convert_include(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: """Convert Confluence `include` / `excerpt-include` macro. When `include_macro = transclusion`, emit an Obsidian-style embed link (`![[Page Title]]`) so the referenced page renders inline in Obsidian, mimicking the Confluence include/excerpt behavior. Requires the target page to also be exported so the link can resolve. When `include_macro = inline` (default), the body_view content is already expanded — fall through to normal div processing to render it. """ macro_name = str(el.get("data-macro-name", "")) macro_id = el.get("data-macro-id") target_title: str | None = None if macro_id and isinstance(macro_id, str): target_title = self._extract_include_target_title(macro_id) if settings.export.include_macro == "transclusion" and target_title: return f"\n![[{target_title}]]\n\n" if settings.export.include_macro == "transclusion": logger.warning( f"{macro_name} macro found but target page title could not be resolved; " f"falling back to inline content" ) inline = super().convert_div(el, text, parent_tags) # type: ignore[misc] if macro_name == "excerpt-include": title_note = f" from page '{target_title}'" if target_title else "" return ( f"\n\n" f"{inline}" f"\n\n\n" ) return inline def _strip_excerpt_include_panel_titles(self, html: str) -> str: """Strip the source-page-title panel from `excerpt-include` bodies. Confluence's `excerpt-include` body.view wraps the included content in a panel whose `panelHeader` is the source page title unless `nopanel=true`. The `panelContent` div holds the actual body. We unwrap to leave only the body. """ soup = BeautifulSoup(html, "html.parser") for el in soup.find_all(attrs={"data-macro-name": "excerpt-include"}): self._unwrap_excerpt_include_panel(el) return str(soup) def _unwrap_excerpt_include_panel(self, el: Tag) -> None: classes = el.get("class") or [] if not isinstance(classes, list) or "panel" not in classes: return header = el.find("div", class_="panelHeader") if isinstance(header, Tag): header.decompose() content = el.find("div", class_="panelContent") if isinstance(content, Tag): content.unwrap() def _extract_include_target_title(self, macro_id: str) -> str | None: """Resolve the target page title for an `include` / `excerpt-include` macro. BeautifulSoup with `xml` parser strips namespace prefixes, so `ac:structured-macro` becomes `structured-macro`, `ri:page` becomes `page`, and `ri:content-title` becomes `content-title`. """ wrapped_editor2 = f"{self.page.editor2}" soup_editor2 = BeautifulSoup(wrapped_editor2, "xml") for macro in soup_editor2.find_all("structured-macro"): if not isinstance(macro, Tag): continue if macro.get("name") not in ("include", "excerpt-include"): continue if macro.get("macro-id") != macro_id: continue ri_page = macro.find("page") if isinstance(ri_page, Tag): title = ri_page.get("content-title") if isinstance(title, str) and title: return title return None def _find_element_with_namespace(self, parent: BeautifulSoup, tag_name: str) -> Tag | None: """Find an element with or without namespace prefix.""" result = parent.find(f"ac:{tag_name}") or parent.find(tag_name) return result if isinstance(result, Tag) else None def _find_structured_macro(self, el: BeautifulSoup) -> Tag | None: """Find structured-macro element with or without namespace.""" return self._find_element_with_namespace(el, "structured-macro") def _extract_plain_text_body(self, el: BeautifulSoup | Tag) -> str | None: """Extract markdown content from plain-text-body element.""" plain_text_body = self._find_element_with_namespace(el, "plain-text-body") # type: ignore[arg-type] if plain_text_body: return plain_text_body.get_text() return None def _extract_markdown_parameter(self, el: BeautifulSoup | Tag) -> str | None: """Extract markdown content from parameter element.""" param = el.find("ac:parameter", {"ac:name": "markdown"}) if param is None: param = el.find("parameter", {"name": "markdown"}) if isinstance(param, Tag): return param.get_text() return None def _extract_markdown_from_body(self, el: BeautifulSoup) -> str | None: """Extract markdown content from body HTML.""" # Try plain-text-body first (standard markdown macro) markdown_content = self._extract_plain_text_body(el) if markdown_content: return markdown_content # Check in structured-macro child element structured_macro = self._find_structured_macro(el) if structured_macro: markdown_content = self._extract_plain_text_body(structured_macro) if markdown_content: return markdown_content # Try parameter for mohamicorp-markdown markdown_content = self._extract_markdown_parameter(el) if markdown_content: return markdown_content # Check parameter in structured-macro child if structured_macro: markdown_content = self._extract_markdown_parameter(structured_macro) if markdown_content: return markdown_content return None def _extract_markdown_from_editor2(self, macro_id: str) -> str | None: """Extract markdown content from editor2 XML.""" wrapped_editor2 = f"{self.page.editor2}" soup_editor2 = BeautifulSoup(wrapped_editor2, "xml") # BeautifulSoup strips namespace prefixes, so ac:structured-macro # becomes structured-macro markdown_macros = soup_editor2.find_all("structured-macro") for macro in markdown_macros: if not isinstance(macro, Tag): continue if ( macro.get("name") in ("markdown", "mohamicorp-markdown") and macro.get("macro-id") == macro_id ): # Try plain-text-body first plain_text_body = macro.find("plain-text-body") if isinstance(plain_text_body, Tag): return plain_text_body.get_text(strip=True) # Try parameter for mohamicorp-markdown param = macro.find("parameter", {"name": "markdown"}) if isinstance(param, Tag): return param.get_text(strip=True) return None def convert_markdown(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: """Convert Markdown macro fragments to Markdown. Supports both standard 'markdown' macro and 'mohamicorp-markdown' macro. The content is already in Markdown format, so we just extract and return it. """ macro_name = el.get("data-macro-name", "") # First, try to extract from body HTML markdown_content = self._extract_markdown_from_body(el) # If not found, try editor2 XML (similar to plantuml) if not markdown_content: macro_id = el.get("data-macro-id") if macro_id and isinstance(macro_id, str): markdown_content = self._extract_markdown_from_editor2(macro_id) if not markdown_content: logger.warning( f"Markdown macro ({macro_name}) found but no content could be extracted" ) return f"\n\n\n" # Return the markdown content directly (it's already in markdown format) # Add newlines for proper spacing return f"\n{markdown_content}\n\n" def convert_table(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: if el.has_attr("class") and "metadata-summary-macro" in el["class"]: return self.convert_page_properties_report(el, text, parent_tags) return super().convert_table(el, text, parent_tags) def convert_page_properties_report( self, el: BeautifulSoup, text: str, parent_tags: list[str] ) -> str: data_cql = el.get("data-cql") if not data_cql: return "" if settings.export.page_properties_report_format == "dataview": dql = self._cql_to_dataview(el, str(data_cql)) if dql is not None: return f"\n```dataview\n{dql}\n```\n" soup = BeautifulSoup(self.page.body_export, "html.parser") table = soup.find("table", {"data-cql": data_cql}) if not table: return "" return super().convert_table(table, "", parent_tags) # type: ignore - def _cql_to_dataview(self, el: BeautifulSoup, cql: str) -> str | None: """Translate a Confluence CQL query to an Obsidian Dataview DQL query. Returns None if the CQL cannot be meaningfully translated. """ current_content_id = str(el.get("data-current-content-id", "")) headings_raw = str(el.get("data-headings", "")) first_col = str(el.get("data-first-column-heading", "Title")) sort_by = str(el.get("data-sort-by", first_col)) reverse_sort = str(el.get("data-reverse-sort", "false")).lower() == "true" label_conditions = [ m.group(1) for m in re.finditer(r'label\s*=\s*"([^"]+)"', cql, re.IGNORECASE) ] parent_match = re.search(r'parent\s*=\s*"?(\d+)"?', cql, re.IGNORECASE) current_content_match = re.search( r'(?:ancestor|parent)\s*=\s*currentContent\s*\(\s*\)', cql, re.IGNORECASE ) from_clause: str | None = None if current_content_match or ( parent_match and parent_match.group(1) == current_content_id ): folder = str(self.page.export_path.parent).replace("\\", "/") from_clause = f'"{folder}"' if from_clause is None and not label_conditions: return None lines: list[str] = [] if headings_raw: headings = [h.strip() for h in headings_raw.split(",") if h.strip()] col_names = ", ".join(f'{sanitize_key(h)} AS "{h}"' for h in headings) lines.append(f"TABLE {col_names}") else: lines.append("TABLE") from_parts = ([from_clause] if from_clause else []) + [ f"#{lbl}" for lbl in label_conditions ] if from_parts: lines.append("FROM " + " AND ".join(from_parts)) sort_col = sanitize_key(sort_by) sort_dir = "DESC" if reverse_sort else "ASC" lines.append(f"SORT {sort_col} {sort_dir}") return "\n".join(lines) def _get_path_for_href( self, path: Path, style: Literal["absolute", "relative", "wiki"] ) -> str: """Get the path to use in href attributes based on settings.""" if style == "absolute": # Note that usually absolute would be # something like this: (settings.export.output_path / path).absolute() # In this case the URL will be "absolute" to the export path. # This is useful for local file links. result = "/" + str(path).lstrip("/") elif style == "wiki": result = path.name else: result = os.path.relpath(path, self.page.export_path.parent) return result _CQL_MAX_BATCH_SIZE: int = 25 def _fetch_page_ids_v2_batch(batch: list[str], base_url: str) -> set[str]: """Single v2 API request for a batch of page IDs. Uses GET /api/v2/pages?id=X&id=Y&... (Atlassian Cloud). The v2 API accepts multiple ``id`` params, so they are encoded directly into the URL path since the SDK only accepts a dict for ``params``. """ query = urllib.parse.urlencode([("id", pid) for pid in batch] + [("limit", len(batch))]) response = cast("dict", get_thread_confluence(base_url).get(f"api/v2/pages?{query}")) if not response: return set() return {str(item["id"]) for item in response.get("results", [])} def _fetch_page_ids_cql_batch(batch: list[str], base_url: str) -> set[str]: """Single CQL v1 request for a batch of page IDs. Uses GET /rest/api/content/search with id in (...) (self-hosted / fallback). """ cql = "id in ({})".format(",".join(batch)) response = cast( "dict", get_thread_confluence(base_url).get( "rest/api/content/search", params={"cql": cql, "limit": len(batch), "fields": "id"}, ), ) if not response: return set() return {str(item["id"]) for item in response.get("results", [])} def fetch_deleted_page_ids(page_ids: list[str], base_url: str) -> set[str]: """Return the subset of *page_ids* that no longer exist in Confluence. Uses the v2 REST API when ``connection_config.use_v2_api`` is enabled (multiple ``id`` query params, up to ``export.existence_check_batch_size`` IDs per request), or the v1 CQL content search otherwise (capped at :data:`_CQL_MAX_BATCH_SIZE` IDs per request). Per-batch API failures are handled safely: affected IDs are assumed to still exist so they are never accidentally deleted. """ if not page_ids: return set() use_v2 = settings.connection_config.use_v2_api batch_size = settings.export.existence_check_batch_size effective_batch_size = batch_size if use_v2 else min(batch_size, _CQL_MAX_BATCH_SIZE) n_batches = -(-len(page_ids) // effective_batch_size) # ceil division logger.debug( "Checking existence of %d page(s) in %d batch(es) via %s API", len(page_ids), n_batches, "v2" if use_v2 else "v1 CQL", ) existing: set[str] = set() for i in range(0, len(page_ids), effective_batch_size): batch = page_ids[i : i + effective_batch_size] try: if use_v2: existing.update(_fetch_page_ids_v2_batch(batch, base_url)) else: existing.update(_fetch_page_ids_cql_batch(batch, base_url)) except Exception: # noqa: BLE001 logger.warning( "Failed to check page existence for batch (%d IDs). " "Skipping deletion for these pages.", len(batch), ) existing.update(batch) return set(page_ids) - existing def sync_removed_pages(base_url: str) -> None: """Orchestrate stale-file cleanup: check API for deleted pages, then clean up.""" if not settings.export.cleanup_stale: logger.debug("Stale page cleanup disabled — skipping.") return unseen = LockfileManager.unseen_ids() if not unseen: logger.debug("No unseen pages in lockfile — nothing to clean up.") return with console.status(f"[dim]Checking {len(unseen)} unseen page(s) for removal…[/dim]"): deleted = fetch_deleted_page_ids(sorted(unseen), base_url) if deleted: logger.info("Removing %d stale page(s) from local export.", len(deleted)) LockfileManager.remove_pages(deleted) def _make_progress() -> Progress: """Build a rich Progress instance for page export.""" return Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), BarColumn(), MofNCompleteColumn(), TaskProgressColumn(), TimeElapsedColumn(), TimeRemainingColumn(), console=console, transient=False, ) def _export_page_worker(page: "Page | Descendant", stats: ExportStats | None = None) -> None: """Export a single Confluence page to Markdown (worker function). Each page carries its own ``base_url`` so the correct thread-local client is used automatically — no global state manipulation needed. Args: page: The page to export. stats: Optional stats tracker to update on completion. """ _page = Page.from_id(page.id, page.base_url) attachment_entries = _page.export() LockfileManager.record_page(_page, attachment_entries) if stats is not None: stats.inc_exported() def export_pages(pages: list["Page | Descendant"]) -> None: """Export a list of Confluence pages to Markdown. Pages are exported in parallel using ThreadPoolExecutor for significant performance improvement. Worker count is read from settings.connection_config.max_workers (default: 20). Args: pages: List of pages to export. """ # Mark all pages as seen so cleanup skips API checks for unchanged pages LockfileManager.mark_seen([p.id for p in pages]) for p in pages: PageTitleRegistry.register(int(p.id), p.title) pages_to_export = [page for page in pages if LockfileManager.should_export(page)] skipped_count = len(pages) - len(pages_to_export) stats = reset_stats(total=len(pages)) for _ in range(skipped_count): stats.inc_skipped() if skipped_count: logger.info("Skipping %d unchanged page(s).", skipped_count) if not pages_to_export: logger.info("All %d page(s) unchanged — nothing to export.", len(pages)) return # Get worker count from config max_workers = settings.connection_config.max_workers serial = settings.export.log_level == "DEBUG" or max_workers <= 1 mode_label = "serial" if serial else f"parallel ({max_workers} workers)" logger.debug("Export mode: %s, pages to export: %d", mode_label, len(pages_to_export)) with _make_progress() as progress: task = progress.add_task( f"[cyan]Exporting {len(pages_to_export)} page(s)[/cyan]", total=len(pages_to_export), ) if serial: for page in pages_to_export: progress.update(task, description=f"[cyan]Page {page.id}[/cyan]") try: _export_page_worker(page, stats) except Exception: logger.exception("Failed to export page %s", page.id) stats.inc_failed() finally: progress.advance(task) else: with ThreadPoolExecutor(max_workers=max_workers) as executor: futures = { executor.submit(_export_page_worker, page, stats): page for page in pages_to_export } for future in as_completed(futures): page = futures[future] try: future.result() except Exception: logger.exception("Failed to export page %s", page.id) stats.inc_failed() finally: progress.advance(task) ================================================ FILE: confluence_markdown_exporter/main.py ================================================ import json import logging import platform import sys import urllib.parse from typing import Annotated import typer import typer.rich_utils import yaml from rich.panel import Panel from rich.table import Table from confluence_markdown_exporter import __version__ from confluence_markdown_exporter import config as config_module from confluence_markdown_exporter.utils.app_data_store import APP_CONFIG_PATH from confluence_markdown_exporter.utils.app_data_store import get_settings from confluence_markdown_exporter.utils.lockfile import LockfileManager from confluence_markdown_exporter.utils.measure_time import measure from confluence_markdown_exporter.utils.rich_console import console from confluence_markdown_exporter.utils.rich_console import get_rich_console from confluence_markdown_exporter.utils.rich_console import get_stats from confluence_markdown_exporter.utils.rich_console import reset_stats from confluence_markdown_exporter.utils.rich_console import setup_logging typer.rich_utils._get_rich_console = get_rich_console logger = logging.getLogger(__name__) class _CmeTyper(typer.Typer): """Typer subclass that intercepts AuthNotConfiguredError at the app boundary. When an export command raises AuthNotConfiguredError, the exception propagates through any active console.status() context managers (stopping spinners cleanly via their __exit__) before reaching here. We then open the config menu at the exact failing URL and exit — no traceback, no per-command boilerplate. """ def __call__(self, *args: object, **kwargs: object) -> None: from confluence_markdown_exporter.api_clients import AuthNotConfiguredError try: super().__call__(*args, **kwargs) except AuthNotConfiguredError as e: from confluence_markdown_exporter.utils.config_interactive import main_config_menu_loop console.print( f"Please configure {e.service} credentials for {e.url} and re-run the export." ) main_config_menu_loop(f"auth.{e.service.lower()}", new_instance_url=e.url) sys.exit(1) except ValueError as e: console.print( f"[red bold]{e}[/red bold]\n" "See [code]--help[/code] or [code]README.md[/code] for more information." ) sys.exit(1) # Each list item must be its own \n\n-separated block so typer's epilog renderer # keeps single \n between items, forming a valid markdown bullet list. _QUICKSTART_EPILOG = ( "**Quick start:**\n\n" "- Configure credentials: `cme config edit auth.confluence`\n\n" "- Set output path: `cme config set export.output_path=./output`\n\n" "- Export a page: `cme pages https://company.atlassian.net/wiki/spaces/KEY/pages/123/Title`\n\n" "- Export a space: `cme spaces https://company.atlassian.net/wiki/spaces/MYSPACE`\n\n" "- Export everything: `cme orgs https://company.atlassian.net`\n\n" "- Each command also has a singular alias" " (`page`, `space`, `org`) that behaves identically.\n\n" ) _PAGE_URL_FORMATS = ( "**Supported URL formats:**\n\n" "- **Cloud**: `https://company.atlassian.net/wiki/spaces/KEY/pages/123/Title`\n\n" "- **Server (long)**: `https://confluence.company.com/display/KEY/Title`\n\n" "- **Server (short)**: `https://confluence.company.com/KEY/Title`\n\n" ) _SPACE_URL_FORMATS = ( "**Supported URL formats:**\n\n" "- **Cloud**: `https://company.atlassian.net/wiki/spaces/SPACEKEY`\n\n" "- **Server (long)**: `https://confluence.company.com/display/SPACEKEY`\n\n" "- **Server (short)**: `https://confluence.company.com/SPACEKEY`\n\n" ) app = _CmeTyper( rich_markup_mode="markdown", no_args_is_help=True, help=( "Export Confluence pages, spaces, or entire organizations to Markdown files.\n\n" "Authentication and settings are managed via `cme config`. " "Run `cme config` to open the interactive menu, or use " "`cme config set ` to set values directly.\n\n" "Most settings can also be overridden with environment variables using the prefix " "`CME_` and `__` as the nested delimiter " "(e.g. `CME_EXPORT__OUTPUT_PATH=/tmp/export`)." ), epilog=_QUICKSTART_EPILOG, ) app.add_typer(config_module.app, name="config") def _init_logging() -> None: """Initialize logging from config (CME_EXPORT__LOG_LEVEL env var takes precedence).""" export = get_settings().export log_file = APP_CONFIG_PATH.parent / "cme.log" if export.save_log_to_file else None setup_logging(export.log_level, log_file=log_file) def _print_summary() -> None: """Print a rich summary panel with export statistics.""" stats = get_stats() if stats.total == 0: return output_path = get_settings().export.output_path grid = Table.grid(padding=(0, 2)) grid.add_column(style="dim", justify="right") grid.add_column() grid.add_row("Pages", "") grid.add_row(" Total", str(stats.total)) grid.add_row(" [success]Exported[/success]", f"[success]{stats.exported}[/success]") grid.add_row(" [dim]Skipped (unchanged)[/dim]", str(stats.skipped)) if stats.removed: grid.add_row(" [dim]Removed[/dim]", str(stats.removed)) if stats.failed: grid.add_row(" [error]Failed[/error]", f"[error]{stats.failed}[/error]") attachments_total = ( stats.attachments_exported + stats.attachments_skipped + stats.attachments_failed ) if attachments_total or stats.attachments_removed: grid.add_row("Attachments", "") if attachments_total: grid.add_row(" Total", str(attachments_total)) att_exp = stats.attachments_exported grid.add_row(" [success]Exported[/success]", f"[success]{att_exp}[/success]") grid.add_row(" [dim]Skipped (unchanged)[/dim]", str(stats.attachments_skipped)) if stats.attachments_removed: grid.add_row(" [dim]Removed[/dim]", str(stats.attachments_removed)) if stats.attachments_failed: grid.add_row(" [error]Failed[/error]", f"[error]{stats.attachments_failed}[/error]") grid.add_row("Output", str(output_path)) if stats.failed: title = "[warning]Export finished with errors[/warning]" else: title = "[success]Export complete[/success]" console.print(Panel(grid, title=title, expand=False)) @app.command( help=( "Export one or more Confluence pages by URL to Markdown.\n\n" "Fetches each page via the Confluence API and writes a Markdown file to the " "configured output directory (`export.output_path`). " "Pages that have not changed since the last export are skipped by default " "(`export.skip_unchanged=true`)." ), epilog=( "**Examples:**\n\n" "- `cme pages https://company.atlassian.net/wiki/spaces/KEY/pages/123/My+Page`\n\n" "- `cme pages https://...page1 https://...page2` — export multiple pages at once\n\n" "- `cme page URL` — singular alias, identical behaviour\n\n" "---\n\n" + _PAGE_URL_FORMATS ), ) def pages( page_urls: Annotated[ list[str], typer.Argument( help=( "One or more Confluence page URLs. " "Supports Cloud and Server URL formats. " "Example: https://company.atlassian.net/wiki/spaces/KEY/pages/123/Title" ), metavar="PAGE_URL", ), ], ) -> None: from confluence_markdown_exporter.confluence import Page from confluence_markdown_exporter.confluence import sync_removed_pages from confluence_markdown_exporter.utils.page_registry import PageTitleRegistry _init_logging() stats = reset_stats(total=len(page_urls)) with measure(f"Export pages {', '.join(page_urls)}"): LockfileManager.init() exported_urls: set[str] = set() fetched_pages: list[Page] = [] for page_url in page_urls: with console.status(f"[dim]Fetching [highlight]{page_url}[/highlight]…[/dim]"): page = Page.from_url(page_url) PageTitleRegistry.register(int(page.id), page.title) fetched_pages.append(page) for page in fetched_pages: LockfileManager.mark_seen([page.id]) if not LockfileManager.should_export(page): stats.inc_skipped() exported_urls.add(page.base_url) continue try: with console.status(f"[dim]Exporting [highlight]{page.title}[/highlight]…[/dim]"): attachment_entries = page.export() LockfileManager.record_page(page, attachment_entries) stats.inc_exported() except Exception: logger.exception("Failed to export page %s", page.title) stats.inc_failed() exported_urls.add(page.base_url) for base_url in exported_urls: sync_removed_pages(base_url) _print_summary() app.command( name="page", help=( "Alias for `pages`. Export one or more Confluence pages by URL to Markdown.\n\n" "See `cme pages --help` for full documentation and all supported URL formats." ), epilog=( "**Example:**\n\n" "- `cme page https://company.atlassian.net/wiki/spaces/KEY/pages/123/My+Page`\n\n" ), )(pages) @app.command( help=( "Export one or more Confluence pages **and all their descendants** by URL to Markdown.\n\n" "Recursively fetches the given page(s) and every child page beneath them, " "then writes Markdown files to the configured output directory. " "Useful for exporting entire page trees without exporting a whole space." ), epilog=( "**Examples:**\n\n" "- `cme pages-with-descendants https://company.atlassian.net/wiki/spaces/KEY/pages/123/Root`\n\n" "- `cme pages-with-descendants https://...root1 https://...root2` — multiple trees\n\n" "- `cme page-with-descendants URL` — singular alias, identical behaviour\n\n" "---\n\n" + _PAGE_URL_FORMATS ), ) def pages_with_descendants( page_urls: Annotated[ list[str], typer.Argument( help=( "One or more Confluence page URLs. " "Each page and all its descendants will be exported. " "Example: https://company.atlassian.net/wiki/spaces/KEY/pages/123/Title" ), metavar="PAGE_URL", ), ], ) -> None: from confluence_markdown_exporter.confluence import Page from confluence_markdown_exporter.confluence import sync_removed_pages _init_logging() with measure(f"Export pages {', '.join(page_urls)} with descendants"): LockfileManager.init() exported_urls: set[str] = set() for page_url in page_urls: page = Page.from_url(page_url) page.export_with_descendants() exported_urls.add(page.base_url) for base_url in exported_urls: sync_removed_pages(base_url) _print_summary() app.command( name="page-with-descendants", help=( "Alias for `pages-with-descendants`. " "Export a Confluence page and all its descendants by URL to Markdown.\n\n" "See `cme pages-with-descendants --help` for full documentation." ), epilog=( "**Example:**\n\n" "- `cme page-with-descendants https://company.atlassian.net/wiki/spaces/KEY/pages/123/Root`\n\n" ), )(pages_with_descendants) @app.command( help=( "Export **all pages** in one or more Confluence spaces by URL to Markdown.\n\n" "Fetches every page in each space via the Confluence API and writes Markdown files " "to the configured output directory. " "Pages that have not changed since the last export are skipped by default." ), epilog=( "**Examples:**\n\n" "- `cme spaces https://company.atlassian.net/wiki/spaces/MYSPACE`\n\n" "- `cme spaces https://...SPACE1 https://...SPACE2` — export multiple spaces\n\n" "- `cme space URL` — singular alias, identical behaviour\n\n" "---\n\n" + _SPACE_URL_FORMATS ), ) def spaces( space_urls: Annotated[ list[str], typer.Argument( help=( "One or more Confluence space URLs. " "All pages within each space will be exported. " "Example: https://company.atlassian.net/wiki/spaces/MYSPACE" ), metavar="SPACE_URL", ), ], ) -> None: from confluence_markdown_exporter.confluence import Space from confluence_markdown_exporter.confluence import sync_removed_pages _init_logging() with measure(f"Export spaces {', '.join(space_urls)}"): LockfileManager.init() exported_urls: set[str] = set() for space_url in space_urls: space = Space.from_url(space_url) space.export() exported_urls.add(space.base_url) for base_url in exported_urls: sync_removed_pages(base_url) _print_summary() app.command( name="space", help=( "Alias for `spaces`. Export all pages in a Confluence space by URL to Markdown.\n\n" "See `cme spaces --help` for full documentation and all supported URL formats." ), epilog=("**Example:**\n\n- `cme space https://company.atlassian.net/wiki/spaces/MYSPACE`\n\n"), )(spaces) @app.command( help=( "Export **all spaces** of one or more Confluence organizations to Markdown.\n\n" "Iterates over every space in the organization and exports all pages in each. " "This is the broadest export scope — use `spaces` to target specific spaces, " "or `pages` / `pages-with-descendants` for finer-grained control.\n\n" "The base URL is the root of the Confluence instance, " "e.g. `https://company.atlassian.net`." ), epilog=( "**Examples:**\n\n" "- `cme orgs https://company.atlassian.net` — export everything\n\n" "- `cme orgs https://company1.atlassian.net https://company2.atlassian.net`" " — multiple orgs\n\n" "- `cme org URL` — singular alias, identical behaviour\n\n" ), ) def orgs( base_urls: Annotated[ list[str], typer.Argument( help=( "One or more Confluence base URLs (root of the instance). " "All spaces and pages within each organization will be exported. " "Example: https://company.atlassian.net" ), metavar="BASE_URL", ), ], ) -> None: from confluence_markdown_exporter.confluence import Organization from confluence_markdown_exporter.confluence import sync_removed_pages _init_logging() with measure("Export all spaces"): LockfileManager.init() for base_url in base_urls: org = Organization.from_url(base_url) org.export() sync_removed_pages(base_url) _print_summary() app.command( name="org", help=( "Alias for `orgs`. " "Export all spaces of a Confluence organization to Markdown.\n\n" "See `cme orgs --help` for full documentation." ), epilog=("**Example:**\n\n- `cme org https://company.atlassian.net`\n\n"), )(orgs) @app.command( help="Show the installed version of confluence-markdown-exporter.", ) def version() -> None: """Display the current version.""" typer.echo(f"confluence-markdown-exporter {__version__}") _ATLASSIAN_NET = "atlassian.net" _REDACTED = "[redacted]" def _redact_url(url: str) -> str: """Redact the instance URL. Atlassian Cloud URLs (``*.atlassian.net``) are kept as ``******.atlassian.net`` so the instance type is still visible. All other URLs are fully replaced with ``[redacted]``. """ parsed = urllib.parse.urlparse(url) host = parsed.hostname or "" if host == _ATLASSIAN_NET or host.endswith(f".{_ATLASSIAN_NET}"): return f"https://******.{_ATLASSIAN_NET}" return _REDACTED def _redact_config(data: dict) -> dict: """Return a deep copy of the config dict with sensitive values redacted. Redacted fields: ``api_token``, ``pat``, ``username``, ``cloud_id`` (when non-empty), ``export.output_path``, and instance URL keys in ``auth.confluence`` / ``auth.jira``. """ import copy data = copy.deepcopy(data) for service in ("confluence", "jira"): auth_section: dict = data.get("auth", {}).get(service, {}) redacted_section: dict = {} for url, details in auth_section.items(): if isinstance(details, dict): for field in ("api_token", "pat", "username", "cloud_id"): if details.get(field): details[field] = _REDACTED redacted_section[_redact_url(url)] = details data.setdefault("auth", {})[service] = redacted_section if data.get("export", {}).get("output_path"): data["export"]["output_path"] = _REDACTED return data @app.command( help=( "Print diagnostic information for filing a bug report.\n\n" "Outputs the app version, Python and OS details, and the current configuration " "with all secrets redacted (API tokens and PATs are masked; " "instance URL hostnames are partially hidden).\n\n" "Paste the full output into your GitHub issue when reporting a bug." ), ) def bugreport() -> None: """Print version, system info, and redacted config for bug reports.""" settings = get_settings() config_data = json.loads(settings.model_dump_json()) redacted = _redact_config(config_data) lines: list[str] = [ "## Bug Report Diagnostic Info", "", "### Version", f"confluence-markdown-exporter {__version__}", "", "### System", f"Python: {sys.version}", f"Platform: {platform.platform()}", f"Architecture: {platform.machine()}", "", "### Config", f"Config file: {_REDACTED}", "```yaml", yaml.dump(redacted, default_flow_style=False, allow_unicode=True).rstrip(), "```", ] typer.echo("\n".join(lines)) if __name__ == "__main__": app() ================================================ FILE: confluence_markdown_exporter/utils/__init__.py ================================================ ================================================ FILE: confluence_markdown_exporter/utils/app_data_store.py ================================================ """Handles storage and retrieval of application data (auth and settings) for the exporter.""" import contextlib import json import os from pathlib import Path from typing import Any from typing import Literal from pydantic import BaseModel from pydantic import Field from pydantic import SecretStr from pydantic import ValidationError from pydantic import field_serializer from pydantic import field_validator from pydantic import model_validator from pydantic_settings import BaseSettings from pydantic_settings import PydanticBaseSettingsSource from pydantic_settings import SettingsConfigDict from typer import get_app_dir def get_app_config_path() -> Path: """Determine the path to the app config file, creating parent directories if needed.""" config_env = os.environ.get("CME_CONFIG_PATH") if config_env: path = Path(config_env) else: app_name = "confluence-markdown-exporter" config_dir = Path(get_app_dir(app_name)) path = config_dir / "app_data.json" path.parent.mkdir(parents=True, exist_ok=True) return path APP_CONFIG_PATH = get_app_config_path() class AtlassianSdkConnectionConfig(BaseModel): """Connection parameters forwarded directly to the Atlassian SDK client constructors. Only fields that are valid constructor keyword arguments for atlassian.Confluence (ConfluenceApiSdk) and atlassian.Jira (JiraApiSdk) may be added here. """ backoff_and_retry: bool = Field( default=True, title="Enable Retry", description="Enable or disable automatic retry with exponential backoff on network errors.", ) backoff_factor: int = Field( default=2, title="Backoff Factor", description=( "Multiplier for exponential backoff between retries. " "For example, 2 means each retry waits twice as long as the previous." ), ) max_backoff_seconds: int = Field( default=60, title="Max Backoff Seconds", description="Maximum number of seconds to wait between retries.", ) max_backoff_retries: int = Field( default=5, title="Max Retries", description="Maximum number of retry attempts before giving up.", ) retry_status_codes: list[int] = Field( default_factory=lambda: [413, 429, 502, 503, 504], title="Retry Status Codes", description="HTTP status codes that should trigger a retry.", ) verify_ssl: bool = Field( default=True, title="Verify SSL", description=( "Whether to verify SSL certificates for HTTPS requests. " "Set to False only if you are sure about the security of your connection." ), ) timeout: int = Field( default=30, title="Request Timeout", description=( "Timeout in seconds for API requests. Prevents hanging on slow/unresponsive servers." ), ) class ConnectionConfig(AtlassianSdkConnectionConfig): """Full connection configuration, extending the Atlassian SDK config with app-level settings.""" use_v2_api: bool = Field( default=False, title="Use Confluence v2 REST API", description=( "Enable Confluence REST API v2 endpoints where available. " "Supported by Atlassian Cloud and Confluence Data Center 8+. " "Must be disabled for older self-hosted Confluence Server instances." ), ) max_workers: int = Field( default=20, title="Max Workers", description=( "Maximum number of parallel workers for page export. " "Set to 1 for serial mode (useful for debugging). " "Higher values improve performance but may hit API rate limits." ), ) class ApiDetails(BaseModel): """API authentication credentials for a single instance. The instance URL is used as the dict key in AuthConfig, not stored here. """ username: SecretStr = Field( default=SecretStr(""), title="Username (email)", description="Username or email for API authentication.", ) api_token: SecretStr = Field( default=SecretStr(""), title="API Token", description=( "API token for authentication (if required). " "Create an Atlassian API token at " "https://id.atlassian.com/manage-profile/security/api-tokens. " "See Atlassian documentation for details." ), ) pat: SecretStr = Field( default=SecretStr(""), title="Personal Access Token (PAT)", description=( "Personal Access Token for authentication. " "Set this if you use a PAT instead of username+API token. " "See your Atlassian instance documentation for how to create a PAT." ), ) cloud_id: str = Field( default="", title="Cloud ID", description=( "Atlassian Cloud ID for this instance. When set, API calls are routed through " "the Atlassian API gateway (https://api.atlassian.com/ex/confluence/{cloud_id}), " "which enables the use of scoped API tokens. " "For Atlassian Cloud instances this is fetched and stored automatically. " "To find your Cloud ID manually, see " "https://support.atlassian.com/jira/kb/retrieve-my-atlassian-sites-cloud-id/." ), ) @field_validator("username", "api_token", "pat", mode="before") @classmethod def _single_line(cls, v: object) -> object: raw = v.get_secret_value() if isinstance(v, SecretStr) else v if isinstance(raw, str): return raw.replace("\r", "").replace("\n", "") return v @field_serializer("username", "api_token", "pat", when_used="json") def dump_secret(self, v: SecretStr) -> str: return v.get_secret_value() class AuthConfig(BaseModel): """Authentication configuration for Confluence and Jira. Credentials are stored in dicts keyed by the instance base URL (e.g. ``"https://company.atlassian.net"``). No "active" pointer is kept — the right instance is selected by matching the URL of the page or space being exported. """ confluence: dict[str, ApiDetails] = Field( default_factory=dict, title="Confluence Accounts", description=( "Confluence authentication credentials keyed by instance base URL. " "Example key: 'https://company.atlassian.net'" ), ) jira: dict[str, ApiDetails] = Field( default_factory=dict, title="Jira Accounts", description=( "Jira authentication credentials keyed by instance base URL. " "Example key: 'https://company.atlassian.net'" ), ) @model_validator(mode="before") @classmethod def _migrate(cls, data: object) -> object: # noqa: C901, PLR0912 """Migrate legacy config formats to the current URL-keyed dict format. Also normalises all instance URL keys (strips trailing slashes) so that entries written with and without a trailing slash are treated as identical. """ if not isinstance(data, dict): return data for service in ("confluence", "jira"): val = data.get(service) if not isinstance(val, dict): continue # Legacy v1: single ApiDetails with a 'url' field at the top level # e.g. {"url": "https://...", "username": "...", ...} if "url" in val and not _looks_like_url_keyed(val): url = val.pop("url", "") or "" # Remove stale active_* fields that were in the same dict val.pop("active_confluence", None) val.pop("active_jira", None) data[service] = {url.rstrip("/"): val} if url else {} # Legacy v2: named-key dict from the previous multi-instance refactor. # e.g. {"default": {"url": "https://...", ...}, "active_confluence": "default"} elif not _looks_like_url_keyed(val): migrated: dict = {} for k, v in val.items(): if k in ("active_confluence", "active_jira"): continue if isinstance(v, dict): inner_url = v.pop("url", "") or "" if inner_url: migrated[inner_url.rstrip("/")] = v elif v: migrated[k] = v # keep as-is if no URL if migrated: data[service] = migrated else: # Current URL-keyed format: normalise any trailing slashes on existing keys normalised: dict = {} for k, v in val.items(): normalised[k.rstrip("/")] = v data[service] = normalised # Drop top-level active_* fields that were stored in auth data.pop("active_confluence", None) data.pop("active_jira", None) return data def get_instance(self, url: str) -> ApiDetails | None: """Return the Confluence ApiDetails whose key matches *url* (exact or host match).""" url = normalize_instance_url(url) return self.confluence.get(url) or self._match_by_host(self.confluence, url) def get_jira_instance(self, url: str) -> ApiDetails | None: """Return the Jira ApiDetails whose key matches *url* (exact or host match).""" url = normalize_instance_url(url) return self.jira.get(url) or self._match_by_host(self.jira, url) def default_confluence_url(self) -> str | None: """Return the URL of the only configured Confluence instance, or None if 0 or 2+.""" return next(iter(self.confluence)) if len(self.confluence) == 1 else None def default_jira_url(self) -> str | None: """Return the URL of the only configured Jira instance, or None if 0 or 2+.""" return next(iter(self.jira)) if len(self.jira) == 1 else None @staticmethod def _match_by_host(instances: dict[str, ApiDetails], url: str) -> ApiDetails | None: import urllib.parse parsed = urllib.parse.urlparse(url) host = parsed.hostname or url # Gateway URLs must match exactly — multiple tenants share api.atlassian.com. if host == "api.atlassian.com": return None for key, details in instances.items(): key_parsed = urllib.parse.urlparse(key) # Skip gateway-style keys when doing hostname-only matching if key_parsed.hostname == "api.atlassian.com": continue if key_parsed.hostname != host or key_parsed.port != parsed.port: continue # Key stored without a context path matches any context path on the same host # (e.g. stored as "https://host", URL is "https://host/confluence/spaces/...") if not key_parsed.path.strip("/"): return details # Key stored with a context path must be a prefix of the lookup URL's path # (e.g. stored as "https://host/confluence", URL is "https://host/confluence/spaces/...") if parsed.path.startswith(key_parsed.path): return details return None def _looks_like_url_keyed(d: dict) -> bool: """Return True if the dict looks like it's already keyed by URLs (not by field names).""" return any(k.startswith(("http://", "https://")) for k in d) def normalize_instance_url(url: str) -> str: """Strip trailing slashes from an instance URL for consistent key storage.""" return url.rstrip("/") class ExportConfig(BaseModel): """Export settings for markdown and attachments.""" log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = Field( default="INFO", title="Log Level", description=( "Controls how much output the exporter prints. " "DEBUG shows every step, INFO shows key milestones, " "WARNING shows only warnings and errors, ERROR shows only errors. " "In CI environments (CI=true / NO_COLOR set) rich formatting is suppressed " "automatically." ), ) save_log_to_file: bool = Field( default=False, title="Save Log To File", description=( "Also write log records to a file alongside the console output. " "The file is named 'cme.log' and lives next to the config file " "(see 'cme config path'). Useful for capturing long DEBUG runs." ), ) output_path: Path = Field( default=Path(), title="Output Path", description=("Directory where exported pages and attachments will be saved."), examples=[ "`.`: Output will be saved relative to the current working directory.", ( "`./confluence_export`: Output will be saved in a folder `confluence_export` " "relative to the current working directory." ), "`/path/to/export`: Output will be saved in the specified absolute path.", ], ) page_href: Literal["absolute", "relative", "wiki"] = Field( default="relative", title="Page Href Style", description=( "How to generate page href paths. Options: absolute, relative, wiki.\n" " - `relative` links are relative to the page\n" " - `absolute` links start from the configured output path\n" " - `wiki` generates Obsidian-style [[Page Title]] wiki links" ), ) page_path: str = Field( default="{space_name}/{homepage_title}/{ancestor_titles}/{page_title}.md", title="Page Path Template", description=( "Template for exported page file paths.\n" "Available variables:\n" " - {space_key}: The key of the Confluence space.\n" " - {space_name}: The name of the Confluence space.\n" " - {homepage_id}: The ID of the homepage of the Confluence space.\n" " - {homepage_title}: The title of the homepage of the Confluence space.\n" " - {ancestor_ids}: A slash-separated list of ancestor page IDs.\n" " - {ancestor_titles}: A slash-separated list of ancestor page titles.\n" " - {page_id}: The unique ID of the Confluence page.\n" " - {page_title}: The title of the Confluence page.\n" ), examples=["{space_name}/{page_title}.md"], ) attachment_href: Literal["absolute", "relative", "wiki"] = Field( default="relative", title="Attachment Href Style", description=( "How to generate attachment href paths. Options: absolute, relative, wiki.\n" " - `relative` links are relative to the page\n" " - `absolute` links start from the configured output path\n" " - `wiki` generates Obsidian-style ![[Attachment Name]] wiki links" ), ) attachment_path: str = Field( default="{space_name}/attachments/{attachment_file_id}{attachment_extension}", title="Attachment Path Template", description=( "Template for exported attachment file paths.\n" "Available variables:\n" " - {space_key}: The key of the Confluence space.\n" " - {space_name}: The name of the Confluence space.\n" " - {homepage_id}: The ID of the homepage of the Confluence space.\n" " - {homepage_title}: The title of the homepage of the Confluence space.\n" " - {ancestor_ids}: A slash-separated list of ancestor page IDs.\n" " - {ancestor_titles}: A slash-separated list of ancestor page titles.\n" " - {attachment_id}: The unique ID of the attachment.\n" " - {attachment_title}: The title of the attachment (without file extension).\n" " - {attachment_file_id}: The file ID of the attachment. Falls back to " "{attachment_id} on Confluence Data Center / Server, where the API does " "not provide a file ID.\n" " - {attachment_extension}: The file extension of the attachment,\n" "including the leading dot." ), examples=["{space_name}/attachments/{attachment_file_id}{attachment_extension}"], ) @field_validator("attachment_path", mode="before") @classmethod def _migrate_attachment_path(cls, v: object) -> object: """Migrate templates that used {attachment_title} as the full filename. Before this change, {attachment_title} included the file extension. Templates that relied on that (i.e. no explicit {attachment_extension}) are silently updated so file extensions are preserved. """ if ( isinstance(v, str) and "{attachment_title}" in v and "{attachment_extension}" not in v ): return v.replace("{attachment_title}", "{attachment_title}{attachment_extension}") return v attachments_export: Literal["referenced", "all", "disabled"] = Field( default="referenced", title="Attachments Export", description=( "Which attachments to download to disk:\n" " referenced: only attachments referenced from the page body (default)\n" " all: every attachment on the page (slower, more disk and bandwidth)\n" " disabled: skip the download entirely - no files written, no lockfile\n" " entries, no lockfile lookup. Attachment metadata is still fetched\n" " from the Confluence API so image and file links in the page body\n" " continue to resolve, but the referenced files will not exist locally." ), ) image_captions: bool = Field( default=False, title="Image Captions", description=( "Whether to export Confluence image captions in the exported Markdown.\n" "When enabled, the storage format of each page is fetched via an additional " "API expansion to extract caption text from `ac:image` elements.\n" "Captions are rendered as an italic line directly below the image:\n" " ![](image.png)\n" " *Caption text*" ), ) page_breadcrumbs: bool = Field( default=True, title="Page Breadcrumbs", description="Whether to include breadcrumb links at the top of the page.", ) page_properties_format: Literal[ "frontmatter", "table", "frontmatter_and_table", "dataview-inline-field", "meta-bind-view-fields", ] = Field( default="frontmatter_and_table", title="Page Properties Format", description=( "How to render Confluence Page Properties macros (Page Properties macro).\n" " frontmatter: extract to YAML front matter only (table removed from content)\n" " table: keep as markdown table only (no metadata)\n" " frontmatter_and_table: front matter + keep original table in content (default)\n" " dataview-inline-field: replace table with Dataview Key:: Value inline fields\n" " meta-bind-view-fields: front matter + Meta Bind VIEW fields inline (requires plugin)" ), ) page_properties_report_format: Literal["frozen", "dataview"] = Field( default="frozen", title="Page Properties Report Format", description=( "How to render Confluence Page Properties Report macros.\n" " frozen: export the rendered table as a static markdown table (default)\n" " dataview: translate the CQL query to an Obsidian Dataview DQL code block;\n" " requires the Dataview plugin and all referenced child pages to be exported\n" " with their page properties as frontmatter; falls back to frozen on failure" ), ) confluence_url_in_frontmatter: Literal["none", "webui", "tinyui", "both"] = Field( default="none", title="Confluence URL in Front Matter", description=( "Whether to include the original Confluence page URL in YAML front matter.\n" " none: do not include (default)\n" " webui: include human-readable URL as `confluence_webui_url`\n" " tinyui: include stable short permalink as `confluence_tinyui_url`\n" " both: include both fields\n" "If a Page Properties macro already defines one of these keys, " "the macro value takes precedence." ), ) page_metadata_in_frontmatter: bool = Field( default=False, title="Page Metadata in Front Matter", description=( "If True, add eight Confluence page metadata fields to the YAML " "front matter of each exported page: confluence_page_id, " "confluence_space_key, confluence_type (page or blogpost), " "confluence_created (ISO 8601, original creation timestamp), " "confluence_created_by (display name of the original author), " "confluence_last_modified (ISO 8601, value of the most recent " "version including minor edits), confluence_last_modified_by " "(display name), confluence_version (integer). Existing keys " "with the same name on the page (e.g. via a Page Properties " "macro) take precedence." ), ) @model_validator(mode="before") @classmethod def _migrate_page_properties(cls, data: object) -> object: """Migrate legacy page_properties_as_front_matter bool to page_properties_format.""" if not isinstance(data, dict): return data old_val = data.pop("page_properties_as_front_matter", None) if old_val is not None and "page_properties_format" not in data: if str(old_val).lower() in ("false", "0"): data["page_properties_format"] = "table" else: data["page_properties_format"] = "frontmatter" return data @model_validator(mode="before") @classmethod def _migrate_attachments_export(cls, data: object) -> object: """Migrate legacy attachment_export_all bool to attachments_export literal.""" if not isinstance(data, dict): return data old_val = data.pop("attachment_export_all", None) if old_val is not None and "attachments_export" not in data: data["attachments_export"] = ( "all" if str(old_val).lower() in ("true", "1") else "referenced" ) return data @model_validator(mode="before") @classmethod def _migrate_inline_comments(cls, data: object) -> object: """Migrate legacy inline_comments bool to comments_export literal.""" if not isinstance(data, dict): return data old_val = data.pop("inline_comments", None) if old_val is not None and "comments_export" not in data: data["comments_export"] = ( "inline" if str(old_val).lower() in ("true", "1") else "none" ) return data filename_encoding: str = Field( default='"<":"_",">":"_",":":"_","\\"":"_","/":"_","\\\\":"_","|":"_","?":"_","*":"_","\\u0000":"_","[":"_","]":"_","\'":"_","’":"_","´":"_","`":"_"', # noqa: RUF001 title="Filename Encoding", description=( "List character-to-replacement pairs, separated by commas. " 'Each pair is written as "character":"replacement". ' "Leave empty to disable all character replacements." ), examples=[ '" ":"-","-":"%2D"', # Replace spaces with dash and dashes with %2D '"=":" equals "', # Replace equals sign with " equals " ], ) filename_length: int = Field( default=255, title="Filename Length", description="Maximum length of the filename.", ) filename_lowercase: bool = Field( default=False, title="Enforce lowercase paths", description=( "Make all paths/files lowercase.\nBy default the original casing will be retained.\n" ), ) include_document_title: bool = Field( default=True, title="Include Document Title", description=( "Whether to include the document title in the exported markdown file. " "If enabled, the title will be added as a top-level heading." ), ) include_toc: bool = Field( default=True, title="Export Table of Contents", description=( "Whether to export the Confluence Table of Contents macro. " "When enabled (default), the TOC is converted to markdown. " "When disabled, the TOC macro is removed from the output." ), ) include_macro: Literal["inline", "transclusion"] = Field( default="inline", title="Include Macro Rendering", description=( "How to render Confluence `include` and `excerpt-include` macros.\n" " inline: expand the referenced page content inline (default)\n" " transclusion: emit an Obsidian-style `![[Page Title]]` embed link;\n" " the referenced page must also be exported for the link to resolve" ), ) enable_jira_enrichment: bool = Field( default=True, title="Enable Jira Enrichment", description=( "Whether to fetch Jira issue data to enrich Confluence pages. " "When enabled, Jira issue links will include the issue summary. " "When disabled, only the issue key and link will be included. " "Requires Jira auth to be configured." ), ) comments_export: Literal["none", "inline", "footer", "all"] = Field( default="none", title="Export Comments", description=( "Which comments to export to a sidecar '.comments.md' file placed " "next to the exported page file. " "'none' — no sidecar. " "'inline' — open inline comments only (annotated text shown as a " "blockquote, then author/date/body). " "'footer' — open page-level (footer) comments only. " "'all' — both, in a single sidecar with two sections " "('## Inline comments' first, then '## Page comments'). " "Resolved comments are skipped. Replies are listed flat below " "the parent comment. Disabled by default — adds one to two extra " "API calls per page when enabled." ), ) convert_status_badges: bool = Field( default=True, title="Convert Status Badges", description=( "Whether to convert Confluence status badge macros " "() " "to HTML elements coloured with the badge's background colour. " "When disabled, only the badge label text is kept." ), ) convert_text_highlights: bool = Field( default=True, title="Convert Text Highlights", description=( "Whether to convert Confluence text highlights " "() " "to HTML elements with a hex color. " "When disabled, the highlight span is stripped and only the text is kept." ), ) convert_font_colors: bool = Field( default=True, title="Convert Font Colors", description=( "Whether to convert Confluence font colors " "( or ) " "to HTML elements with a hex color. " "When disabled, the color span is stripped and only the text is kept." ), ) skip_unchanged: bool = Field( default=True, title="Skip Unchanged Pages", description=( "Skip exporting pages that have not changed since last export." " Uses a lockfile to track page versions." ), ) cleanup_stale: bool = Field( default=True, title="Cleanup Stale Files", description=( "After export, delete local files for pages that have been removed " "from Confluence or whose export path has changed." ), ) lockfile_name: str = Field( default="confluence-lock.json", title="Lock File Name", description="Name of the lock file used to track exported pages.", ) existence_check_batch_size: int = Field( default=250, title="Existence Check Batch Size", description=( "Number of page IDs per batch when verifying page existence during cleanup. " "For self-hosted Confluence (CQL), this is internally capped at 25." ), ) class ConfigModel(BaseModel): """Top-level application configuration model (used for persistence only).""" export: ExportConfig = Field(default_factory=ExportConfig, title="Export Settings") connection_config: ConnectionConfig = Field( default_factory=ConnectionConfig, title="Connection Configuration" ) auth: AuthConfig = Field(default_factory=AuthConfig, title="Authentication") class _JsonConfigSource(PydanticBaseSettingsSource): """Settings source that reads from the JSON config file (lower priority than ENV vars).""" def get_field_value(self, field: Any, field_name: str) -> Any: # noqa: ANN401 return None, field_name, False def field_is_complex(self, field: Any) -> bool: # noqa: ANN401 return True def __call__(self) -> dict[str, Any]: if APP_CONFIG_PATH.exists(): try: raw = json.loads(APP_CONFIG_PATH.read_text(encoding="utf-8")) return ConfigModel(**raw).model_dump() except Exception: # noqa: BLE001 return ConfigModel().model_dump() return ConfigModel().model_dump() class AppSettings(BaseSettings): """Effective application settings: ENV vars take precedence over the config file. ENV vars use the prefix ``CME_`` and double-underscore (``__``) as the nested field delimiter, matching the dot-notation config keys but uppercased. For example:: CME_EXPORT__LOG_LEVEL=DEBUG CME_EXPORT__OUTPUT_PATH=/tmp/export CME_CONNECTION_CONFIG__MAX_WORKERS=5 CME_CONNECTION_CONFIG__VERIFY_SSL=false """ model_config = SettingsConfigDict( env_prefix="CME_", env_nested_delimiter="__", extra="ignore", populate_by_name=True, ) export: ExportConfig = Field(default_factory=ExportConfig, title="Export Settings") connection_config: ConnectionConfig = Field( default_factory=ConnectionConfig, title="Connection Configuration" ) auth: AuthConfig = Field(default_factory=AuthConfig, title="Authentication") @classmethod def settings_customise_sources( cls, settings_cls: type[BaseSettings], init_settings: PydanticBaseSettingsSource, env_settings: PydanticBaseSettingsSource, dotenv_settings: PydanticBaseSettingsSource, # noqa: ARG003 file_secret_settings: PydanticBaseSettingsSource, # noqa: ARG003 ) -> tuple[PydanticBaseSettingsSource, ...]: """ENV vars override JSON file config; init values override both.""" return (init_settings, env_settings, _JsonConfigSource(settings_cls)) def load_app_data() -> dict[str, dict]: """Load application data from the config file, returning a validated dict.""" data: dict = {} if APP_CONFIG_PATH.exists(): with contextlib.suppress(json.JSONDecodeError, ValueError): data = json.loads(APP_CONFIG_PATH.read_text(encoding="utf-8")) try: return ConfigModel(**data).model_dump() except ValidationError: return ConfigModel().model_dump() def save_app_data(config_model: ConfigModel) -> None: """Save application data to the config file using Pydantic serialization.""" # Use Pydantic's model_dump_json which properly handles SecretStr serialization json_str = config_model.model_dump_json(indent=2) APP_CONFIG_PATH.write_text(json_str, encoding="utf-8") def get_settings() -> AppSettings: """Get the effective application settings (ENV vars override stored config).""" return AppSettings() def _set_by_path(obj: dict, path: str, value: object) -> None: """Set a value in a nested dict using dot notation path.""" keys = path.split(".") current = obj for k in keys[:-1]: if k not in current or not isinstance(current[k], dict): current[k] = {} current = current[k] current[keys[-1]] = value def _set_by_keys(obj: dict, keys: list[str], value: object) -> None: """Set a value in a nested dict using an explicit list of key components.""" current = obj for k in keys[:-1]: if k not in current or not isinstance(current[k], dict): current[k] = {} current = current[k] current[keys[-1]] = value def set_setting(path: str, value: object) -> None: """Set a setting by dot-path and save to config file.""" data = load_app_data() _set_by_path(data, path, value) try: settings = ConfigModel.model_validate(data) except ValidationError as e: raise ValueError(str(e)) from e save_app_data(settings) def set_setting_with_keys(keys: list[str], value: object) -> None: """Set a setting by an explicit list of path components and save to config file. Use this instead of ``set_setting`` when any path component contains dots (e.g. a URL used as a dict key: ``["auth", "confluence", "https://x.y", "username"]``). """ data = load_app_data() _set_by_keys(data, keys, value) try: settings = ConfigModel.model_validate(data) except ValidationError as e: raise ValueError(str(e)) from e save_app_data(settings) def get_default_value_by_path(path: str | None = None) -> object: """Get the default value for a given config path, or the whole config if path is None.""" model = ConfigModel() if not path: return model.model_dump() keys = path.split(".") current = model for k in keys: if hasattr(current, k): current = getattr(current, k) elif isinstance(current, dict) and k in current: current = current[k] else: msg = f"Invalid config path: {path}" raise KeyError(msg) if isinstance(current, BaseModel): return current.model_dump() return current def reset_to_defaults(path: str | None = None) -> None: """Reset the whole config, a section, or a single option to its default value. If path is None, reset the entire config. Otherwise, reset the specified path. """ if path is None: save_app_data(ConfigModel()) return data = load_app_data() default_value = get_default_value_by_path(path) _set_by_path(data, path, default_value) settings = ConfigModel.model_validate(data) save_app_data(settings) ================================================ FILE: confluence_markdown_exporter/utils/config_interactive.py ================================================ from pathlib import Path from typing import Literal from typing import get_args from typing import get_origin import jmespath import questionary from pydantic import BaseModel from pydantic import SecretStr from pydantic import ValidationError from questionary import Choice from questionary import Style from confluence_markdown_exporter.api_clients import ensure_service_gateway_url from confluence_markdown_exporter.utils.app_data_store import ConfigModel from confluence_markdown_exporter.utils.app_data_store import get_app_config_path from confluence_markdown_exporter.utils.app_data_store import get_settings from confluence_markdown_exporter.utils.app_data_store import reset_to_defaults from confluence_markdown_exporter.utils.app_data_store import save_app_data from confluence_markdown_exporter.utils.app_data_store import set_setting from confluence_markdown_exporter.utils.app_data_store import set_setting_with_keys custom_style = Style( [ ("key", "fg:#00b8d4 bold"), # cyan bold for key ("value", "fg:#888888 italic"), # gray italic for value ("pointer", "fg:#00b8d4 bold"), ("highlighted", "fg:#00b8d4 bold"), ] ) def _get_field_type(model: type[BaseModel], key: str) -> type | None: # Handles both Pydantic v1 and v2 if hasattr(model, "model_fields"): # v2 return model.model_fields[key].annotation return model.__annotations__[key] def _get_submodel(model: type[BaseModel], key: str) -> type[BaseModel] | None: if hasattr(model, "model_fields"): sub = model.model_fields[key].annotation else: sub = model.__annotations__[key] # Only return submodel if it's a subclass of BaseModel if isinstance(sub, type): try: if issubclass(sub, BaseModel): return sub except TypeError: # sub is not a class or not suitable for issubclass return None return None def _get_field_metadata(model: type[BaseModel], key: str) -> dict: # Support jmespath-style dot-separated paths for nested fields if "." in key: keys = key.split(".") key = keys[-1] # Returns dict with title, description, examples for a field if hasattr(model, "model_fields"): # Pydantic v2 field = model.model_fields[key] return { "title": getattr(field, "title", None), "description": getattr(field, "description", None), "examples": getattr(field, "examples", None), } # Pydantic v1 fallback field = model.model_fields[key] return { "title": getattr(field, "title", None), "description": getattr(field, "description", None), "examples": getattr(field, "example", None), } def _format_prompt_message(key_name: str, model: type[BaseModel]) -> str: meta = _get_field_metadata(model, key_name) lines = [] # Title if meta["title"]: lines.append(f"{meta['title']}\n") else: lines.append(f"{key_name}\n") # Description if meta["description"]: lines.append(meta["description"]) # Examples if meta["examples"]: ex = meta["examples"] if isinstance(ex, list | tuple) and ex: lines.append("\nExamples:") lines.extend(f" • {example}" for example in ex) # Instruction lines.append(f"\nChange {meta['title']} to:") return "\n".join(lines) def _validate_int(val: str) -> bool | str: return val.isdigit() or "Must be an integer" def _validate_pydantic(val: object, model: type[BaseModel], key_name: str) -> bool | str: try: data = model().model_dump() data[key_name] = val model(**data) except ValidationError as e: return str(e.errors()[0]["msg"]) else: return True def _prompt_literal(prompt_message: str, field_type: type, current_value: object) -> object: options = list(get_args(field_type)) return questionary.select( prompt_message, choices=[str(opt) for opt in options], default=str(current_value), style=custom_style, ).ask() def _prompt_bool(prompt_message: str, current_value: object) -> object: return questionary.confirm( prompt_message, default=bool(current_value), style=custom_style ).ask() def _prompt_path( prompt_message: str, current_value: object, model: type[BaseModel], key_name: str, ) -> object: return questionary.path( prompt_message, default=str(current_value), validate=lambda val: _validate_pydantic(val, model, key_name), style=custom_style, ).ask() def _prompt_int(prompt_message: str, current_value: object) -> object: answer = questionary.text( prompt_message, default=str(current_value), validate=_validate_int, style=custom_style, ).ask() if answer is not None: try: return int(answer) except ValueError: questionary.print("Invalid integer value.") return None def _prompt_list(prompt_message: str, current_value: object) -> object: default_val = "" val_type = str if isinstance(current_value, list): default_val = ",".join(map(str, current_value)) if len(current_value) > 0: val_type = type(current_value[0]) answer = questionary.text( prompt_message + " (comma-separated)", default=default_val, style=custom_style, ).ask() if answer is not None: answer = answer.strip().lstrip("[").rstrip("]").strip(",").replace(" ", "") try: return [val_type(x.strip()) for x in answer.split(",") if x.strip()] except ValueError: questionary.print("Input should be a list (e.g. 1,2,3 or [1,2,3]).") return None def _prompt_str( prompt_message: str, current_value: object, model: type[BaseModel], key_name: str, ) -> object: return questionary.text( prompt_message, default=str(current_value), validate=lambda val: _validate_pydantic(val, model, key_name), style=custom_style, ).ask() def get_model_by_path(model: type[BaseModel], path: str) -> type[BaseModel]: """Traverse a Pydantic model class using a dot-separated path and return the submodel class.""" keys = path.split(".") for key in keys: # Try direct submodel first sub = _get_submodel(model, key) if sub is not None: model = sub continue # Try dict[str, SomeModel] — the key may be a field name or an instance name if hasattr(model, "model_fields") and key in model.model_fields: dict_sub = _get_dict_value_model(model, key) if dict_sub is not None: model = dict_sub continue # key is an instance name inside a dict[str, SomeModel] — model stays the same return model def _get_dict_value_model(model: type[BaseModel], key: str) -> type[BaseModel] | None: """If the field annotation is dict[str, SomeModel], return SomeModel; else None.""" if hasattr(model, "model_fields"): annotation = model.model_fields[key].annotation else: annotation = model.__annotations__.get(key) if annotation is None: return None origin = get_origin(annotation) if origin is dict: args = get_args(annotation) if len(args) == 2 and isinstance(args[1], type): # noqa: PLR2004 try: if issubclass(args[1], BaseModel): return args[1] except TypeError: pass return None def _edit_instance_fields( # noqa: C901, PLR0912 instance_key: str, instance_data: dict, item_model: type[BaseModel], parent_path_parts: list[str], ) -> str | None: """Edit the fields of a single named instance using set_setting_with_keys. This avoids the dot-split path system so URL keys (which contain dots) work correctly. Returns ``"__remove__"`` if the user chose to remove this instance, else ``None``. """ selected_field: str | None = None while True: choices = [] for k, v in instance_data.items(): if v is None: continue try: meta = _get_field_metadata(item_model, k) display_title = meta["title"] if meta and meta["title"] else k except (KeyError, AttributeError): display_title = k display_val = "Not set" if isinstance(v, str | SecretStr) and str(v) == "" else v choices.append( Choice( title=[ ("class:key", str(display_title)), ("class:value", f" {display_val}"), ], value=k, ) ) choices.append(Choice(title="[Remove]", value="__remove__")) choices.append(Choice(title="[Back]", value="__back__")) field_key = questionary.select( f"Edit credentials for '{instance_key}':", choices=choices, style=custom_style, default=selected_field, ).ask() if field_key == "__back__" or field_key is None: return None if field_key == "__remove__": confirm = questionary.confirm( f"Remove instance '{instance_key}'?", default=False, style=custom_style ).ask() if confirm: return "__remove__" continue selected_field = field_key current_val = instance_data.get(field_key) while True: new_val = _prompt_for_new_value(field_key, current_val, item_model) if new_val is not None: try: set_setting_with_keys([*parent_path_parts, instance_key, field_key], new_val) instance_data[field_key] = new_val questionary.print(f"Updated {field_key}.") # Offer cross-service sync for auth credential fields if len(parent_path_parts) >= 2 and parent_path_parts[0] == "auth": # noqa: PLR2004 _maybe_sync_auth_change( parent_path_parts[1], instance_key, field_key, new_val, current_val ) break except (ValueError, TypeError) as e: questionary.print(f"Error: {e}") retry = questionary.confirm("Try again?", style=custom_style).ask() if not retry: break else: break _SERVICE_PAIRS = {"confluence": "jira", "jira": "confluence"} def _maybe_sync_new_instance(instance_url: str, parent_path_parts: list[str]) -> None: """After configuring a new instance, offer to copy its credentials to the paired service. Only applicable when the parent path is ``auth.confluence`` or ``auth.jira``. """ if len(parent_path_parts) < 2 or parent_path_parts[0] != "auth": # noqa: PLR2004 return service = parent_path_parts[1] other_service = _SERVICE_PAIRS.get(service) if not other_service: return from confluence_markdown_exporter.api_clients import ensure_service_gateway_url target_url = ensure_service_gateway_url(instance_url, other_service) should_sync = questionary.confirm( f"Also save the same credentials for {other_service.capitalize()} at '{target_url}'?", default=True, style=custom_style, ).ask() if not should_sync: return settings = get_settings().model_dump() source: dict = settings for k in parent_path_parts: source = source[k] entry = source.get(instance_url) if entry: set_setting_with_keys(["auth", other_service, target_url], entry) questionary.print(f"auth.{other_service}.{target_url} updated to match.") def _edit_instance_dict_loop( # noqa: C901, PLR0912, PLR0915 instances: dict, item_model: type[BaseModel], parent_key: str, new_instance_url: str | None = None, ) -> None: """Interactive loop for managing a dict[str, BaseModel] (URL-keyed instances). When *new_instance_url* is provided the loop skips the selection list and jumps directly to editing that specific URL (creating a blank entry first if needed). This is used when an export command detects missing auth for a known URL. """ parent_path_parts = parent_key.split(".") # If a specific URL was requested, jump straight to its editor and then return. if new_instance_url: new_instance_url = new_instance_url.strip().rstrip("/") if new_instance_url not in instances: blank = item_model() set_setting_with_keys([*parent_path_parts, new_instance_url], blank.model_dump()) instances[new_instance_url] = blank.model_dump() current_val = instances.get(new_instance_url, {}) if not isinstance(current_val, dict): current_val = current_val.model_dump() # type: ignore[union-attr] result = _edit_instance_fields(new_instance_url, current_val, item_model, parent_path_parts) if result == "__remove__": instances.pop(new_instance_url, None) current = get_settings().model_dump() sub: dict = current for k in parent_path_parts: sub = sub[k] sub.pop(new_instance_url, None) save_app_data(ConfigModel.model_validate(current)) else: _maybe_sync_new_instance(new_instance_url, parent_path_parts) return while True: choices = [ Choice(title=[("class:key", instance_url)], value=("edit", instance_url)) for instance_url in instances ] choices.append(Choice(title="[Add instance]", value=("add", None))) choices.append(Choice(title="[Back]", value=("back", None))) action, instance_url = questionary.select( f"Manage instances for '{parent_key}':", choices=choices, style=custom_style, ).ask() or ("back", None) if action == "back" or action is None: return if action == "add": new_url = questionary.text( "Enter the base URL for the new instance (e.g. https://company.atlassian.net):", validate=lambda v: ( "URL cannot be empty" if not v.strip() else "Instance already exists" if v.strip() in instances else True ), style=custom_style, ).ask() if new_url: new_url = new_url.strip().rstrip("/") new_instance = item_model() set_setting_with_keys([*parent_path_parts, new_url], new_instance.model_dump()) instances[new_url] = new_instance.model_dump() continue if action == "edit" and instance_url: current_val = instances.get(instance_url, {}) if not isinstance(current_val, dict): current_val = current_val.model_dump() # type: ignore[union-attr] result = _edit_instance_fields( instance_url, current_val, item_model, parent_path_parts, ) if result == "__remove__": instances.pop(instance_url, None) current = get_settings().model_dump() sub: dict = current for k in parent_path_parts: sub = sub[k] sub.pop(instance_url, None) save_app_data(ConfigModel.model_validate(current)) continue # Refresh from disk updated = get_settings().model_dump() sub = updated for k in parent_path_parts: sub = sub[k] instances[instance_url] = sub.get(instance_url, current_val) def _main_config_menu(settings: dict, default: tuple[str, bool] | None = None) -> tuple: choices = [] for k, v in settings.items(): meta = _get_field_metadata(ConfigModel, k) display_title = meta["title"] if meta and meta["title"] else k if isinstance(v, dict): choices.append( Choice( title=[ ("class:key", str(display_title)), ("class:value", " [submenu]"), ], value=(k, True), ) ) else: display_val = "Not set" if isinstance(v, str | SecretStr) and str(v) == "" else v choices.append( Choice( title=[ ("class:key", str(display_title)), ("class:value", f" {display_val}"), ], value=(k, False), ) ) choices.append(Choice(title="[Reset config to defaults]", value=("__reset__", False))) choices.append(Choice(title="[Exit]", value=("__exit__", False))) # Find the matching Choice value for default default_value = None if default is not None: for c in choices: if hasattr(c, "value") and c.value == default: default_value = c.value break return questionary.select( f"Config file location: {get_app_config_path()}\n\nSelect a config to change (or reset):", choices=choices, style=custom_style, default=default_value, ).ask() or (None, False) def _prompt_for_new_value( # noqa: PLR0911 key_name: str, current_value: object, model: type[BaseModel], ) -> object: field_type = _get_field_type(model, key_name) origin = get_origin(field_type) prompt_message = _format_prompt_message(key_name, model) if field_type is None: field_type = str # Default to string if no type found if origin is Literal: return _prompt_literal(prompt_message, field_type, current_value) if field_type is bool: return _prompt_bool(prompt_message, current_value) if field_type is Path: return _prompt_path(prompt_message, current_value, model, key_name) if field_type is int: return _prompt_int(prompt_message, current_value) if field_type is list or origin is list: return _prompt_list(prompt_message, current_value) if isinstance(current_value, SecretStr): return _prompt_str(prompt_message, current_value.get_secret_value(), model, key_name) return _prompt_str(prompt_message, current_value, model, key_name) def _maybe_sync_auth_change( service: str, instance_url: str, key: str, value_cast: object, previous_value: object, ) -> None: """After changing an auth credential, offer to sync it to the paired service instance. Args: instance_url: The URL key of the instance being edited (may contain dots). service: ``"confluence"`` or ``"jira"``. key: The field name that changed (``"username"``, ``"api_token"``, or ``"pat"``). value_cast: The new value. previous_value: The old value (used to skip the prompt when was empty before). """ if service == "confluence": other_service = "Jira" other_service_key = "jira" elif service == "jira": other_service = "Confluence" other_service_key = "confluence" else: return # Only ask when replacing an existing (non-empty) value if isinstance(previous_value, SecretStr): if not previous_value.get_secret_value(): return elif not previous_value: return instance_url = ensure_service_gateway_url(instance_url, other_service_key) should_sync = questionary.confirm( f"Also apply this {key} change to the {other_service} instance '{instance_url}'?", default=True, style=custom_style, ).ask() if should_sync: try: set_setting_with_keys(["auth", other_service_key, instance_url, key], value_cast) questionary.print(f"auth.{other_service_key}.{instance_url}.{key} updated to match.") except (ValueError, TypeError) as e: questionary.print(f"Could not sync to {other_service}: {e}") def _reset_and_reload(parent_key: str | None, display_title: str | None = None) -> None: """Reset config (whole or section) and reload config_dict from disk, with confirmation.""" if parent_key is None: confirm_msg = "Are you sure you want to reset all config to defaults?" else: confirm_msg = f"Are you sure you want to reset section '{display_title}' to defaults?" confirm = questionary.confirm(confirm_msg, style=custom_style).ask() if not confirm: return reset_to_defaults(parent_key or None) updated = get_settings().model_dump() if parent_key: # Traverse to the correct nested dict for jmespath/dot-paths keys = parent_key.split(".") sub = updated for k in keys: sub = sub[k] # Optionally, update sub in place if needed (here, just to trigger reload/print) else: for k in list(updated.keys()): updated[k] = updated[k] if display_title: questionary.print(f"Section '{display_title}' reset to defaults.") else: questionary.print("Config reset to defaults.") def _get_choices(config_dict: dict, model: type[BaseModel]) -> list: choices = [] for k, v in config_dict.items(): if v is None: continue meta = _get_field_metadata(model, k) display_title = meta["title"] if meta and meta["title"] else k if isinstance(v, dict): choices.append( Choice( title=[ ("class:key", str(display_title)), ("class:value", " [submenu]"), ], value=k, ) ) else: display_val = "Not set" if isinstance(v, str | SecretStr) and str(v) == "" else v choices.append( Choice( title=[ ("class:key", str(display_title)), ("class:value", f" {display_val}"), ], value=k, ) ) choices.append(Choice(title="[Reset this group to defaults]", value="__reset_section__")) choices.append(Choice(title="[Back]", value="__back__")) return choices def _edit_dict_config_loop( # noqa: C901, PLR0912, PLR0915 config_dict: dict, model: type[BaseModel], parent_key: str, parent_model: type[BaseModel], last_selected: str | None = None, ) -> str | None: selected_key = last_selected while True: choices = _get_choices(config_dict, model) meta = None if hasattr(parent_model, "model_fields") and parent_key: meta = _get_field_metadata(parent_model, parent_key) display_title = meta["title"] if meta and meta["title"] else parent_key key = questionary.select( f"Edit options for '{display_title}':", choices=choices, style=custom_style, default=selected_key, ).ask() if key == "__back__" or key is None: return selected_key if key == "__reset_section__": _reset_and_reload(parent_key, display_title) # Reload the updated config_dict for this section from disk updated = get_settings().model_dump() if parent_key: # Traverse to the correct nested dict for jmespath/dot-paths keys = parent_key.split(".") sub = updated for k in keys: sub = sub[k] config_dict.clear() config_dict.update(sub) else: config_dict.clear() config_dict.update(updated) selected_key = None continue current_value = config_dict[key] if key else None # Check for dict[str, BaseModel] (named instances, e.g. auth.confluence) dict_value_model = _get_dict_value_model(model, key) if isinstance(current_value, dict) and dict_value_model is not None: _edit_instance_dict_loop( current_value, dict_value_model, f"{parent_key}.{key}" if parent_key else key, ) selected_key = key # Might have updated other service auth config # Reload the updated config_dict for this section from disk updated = get_settings().model_dump() if parent_key: # Traverse to the correct nested dict for jmespath/dot-paths keys = parent_key.split(".") sub = updated for k in keys: sub = sub[k] config_dict.clear() config_dict.update(sub) else: config_dict.clear() config_dict.update(updated) continue submodel = _get_submodel(model, key) if isinstance(current_value, dict) and submodel is not None: # Always set selected_key to the submenu key after returning _edit_dict_config_loop( current_value, submodel, f"{parent_key}.{key}" if parent_key else key, model, last_selected=None, ) selected_key = key else: while True: value_cast = _prompt_for_new_value(key, current_value, model) if value_cast is not None: try: set_setting(f"{parent_key}.{key}" if parent_key else key, value_cast) config_dict[key] = value_cast questionary.print(f"{parent_key}.{key} updated to {value_cast}.") selected_key = key break except (ValueError, TypeError) as e: questionary.print(f"Error: {e}") retry = questionary.confirm("Try again?", style=custom_style).ask() if not retry: break else: break # After editing, keep cursor at this entry selected_key = key def _edit_dict_config( config_dict: dict, model: type[BaseModel], parent_key: str, parent_model: type[BaseModel], last_selected: str | None = None, ) -> str | None: return _edit_dict_config_loop(config_dict, model, parent_key, parent_model, last_selected) def main_config_menu_loop( # noqa: C901, PLR0912 jump_to: str | None = None, new_instance_url: str | None = None, ) -> None: settings = get_settings().model_dump() if jump_to: submenu = jmespath.search(jump_to, settings) preselect: str | None = None if not isinstance(submenu, dict): # jump_to points to a leaf value — open its parent section with cursor on that item leaf_key = jump_to.rsplit(".", 1)[-1] jump_to = jump_to.rsplit(".", 1)[0] if "." in jump_to else jump_to submenu = jmespath.search(jump_to, settings) preselect = leaf_key parent_path = jump_to.rsplit(".", 1)[0] if "." in jump_to else None parent_model = get_model_by_path(ConfigModel, parent_path) if parent_path else ConfigModel # If jump_to resolves to a dict[str, BaseModel] field (URL-keyed instances such as # auth.confluence), delegate directly to the instance-dict editor so that # URL keys are never mistaken for Pydantic field names. last_segment = jump_to.rsplit(".", 1)[-1] if "." in jump_to else jump_to dict_value_model = _get_dict_value_model(parent_model, last_segment) if dict_value_model is not None and isinstance(submenu, dict): _edit_instance_dict_loop( submenu, dict_value_model, jump_to, new_instance_url=new_instance_url ) return submodel = get_model_by_path(ConfigModel, jump_to) _edit_dict_config(submenu, submodel, jump_to, parent_model, last_selected=preselect) return last_selected = None while True: settings = get_settings().model_dump() key, is_dict = _main_config_menu(settings, default=last_selected) if key == "__reset__": _reset_and_reload(None) last_selected = None continue if key == "__exit__" or key is None: break last_selected = (key, is_dict) current_value = settings[key] if is_dict: submodel = _get_submodel(ConfigModel, key) if submodel is not None: returned_key = _edit_dict_config( current_value, submodel, key, ConfigModel, last_selected=None ) last_selected = (key, is_dict) if returned_key is None else (returned_key, True) else: while True: value_cast = _prompt_for_new_value(key, current_value, ConfigModel) if value_cast is None or value_cast == current_value: # User cancelled or made no change: do not update config break try: set_setting(key, value_cast) questionary.print(f"{key} updated to {value_cast}.") last_selected = (key, is_dict) break except (ValueError, TypeError) as e: questionary.print(f"Error: {e}") retry = questionary.confirm("Try again?", style=custom_style).ask() if not retry: break ================================================ FILE: confluence_markdown_exporter/utils/drawio_converter.py ================================================ """Utility module for parsing DrawIO files and extracting mermaid diagrams.""" import html import json import logging from pathlib import Path from typing import cast from bs4 import BeautifulSoup logger = logging.getLogger(__name__) def load_drawio_file(file_path: str | Path) -> str | None: """Load and parse a DrawIO XML file. Args: file_path: Path to the DrawIO file (.drawio) Returns: The XML content as a string, or None if file doesn't exist """ file_path = Path(file_path) if not file_path.exists(): return None return file_path.read_text(encoding="utf-8") def extract_mermaid_data(xml_content: str) -> str | None: """Extract mermaid data from DrawIO XML. Args: xml_content: The XML content as a string. Returns: The extracted mermaid data string or None if not found. """ try: soup = BeautifulSoup(xml_content, "xml") # Search for UserObject tag (XML parser preserves case) user_object = soup.find("UserObject") if user_object is None: return None try: attrs = cast( "dict[str, str]", user_object.attrs, # type: ignore[attr-defined] ) # XML parser preserves attribute case as mermaidData mermaid_data_attr = attrs.get("mermaidData") if mermaid_data_attr is None: return None # Unescape HTML entities if present return html.unescape(mermaid_data_attr) except AttributeError: return None except Exception: # pylint: disable=broad-except logger.exception("Error extracting mermaid data from DrawIO XML") return None def parse_mermaid_json(mermaid_data: str) -> str | None: """Parse mermaid data from JSON format and extract the diagram definition. The mermaid data is often stored as JSON with a "data" field containing the actual mermaid diagram as a string. Args: mermaid_data: The raw mermaid data string (may be JSON-formatted) Returns: The mermaid diagram string, or the input if already in plain format """ try: # Try to parse as JSON parsed = json.loads(mermaid_data) if isinstance(parsed, dict) and "data" in parsed: return parsed["data"] except (json.JSONDecodeError, TypeError): # If not JSON, return as-is (already a plain diagram string) pass return mermaid_data def format_mermaid_markdown(mermaid_diagram: str) -> str: """Format mermaid diagram as a markdown code fence. Args: mermaid_diagram: The mermaid diagram definition Returns: Formatted markdown code fence containing the mermaid diagram """ return f"```mermaid\n{mermaid_diagram}\n```" def load_and_parse_drawio(file_path: str | Path) -> str | None: """Load a DrawIO file and extract mermaid diagram as markdown. This is the main entry point that orchestrates the full process: 1. Load the DrawIO XML file 2. Extract mermaidData from UserObject 3. Parse JSON format if needed 4. Format as markdown code fence Args: file_path: Path to the DrawIO file (.drawio) Returns: Formatted markdown code fence with mermaid diagram, or None if not found/error """ # Load the DrawIO file xml_content = load_drawio_file(file_path) if xml_content is None: return None # Extract mermaid data from XML mermaid_data = extract_mermaid_data(xml_content) if mermaid_data is None: return None # Parse mermaid data (handle JSON format) mermaid_diagram = parse_mermaid_json(mermaid_data) if mermaid_diagram is None: return None # Format as markdown result = format_mermaid_markdown(mermaid_diagram) logger.debug("Extracted mermaid diagram from %s", file_path) return result ================================================ FILE: confluence_markdown_exporter/utils/export.py ================================================ import json import logging import re from pathlib import Path from confluence_markdown_exporter.utils.app_data_store import get_settings logger = logging.getLogger(__name__) settings = get_settings() export_options = settings.export def parse_encode_setting(encode_setting: str) -> dict[str, str]: """Parse encoding setting containing character mapping. Args: encode_setting: JSON object content without braces '"char1":"replacement1","char2":"replacement2"' Returns: Dictionary mapping characters to their replacements Examples: "" -> {} '" ":"%2D","-":"%2D"' -> {" ": "%2D", "-": "%2D"} '" ":"dash","-":"%2D"' -> {" ": "dash", "-": "%2D"} '"=":" equals "' -> {"=": " equals "} Note: Uses JSON format for mapping to handle all characters unambiguously. Curly braces are added automatically before parsing. """ if not encode_setting: return {} # Add curly braces to make it valid JSON json_str = f"{{{encode_setting}}}" # Use JSON parsing for robust and unambiguous parsing try: mapping = json.loads(json_str) if isinstance(mapping, dict): return mapping except (json.JSONDecodeError, TypeError): # Fallback: if parsing fails, return empty mapping pass return {} def save_file(file_path: Path, content: str | bytes) -> None: """Save content to a file, creating parent directories as needed.""" file_path.parent.mkdir(parents=True, exist_ok=True) if isinstance(content, bytes): with file_path.open("wb") as file: file.write(content) elif isinstance(content, str): with file_path.open("w", encoding="utf-8") as file: file.write(content) else: msg = "Content must be either a string or bytes." raise TypeError(msg) logger.debug("Saved file %s (%d bytes)", file_path, len(content)) def sanitize_filename(filename: str) -> str: """Sanitize a filename for cross-platform compatibility. Replaces characters based on encoding mapping, trims trailing spaces and dots, and prevents reserved names. Args: filename: The original filename. Returns: A sanitized filename string. """ sanitized = filename # Strip control characters (ASCII 0x00-0x1F, 0x7F) invalid on Windows/Linux sanitized = re.sub(r"[\x00-\x1f\x7f]", "", sanitized) if export_options.filename_encoding: encode_map = parse_encode_setting(export_options.filename_encoding) # Create pattern from all characters that have mappings if encode_map: chars_to_encode = "".join(encode_map.keys()) encode_re = escape_character_class(chars_to_encode) encode_pattern = re.compile(f"[{encode_re}]") def map_char(m: re.Match[str]) -> str: char = m.group(0) return encode_map[char] sanitized = re.sub(encode_pattern, map_char, sanitized) # Trim spaces and dots from the end sanitized = sanitized.rstrip(" .") # Reserved Windows names (case-insensitive) reserved = { "CON", "PRN", "AUX", "NUL", *(f"COM{i}" for i in range(1, 10)), *(f"LPT{i}" for i in range(1, 10)), } name = Path(sanitized).stem.upper() if name in reserved: sanitized = f"{sanitized}_" if export_options.filename_lowercase: sanitized = sanitized.lower() # Limit length to specificed number of characters return sanitized[: export_options.filename_length] def sanitize_key(s: str, connector: str = "_") -> str: """Convert an input string to a valid Python/YAML-compatible key. - Lowercase the string. - Replace non-alphanumeric characters with underscores. - Collapse multiple underscores into one. - Trim leading/trailing underscores. - Prefix with 'key_' if the first character is not a letter or underscore. """ s = s.lower() s = re.sub(f"[^a-z0-9{connector}]", connector, s) s = re.sub(f"{connector}+", connector, s) s = s.strip(connector) if not re.match(r"^[a-z]", s): s = f"key{connector}{s}" return s def github_heading_slug(text: str) -> str: """Generate a GitHub-compatible heading anchor slug. Matches the github-slugger algorithm used by GitHub to render heading anchors, so that generated TOC links resolve correctly in GitHub-rendered Markdown. """ text = text.lower().strip() text = re.sub(r"[^\w\s-]", "", text) # drop punctuation; keep letters, digits, spaces, hyphens text = re.sub(r"[\s_]+", "-", text) # whitespace/underscores → hyphens return re.sub(r"-{2,}", "-", text) # collapse runs of hyphens (e.g. "- word" → "-word") def escape_character_class(s: str) -> str: """Escape characters for use in a regex character class. Args: s: The string containing characters to escape. Returns: The input string with special regex character class characters escaped. """ # Escape backslash first, then other special characters for character classes return s.replace("\\", r"\\").replace("-", r"\-").replace("]", r"\]").replace("^", r"\^") ================================================ FILE: confluence_markdown_exporter/utils/lockfile.py ================================================ """Lock file handling for tracking exported Confluence pages.""" from __future__ import annotations import json import logging import tempfile import threading from datetime import datetime from datetime import timezone from pathlib import Path from typing import TYPE_CHECKING from typing import ClassVar from pydantic import BaseModel from pydantic import Field from pydantic import ValidationError from confluence_markdown_exporter.utils.page_registry import PageTitleRegistry from confluence_markdown_exporter.utils.rich_console import get_stats if TYPE_CHECKING: from confluence_markdown_exporter.confluence import Descendant from confluence_markdown_exporter.confluence import Page logger = logging.getLogger(__name__) LOCKFILE_VERSION = 2 class AttachmentEntry(BaseModel): """Entry for a single attachment tracked in the lock file.""" version: int path: str class PageEntry(BaseModel): """Entry for a single page in the lock file.""" title: str version: int export_path: str attachments: dict[str, AttachmentEntry] = Field(default_factory=dict) class SpaceEntry(BaseModel): """Lock file entry for a Confluence space.""" pages: dict[str, PageEntry] = Field(default_factory=dict) class OrgEntry(BaseModel): """Lock file entry for a Confluence organisation (base URL).""" spaces: dict[str, SpaceEntry] = Field(default_factory=dict) class ConfluenceLock(BaseModel): """Lock file tracking exported Confluence data.""" lockfile_version: int = Field(default=LOCKFILE_VERSION) last_export: str = Field(default="") orgs: dict[str, OrgEntry] = Field(default_factory=dict) @classmethod def load(cls, lockfile_path: Path) -> ConfluenceLock: """Load lock file from disk, or return empty if not exists or outdated.""" if lockfile_path.exists(): try: content = lockfile_path.read_text(encoding="utf-8") data = json.loads(content) if data.get("lockfile_version", 1) < LOCKFILE_VERSION: logger.info( "Lock file format is outdated (v%s → v%s). Starting fresh.", data.get("lockfile_version", 1), LOCKFILE_VERSION, ) return cls() return cls.model_validate(data) except (ValidationError, json.JSONDecodeError): logger.warning("Failed to parse lock file: %s. Starting fresh.", lockfile_path) return cls() def all_pages(self) -> dict[str, PageEntry]: """Return all page entries as a flat dict keyed by page ID.""" result: dict[str, PageEntry] = {} for org in self.orgs.values(): for space in org.spaces.values(): result.update(space.pages) return result def get_page(self, page_id: str) -> PageEntry | None: """Return the PageEntry for *page_id*, searching all orgs and spaces.""" for org in self.orgs.values(): for space in org.spaces.values(): if page_id in space.pages: return space.pages[page_id] return None def remove_page(self, page_id: str) -> None: """Remove *page_id* from whichever org/space entry holds it.""" for org in self.orgs.values(): for space in org.spaces.values(): space.pages.pop(page_id, None) def add_page( self, page: Page, attachment_entries: dict[str, AttachmentEntry] | None = None, ) -> None: """Add or update a page entry, placed under its org and space.""" if page.version is None: logger.warning("Page %s has no version info. Skipping lock entry.", page.id) return org_url = page.base_url space_key = page.space.key if org_url not in self.orgs: self.orgs[org_url] = OrgEntry() if space_key not in self.orgs[org_url].spaces: self.orgs[org_url].spaces[space_key] = SpaceEntry() self.orgs[org_url].spaces[space_key].pages[str(page.id)] = PageEntry( title=page.title, version=page.version.number, export_path=str(page.export_path), attachments=attachment_entries or {}, ) def save( # noqa: C901 self, lockfile_path: Path, *, delete_ids: set[str] | None = None ) -> None: """Save lock file to disk. To handle concurrent writes, this method reads the existing lock file and merges it with the current state before saving. """ lockfile_path.parent.mkdir(parents=True, exist_ok=True) # Read existing lock file and merge to handle concurrent writes existing = ConfluenceLock.load(lockfile_path) for org_url, org_entry in self.orgs.items(): if org_url not in existing.orgs: existing.orgs[org_url] = OrgEntry() for space_key, space_entry in org_entry.spaces.items(): if space_key not in existing.orgs[org_url].spaces: existing.orgs[org_url].spaces[space_key] = SpaceEntry() existing.orgs[org_url].spaces[space_key].pages.update(space_entry.pages) if delete_ids: for page_id in delete_ids: existing.remove_page(page_id) # Sort for deterministic output for org in existing.orgs.values(): for space in org.spaces.values(): space.pages = dict(sorted(space.pages.items())) org.spaces = dict(sorted(org.spaces.items())) existing.orgs = dict(sorted(existing.orgs.items())) existing.last_export = datetime.now(timezone.utc).isoformat() json_str = json.dumps(existing.model_dump(), indent=2, ensure_ascii=False) tmp_path = None try: with tempfile.NamedTemporaryFile( mode="w", dir=lockfile_path.parent, suffix=".tmp", delete=False, encoding="utf-8", ) as fd: tmp_path = Path(fd.name) fd.write(json_str) try: tmp_path.replace(lockfile_path) except PermissionError: # Windows: MoveFileExW(MOVEFILE_REPLACE_EXISTING) can fail when # security software holds the destination. Fall back to non-atomic # unlink + rename. lockfile_path.unlink(missing_ok=True) tmp_path.rename(lockfile_path) except BaseException: if tmp_path is not None: tmp_path.unlink(missing_ok=True) raise # Update self to reflect merged state self.orgs = existing.orgs self.last_export = existing.last_export class LockfileManager: """Manager for lock file operations during export.""" _lockfile_path: ClassVar[Path | None] = None _lock: ClassVar[ConfluenceLock | None] = None _output_path: ClassVar[Path | None] = None _all_entries_snapshot: ClassVar[dict[str, PageEntry]] = {} _seen_page_ids: ClassVar[set[str]] = set() _thread_lock: ClassVar[threading.Lock] = threading.Lock() @classmethod def init(cls) -> None: """Initialize the lockfile manager if skip_unchanged is enabled.""" from confluence_markdown_exporter.utils.app_data_store import get_settings settings = get_settings() if not settings.export.skip_unchanged: return cls._output_path = settings.export.output_path cls._lockfile_path = cls._output_path / settings.export.lockfile_name cls._lock = ConfluenceLock.load(cls._lockfile_path) cls._all_entries_snapshot = dict(cls._lock.all_pages()) cls._seen_page_ids = set() PageTitleRegistry.reset() for pid, entry in cls._all_entries_snapshot.items(): try: PageTitleRegistry.register(int(pid), entry.title) except (TypeError, ValueError): continue logger.debug( "Lockfile initialized: %s (%d tracked page(s))", cls._lockfile_path, len(cls._all_entries_snapshot), ) @classmethod def get_page_attachment_entries(cls, page_id: str) -> dict[str, AttachmentEntry]: """Return attachment entries for *page_id* from the lock file, or empty dict.""" if cls._lock is None: return {} entry = cls._lock.get_page(page_id) return entry.attachments if entry else {} @classmethod def record_page( cls, page: Page, attachment_entries: dict[str, AttachmentEntry] | None = None, ) -> None: """Record a page export to the lock file.""" if cls._lock is None or cls._lockfile_path is None: return with cls._thread_lock: cls._lock.add_page(page, attachment_entries) cls._lock.save(cls._lockfile_path) cls._seen_page_ids.add(str(page.id)) PageTitleRegistry.register(int(page.id), page.title) @classmethod def mark_seen(cls, page_ids: list[int]) -> None: """Mark page IDs as seen in the current export run. This avoids unnecessary API existence checks during cleanup for pages that were encountered but skipped (e.g. unchanged pages). """ cls._seen_page_ids.update(str(pid) for pid in page_ids) @classmethod def should_export(cls, page: Page | Descendant) -> bool: """Check if a page should be exported based on lockfile state. Returns True if the page should be exported (not in lockfile or changed). """ if cls._lock is None: return True page_id = str(page.id) entry = cls._lock.get_page(page_id) if entry is None: logger.debug("Page id=%s not in lockfile — will export", page_id) return True if page.version is None: logger.debug("Page id=%s has no version info — will export", page_id) return True # Re-export if the output file is missing from disk if cls._output_path is not None and not (cls._output_path / entry.export_path).exists(): logger.debug("Page id=%s output file missing — will re-export", page_id) return True # Export if version or export_path has changed if entry.version != page.version.number or entry.export_path != str(page.export_path): logger.debug( "Page id=%s changed (v%s -> v%s) — will export", page_id, entry.version, page.version.number, ) return True logger.debug("Page id=%s unchanged (v%s) — skipping", page_id, entry.version) return False @classmethod def unseen_ids(cls) -> set[str]: """Return lockfile page IDs not encountered during the current export run.""" if cls._lock is None: return set() return set(cls._lock.all_pages().keys()) - cls._seen_page_ids @classmethod def remove_pages(cls, deleted_ids: set[str]) -> None: """Remove files and lockfile entries for moved or deleted pages. Args: deleted_ids: Page IDs confirmed as deleted from Confluence. """ if cls._lock is None or cls._lockfile_path is None or cls._output_path is None: return result_delete_ids: set[str] = set() # Handle moved pages: delete old file when export_path changed for page_id in cls._seen_page_ids: if page_id in cls._all_entries_snapshot: old_entry = cls._all_entries_snapshot[page_id] new_entry = cls._lock.get_page(page_id) if new_entry and old_entry.export_path != new_entry.export_path: (cls._output_path / old_entry.export_path).unlink(missing_ok=True) logger.info("Deleted old path for moved page: %s", old_entry.export_path) # Remove files and lockfile entries for pages deleted from Confluence for page_id in deleted_ids: entry = cls._lock.get_page(page_id) if entry: (cls._output_path / entry.export_path).unlink(missing_ok=True) logger.info("Deleted removed page: %s", entry.export_path) result_delete_ids.add(page_id) if result_delete_ids: with cls._thread_lock: cls._lock.save(cls._lockfile_path, delete_ids=result_delete_ids) stats = get_stats() for _ in result_delete_ids: stats.inc_removed() ================================================ FILE: confluence_markdown_exporter/utils/measure_time.py ================================================ import logging import time from collections.abc import Callable from collections.abc import Generator from contextlib import contextmanager from datetime import datetime from typing import ParamSpec from typing import TypeVar from dateutil.relativedelta import relativedelta from rich.rule import Rule from confluence_markdown_exporter.utils.rich_console import console T = TypeVar("T") P = ParamSpec("P") logger = logging.getLogger(__name__) def _format_duration(delta: relativedelta) -> str: """Return a human-readable duration string from a relativedelta. Args: delta: The duration as a relativedelta. Returns: A formatted string like "2m 3s" or "45s". """ parts = [] if delta.hours: parts.append(f"{delta.hours}h") if delta.minutes: parts.append(f"{delta.minutes}m") seconds = delta.seconds + round(delta.microseconds / 1_000_000) if seconds or not parts: parts.append(f"{seconds}s") return " ".join(parts) def measure_time(func: Callable[P, T]) -> Callable[P, T]: """Decorator to measure and print the execution time of a function.""" def wrapper(*args: P.args, **kwargs: P.kwargs) -> T: start_time = time.time() result = func(*args, **kwargs) end_time = time.time() elapsed_time = end_time - start_time logger.info(f"Function '{func.__name__}' took {elapsed_time:.4f} seconds to execute.") return result return wrapper @contextmanager def measure(step: str) -> Generator[None, None, None]: """Measure and display the execution time of the encapsulated block. Prints a rich rule banner at start and a summary line at end. Args: step: The step name shown in the banner. Raises: e: Reraised exception from execution. """ start_time = datetime.now() console.print(Rule(f"[highlight]{step}[/highlight]", style="dim")) logger.debug("Started at %s", start_time.strftime("%Y-%m-%d %H:%M:%S")) state = "stopped" try: yield state = "ended" except Exception: state = "failed" raise finally: end_time = datetime.now() duration = relativedelta(end_time, start_time) duration_str = _format_duration(duration) if state == "ended": console.print( f"[success]✓[/success] [dim]{step}[/dim] " f"completed in [highlight]{duration_str}[/highlight]" ) elif state == "failed": console.print( f"[error]✗[/error] [dim]{step}[/dim] " f"failed after [highlight]{duration_str}[/highlight]" ) else: console.print( f"[warning]![/warning] [dim]{step}[/dim] " f"stopped after [highlight]{duration_str}[/highlight]" ) ================================================ FILE: confluence_markdown_exporter/utils/page_registry.py ================================================ """Cross-space page title registry for link disambiguation. Confluence enforces page-title uniqueness per space, not across spaces. When pages from multiple spaces are exported into the same vault, two pages can share a title — Obsidian's wiki link ``[[Title]]`` then resolves ambiguously. This registry tracks known page titles so the Markdown converter can emit a path-qualified wiki link (``[[path/to/file|Title]]``) when a collision is detected. """ from __future__ import annotations import threading from typing import ClassVar class PageTitleRegistry: """Track page-id -> title mappings to detect cross-page title collisions. Populated from the lockfile snapshot at run start and from each page list before export workers begin so collisions are known before any link rendering. """ _entries: ClassVar[dict[int, str]] = {} _title_counts: ClassVar[dict[str, int]] = {} _lock: ClassVar[threading.Lock] = threading.Lock() @classmethod def reset(cls) -> None: with cls._lock: cls._entries.clear() cls._title_counts.clear() @classmethod def register(cls, page_id: int, title: str) -> None: if not page_id or not title: return with cls._lock: old = cls._entries.get(page_id) if old == title: return if old is not None: cls._title_counts[old] -= 1 if cls._title_counts[old] <= 0: cls._title_counts.pop(old, None) cls._entries[page_id] = title cls._title_counts[title] = cls._title_counts.get(title, 0) + 1 @classmethod def is_ambiguous(cls, title: str) -> bool: return cls._title_counts.get(title, 0) > 1 @classmethod def title_count(cls, title: str) -> int: return cls._title_counts.get(title, 0) ================================================ FILE: confluence_markdown_exporter/utils/rich_console.py ================================================ """Shared rich console, logging setup, and export statistics tracking.""" import logging import threading from dataclasses import dataclass from dataclasses import field from os import getenv from pathlib import Path from rich.console import Console from rich.logging import RichHandler from rich.style import Style from rich.theme import Theme _CME_THEME = Theme( { "none": Style.null(), "reset": Style( color="default", bgcolor="default", dim=False, bold=False, italic=False, underline=False, blink=False, blink2=False, reverse=False, conceal=False, strike=False, ), "dim": Style(dim=True), "bright": Style(dim=False), "bold": Style(bold=True), "strong": Style(bold=True), "code": Style(color="cyan"), "italic": Style(italic=True), "emphasize": Style(italic=True), "underline": Style(underline=True), "blink": Style(blink=True), "blink2": Style(blink2=True), "reverse": Style(reverse=True), "strike": Style(strike=True), "black": Style(color="black"), "red": Style(color="red"), "green": Style(color="green"), "yellow": Style(color="yellow"), "magenta": Style(color="magenta"), "cyan": Style(color="cyan"), "white": Style(color="white"), "inspect.attr": Style(color="yellow", italic=True), "inspect.attr.dunder": Style(color="yellow", italic=True, dim=True), "inspect.callable": Style(bold=True, color="red"), "inspect.async_def": Style(italic=True, color="bright_cyan"), "inspect.def": Style(italic=True, color="bright_cyan"), "inspect.class": Style(italic=True, color="bright_cyan"), "inspect.error": Style(bold=True, color="red"), "inspect.equals": Style(), "inspect.help": Style(color="cyan"), "inspect.doc": Style(dim=True), "inspect.value.border": Style(color="green"), "live.ellipsis": Style(bold=True, color="red"), "layout.tree.row": Style(dim=False, color="red"), "layout.tree.column": Style(dim=False, color="blue"), "logging.keyword": Style(bold=True, color="yellow"), "logging.level.notset": Style(dim=True), "logging.level.debug": Style(color="green"), "logging.level.info": Style(color="blue"), "logging.level.warning": Style(color="yellow"), "logging.level.error": Style(color="red", bold=True), "logging.level.critical": Style(color="red", bold=True, reverse=True), "log.level": Style.null(), "log.time": Style(color="cyan", dim=True), "log.message": Style.null(), "log.path": Style(dim=True), "repr.ellipsis": Style(color="yellow"), "repr.indent": Style(color="green", dim=True), "repr.error": Style(color="red", bold=True), "repr.str": Style(color="green", italic=False, bold=False), "repr.brace": Style(bold=True), "repr.comma": Style(bold=True), "repr.ipv4": Style(bold=True, color="bright_green"), "repr.ipv6": Style(bold=True, color="bright_green"), "repr.eui48": Style(bold=True, color="bright_green"), "repr.eui64": Style(bold=True, color="bright_green"), "repr.tag_start": Style(bold=True), "repr.tag_name": Style(color="bright_magenta", bold=True), "repr.tag_contents": Style(color="default"), "repr.tag_end": Style(bold=True), "repr.attrib_name": Style(color="yellow", italic=False), "repr.attrib_equal": Style(bold=True), "repr.attrib_value": Style(color="magenta", italic=False), "repr.number": Style(color="cyan", bold=True, italic=False), "repr.number_complex": Style(color="cyan", bold=True, italic=False), # same "repr.bool_true": Style(color="bright_green", italic=True), "repr.bool_false": Style(color="bright_red", italic=True), "repr.none": Style(color="magenta", italic=True), "repr.url": Style(underline=True, color="bright_blue", italic=False, bold=False), "repr.uuid": Style(color="bright_yellow", bold=False), "repr.call": Style(color="magenta", bold=True), "repr.path": Style(color="magenta"), "repr.filename": Style(color="bright_magenta"), "rule.line": Style(color="bright_green"), "rule.text": Style.null(), "json.brace": Style(bold=True), "json.bool_true": Style(color="bright_green", italic=True), "json.bool_false": Style(color="bright_red", italic=True), "json.null": Style(color="magenta", italic=True), "json.number": Style(color="cyan", bold=True, italic=False), "json.str": Style(color="green", italic=False, bold=False), "json.key": Style(color="blue", bold=True), "prompt": Style.null(), "prompt.choices": Style(color="magenta", bold=True), "prompt.default": Style(color="cyan", bold=True), "prompt.invalid": Style(color="red"), "prompt.invalid.choice": Style(color="red"), "pretty": Style.null(), "scope.border": Style(color="blue"), "scope.key": Style(color="yellow", italic=True), "scope.key.special": Style(color="yellow", italic=True, dim=True), "scope.equals": Style(color="red"), "table.header": Style(bold=True), "table.footer": Style(bold=True), "table.cell": Style.null(), "table.title": Style(italic=True), "table.caption": Style(italic=True, dim=True), "traceback.error": Style(color="red", italic=True), "traceback.border.syntax_error": Style(color="bright_red"), "traceback.border": Style(color="red"), "traceback.text": Style.null(), "traceback.title": Style(color="red", bold=True), "traceback.exc_type": Style(color="bright_red", bold=True), "traceback.exc_value": Style.null(), "traceback.offset": Style(color="bright_red", bold=True), "traceback.error_range": Style(underline=True, bold=True), "traceback.note": Style(color="green", bold=True), "traceback.group.border": Style(color="magenta"), "bar.back": Style(color="grey23"), "bar.complete": Style(color="rgb(249,38,114)"), "bar.finished": Style(color="rgb(114,156,31)"), "bar.pulse": Style(color="rgb(249,38,114)"), "progress.description": Style.null(), "progress.filesize": Style(color="green"), "progress.filesize.total": Style(color="green"), "progress.download": Style(color="green"), "progress.elapsed": Style(color="yellow"), "progress.percentage": Style(color="magenta"), "progress.remaining": Style(color="cyan"), "progress.data.speed": Style(color="red"), "progress.spinner": Style(color="green"), "status.spinner": Style(color="green"), "tree": Style(), "tree.line": Style(), "markdown.paragraph": Style(), "markdown.text": Style(), "markdown.em": Style(italic=True), "markdown.emph": Style(italic=True), # For commonmark backwards compatibility "markdown.strong": Style(bold=True), "markdown.code": Style(color="cyan"), "markdown.code_block": Style(color="cyan"), "markdown.block_quote": Style(color="magenta"), "markdown.list": Style(color="cyan"), "markdown.item": Style(), "markdown.item.bullet": Style(color="yellow", bold=True), "markdown.item.number": Style(color="yellow", bold=True), "markdown.hr": Style(color="yellow"), "markdown.h1.border": Style(), "markdown.h1": Style(bold=True), "markdown.h2": Style(bold=True, underline=True), "markdown.h3": Style(bold=True), "markdown.h4": Style(bold=True, dim=True), "markdown.h5": Style(underline=True), "markdown.h6": Style(italic=True), "markdown.h7": Style(italic=True, dim=True), "markdown.link": Style(color="bright_blue"), "markdown.link_url": Style(color="blue", underline=True), "markdown.s": Style(strike=True), "iso8601.date": Style(color="blue"), "iso8601.time": Style(color="magenta"), "iso8601.timezone": Style(color="yellow"), } ) TERMINAL_WIDTH = getenv("TERMINAL_WIDTH") MAX_WIDTH = int(TERMINAL_WIDTH) if TERMINAL_WIDTH else None FORCE_TERMINAL = ( False if getenv("NO_COLOR") or getenv("CI") else True if getenv("FORCE_COLOR") or getenv("PY_COLORS") or getenv("GITHUB_ACTIONS") else None ) def get_rich_console(*, stderr: bool = False) -> Console: return Console( theme=_CME_THEME, highlight=False, # In CI, disable live rendering (no ANSI escapes, no overwriting lines, no colors) force_terminal=FORCE_TERMINAL, width=MAX_WIDTH, stderr=stderr, ) console: Console = get_rich_console() def setup_logging(log_level: str = "INFO", log_file: Path | None = None) -> None: """Configure the root logger to use rich output. Args: log_level: One of DEBUG, INFO, WARNING, ERROR. log_file: Optional path to also write log records to. The file uses a plain (non-rich) format so it is grep-friendly. Parent directories are created if missing. """ level = getattr(logging, log_level.upper(), logging.INFO) handler = RichHandler( console=console, rich_tracebacks=True, show_path=log_level == "DEBUG", markup=False, log_time_format="[%X]", ) handler.setLevel(level) root = logging.getLogger() root.setLevel(level) # Remove any existing handlers so we don't double-log root.handlers.clear() root.addHandler(handler) if log_file is not None: log_file.parent.mkdir(parents=True, exist_ok=True) file_handler = logging.FileHandler(log_file, encoding="utf-8") file_handler.setLevel(level) file_handler.setFormatter( logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s") ) root.addHandler(file_handler) @dataclass class ExportStats: """Thread-safe counters for a single export run.""" total: int = 0 exported: int = 0 skipped: int = 0 failed: int = 0 removed: int = 0 attachments_exported: int = 0 attachments_skipped: int = 0 attachments_failed: int = 0 attachments_removed: int = 0 _lock: threading.Lock = field(default_factory=threading.Lock, repr=False, compare=False) def inc_exported(self) -> None: """Increment the exported counter by 1.""" with self._lock: self.exported += 1 def inc_skipped(self) -> None: """Increment the skipped counter by 1.""" with self._lock: self.skipped += 1 def inc_failed(self) -> None: """Increment the failed counter by 1.""" with self._lock: self.failed += 1 def inc_removed(self) -> None: """Increment the pages removed counter by 1.""" with self._lock: self.removed += 1 def inc_attachments_exported(self) -> None: """Increment the attachments exported counter by 1.""" with self._lock: self.attachments_exported += 1 def inc_attachments_skipped(self) -> None: """Increment the attachments skipped counter by 1.""" with self._lock: self.attachments_skipped += 1 def inc_attachments_failed(self) -> None: """Increment the attachments failed counter by 1.""" with self._lock: self.attachments_failed += 1 def inc_attachments_removed(self) -> None: """Increment the attachments removed counter by 1.""" with self._lock: self.attachments_removed += 1 # Module-level stats instance reset at the start of each export run _stats: ExportStats = ExportStats() def reset_stats(total: int = 0) -> ExportStats: """Reset and return the global export stats for a new run. Args: total: Total number of pages in the export scope (including skipped). Returns: The fresh ExportStats instance. """ global _stats # noqa: PLW0603 _stats = ExportStats(total=total) return _stats def get_stats() -> ExportStats: """Return the current global export stats.""" return _stats ================================================ FILE: confluence_markdown_exporter/utils/table_converter.py ================================================ import re from typing import cast from bs4 import BeautifulSoup from bs4 import Tag from markdownify import MarkdownConverter from tabulate import tabulate _LEADING_BR_OR_WS = re.compile(r"^(?:\s|)+") _TRAILING_BR_OR_WS = re.compile(r"(?:\s|)+$") def _get_int_attr(cell: Tag, attr: str, default: str = "1") -> int: val = cell.get(attr, default) if isinstance(val, list): val = val[0] if val else default try: return int(str(val)) except (ValueError, TypeError): return int(default) def pad(rows: list[list[Tag]]) -> list[list[Tag]]: """Pad table rows to handle rowspan and colspan for markdown conversion.""" padded: list[list[Tag]] = [] occ: dict[tuple[int, int], Tag] = {} for r, row in enumerate(rows): if not row: continue cur: list[Tag] = [] c = 0 for cell in row: while (r, c) in occ: cur.append(occ.pop((r, c))) c += 1 rs = _get_int_attr(cell, "rowspan", "1") cs = _get_int_attr(cell, "colspan", "1") cur.append(cell) # Append extra cells for colspan if cs > 1: cur.extend(make_empty_cell() for _ in range(1, cs)) # Mark future cells for rowspan and colspan for i in range(rs): for j in range(cs): if i or j: occ[(r + i, c + j)] = make_empty_cell() c += cs while (r, c) in occ: cur.append(occ.pop((r, c))) c += 1 padded.append(cur) return padded def make_empty_cell() -> Tag: """Return an empty Tag.""" return Tag(name="td") def _normalize_table_cell_text(text: str) -> str: text = text.replace("|", "\\|").replace("\n", "
") text = _LEADING_BR_OR_WS.sub("", text) return _TRAILING_BR_OR_WS.sub("", text) class TableConverter(MarkdownConverter): """Custom MarkdownConverter for converting HTML tables to markdown tables.""" def convert_table(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: rows = [ cast("list[Tag]", tr.find_all(["td", "th"])) for tr in cast("list[Tag]", el.find_all("tr")) if tr ] if not rows: return "" padded_rows = pad(rows) converted = [[self.convert(str(cell)) for cell in row] for row in padded_rows] has_header = all(cell.name == "th" for cell in rows[0]) if has_header: return tabulate(converted[1:], headers=converted[0], tablefmt="pipe") return tabulate(converted, headers=[""] * len(converted[0]), tablefmt="pipe") def convert_th(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: """This method is empty because we want a No-Op for the tag.""" return _normalize_table_cell_text(text) def convert_tr(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: """This method is empty because we want a No-Op for the tag.""" return text def convert_td(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: """This method is empty because we want a No-Op for the tag.""" return _normalize_table_cell_text(text) def convert_thead(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: """This method is empty because we want a No-Op for the tag.""" return text def convert_tbody(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: """This method is empty because we want a No-Op for the tag.""" return text ParentTags = list[str] | set[str] @staticmethod def _normalize_parent_tags( parent_tags: "TableConverter.ParentTags | bool", ) -> "TableConverter.ParentTags": # markdownify 1.x passes set[str]; older versions passed bool (convert_as_inline) return parent_tags if isinstance(parent_tags, list | set) else set() def convert_ol( self, el: BeautifulSoup, text: str, parent_tags: "TableConverter.ParentTags | bool" ) -> str: tags = self._normalize_parent_tags(parent_tags) if "td" in tags: lines = text.splitlines() if not lines: return "" start = int(el.get("start") or 1) numbered = [ f"{start + i}. {item}".rstrip() if item.strip() else str(start + i) for i, item in enumerate(lines) ] return "
".join(n for n in numbered if n) return super().convert_ol(el, text, tags) def convert_li( self, el: BeautifulSoup, text: str, parent_tags: "TableConverter.ParentTags | bool" ) -> str: tags = self._normalize_parent_tags(parent_tags) if "td" in tags: return text.strip().removesuffix("
") + "\n" return MarkdownConverter.convert_li(self, el, text, tags) # type: ignore[attr-defined] def convert_ul( self, el: BeautifulSoup, text: str, parent_tags: "TableConverter.ParentTags | bool" ) -> str: tags = self._normalize_parent_tags(parent_tags) if "td" in tags: items = [item for item in text.splitlines() if item.strip()] if not items: return "" if len(items) == 1: return items[0] return "- " + "
- ".join(items) return super().convert_ul(el, text, tags) def convert_p( self, el: BeautifulSoup, text: str, parent_tags: "TableConverter.ParentTags | bool" ) -> str: tags = self._normalize_parent_tags(parent_tags) md = super().convert_p(el, text, tags) if "td" in tags: md = md.replace("\n", "") + "
" return md ================================================ FILE: confluence_markdown_exporter/utils/type_converter.py ================================================ def str_to_bool(value: str) -> bool: """Convert a string to boolean.""" true_set = {"true", "1", "yes", "on"} false_set = {"false", "0", "no", "off"} val = value.strip().lower() if val in true_set: return True if val in false_set: return False msg = f"Invalid boolean string: '{value}'" raise ValueError(msg) ================================================ FILE: docs/compatibility.md ================================================ --- id: compatibility title: Compatibility sidebar_position: 5 --- # Compatibility This package is not tested extensively. Please check all output and report any issue on the [issue tracker](https://github.com/Spenhouet/confluence-markdown-exporter/issues). It has generally been tested on: - **Confluence Cloud** 1000.0.0-b5426ab8524f (2025-05-28) - **Confluence Server** 8.5.20 If you successfully use the exporter with a different Confluence version, feel free to open a PR adding it to this list. ================================================ FILE: docs/configuration/authentication.md ================================================ --- id: authentication title: Authentication sidebar_position: 3 --- # Authentication :::note Auth credentials use URL-keyed nested dicts (e.g. `auth.confluence["https://company.atlassian.net"]`) and cannot be mapped to flat ENV var names. Use `cme config edit auth.confluence` or `cme config set` for auth configuration. ::: The fastest way to set credentials is the interactive menu: ```sh cme config edit auth.confluence cme config edit auth.jira ``` ## Confluence ### auth.confluence.url Confluence instance URL. - Default: `""` ### auth.confluence.username Confluence username/email. - Default: `""` ### auth.confluence.api_token Confluence API token. - Default: `""` ### auth.confluence.pat Confluence Personal Access Token. - Default: `""` ### auth.confluence.cloud_id Atlassian Cloud ID for the Confluence instance. When set, API calls are routed through the Atlassian API gateway (`https://api.atlassian.com/ex/confluence/{cloud_id}`), which enables the use of [scoped API tokens](https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/). For Atlassian Cloud instances (`.atlassian.net`) this is fetched and stored **automatically** on first connection. You can also set it manually. See [How to retrieve your Atlassian Cloud ID](https://support.atlassian.com/jira/kb/retrieve-my-atlassian-sites-cloud-id/). - Default: `""` ## Jira ### auth.jira.url Jira instance URL. - Default: `""` ### auth.jira.username Jira username/email. - Default: `""` ### auth.jira.api_token Jira API token. - Default: `""` ### auth.jira.pat Jira Personal Access Token. - Default: `""` ### auth.jira.cloud_id Atlassian Cloud ID for the Jira instance. Works identically to `auth.confluence.cloud_id` above, routing API calls through `https://api.atlassian.com/ex/jira/{cloud_id}`. For Atlassian Cloud instances this is fetched and stored **automatically** on first connection. - Default: `""` ## Generating API tokens API tokens that are associated with Atlassian Cloud accounts can be generated [in your 'Account Settings'](https://id.atlassian.com/manage-profile/security/api-tokens) (in Jira/Confluence: profile picture in upper-right corner → _Account Settings_ → _Security_ → _Create and Manage API tokens_). Scoped API tokens **require 'classic' scopes**; these scopes have been tested (giving full read-only access): ```text read:confluence-content.all read:account read:confluence-content.permission read:confluence-content.summary read:confluence-groups read:confluence-props read:confluence-space.summary read:confluence-user read:me readonly:content.attachment:confluence search:confluence ``` ================================================ FILE: docs/configuration/ci.md ================================================ --- id: ci title: Running in CI sidebar_label: CI / non-interactive sidebar_position: 5 --- # Running in CI / non-interactive environments The exporter automatically detects CI environments and suppresses rich terminal formatting (colors, spinner animations, progress bar redraws) so that log output is clean and readable in CI logs. Detection is based on two standard environment variables: | Variable | Effect | | ------------ | ------------------------------------------------------------------------- | | `CI=true` | Disables ANSI color codes and live terminal output | | `NO_COLOR=1` | Same effect (follows the [no-color.org](https://no-color.org) convention) | Most CI platforms (GitHub Actions, GitLab CI, CircleCI, Jenkins, etc.) set `CI=true` automatically. ## Controlling log verbosity You can control output verbosity via the `CME_EXPORT__LOG_LEVEL` env var or the [`export.log_level`](./options.md#exportlog_level) config option: ```sh # Enable verbose debug logging for a single run (not persisted): CME_EXPORT__LOG_LEVEL=DEBUG cme pages # Reduce verbosity permanently: cme config set export.log_level=WARNING # Or for the current session only: CME_EXPORT__LOG_LEVEL=WARNING cme pages ``` This is useful for using different log levels for different environments or for scripting. ## Tips for CI pipelines - Use a dedicated config file via [`CME_CONFIG_PATH`](./index.md#custom-config-file-location) so CI runs don't share state with developer machines. - Provide credentials via secrets and set them with `cme config set` at the start of the run, or use ENV var overrides for non-auth options. - Pin the version using the version-specific installer URL; see [Installation](../installation.md#pinning-a-specific-version). ================================================ FILE: docs/configuration/index.md ================================================ --- id: index title: Configuration slug: /configuration/ sidebar_position: 1 --- # Configuration All configuration and authentication is stored in a single JSON file managed by the application. You do not need to manually edit this file; use the `cme config` commands instead. ## Config commands | Command | Description | | ------------------------------- | ---------------------------------------------- | | `cme config` | Open the interactive configuration menu | | `cme config list` | Print the full configuration as YAML | | `cme config get ` | Print the value of a single config key | | `cme config set ...` | Set one or more config values | | `cme config edit ` | Open the interactive editor for a specific key | | `cme config path` | Print the path to the config file | | `cme config reset` | Reset all configuration to defaults | ### Interactive menu ```sh cme config ``` Opens a full interactive menu where you can: - See all config options and their current values - Select any option to change it (including authentication) - Navigate into nested sections (e.g. `auth.confluence`) - Reset all config to defaults ### List current configuration ```sh cme config list # YAML (default) cme config list -o json # JSON ``` Prints the entire current configuration. Output format defaults to YAML; use `-o json` for JSON. ### Get a single value ```sh cme config get export.log_level cme config get connection_config.max_workers ``` Prints the current value of the specified key. Nested sections are printed as YAML. ### Set values ```sh cme config set export.log_level=DEBUG cme config set export.output_path=/tmp/export cme config set export.skip_unchanged=false ``` Sets one or more `key=value` pairs directly. Values are parsed as JSON where possible (so `true`, `false`, and numbers work as expected), falling back to a plain string. :::note For auth keys that contain a URL (e.g. `auth.confluence.https://...`), use `cme config edit auth.confluence` instead, which handles URL-based keys correctly. ::: ### Edit a specific key interactively ```sh cme config edit auth.confluence cme config edit export.log_level ``` Opens the interactive editor directly at the specified config section, skipping the top-level menu. ### Show config file path ```sh cme config path ``` Prints the absolute path to the configuration file. Useful when `CME_CONFIG_PATH` is set or when locating the file for backup/inspection. ### Reset to defaults ```sh cme config reset cme config reset --yes # skip confirmation ``` Resets the entire configuration to factory defaults after confirmation. ## ENV var overrides All options can be set via the config file (using `cme config set`) or overridden for the current session via environment variables. ENV vars **take precedence** over stored config and are **not** persisted. ENV var names use the `CME_` prefix and `__` (double underscore) as the nested delimiter, matching the key in uppercase. Example: `export.log_level` → `CME_EXPORT__LOG_LEVEL`. :::note Auth credentials use URL-keyed nested dicts (e.g. `auth.confluence["https://company.atlassian.net"]`) and cannot be mapped to flat ENV var names. Use `cme config edit auth.confluence` or `cme config set` for auth configuration. ::: ## Custom config file location By default, configuration is stored in a platform-specific application directory. You can override the config file location by setting the `CME_CONFIG_PATH` environment variable to the desired file path: ```sh export CME_CONFIG_PATH=/path/to/your/custom_config.json ``` If set, the application will read and write config from this file instead. ## Next - [Full option reference →](./options.md) - [Authentication →](./authentication.md) - [Target-system presets (Obsidian, ADO, …) →](./target-systems.md) - [Running in CI / non-interactive environments →](./ci.md) ================================================ FILE: docs/configuration/options.md ================================================ --- id: options title: Configuration options sidebar_label: Options reference sidebar_position: 2 --- # Configuration options Reference for every supported option. All options can be set via `cme config set =` or overridden per-session through the listed environment variable. ## export.\* ### export.log_level Controls output verbosity: `DEBUG` (every step), `INFO` (key milestones), `WARNING` (warnings/errors only), `ERROR` (errors only). - Default: `INFO` - ENV Var: `CME_EXPORT__LOG_LEVEL` ### export.output_path The directory where all exported files and folders will be written. Used as the base for relative and absolute links. - Default: `./` (current working directory) - ENV Var: `CME_EXPORT__OUTPUT_PATH` ### export.page_href How to generate links to pages in Markdown. Options: `relative` (default), `absolute`, or `wiki`. - Default: `relative` - ENV Var: `CME_EXPORT__PAGE_HREF` | Value | Output | | ---------- | -------------------------------------- | | `relative` | `[Page Title](../path/to/page.md)` | | `absolute` | `[Page Title](/space/path/to/page.md)` | | `wiki` | `[[Page Title]]` | ### export.page_path Path template for exported pages. - Default: `{space_name}/{homepage_title}/{ancestor_titles}/{page_title}.md` - ENV Var: `CME_EXPORT__PAGE_PATH` ### export.attachment_href How to generate links to attachments in Markdown. Options: `relative` (default), `absolute`, or `wiki`. - Default: `relative` - ENV Var: `CME_EXPORT__ATTACHMENT_HREF` | Value | Output | | ---------- | ---------------------------------------------------------------------------------- | | `relative` | `[file.pdf](../path/to/file.pdf)` / `![alt](../path/to/image.png)` | | `absolute` | `[file.pdf](/space/attachments/file.pdf)` / `![alt](/space/attachments/image.png)` | | `wiki` | `[[file.pdf\|File Title]]` / `![[image.png]]` | ### export.attachment_path Path template for attachments. - Default: `{space_name}/attachments/{attachment_file_id}{attachment_extension}` - ENV Var: `CME_EXPORT__ATTACHMENT_PATH` On Confluence Data Center / Server, where the API does not provide `fileId`, `{attachment_file_id}` falls back to the content id, so the default template still produces unique filenames. ### export.attachments_export Which attachments to download to disk. | Value | Behaviour | | ------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `referenced` | Only attachments whose ID/filename appears in the page body (default). | | `all` | Every attachment on the page. Large or numerous attachments increase export time. | | `disabled` | Skip downloads entirely: no files written, no lockfile entries, no lookup. Body image and file links still point at `attachment_path`, but the files will not exist locally. | - Default: `referenced` - ENV Var: `CME_EXPORT__ATTACHMENTS_EXPORT` ### export.image_captions Whether to export Confluence image captions in the exported Markdown. When enabled, the storage format of each page is fetched (via an additional API body expansion) and `ac:image` captions are extracted and rendered as an italic line directly below the image: ```markdown ![](image.png) _Caption text_ ``` When disabled, no caption is added. - Default: `False` - ENV Var: `CME_EXPORT__IMAGE_CAPTIONS` ### export.page_breadcrumbs Whether to include breadcrumb links at the top of the page. - Default: `True` - ENV Var: `CME_EXPORT__PAGE_BREADCRUMBS` ### export.page_properties_format Controls how Confluence Page Properties macros (key-value tables) are rendered. Duplicate property keys are automatically disambiguated by appending a counter (e.g. `status`, `status_2`, `status_3`). | Value | Description | | ----------------------- | -------------------------------------------------------------------------------------------------------------------------------------------- | | `frontmatter` | Extract to YAML front matter; table is removed from the page body | | `table` | Keep as a regular markdown table; no metadata is written | | `frontmatter_and_table` | Write to YAML front matter **and** keep the original table in the body (default) | | `dataview-inline-field` | Replace the table with [Dataview](https://blacksmithgu.github.io/obsidian-dataview/) `Key:: Value` inline fields | | `meta-bind-view-fields` | Write YAML front matter and a table using [Meta Bind](https://www.moritzjung.dev/obsidian-meta-bind-plugin-docs/) `VIEW[{key}][text]` fields | :::info Migration The legacy `page_properties_as_front_matter=true/false` is still accepted and maps to `frontmatter` / `table` respectively. ::: - Default: `frontmatter_and_table` - ENV Var: `CME_EXPORT__PAGE_PROPERTIES_FORMAT` ### export.page_properties_report_format Controls how Confluence Page Properties Report macros (dynamic cross-page property tables) are rendered. | Value | Description | | ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `frozen` | Export the rendered table as a static markdown table snapshot (default) | | `dataview` | Translate the CQL query to an [Obsidian Dataview](https://blacksmithgu.github.io/obsidian-dataview/) DQL code block; requires the Dataview plugin and all referenced child pages to be exported with their page properties as front matter; falls back to a frozen table if the query cannot be translated | - Default: `frozen` - ENV Var: `CME_EXPORT__PAGE_PROPERTIES_REPORT_FORMAT` ### export.confluence_url_in_frontmatter Whether to include the original Confluence page URL in the YAML front matter of the exported file. | Value | Description | | -------- | --------------------------------------------------------------------------------------------------------- | | `none` | Do not include any URL (default) | | `webui` | Include `confluence_webui_url` (human-readable URL; may change when the page is renamed or moved) | | `tinyui` | Include `confluence_tinyui_url` (stable short permalink based on the page ID; survives renames and moves) | | `both` | Include both fields | If a Page Properties macro on the page already defines `confluence_webui_url` or `confluence_tinyui_url`, the value from the macro takes precedence over the URL extracted from the API. - Default: `none` - ENV Var: `CME_EXPORT__CONFLUENCE_URL_IN_FRONTMATTER` ### export.page_metadata_in_frontmatter Add eight Confluence page metadata fields to the YAML front matter of each exported page. | Field | Source | | ----- | ------ | | `confluence_page_id` | Page ID (string) | | `confluence_space_key` | Space key | | `confluence_type` | Content type (`page` or `blogpost`) | | `confluence_created` | ISO 8601 timestamp of when the page was first created (`history.createdDate`) | | `confluence_created_by` | Display name of the original author (`history.createdBy.displayName`) | | `confluence_last_modified` | ISO 8601 timestamp of the most recent version (`version.when`), including minor edits | | `confluence_last_modified_by` | Display name of the last editor | | `confluence_version` | Version number (integer) | Fields with empty or zero values are omitted. If a Page Properties macro on the page already defines a key with the same name, the macro value takes precedence. `confluence_page_id` is intentionally written as a quoted string (e.g. `'629839369'`) rather than an integer. Confluence Cloud page IDs can exceed JavaScript's safe-integer range (`2^53 − 1`), so JS-based static site generators (Hugo, Astro, …) parsing the front matter would silently truncate them. `confluence_created` and `confluence_last_modified` are also quoted because PyYAML wraps ISO-8601 timestamps with timezone offsets to prevent loaders from coercing the value into a `datetime` object. Example front matter with both `confluence_url_in_frontmatter: webui` and `page_metadata_in_frontmatter: true`: ```yaml --- tags: - team-foo confluence_webui_url: https://.../wiki/spaces/.../pages/123/Title confluence_page_id: '123' confluence_space_key: TEAM confluence_type: page confluence_created: "2024-08-15T08:34:12.000+02:00" confluence_created_by: Sam Creator confluence_last_modified: "2026-04-12T10:34:00.000+02:00" confluence_last_modified_by: Alex Johnson confluence_version: 7 --- ``` - Default: `false` - ENV Var: `CME_EXPORT__PAGE_METADATA_IN_FRONTMATTER` ### export.filename_encoding Character mapping for filename encoding. - Default: Default mappings for forbidden characters. - ENV Var: `CME_EXPORT__FILENAME_ENCODING` ### export.filename_length Maximum length of filenames. - Default: `255` - ENV Var: `CME_EXPORT__FILENAME_LENGTH` ### export.filename_lowercase Make all exported paths and filenames lowercase. By default the original casing from Confluence is retained. - Default: `False` - ENV Var: `CME_EXPORT__FILENAME_LOWERCASE` ### export.include_document_title Whether to include the document title in the exported markdown file. If enabled, the title will be added as a top-level heading. - Default: `True` - ENV Var: `CME_EXPORT__INCLUDE_DOCUMENT_TITLE` ### export.include_toc Whether to export the Confluence Table of Contents macro. When enabled, the TOC is converted to markdown. When disabled, the TOC macro is removed from the output. - Default: `True` - ENV Var: `CME_EXPORT__INCLUDE_TOC` ### export.include_macro Controls how Confluence `include` and `excerpt-include` macros are rendered. The `include` macro embeds the full content of another page; `excerpt-include` embeds a named excerpt from another page. | Value | Behaviour | | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `inline` | Expand the referenced page content inline at the point of inclusion (default). The body already rendered by Confluence is used, so no extra API calls are required. | | `transclusion` | Emit an Obsidian-style `![[Page Title]]` embed link. Obsidian renders the link as an inline preview of the target note. The referenced page must also be exported to resolve. | - Default: `inline` - ENV Var: `CME_EXPORT__INCLUDE_MACRO` ### export.enable_jira_enrichment Fetch Jira issue data to enrich Confluence pages. When enabled, Jira issue links include the issue summary. Requires Jira auth to be configured. - Default: `True` - ENV Var: `CME_EXPORT__ENABLE_JIRA_ENRICHMENT` ### export.comments_export Which comments to export to a sidecar `.comments.md` file placed next to the exported page file, using the same path stem. | Value | Behaviour | | ---------- | ---------------------------------------------------------------------------------------- | | `none` | No sidecar (default). | | `inline` | Open inline comments only (annotated text as blockquote, then author / date / body). | | `footer` | Open page-level (footer) comments only. | | `all` | Both, in a single sidecar with `## Inline comments` first, then `## Page comments`. | Only open comments are included; resolved comments are skipped. Replies are listed flat below their parent comment. Disabled by default; enabling adds one to two extra API calls per page. Sidecar example for `comments_export = "all"`: ```markdown --- confluence_page_id: '123' confluence_page_title: "Example Page" confluence_webui_url: "https://example.atlassian.net/wiki/spaces/TEAM/pages/123" --- ## Inline comments ### marked excerpt > marked excerpt **Alice** · 2026-04-01 Looks good to me. ## Page comments ### Discussion about the rollout **Bob** · 2026-04-02 Are we shipping this Friday? ``` The legacy boolean key `inline_comments` is migrated automatically: `true` becomes `"inline"`, `false` becomes `"none"`. - Default: `none` - ENV Var: `CME_EXPORT__COMMENTS_EXPORT` ### export.convert_status_badges Whether to convert Confluence status badge macros to HTML `` elements coloured with the badge's background colour. Each lozenge variant maps to an Atlassian design-system pastel: | Lozenge | Colour | Hex | | -------------- | --------------- | --------- | | Gray (default) | Gray | `#dfe1e6` | | Blue | Blue | `#cce0ff` | | Green | Green | `#baf3db` | | Yellow | Yellow / Orange | `#f8e6a0` | | Red | Red | `#ffd5d2` | | Purple | Purple / Violet | `#dfd8fd` | When disabled, only the badge label text is kept. - Default: `True` - ENV Var: `CME_EXPORT__CONVERT_STATUS_BADGES` ### export.convert_text_highlights Whether to convert Confluence text highlights (``) to HTML `` elements with a hex color value. When disabled, the highlight span is stripped and only the plain text is kept. - Default: `True` - ENV Var: `CME_EXPORT__CONVERT_TEXT_HIGHLIGHTS` ### export.convert_font_colors Whether to convert Confluence font colors to HTML `` elements with a hex color value. Handles both inline-style spans (``) and CSS-class-based spans (``) used in the Confluence export view. When disabled, the color span is stripped and only the plain text is kept. - Default: `True` - ENV Var: `CME_EXPORT__CONVERT_FONT_COLORS` ### export.skip_unchanged Skip exporting pages that have not changed since last export. Uses a lockfile to track page versions. - Default: `True` - ENV Var: `CME_EXPORT__SKIP_UNCHANGED` ### export.cleanup_stale After export, delete local files for pages removed from Confluence or whose export path has changed. - Default: `True` - ENV Var: `CME_EXPORT__CLEANUP_STALE` ### export.lockfile_name Name of the lock file used to track exported pages. - Default: `confluence-lock.json` - ENV Var: `CME_EXPORT__LOCKFILE_NAME` ### export.existence_check_batch_size Number of page IDs per batch when checking page existence during cleanup. Capped at 25 for self-hosted (CQL). - Default: `250` - ENV Var: `CME_EXPORT__EXISTENCE_CHECK_BATCH_SIZE` ## connection_config.\* ### connection_config.backoff_and_retry Enable or disable automatic retry with exponential backoff on network errors. - Default: `True` - ENV Var: `CME_CONNECTION_CONFIG__BACKOFF_AND_RETRY` ### connection_config.backoff_factor Multiplier for exponential backoff between retries. For example, `2` means each retry waits twice as long as the previous. - Default: `2` - ENV Var: `CME_CONNECTION_CONFIG__BACKOFF_FACTOR` ### connection_config.max_backoff_seconds Maximum seconds to wait between retries. - Default: `60` - ENV Var: `CME_CONNECTION_CONFIG__MAX_BACKOFF_SECONDS` ### connection_config.max_backoff_retries Maximum number of retry attempts before giving up. - Default: `5` - ENV Var: `CME_CONNECTION_CONFIG__MAX_BACKOFF_RETRIES` ### connection_config.retry_status_codes HTTP status codes that trigger a retry. - Default: `[413, 429, 502, 503, 504]` - ENV Var: `CME_CONNECTION_CONFIG__RETRY_STATUS_CODES` ### connection_config.timeout Timeout in seconds for API requests. Prevents hanging on slow or unresponsive servers. - Default: `30` - ENV Var: `CME_CONNECTION_CONFIG__TIMEOUT` ### connection_config.verify_ssl Whether to verify SSL certificates for HTTPS requests. Set to `False` only if you are sure about the security of your connection. - Default: `True` - ENV Var: `CME_CONNECTION_CONFIG__VERIFY_SSL` ### connection_config.use_v2_api Enable Confluence REST API v2 endpoints. Supported on Atlassian Cloud and Data Center 8+. Disable for self-hosted Server instances. - Default: `False` - ENV Var: `CME_CONNECTION_CONFIG__USE_V2_API` ### connection_config.max_workers Maximum number of parallel workers for page export. Set to `1` for serial/debug mode. Higher values improve performance but may hit API rate limits. - Default: `20` - ENV Var: `CME_CONNECTION_CONFIG__MAX_WORKERS` ================================================ FILE: docs/contributing.md ================================================ --- id: contributing title: Contributing sidebar_position: 7 --- # Contributing If you would like to contribute to `confluence-markdown-exporter`, please read the [contribution guideline](https://github.com/Spenhouet/confluence-markdown-exporter/blob/main/CONTRIBUTING.md) in the repository. ## Reporting issues Use the [GitHub issue tracker](https://github.com/Spenhouet/confluence-markdown-exporter/issues). When reporting, include: 1. Your Confluence flavour and version (Cloud, Server, Data Center) 2. The exact command you ran 3. The full output with `cme config set export.log_level=DEBUG` enabled 4. A minimal page (if possible) reproducing the issue ## Docs site The documentation site is built with [Docusaurus](https://docusaurus.io/) and deployed to GitHub Pages. - Sources live under `docs/` in the repository as plain Markdown / MDX. - Local preview: `npm ci && npm start` (serves `http://localhost:3000/confluence-markdown-exporter/`). - Production build with all versions: `npm run build:versioned` then `npm run serve`. ### Versioning Versioning is **driven by git release tags**. There are no `versioned_docs/` folders committed to the repo. At build time, `scripts/build-versions.mjs`: 1. Lists git tags matching `^\d+\.\d+\.\d+$` (the project's release pattern). 2. Filters to tags whose tree already contains a Docusaurus `docs/` + `sidebars.ts`. 3. Snapshots each eligible tag into `versioned_docs/version-/` by checking out that tag's docs and running `docusaurus docs:version`. 4. Builds with the newest tag set as the default version; HEAD becomes the `Next 🚧` (unreleased) version. That means: cutting a new release tag automatically produces a new docs version on the next site build. Old versions cannot be edited after-the-fact; they are sourced directly from their git tag. ## License This tool is an open source project released under the [MIT License](https://github.com/Spenhouet/confluence-markdown-exporter/blob/main/LICENSE). ================================================ FILE: docs/docker.md ================================================ --- id: docker title: Docker sidebar_position: 5 --- # Docker Prebuilt images are published to Docker Hub at [`spenhouet/confluence-markdown-exporter`](https://hub.docker.com/r/spenhouet/confluence-markdown-exporter). The Docker image is intended for **non-interactive / CI use**: you supply a pre-defined config (either as a mounted JSON file or as environment variables), and the container runs a single export command and exits. :::note The interactive `cme config` menu is **not** supported in this mode. Edit the JSON config file directly or change the env vars instead. ::: ## Available tags - `latest`: the most recent release - `` (e.g. `5.1.0`): pinned release version - `` / `.` (e.g. `5`, `5.1`): rolling tags following the latest release within that range ## Quick start ```bash docker pull spenhouet/confluence-markdown-exporter:latest docker run --rm spenhouet/confluence-markdown-exporter --help ``` The image pins `export.output_path` to `/data/output` (via the `CME_EXPORT__OUTPUT_PATH` env var baked into the image), overriding whatever value the mounted config file has. Bind-mount your host export directory there and exported files appear in it. ## Providing configuration The image reads its config from `/data/config/app_data.json` (set via `CME_CONFIG_PATH`). Generate this file once on a workstation by running `cme config` locally, then check it in to your CI repository or your secret store and mount it into the container, using the same pattern as a Kubernetes ConfigMap volume: ```bash docker run --rm \ -v "$PWD/app_data.json:/data/config/app_data.json:ro" \ -v "$PWD/output:/data/output" \ spenhouet/confluence-markdown-exporter \ pages ``` The mounted file must be readable by UID `1000` (the non-root `cme` user inside the image). For a config file managed in a CI runner this is usually already the case; if not, `chmod 644 app_data.json` is enough. In Docker Compose, the [`configs:`](https://docs.docker.com/reference/compose-file/configs/) top-level key expresses the same mount declaratively: ```yaml services: cme: image: spenhouet/confluence-markdown-exporter:latest command: ["pages", ""] configs: - source: cme_config target: /data/config/app_data.json volumes: - ./output:/data/output configs: cme_config: file: ./app_data.json ``` ## Overriding scalar settings via environment variables Scalar settings can be overridden at runtime with environment variables using the `CME_` prefix and `__` as the nested delimiter: ```bash docker run --rm \ -e CME_EXPORT__LOG_LEVEL=DEBUG \ -e CME_CONNECTION_CONFIG__MAX_WORKERS=5 \ -v "$PWD/app_data.json:/data/config/app_data.json:ro" \ -v "$PWD/output:/data/output" \ spenhouet/confluence-markdown-exporter \ pages ``` See the full [options reference](./configuration/options.md) for every supported `CME_*` env var. ## Auth credentials in environment variables :::warning The `auth.confluence` and `auth.jira` settings are dicts keyed by the instance base URL. That URL key cannot be expressed inside an environment variable name. ::: If you must inject auth credentials via env vars (e.g. to keep secrets out of the JSON file), supply the whole sub-dict as a single JSON-encoded value: ```bash docker run --rm \ -v "$PWD/app_data.json:/data/config/app_data.json:ro" \ -e CME_AUTH__CONFLUENCE="{\"https://company.atlassian.net\":{\"username\":\"$CONFLUENCE_USER\",\"api_token\":\"$CONFLUENCE_API_TOKEN\"}}" \ -v "$PWD/output:/data/output" \ spenhouet/confluence-markdown-exporter \ pages ``` For most CI setups it is simpler to template the JSON file from the CI secret store before running the container. ## See also - [Authentication](./configuration/authentication.md): full credential setup and scoped-token notes - [CI / non-interactive](./configuration/ci.md): `CI=true`, `NO_COLOR`, log-level control - [Installation](./installation.md): pip / uv / curl / PowerShell installers ================================================ FILE: docs/features.md ================================================ --- id: features title: Features sidebar_position: 3 --- # Features Exports individual pages, pages with descendants, or entire spaces via the Atlassian API. Skips unchanged pages by default, re-exporting only what has changed since the last run. ## Supported Confluence features ### Content & formatting - **Rich text**: headings, paragraphs, bold, italic, underline, lists, tables, links, images, attachments, and image captions - **Code blocks**: language-aware fenced code blocks - **Task lists**: checkboxes with completion state - **Text highlights & font colours**: preserved with inline HTML colour styling - **Status badges**: converted to coloured inline highlights - **Info / note / tip / warning panels**: converted to Markdown alert blocks (`[!NOTE]`, `[!TIP]`, …) - **Comments**: open inline and/or page-level (footer) comments exported as sidecar files next to each page - **Include / excerpt-include macros**: embedded pages either inlined or exported as Obsidian transclusion links (`![[Page Title]]`) ### Page metadata - **Page properties**: Page Properties macro exported as YAML front matter, [Dataview](https://blacksmithgu.github.io/obsidian-dataview/) inline fields, or [Meta Bind](https://www.moritzjung.dev/obsidian-meta-bind-plugin-docs/) VIEW fields; duplicate keys are disambiguated automatically (configurable via [`export.page_properties_format`](./configuration/options.md#exportpage_properties_format)) - **Page Properties Report**: dynamic cross-page property tables exported as a static snapshot or a live [Dataview](https://blacksmithgu.github.io/obsidian-dataview/) DQL query (configurable via [`export.page_properties_report_format`](./configuration/options.md#exportpage_properties_report_format)) - **Page labels**: exported as `tags` in YAML front matter ### Diagrams & add-ons - **[draw.io](https://marketplace.atlassian.com/apps/1210933/draw-io-diagrams-uml-bpmn-aws-erd-flowcharts)**: diagram files saved as attachments; embedded Mermaid diagrams extracted as fenced Mermaid blocks - **[PlantUML](https://marketplace.atlassian.com/apps/1222993/flowchart-plantuml-diagrams-for-confluence)**: exported as fenced PlantUML code blocks - **[Markdown Extensions](https://marketplace.atlassian.com/apps/1215703/markdown-extensions-for-confluence)**: pass-through of raw Markdown macro content ================================================ FILE: docs/installation.md ================================================ --- id: installation title: Installation sidebar_position: 1 --- import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; import { VerifyTabs } from '@site/src/components/quickstart'; # Installation Pick the install method that fits your environment. All methods produce the same `cme` / `confluence-markdown-exporter` CLI. ```bash curl -LsSf uvx.sh/confluence-markdown-exporter/install.sh | sh ``` Uses [uv](https://docs.astral.sh/uv/) under the hood to create an isolated, self-updating environment. No need to manage a virtualenv yourself. ```bash curl -LsSf uvx.sh/confluence-markdown-exporter/install.sh | sh ``` Uses [uv](https://docs.astral.sh/uv/) under the hood to create an isolated, self-updating environment. No need to manage a virtualenv yourself. ```powershell powershell -ExecutionPolicy ByPass -c "irm https://uvx.sh/confluence-markdown-exporter/install.ps1 | iex" ``` Uses [uv](https://docs.astral.sh/uv/) under the hood. Run from PowerShell. ```bash pip install confluence-markdown-exporter ``` Installs from PyPI into the active Python environment. Requires Python ≥ 3.10. If you don't already have a project virtualenv, prefer the **uv** or **Linux/macOS/Windows installer** tabs; they isolate the tool for you. ```bash # Install as an isolated, self-managed tool uv tool install confluence-markdown-exporter # …or run it once without installing uvx confluence-markdown-exporter --help ``` [`uv tool install`](https://docs.astral.sh/uv/concepts/tools/) puts the CLI on your PATH inside its own isolated environment. [`uvx`](https://docs.astral.sh/uv/guides/tools/) runs it ephemerally; handy for one-off exports or CI. ```bash docker pull spenhouet/confluence-markdown-exporter:latest docker run --rm spenhouet/confluence-markdown-exporter --help ``` The Docker image is intended for **non-interactive / CI use**: you supply a pre-defined config (mounted JSON file or env vars) and the container runs a single export command and exits. The interactive `cme config` menu is not available inside the container. Full setup (mounted config, Compose example, env-var auth) is on the [Docker page](./docker.md). ## Pinning a specific version ```bash curl -LsSf uvx.sh/confluence-markdown-exporter/5.1.1/install.sh | sh ``` ```bash curl -LsSf uvx.sh/confluence-markdown-exporter/5.1.1/install.sh | sh ``` ```powershell powershell -ExecutionPolicy ByPass -c "irm https://uvx.sh/confluence-markdown-exporter/5.1.1/install.ps1 | iex" ``` ```bash pip install confluence-markdown-exporter==5.1.1 ``` ```bash uv tool install confluence-markdown-exporter==5.1.1 ``` ```bash docker pull spenhouet/confluence-markdown-exporter:5.1.1 ``` Pinned tags are kept available indefinitely; rolling tags (`latest`, ``, `.`) advance with each release. See [Docker → Available tags](./docker.md#available-tags). ## Verify the install You should see the top-level commands: `pages`, `pages-with-descendants`, `spaces`, `orgs`, and `config`. ## Next steps - [Authenticate and configure your first export →](./configuration/index.md#interactive-menu) (local install) - [Export pages or whole spaces →](./usage.md) (local install) - [Docker page](./docker.md): non-interactive setup (mounted config + env vars) ================================================ FILE: docs/intro.md ================================================ --- id: intro title: Introduction sidebar_position: 1 --- import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; import { AuthenticateTabs, ExportTabs } from '@site/src/components/quickstart'; import Logo from '@site/static/img/logo.png';
confluence-markdown-exporter
> Export Confluence pages to Markdown for Obsidian, Gollum, Azure DevOps, Foam, Dendron and any other Markdown-based platform. Exports individual pages, pages with descendants, or entire Confluence spaces via the Atlassian API into clean Markdown. Skips unchanged pages by default, re-exporting only what has changed since the last run. ## What's in these docs - **[Installation](./installation.md)**: install and update the CLI in one command - **[Usage](./usage.md)**: export pages, descendants, spaces, or organisations - **[Features](./features.md)**: supported Confluence content, macros, and add-ons - **[Configuration](./configuration/index.md)**: every option with defaults and ENV vars - **[Target systems](./configuration/target-systems.md)**: Obsidian, Azure DevOps, … - **[Troubleshooting](./troubleshooting.md)**: known issues and how to report ## Get started in 60 seconds ### 1. Install ```bash curl -LsSf uvx.sh/confluence-markdown-exporter/install.sh | sh ``` ```bash curl -LsSf uvx.sh/confluence-markdown-exporter/install.sh | sh ``` ```powershell powershell -ExecutionPolicy ByPass -c "irm https://uvx.sh/confluence-markdown-exporter/install.ps1 | iex" ``` ```bash pip install confluence-markdown-exporter ``` ```bash uv tool install confluence-markdown-exporter # or, one-shot run without installing: uvx confluence-markdown-exporter --help ``` ```bash docker pull spenhouet/confluence-markdown-exporter:latest docker run --rm spenhouet/confluence-markdown-exporter --help ``` The Docker image is intended for non-interactive / CI use; see the [Docker page](./docker.md) for config-file mounts and environment variables. ### 2. Authenticate ### 3. Export Your Markdown lands in the configured `export.output_path` (current directory by default). ================================================ FILE: docs/troubleshooting.md ================================================ --- id: troubleshooting title: Troubleshooting sidebar_position: 6 --- # Troubleshooting ## Known issues and limitations ### Missing attachment file ID on Server For some Confluence Server versions / configurations, the attachment file ID is not returned by the API ([#39](https://github.com/Spenhouet/confluence-markdown-exporter/issues/39)). In that case, `{attachment_file_id}` automatically falls back to the content id, so the default [`export.attachment_path`](./configuration/options.md#exportattachment_path) template still produces unique filenames out of the box. If you prefer human-readable filenames over numeric IDs, set `export.attachment_path` to use `{attachment_title}{attachment_extension}`, e.g.: ```sh cme config set export.attachment_path='{space_name}/attachments/{attachment_title}{attachment_extension}' ``` ### Connection issues behind proxy or VPN There might be connection issues if your Confluence Server is behind a proxy or VPN ([#38](https://github.com/Spenhouet/confluence-markdown-exporter/issues/38)). If you experience issues, help to fix this is appreciated. ## Reporting bugs Open an issue on the [GitHub issue tracker](https://github.com/Spenhouet/confluence-markdown-exporter/issues) and include: 1. Your Confluence flavour and version (Cloud, Server, Data Center) 2. The exact command you ran 3. The full output, ideally with `cme config set export.log_level=DEBUG` enabled 4. A minimal example page (if possible) reproducing the issue ================================================ FILE: docs/usage.md ================================================ --- id: usage title: Usage sidebar_position: 2 --- # Usage Run the exporter with the desired Confluence page URL or space URL. Execute the console application by typing `confluence-markdown-exporter` (or its shorter alias `cme`) followed by one of the commands `pages`, `pages-with-descendants`, `spaces`, `orgs`, or `config`. Add `--help` to any command for additional information. All export commands accept one or more URLs as space-separated arguments. Each command also has a singular alias (`page`, `page-with-descendants`, `space`, `org`) that behaves identically. ## Export pages Export one or more Confluence pages by URL: ```sh cme pages cme pages ... # Singular alias (identical behaviour): cme page ``` Supported page URL formats: - Confluence Cloud: `https://company.atlassian.net/wiki/spaces/SPACEKEY/pages/123456789/Page+Title` - Confluence Cloud (API gateway): `https://api.atlassian.com/ex/confluence/CLOUDID/wiki/spaces/SPACEKEY/pages/123456789/Page+Title` - Confluence Server (long): `https://wiki.company.com/display/SPACEKEY/Page+Title` - Confluence Server (short): `https://wiki.company.com/SPACEKEY/Page+Title` - Confluence Server (param): `https://wiki.company.com/pages/viewpage.action?pageId=123456789` ## Export pages with descendants Export one or more Confluence pages and all their descendant pages by URL: ```sh cme pages-with-descendants cme pages-with-descendants ... # Singular alias (identical behaviour): cme page-with-descendants ``` ## Export spaces Export all Confluence pages of one or more spaces by URL: ```sh cme spaces cme spaces ... # Singular alias (identical behaviour): cme space ``` Supported space URL formats: - Confluence Cloud: `https://company.atlassian.net/wiki/spaces/SPACEKEY` - Confluence Cloud (API gateway): `https://api.atlassian.com/ex/confluence/CLOUDID/wiki/spaces/SPACEKEY` - Confluence Server (long): `https://wiki.company.com/display/SPACEKEY` - Confluence Server (short): `https://wiki.company.com/SPACEKEY` ## Export all spaces of an organization Export all Confluence pages across all spaces of one or more organizations by URL: ```sh cme orgs cme orgs ... # Singular alias (identical behaviour): cme org ``` ## Output layout The exported Markdown file(s) will be saved in the configured output directory (see [`export.output_path`](./configuration/options.md#exportoutput_path)) e.g.: ```text output_path/ └── MYSPACE/ ├── MYSPACE.md └── MYSPACE/ ├── My Confluence Page.md └── My Confluence Page/ ├── My nested Confluence Page.md └── Another one.md ``` ================================================ FILE: docusaurus.config.ts ================================================ import { themes as prismThemes } from "prism-react-renderer"; import type { Config } from "@docusaurus/types"; import type * as Preset from "@docusaurus/preset-classic"; const config: Config = { title: "Confluence Markdown Exporter", tagline: "Export Confluence pages to Markdown for Obsidian, Gollum, Azure DevOps, Foam, Dendron and more.", favicon: "img/favicon.svg", url: "https://spenhouet.github.io", baseUrl: "/confluence-markdown-exporter/", organizationName: "Spenhouet", projectName: "confluence-markdown-exporter", trailingSlash: false, onBrokenLinks: "throw", i18n: { defaultLocale: "en", locales: ["en"], }, presets: [ [ "classic", { docs: { sidebarPath: "./sidebars.ts", routeBasePath: "/", editUrl: "https://github.com/Spenhouet/confluence-markdown-exporter/edit/main/", showLastUpdateAuthor: true, showLastUpdateTime: true, // Versioning is driven by git tags via scripts/build-versions.mjs. // The script writes versioned_docs/, versioned_sidebars/, versions.json // at build time and exports DOCS_LAST_VERSION pointing at the newest tag. lastVersion: process.env.DOCS_LAST_VERSION || "current", versions: { current: { label: process.env.DOCS_LAST_VERSION ? "Next 🚧" : "Current", path: process.env.DOCS_LAST_VERSION ? "next" : "", banner: process.env.DOCS_LAST_VERSION ? "unreleased" : "none", }, }, }, blog: false, theme: { customCss: "./src/css/custom.css", }, sitemap: { changefreq: "weekly", priority: 0.5, }, } satisfies Preset.Options, ], ], themeConfig: { image: "img/logo.png", colorMode: { defaultMode: "dark", respectPrefersColorScheme: true, }, announcementBar: { id: "github_star", content: '⭐ If you like confluence-markdown-exporter, star it on GitHub!', backgroundColor: "var(--ifm-color-primary-darker)", textColor: "#ffffff", isCloseable: true, }, navbar: { title: "Confluence Markdown Exporter", logo: { alt: "confluence-markdown-exporter logo", src: "img/favicon.svg", }, items: [ { type: "docSidebar", sidebarId: "docsSidebar", position: "left", label: "Docs", }, { type: "docsVersionDropdown", position: "right", dropdownActiveClassDisabled: true, }, { href: "https://pypi.org/project/confluence-markdown-exporter/", label: "PyPI", position: "right", }, { href: "https://github.com/Spenhouet/confluence-markdown-exporter", position: "right", className: "header-github-link", "aria-label": "GitHub repository", }, ], }, footer: { style: "dark", links: [ { title: "Docs", items: [ { label: "Installation", to: "/installation" }, { label: "Usage", to: "/usage" }, { label: "Configuration", to: "/configuration/" }, { label: "Features", to: "/features" }, ], }, { title: "Community", items: [ { label: "Issues", href: "https://github.com/Spenhouet/confluence-markdown-exporter/issues", }, { label: "Discussions", href: "https://github.com/Spenhouet/confluence-markdown-exporter/discussions", }, ], }, { title: "More", items: [ { label: "Contributing", to: "/contributing" }, { label: "GitHub", href: "https://github.com/Spenhouet/confluence-markdown-exporter", }, { label: "PyPI", href: "https://pypi.org/project/confluence-markdown-exporter/", }, ], }, ], copyright: `Copyright © ${new Date().getFullYear()} Sebastian Penhouet. Built with Docusaurus.`, }, prism: { theme: prismThemes.github, darkTheme: prismThemes.dracula, additionalLanguages: ["bash", "powershell", "yaml", "json", "toml", "diff"], }, docs: { sidebar: { hideable: true, autoCollapseCategories: false, }, }, tableOfContents: { minHeadingLevel: 2, maxHeadingLevel: 4, }, } satisfies Preset.ThemeConfig, plugins: [ [ require.resolve("@easyops-cn/docusaurus-search-local"), { hashed: true, indexBlog: false, docsRouteBasePath: "/", highlightSearchTermsOnTargetPage: true, explicitSearchResultPath: true, }, ], ], markdown: { mermaid: false, hooks: { onBrokenMarkdownLinks: "warn", }, }, }; export default config; ================================================ FILE: package.json ================================================ { "name": "confluence-markdown-exporter-docs", "version": "0.0.0", "private": true, "scripts": { "docusaurus": "docusaurus", "start": "docusaurus start", "build": "docusaurus build", "build:versioned": "node scripts/build-versions.mjs", "swizzle": "docusaurus swizzle", "deploy": "docusaurus deploy", "clear": "docusaurus clear", "serve": "docusaurus serve", "write-translations": "docusaurus write-translations", "write-heading-ids": "docusaurus write-heading-ids", "typecheck": "tsc" }, "dependencies": { "@docusaurus/core": "^3.10.1", "@docusaurus/preset-classic": "^3.10.1", "@easyops-cn/docusaurus-search-local": "^0.46.1", "@mdx-js/react": "^3.0.0", "clsx": "^2.0.0", "prism-react-renderer": "^2.3.0", "react": "^18.0.0", "react-dom": "^18.0.0" }, "devDependencies": { "@docusaurus/module-type-aliases": "^3.10.1", "@docusaurus/tsconfig": "^3.10.1", "@docusaurus/types": "^3.10.1", "typescript": "~5.5.2" }, "browserslist": { "production": [ ">0.5%", "not dead", "not op_mini all" ], "development": [ "last 3 chrome version", "last 3 firefox version", "last 5 safari version" ] }, "engines": { "node": ">=18.0" } } ================================================ FILE: pyproject.toml ================================================ [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [project] name = "confluence-markdown-exporter" version = "5.1.1" description = "A tool to export Confluence pages to Markdown" keywords = ["confluence", "atlassian", "markdown", "export", "convertion", "download"] readme = "README.md" license = { text = "MIT" } authors = [ { name = "Sebastian Penhouet" } ] requires-python = ">= 3.10" dependencies = [ 'atlassian-python-api', 'jmespath', 'markdownify', 'pydantic-settings', 'pyyaml', 'questionary', 'rich', 'tabulate', 'typer', 'python-dateutil', "lxml>=6.0.2", ] [dependency-groups] dev = [ "pytest>=8.4.1", "ruff>=0.11.13", ] [project.urls] Homepage = "https://github.com/Spenhouet/confluence-markdown-exporter" Documentation = "https://spenhouet.github.io/confluence-markdown-exporter/" Source = "https://github.com/Spenhouet/confluence-markdown-exporter" Tracker = "https://github.com/Spenhouet/confluence-markdown-exporter/issues" [project.scripts] confluence-markdown-exporter = "confluence_markdown_exporter.main:app" cme = "confluence_markdown_exporter.main:app" [tool.hatch.build.targets.wheel] packages = ["confluence_markdown_exporter"] [tool.ruff] # Exclude a variety of commonly ignored directories. This means Ruff will not lint or format files with these names exclude = [ ".bzr", ".direnv", ".eggs", ".git", ".git-rewrite", ".hg", ".ipynb_checkpoints", ".mypy_cache", ".nox", ".pants.d", ".pyenv", ".pytest_cache", ".pytype", ".ruff_cache", ".svn", ".tox", ".venv", ".vscode", "__pypackages__", "_build", "buck-out", "build", "dist", "node_modules", "site-packages", "venv", ] indent-width = 4 # each indent is 4 spaces, equivalent to using "tab" line-length = 100 # max no of characters in a line. Black default is 88 characters target-version = "py310" # Assumes Python 3.10 and above [tool.ruff.lint] select = [ "A", # flake8-builtins "B", # flake8-bugbear "D", # pydocstyle "E", # pycodestyle errors "F", # pyflakes "G", # flake8-logging-format "I", # isort "N", # pep8-naming "S", # flake8-bandit "W", # pycodestyle warnings "C4", # flake8-comprehensions "EM", # flake8-errmsg "PD", # pandas-vet "PL", # Pylint "UP", # pyupgrade - auto-upgrade syntax for current version of Python "ANN", # flake8-annotations "BLE", # flake8-blind-except "C90", # McCabe complexity checker "ERA", # eradicate - removes commented out code "FBT", # flake8-boolean-trap "FLY", # flynt "ICN", # flake8-import-conventions "LOG", # flake8-logger "NPY", # numpy-specific rules "PGH", # pygrep-hooks "PIE", # flake8-pie "RET", # flake8-return "RSE", # flake8-raise "SIM", # flake8-simplify "RUF", # ruff-specific rules "TCH", # flake8-type-checking "TID", # flake8-tidy-imports "TRY", # tryceratops "ASYNC", # flake8-async "PT", # flake8-pytest-style "FAST", # FastAPI, "T20", # flake8-print "ARG", # flake8-unused-arguments "PTH", # flake8-use-pathlib "PERF", # Perflint "FURB", # refurb ] ignore = [ "W191", # lint rule that may clash with Ruff Formatter: tab-indentation "E111", # lint rule that may clash with Ruff Formatter: indentation-with-invalid-multiple "E114", # lint rule that may clash with Ruff Formatter: indentation-with-invalid-multiple-comment "E117", # lint rule that may clash with Ruff Formatter: over-indented "D206", # lint rule that may clash with Ruff Formatter: indent-with-spaces "D300", # lint rule that may clash with Ruff Formatter: triple-single-quotes "D1", # ignore this to match google docstring convention "G004", # ignore this to allow f-strings in logging "UP015", # ignore this to allow "with open" statements to have modes explicitly stated "SIM102", # ignore this to avoid changing nested if statements to single if statements, potentially confusing "ERA001", # ignore this to keep commented out lines while functionality is not implemented (configs/logos) "PERF203", # ignore this as this often is intentional. "ARG002", # Many methods in this project share the same signature, independent of variable usage. "PLC0415", # Allow lacy loading of imports ] fixable = ["ALL"] # Allow fix for all enabled rules (when using "Fix all" or when `--fix` is provided to ruff check in CLI) unfixable = ["F401"] # disable autofix for unused-imports dummy-variable-rgx = "^(_+\\w*)$" # Allow unused variables when underscore-prefixed flake8-bugbear.extend-immutable-calls = [ "fastapi.Depends", "fastapi.Query", ] # Allow default arguments like, e.g., `data: List[str] = fastapi.Query(None)` pycodestyle.max-doc-length = 100 # max line-length for docstrings pydocstyle.convention = "google" # docstring convention. Options: "google", "numpy", or "pep257" pylint.max-args = 10 # max no of args in a function [tool.ruff.lint.isort] known-first-party = ["airamed", "main"] force-single-line = true # force each import to be in its own line [tool.ruff.format] docstring-code-format = true # Enable auto-formatting of code examples in docstrings. Markdown, reStructuredText code/literal blocks and doctests are all supported docstring-code-line-length = "dynamic" # Set line length limit used when formatting code snippets in docstrings. This only has an effect when the `docstring-code-format` setting is enabled indent-style = "space" # indent with spaces, rather than "tab" line-ending = "lf" # options: "auto", "lf", "cr-lf", "native" quote-style = "double" # Use double quotes as voted by majority skip-magic-trailing-comma = false # respects magic trailing commas # Ignore S101 (assert) in all test files [tool.ruff.lint.per-file-ignores] "tests/**/*.py" = [ "S101", # Assert in tests is expected "S110", # try-except-pass detected "FBT001", # Often conflicts in tests "PLR2004", # Magic numbers are acceptable in tests ] ================================================ FILE: scripts/build-versions.mjs ================================================ #!/usr/bin/env node /** * Build the Docusaurus site with per-tag versioned docs derived from git history. * * Strategy: * 1. List git tags matching the project release pattern (e.g. "5.1.0"). * 2. Keep only the tags whose tree already contains a Docusaurus-style * docs/ tree and sidebars.ts (i.e. tags cut after docs migration). * 3. For each eligible tag (newest -> oldest): * a. Copy docs/ + sidebars.ts from that tag into the working tree. * b. Run `docusaurus docs:version ` to snapshot it. * c. Restore HEAD docs/ + sidebars.ts. * 4. Set DOCS_LAST_VERSION env var to the newest tag and invoke `npm run build`. * * versioned_docs/, versioned_sidebars/, and versions.json are NOT committed to * the repo (see .gitignore). They are regenerated on every build. */ import { execSync } from "node:child_process"; import { existsSync, mkdtempSync, rmSync, cpSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; const log = (msg) => console.log(`[build-versions] ${msg}`); function sh(cmd, opts = {}) { return execSync(cmd, { encoding: "utf8", stdio: ["ignore", "pipe", "pipe"], ...opts }); } function tagHasDocusaurusDocs(tag) { try { execSync(`git cat-file -e ${tag}:sidebars.ts`, { stdio: "ignore" }); execSync(`git cat-file -e ${tag}:docs/intro.md`, { stdio: "ignore" }); return true; } catch { return false; } } function listTags() { const out = sh("git tag --sort=-version:refname").trim(); if (!out) return []; return out .split("\n") .map((t) => t.trim()) .filter((t) => /^\d+\.\d+\.\d+$/.test(t)); } function snapshotTag(tag) { log(`Snapshotting ${tag}`); const work = mkdtempSync(join(tmpdir(), `docs-${tag}-`)); try { // Extract docs/ and sidebars.ts from the tag into a temp dir. sh(`git archive ${tag} docs sidebars.ts | tar -x -C ${work}`, { shell: "/bin/bash", }); // Swap into the working tree, then snapshot, then restore HEAD. rmSync("docs", { recursive: true, force: true }); cpSync(join(work, "docs"), "docs", { recursive: true }); cpSync(join(work, "sidebars.ts"), "sidebars.ts"); execSync(`npx --no-install docusaurus docs:version ${tag}`, { stdio: "inherit", }); } finally { rmSync(work, { recursive: true, force: true }); // Restore HEAD versions of docs/ and sidebars.ts. execSync("git checkout HEAD -- docs sidebars.ts", { stdio: "ignore" }); } } function main() { // Refuse to run if working tree has uncommitted changes to docs or sidebars, // because we temporarily overwrite them during snapshots. const dirty = sh("git status --porcelain -- docs sidebars.ts").trim(); if (dirty && !process.env.FORCE_BUILD_VERSIONS) { console.error( "[build-versions] docs/ or sidebars.ts has uncommitted changes; refusing to run.\n" + "Commit / stash them first, or set FORCE_BUILD_VERSIONS=1 to override.", ); process.exit(1); } const tags = listTags(); const eligible = tags.filter(tagHasDocusaurusDocs); log( eligible.length ? `Found ${eligible.length} eligible tag(s): ${eligible.join(", ")}` : "No eligible release tags found; building HEAD as 'Current' only.", ); for (const tag of eligible) { snapshotTag(tag); } const lastVersion = eligible[0] || ""; log(`DOCS_LAST_VERSION=${lastVersion || "(unset, HEAD only)"}`); execSync("npm run build", { stdio: "inherit", env: { ...process.env, DOCS_LAST_VERSION: lastVersion }, }); } main(); ================================================ FILE: scripts/bump-docs-version.sh ================================================ #!/usr/bin/env bash # Bump every version-pinning reference in README and the documentation tree. # # Patterns rewritten: # uvx.sh/confluence-markdown-exporter//install.sh # uvx.sh/confluence-markdown-exporter//install.ps1 # confluence-markdown-exporter== # spenhouet/confluence-markdown-exporter: (Docker pin; :latest left alone) # # Auto-discovers any file under README.md, docs/, or src/ that contains one of # the patterns above, so no explicit file list needs to be maintained when new # docs pages adopt version-pinning snippets. # # Usage: scripts/bump-docs-version.sh set -euo pipefail if [[ $# -ne 1 ]]; then echo "usage: $0 " >&2 exit 1 fi NEW="$1" # Validate: tolerate "1.2.3", "1.2.3a4", "1.2.3rc1" etc. (anything pip accepts). if [[ ! "$NEW" =~ ^[0-9]+\.[0-9]+\.[0-9]+([.-][0-9A-Za-z]+)*$ ]]; then echo "error: '$NEW' does not look like a valid version" >&2 exit 1 fi PATTERN='(uvx\.sh/confluence-markdown-exporter/[^/[:space:]]+/install\.(sh|ps1)|confluence-markdown-exporter==[0-9]|spenhouet/confluence-markdown-exporter:[0-9])' mapfile -t files < <( # Search README.md, docs/, src/ if they exist. Suppress "No such file" noise. for root in README.md docs src; do [[ -e "$root" ]] || continue if [[ -f "$root" ]]; then echo "$root" else find "$root" -type f \( -name '*.md' -o -name '*.mdx' -o -name '*.tsx' -o -name '*.ts' \) fi done | xargs -r grep -lE "$PATTERN" 2>/dev/null ) if [[ ${#files[@]} -eq 0 ]]; then echo "No files contain version-pin patterns; nothing to update." exit 0 fi for f in "${files[@]}"; do sed -i \ -e "s|uvx\.sh/confluence-markdown-exporter/[^/[:space:]]*/install\.sh|uvx.sh/confluence-markdown-exporter/${NEW}/install.sh|g" \ -e "s|uvx\.sh/confluence-markdown-exporter/[^/[:space:]]*/install\.ps1|uvx.sh/confluence-markdown-exporter/${NEW}/install.ps1|g" \ -e "s|confluence-markdown-exporter==[0-9A-Za-z.\\-]*|confluence-markdown-exporter==${NEW}|g" \ -e "s|spenhouet/confluence-markdown-exporter:[0-9][0-9A-Za-z.\\-]*|spenhouet/confluence-markdown-exporter:${NEW}|g" \ "$f" echo "updated: $f" done ================================================ FILE: sidebars.ts ================================================ import type { SidebarsConfig } from "@docusaurus/plugin-content-docs"; const sidebars: SidebarsConfig = { docsSidebar: [ "intro", { type: "category", label: "Quickstart", collapsed: false, items: ["installation", "usage"], }, "features", { type: "category", label: "Configuration", collapsed: false, link: { type: "doc", id: "configuration/index" }, items: [ "configuration/options", "configuration/authentication", "configuration/target-systems", "configuration/ci", ], }, "docker", "compatibility", "troubleshooting", "contributing", ], }; export default sidebars; ================================================ FILE: src/components/HomepageFeatures/index.tsx ================================================ import React, { type ReactNode } from "react"; import clsx from "clsx"; import Link from "@docusaurus/Link"; import styles from "./styles.module.css"; type Feature = { icon: string; title: string; description: ReactNode; href: string; }; const FEATURES: Feature[] = [ { icon: "🚀", title: "One-command install", href: "/installation", description: ( <> A single curl/PowerShell line installs an isolated, self-updating CLI via uv. No virtualenv juggling. ), }, { icon: "📚", title: "Pages, spaces, orgs", href: "/usage", description: ( <> Export a single page, a page subtree, an entire space, or every space in your Atlassian organisation. ), }, { icon: "⚡", title: "Incremental by default", href: "/features", description: ( <> Skips unchanged pages using a lockfile. Re-runs export only what actually moved since last time. ), }, { icon: "🎯", title: "Target presets", href: "/configuration/target-systems", description: ( <> Pre-baked configurations for Obsidian (wiki links, Dataview, Meta Bind) and Azure DevOps wikis (sanitized filenames, attachments folder). ), }, { icon: "🧩", title: "Macros & add-ons", href: "/features", description: ( <> Status badges, panels, page properties, draw.io, PlantUML, Mermaid, include/excerpt: all converted to portable Markdown. ), }, { icon: "🔐", title: "Cloud & Server", href: "/configuration/authentication", description: ( <> Works against Confluence Cloud, the Atlassian API gateway, and on-premise Server / Data Center. API tokens, PATs, scoped tokens: all supported. ), }, ]; function FeatureCard({ icon, title, description, href }: Feature) { return (

{title}

{description}

); } export default function HomepageFeatures(): ReactNode { return (
{FEATURES.map((f) => ( ))}
); } ================================================ FILE: src/components/HomepageFeatures/styles.module.css ================================================ .features { padding: 4rem 0; width: 100%; } .featureCol { margin-bottom: 1.5rem; text-decoration: none !important; } .featureCol:hover { text-decoration: none !important; } .featureCard { display: flex; flex-direction: column; gap: 0.25rem; } @media (max-width: 768px) { .features { padding: 2rem 0; } } ================================================ FILE: src/components/quickstart/index.tsx ================================================ import React, { type ReactNode } from "react"; import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem"; import CodeBlock from "@theme/CodeBlock"; import Link from "@docusaurus/Link"; /** * Build a six-tab group keyed by the install-method groupId, so it stays in * sync with the install tabs on landing / intro / installation pages. * * The five non-docker tabs share the same `local` content; the docker tab * shows the container equivalent. */ function makeStepTabs(local: ReactNode, docker: ReactNode) { return ( {local} {local} {local} {local} {local} {docker} ); } /** Step 2: Authenticate. Interactive `cme config` locally, JSON config for Docker. */ export function AuthenticateTabs() { return makeStepTabs( {`cme config edit auth.confluence`}, <>

The container has no interactive menu. Generate the JSON config on a workstation first, then mount it (or pass credentials via{" "} CME_AUTH__* env vars):

{`# Writes ~/.config/confluence-markdown-exporter/app_data.json cme config edit auth.confluence`}

Copy that app_data.json to your CI repo or secret store, then mount it on every container run (next step). See the{" "} Docker page for the env-var alternative.

, ); } /** Step 3: Export. `cme pages …` locally, `docker run … pages …` for Docker. */ export function ExportTabs() { return makeStepTabs( {`# A page, a subtree, an entire space, or every space of an org: cme pages https://example.atlassian.net/wiki/spaces/SPACE/pages/123/Title cme spaces https://example.atlassian.net/wiki/spaces/SPACE cme orgs https://example.atlassian.net`} , {`docker run --rm \\ -v "$PWD/app_data.json:/data/config/app_data.json:ro" \\ -v "$PWD/output:/data/output" \\ spenhouet/confluence-markdown-exporter \\ pages https://example.atlassian.net/wiki/spaces/SPACE/pages/123/Title`} , ); } /** "Verify the install" tab variants for the installation page. */ export function VerifyTabs() { return makeStepTabs( {`cme --help`}, {`docker run --rm spenhouet/confluence-markdown-exporter --help`} , ); } ================================================ FILE: src/css/custom.css ================================================ /** * Theme overrides for Docusaurus Infima. * Primary palette tuned for a modern docs look. */ :root { --ifm-color-primary: #5b6cff; --ifm-color-primary-dark: #3c50ff; --ifm-color-primary-darker: #2c41ff; --ifm-color-primary-darkest: #0026e6; --ifm-color-primary-light: #7a88ff; --ifm-color-primary-lighter: #8a96ff; --ifm-color-primary-lightest: #b5bdff; --ifm-code-font-size: 90%; --ifm-font-family-base: "Inter", -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; --ifm-font-family-monospace: "JetBrains Mono", ui-monospace, SFMono-Regular, "SF Mono", Consolas, "Liberation Mono", monospace; --ifm-heading-font-weight: 700; --ifm-h1-font-size: 2.5rem; --ifm-h2-font-size: 1.75rem; --ifm-h3-font-size: 1.25rem; --ifm-navbar-shadow: 0 1px 0 0 rgb(0 0 0 / 5%); --ifm-navbar-background-color: rgba(255, 255, 255, 0.85); --ifm-navbar-link-hover-color: var(--ifm-color-primary); --ifm-toc-border-color: transparent; --ifm-table-stripe-background: rgba(0, 0, 0, 0.02); --ifm-table-border-color: rgba(0, 0, 0, 0.08); --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.08); } [data-theme="dark"] { --ifm-color-primary: #8a96ff; --ifm-color-primary-dark: #6a7aff; --ifm-color-primary-darker: #5b6cff; --ifm-color-primary-darkest: #3c50ff; --ifm-color-primary-light: #a4adff; --ifm-color-primary-lighter: #b5bdff; --ifm-color-primary-lightest: #d4d9ff; --ifm-background-color: #0d1117; --ifm-background-surface-color: #161b22; --ifm-navbar-background-color: rgba(13, 17, 23, 0.85); --ifm-table-stripe-background: rgba(255, 255, 255, 0.03); --ifm-table-border-color: rgba(255, 255, 255, 0.08); --docusaurus-highlighted-code-line-bg: rgba(255, 255, 255, 0.08); } @font-face { font-family: "Inter"; font-style: normal; font-weight: 100 900; font-display: swap; src: url("https://rsms.me/inter/font-files/InterVariable.woff2") format("woff2"); } html { scroll-padding-top: var(--ifm-navbar-height); } .navbar { backdrop-filter: saturate(180%) blur(20px); -webkit-backdrop-filter: saturate(180%) blur(20px); } .navbar__title { font-weight: 700; } .header-github-link::before { content: ""; display: inline-block; width: 24px; height: 24px; background-color: var(--ifm-navbar-link-color); mask-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'%3E%3Cpath d='M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12'/%3E%3C/svg%3E"); mask-repeat: no-repeat; mask-size: contain; vertical-align: middle; } .header-github-link:hover::before { background-color: var(--ifm-color-primary); } .header-github-link { font-size: 0; padding: 0.5rem !important; } /* Hero */ .hero { background: linear-gradient( 135deg, var(--ifm-color-primary-darkest) 0%, var(--ifm-color-primary) 50%, var(--ifm-color-primary-light) 100% ); color: #fff; padding: 4rem 0 5rem; position: relative; overflow: hidden; } .hero::before { content: ""; position: absolute; inset: 0; background: radial-gradient(circle at 20% 20%, rgba(255, 255, 255, 0.15), transparent 50%), radial-gradient(circle at 80% 80%, rgba(255, 255, 255, 0.1), transparent 50%); pointer-events: none; } .hero > .container { position: relative; z-index: 1; } .hero__title { font-size: 3rem; font-weight: 800; letter-spacing: -0.02em; } .hero__subtitle { font-size: 1.25rem; opacity: 0.92; max-width: 36rem; margin: 1rem auto 0; } .hero-logo { max-width: 480px; width: 80%; margin-bottom: 1.5rem; filter: drop-shadow(0 12px 32px rgba(0, 0, 0, 0.25)); } @media (max-width: 768px) { .hero-logo { max-width: 320px; width: 90%; } } .button--hero { background: #fff; color: var(--ifm-color-primary-darkest); border: none; font-weight: 600; transition: transform 0.15s ease, box-shadow 0.15s ease; } .button--hero:hover { background: #fff; color: var(--ifm-color-primary-darker); transform: translateY(-1px); box-shadow: 0 6px 20px rgba(0, 0, 0, 0.2); } .button--hero-secondary { background: transparent; color: #fff; border: 1px solid rgba(255, 255, 255, 0.5); font-weight: 600; } .button--hero-secondary:hover { background: rgba(255, 255, 255, 0.1); color: #fff; border-color: #fff; } /* Feature cards */ .feature-card { height: 100%; padding: 1.75rem; border-radius: 12px; background: var(--ifm-background-surface-color); border: 1px solid var(--ifm-color-emphasis-200); transition: transform 0.15s ease, border-color 0.15s ease, box-shadow 0.15s ease; } .feature-card:hover { transform: translateY(-2px); border-color: var(--ifm-color-primary); box-shadow: 0 12px 24px -8px rgba(91, 108, 255, 0.25); } .feature-card h3 { margin: 0.75rem 0 0.5rem; font-size: 1.15rem; } .feature-card p { color: var(--ifm-color-emphasis-700); margin: 0; font-size: 0.95rem; } .feature-icon { display: inline-flex; align-items: center; justify-content: center; width: 48px; height: 48px; border-radius: 12px; background: linear-gradient( 135deg, var(--ifm-color-primary) 0%, var(--ifm-color-primary-light) 100% ); font-size: 1.5rem; } /* Code blocks polish */ .theme-code-block { border-radius: 10px; } /* Admonition tweaks */ .alert--info, .alert--note, .alert--tip, .alert--warning, .alert--danger { border-left-width: 4px; } /* Table polish */ table { border-radius: 8px; overflow: hidden; } ================================================ FILE: src/pages/index.module.css ================================================ .heroBanner { text-align: center; } .buttons { display: flex; align-items: center; justify-content: center; gap: 1rem; margin-top: 2rem; flex-wrap: wrap; } .quickstart { padding: 3rem 0 5rem; } .quickstartTitle { text-align: center; font-size: 2rem; letter-spacing: -0.01em; margin-bottom: 0.5rem; } .quickstartLead { text-align: center; color: var(--ifm-color-emphasis-700); margin-bottom: 2rem; font-size: 1.05rem; } .quickstartFooter { text-align: center; margin-top: 2rem; color: var(--ifm-color-emphasis-600); font-size: 0.9rem; } .stepTitle { font-size: 1.15rem; font-weight: 600; margin: 2rem 0 0.75rem; color: var(--ifm-color-emphasis-900); letter-spacing: -0.005em; } .stepTitle:first-of-type { margin-top: 1rem; } @media (max-width: 768px) { .quickstart { padding: 2rem 0; } } ================================================ FILE: src/pages/index.tsx ================================================ import React, { type ReactNode } from "react"; import clsx from "clsx"; import Link from "@docusaurus/Link"; import useDocusaurusContext from "@docusaurus/useDocusaurusContext"; import Layout from "@theme/Layout"; import CodeBlock from "@theme/CodeBlock"; import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem"; import HomepageFeatures from "@site/src/components/HomepageFeatures"; import { AuthenticateTabs, ExportTabs, } from "@site/src/components/quickstart"; import styles from "./index.module.css"; function HomepageHeader() { const { siteConfig } = useDocusaurusContext(); return (
{siteConfig.title}

{siteConfig.tagline}

Get started → View on GitHub
); } const INSTALL_SNIPPETS = { linux: `# Installs an isolated, self-updating CLI via uv. curl -LsSf uvx.sh/confluence-markdown-exporter/install.sh | sh`, macos: `# Installs an isolated, self-updating CLI via uv. curl -LsSf uvx.sh/confluence-markdown-exporter/install.sh | sh`, windows: `powershell -ExecutionPolicy ByPass -c "irm https://uvx.sh/confluence-markdown-exporter/install.ps1 | iex"`, pip: `pip install confluence-markdown-exporter`, uv: `# Install as an isolated tool… uv tool install confluence-markdown-exporter # …or run it once without installing: uvx confluence-markdown-exporter --help`, docker: `# Pull and run the prebuilt image (non-interactive / CI use). docker pull spenhouet/confluence-markdown-exporter:latest docker run --rm spenhouet/confluence-markdown-exporter --help`, }; function InstallTabs() { return ( {INSTALL_SNIPPETS.linux} {INSTALL_SNIPPETS.macos} {INSTALL_SNIPPETS.windows} {INSTALL_SNIPPETS.pip} {INSTALL_SNIPPETS.uv} {INSTALL_SNIPPETS.docker} ); } function QuickstartSection() { return (

Get going in 60 seconds

Install, authenticate, export. That's the whole flow.

1. Install

2. Authenticate

3. Export

Detailed setup and per-target presets in the{" "} installation docs.

); } export default function Home(): ReactNode { const { siteConfig } = useDocusaurusContext(); return (
); } ================================================ FILE: tests/__init__.py ================================================ # Test package for confluence-markdown-exporter ================================================ FILE: tests/conftest.py ================================================ """Shared test fixtures and configuration for confluence-markdown-exporter tests.""" import importlib import os import sys import tempfile from collections.abc import Generator from pathlib import Path from typing import Any from unittest.mock import MagicMock # Isolate tests from the developer's user config. The package binds APP_CONFIG_PATH # at import time from CME_CONFIG_PATH (or, when unset, typer.get_app_dir() which # resolves to ~/.config/confluence-markdown-exporter/app_data.json on Linux). # Without this, local settings like `page_href="wiki"` leak into tests that rely # on the schema defaults. _test_config_dir = tempfile.mkdtemp(prefix="cme-test-config-") os.environ["CME_CONFIG_PATH"] = str(Path(_test_config_dir) / "app_data.json") import pytest # noqa: E402 from pydantic import SecretStr # noqa: E402 from confluence_markdown_exporter.utils.app_data_store import ApiDetails # noqa: E402 from confluence_markdown_exporter.utils.app_data_store import AuthConfig # noqa: E402 from confluence_markdown_exporter.utils.app_data_store import ConfigModel # noqa: E402 from confluence_markdown_exporter.utils.app_data_store import ConnectionConfig # noqa: E402 from confluence_markdown_exporter.utils.app_data_store import ExportConfig # noqa: E402 # Store original functions before any patching _original_get_confluence = None _original_get_jira = None def pytest_configure(config: pytest.Config) -> None: # noqa: ARG001 """Configure pytest and mock API clients before test collection.""" import confluence_markdown_exporter.api_clients global _original_get_confluence, _original_get_jira # noqa: PLW0603 # Save the original functions _original_get_confluence = confluence_markdown_exporter.api_clients.get_confluence_instance _original_get_jira = confluence_markdown_exporter.api_clients.get_jira_instance # Create mock objects that will be returned by the wrapper mock_confluence = MagicMock() mock_confluence.get_all_spaces.return_value = [] mock_jira = MagicMock() # Replace with wrapper functions that return mocks confluence_markdown_exporter.api_clients.get_confluence_instance = lambda _url: mock_confluence confluence_markdown_exporter.api_clients.get_jira_instance = lambda _url: mock_jira def pytest_unconfigure(config: pytest.Config) -> None: # noqa: ARG001 """Restore original functions after test session.""" import confluence_markdown_exporter.api_clients global _original_get_confluence, _original_get_jira # noqa: PLW0602 if _original_get_confluence: confluence_markdown_exporter.api_clients.get_confluence_instance = _original_get_confluence if _original_get_jira: confluence_markdown_exporter.api_clients.get_jira_instance = _original_get_jira @pytest.fixture(autouse=True) def restore_api_functions_for_specific_tests( request: pytest.FixtureRequest, ) -> Generator[None, None, None]: """Restore original API functions for api_clients tests that test those functions. This allows those tests to properly mock and test the actual function behavior. """ import confluence_markdown_exporter.api_clients global _original_get_confluence, _original_get_jira # noqa: PLW0602 # Check if this is a test that needs the original functions is_api_client_function_test = ( "test_api_clients.py" in str(request.fspath) and ("TestGetConfluenceInstance" in request.node.nodeid or "TestGetJiraInstance" in request.node.nodeid) ) if is_api_client_function_test and _original_get_confluence and _original_get_jira: # Temporarily restore original functions confluence_markdown_exporter.api_clients.get_confluence_instance = _original_get_confluence confluence_markdown_exporter.api_clients.get_jira_instance = _original_get_jira # Force reimport in the test module to pick up the restored functions # This is needed because the test module imported the mocked versions at collection time if "tests.unit.test_api_clients" in sys.modules: importlib.reload(sys.modules["tests.unit.test_api_clients"]) yield # Re-apply mocks after the test if is_api_client_function_test: mock_confluence = MagicMock() mock_confluence.get_all_spaces.return_value = [] mock_jira = MagicMock() confluence_markdown_exporter.api_clients.get_confluence_instance = ( lambda _url: mock_confluence ) confluence_markdown_exporter.api_clients.get_jira_instance = lambda _url: mock_jira @pytest.fixture def temp_config_dir() -> Generator[Path, None, None]: """Create a temporary directory for test configuration.""" with tempfile.TemporaryDirectory() as temp_dir: yield Path(temp_dir) @pytest.fixture def mock_confluence_client() -> MagicMock: """Create a mock Confluence client for testing.""" mock_client = MagicMock() mock_client.get_all_spaces.return_value = [ {"key": "TEST", "name": "Test Space", "id": "123456"} ] mock_client.get_page_by_id.return_value = { "id": "123456", "title": "Test Page", "body": {"storage": {"value": "

Test content

"}}, "space": {"key": "TEST"}, "version": {"number": 1}, } return mock_client @pytest.fixture def mock_jira_client() -> MagicMock: """Create a mock Jira client for testing.""" mock_client = MagicMock() mock_client.get_all_projects.return_value = [ {"key": "TEST", "name": "Test Project", "id": "10000"} ] mock_client.get_issue.return_value = { "key": "TEST-123", "fields": { "summary": "Test Issue", "description": "Test description", "status": {"name": "Open"}, }, } return mock_client SAMPLE_CONFLUENCE_URL = "https://test.atlassian.net" @pytest.fixture def sample_api_details() -> ApiDetails: """Create sample API details for testing.""" return ApiDetails( username=SecretStr("test@example.com"), api_token=SecretStr("test-token"), pat=SecretStr("test-pat"), ) @pytest.fixture def sample_connection_config() -> ConnectionConfig: """Create sample connection configuration for testing.""" return ConnectionConfig( backoff_and_retry=True, backoff_factor=2, max_backoff_seconds=60, max_backoff_retries=5, retry_status_codes=[413, 429, 502, 503, 504], verify_ssl=True, ) @pytest.fixture def sample_config_model( sample_api_details: ApiDetails, sample_connection_config: ConnectionConfig, temp_config_dir: Path, ) -> ConfigModel: """Create sample configuration for testing.""" auth_config = AuthConfig( confluence={SAMPLE_CONFLUENCE_URL: sample_api_details}, jira={SAMPLE_CONFLUENCE_URL: sample_api_details}, ) export_config = ExportConfig( output_path=temp_config_dir / "output", ) return ConfigModel( auth=auth_config, export=export_config, connection_config=sample_connection_config, ) @pytest.fixture def confluence_page_response() -> dict[str, Any]: """Sample Confluence page response for testing.""" return { "id": "123456", "type": "page", "status": "current", "title": "Test Page", "space": {"key": "TEST", "name": "Test Space", "id": "123"}, "version": { "number": 1, "when": "2023-01-01T00:00:00.000Z", "by": {"displayName": "Test User", "username": "testuser"}, }, "ancestors": [], "children": {"page": {"results": [], "size": 0}}, "descendants": {"page": {"results": [], "size": 0}}, "body": { "storage": { "value": ( "

Test Heading

Test content with bold text.

" ), "representation": "storage", } }, "_links": { "webui": "/spaces/TEST/pages/123456/Test+Page", "base": "https://test.atlassian.net/wiki", }, } @pytest.fixture def confluence_space_response() -> dict[str, Any]: """Sample Confluence space response for testing.""" return { "id": "123", "key": "TEST", "name": "Test Space", "description": {"plain": {"value": "A test space"}}, "homepage": {"id": "123456"}, "_links": { "webui": "/spaces/TEST", "base": "https://test.atlassian.net/wiki", }, } @pytest.fixture def jira_issue_response() -> dict[str, Any]: """Sample Jira issue response for testing.""" return { "id": "10000", "key": "TEST-123", "fields": { "summary": "Test Issue Summary", "description": "This is a test issue description", "status": {"name": "Open", "id": "1"}, "priority": {"name": "Medium", "id": "3"}, "issuetype": {"name": "Bug", "id": "1"}, "created": "2023-01-01T00:00:00.000+0000", "updated": "2023-01-01T12:00:00.000+0000", }, } ================================================ FILE: tests/integration/__init__.py ================================================ """Integration tests for confluence-markdown-exporter.""" ================================================ FILE: tests/integration/test_cli_integration.py ================================================ """Basic tests for confluence-markdown-exporter package.""" import subprocess import sys import pytest import confluence_markdown_exporter.main as main_module from confluence_markdown_exporter import __version__ def test_package_has_version() -> None: """Test that package has a version attribute.""" assert __version__ is not None assert isinstance(__version__, str) assert len(__version__) > 0 def test_version_command() -> None: """Test that the version command works correctly.""" try: # Test the version command result = subprocess.run( [sys.executable, "-m", "confluence_markdown_exporter.main", "version"], capture_output=True, text=True, check=True, timeout=10, ) # Check that version output contains expected format assert "confluence-markdown-exporter" in result.stdout assert result.returncode == 0 # The version should be present in output # Note: We don't check exact match since dev versions may have extra info assert len(result.stdout.strip()) > len("confluence-markdown-exporter") except subprocess.TimeoutExpired: pytest.fail("Version command timed out") except subprocess.CalledProcessError as e: pytest.fail(f"Version command failed: {e}") except Exception as e: # noqa: BLE001 pytest.fail(f"Unexpected error testing version command: {e}") def test_config_list_command() -> None: """Test that the config list command works correctly.""" import yaml try: result = subprocess.run( [ sys.executable, "-m", "confluence_markdown_exporter.main", "config", "list", ], capture_output=True, text=True, check=True, timeout=10, ) assert result.returncode == 0 assert "auth:" in result.stdout assert "export:" in result.stdout assert "connection_config:" in result.stdout # Verify it's valid YAML config_data = yaml.safe_load(result.stdout) assert isinstance(config_data, dict) assert "auth" in config_data assert "export" in config_data assert "connection_config" in config_data except subprocess.TimeoutExpired: pytest.fail("Config list command timed out") except subprocess.CalledProcessError as e: pytest.fail(f"Config list command failed: {e}") except Exception as e: # noqa: BLE001 pytest.fail(f"Unexpected error testing config list command: {e}") def test_cli_entry_points() -> None: """Test that CLI entry points are properly configured.""" # Test that we can import the main module without triggering execution try: # Check that the main module exists and has expected attributes assert main_module is not None # Check if the app is defined (typer app) assert hasattr(main_module, "app") except ImportError as e: pytest.fail(f"Could not import main module: {e}") except Exception: # noqa: BLE001 # Allow other exceptions as the module might have initialization code # but we can still verify it's importable pass ================================================ FILE: tests/unit/__init__.py ================================================ """Unit tests for confluence-markdown-exporter.""" ================================================ FILE: tests/unit/test_alert_conversion.py ================================================ """Test Confluence alert/panel macro conversion.""" from __future__ import annotations from typing import TYPE_CHECKING import pytest if TYPE_CHECKING: from confluence_markdown_exporter.confluence import Page def _make_converter(editor2: str = "") -> Page.Converter: from confluence_markdown_exporter.confluence import Page class MockPage: def __init__(self) -> None: self.id = "test-page" self.title = "Test Page" self.html = "" self.labels = [] self.ancestors = [] self.editor2 = editor2 def get_attachment_by_file_id(self, file_id: str) -> None: return None return Page.Converter(MockPage()) @pytest.fixture def converter() -> Page.Converter: return _make_converter() class TestAlertOutsideTable: def test_panel_renders_as_note_alert(self, converter: Page.Converter) -> None: html = '

body text

' out = converter.convert(html) assert "> [!NOTE]" in out assert "body text" in out def test_warning_renders_as_caution_alert(self, converter: Page.Converter) -> None: html = '

danger

' out = converter.convert(html) assert "> [!CAUTION]" in out class TestAlertInsideTableCell: def test_panel_in_td_emits_emoji_no_blockquote(self, converter: Page.Converter) -> None: html = ( "
" '

Klinische Abteilung

' "
" ) out = converter.convert(html) assert "[!NOTE]" not in out assert ">" not in out.replace("", "") assert "\U0001f4dd Klinische Abteilung" in out def test_info_in_td_emits_important_emoji(self, converter: Page.Converter) -> None: html = ( "
" '

info text

' "
" ) out = converter.convert(html) assert "[!IMPORTANT]" not in out assert "❗ info text" in out def test_warning_in_td_emits_caution_emoji(self, converter: Page.Converter) -> None: html = ( "
" '

danger

' "
" ) out = converter.convert(html) assert "[!CAUTION]" not in out assert "\U0001f6d1 danger" in out def test_tip_in_td_emits_tip_emoji(self, converter: Page.Converter) -> None: html = ( "
" '

helpful

' "
" ) out = converter.convert(html) assert "[!TIP]" not in out assert "\U0001f4a1 helpful" in out def test_note_in_td_emits_warning_emoji(self, converter: Page.Converter) -> None: html = ( "
" '

watch out

' "
" ) out = converter.convert(html) assert "[!WARNING]" not in out assert "⚠️ watch out" in out def test_panel_in_th_emits_emoji_no_blockquote(self, converter: Page.Converter) -> None: html = ( "
" '

header note

' "
" ) out = converter.convert(html) assert "[!NOTE]" not in out assert "\U0001f4dd header note" in out class TestCustomPanelEmoji: def test_custom_panel_icon_text_used_in_table_cell(self) -> None: editor2 = ( '' '1f6e0' ':tools:' '\U0001f6e0️' "

Klinische Abteilung

" "
" ) converter = _make_converter(editor2) html = ( "
" '

Klinische Abteilung

' "
" ) out = converter.convert(html) assert "\U0001f6e0️ Klinische Abteilung" in out assert "\U0001f4dd" not in out def test_custom_panel_icon_id_decoded_when_no_text(self) -> None: editor2 = ( '' '1f6e0' "

x

" "
" ) converter = _make_converter(editor2) html = ( "
" '

x

' "
" ) out = converter.convert(html) assert "\U0001f6e0 x" in out def test_panel_without_custom_icon_falls_back_to_default(self) -> None: editor2 = ( '' "

plain

" "
" ) converter = _make_converter(editor2) html = ( "
" '

plain

' "
" ) out = converter.convert(html) assert "\U0001f4dd plain" in out def test_unknown_macro_id_falls_back_to_default(self) -> None: converter = _make_converter("") html = ( "
" '

y

' "
" ) out = converter.convert(html) assert "\U0001f4dd y" in out ================================================ FILE: tests/unit/test_api_clients.py ================================================ """Unit tests for api_clients module.""" import urllib.parse from unittest.mock import MagicMock from unittest.mock import patch import pytest import requests from atlassian.errors import ApiError from pydantic import SecretStr from confluence_markdown_exporter.api_clients import ApiClientFactory from confluence_markdown_exporter.api_clients import AuthNotConfiguredError from confluence_markdown_exporter.api_clients import ConfluenceRef from confluence_markdown_exporter.api_clients import get_confluence_instance from confluence_markdown_exporter.api_clients import parse_confluence_path from confluence_markdown_exporter.api_clients import response_hook from confluence_markdown_exporter.utils.app_data_store import ApiDetails from confluence_markdown_exporter.utils.app_data_store import AtlassianSdkConnectionConfig from confluence_markdown_exporter.utils.app_data_store import AuthConfig from confluence_markdown_exporter.utils.app_data_store import ConfigModel from tests.conftest import SAMPLE_CONFLUENCE_URL _PARSE_CONFLUENCE_PATH_CASES = [ ( "https://company.atlassian.net/wiki/spaces/SPACEKEY", ConfluenceRef(space_key="SPACEKEY"), ), ( "https://company.atlassian.net/wiki/spaces/SPACEKEY/pages/123456789/Page+Title", ConfluenceRef(space_key="SPACEKEY", page_id=123456789, page_title="Page Title"), ), ( "https://company.atlassian.net/wiki/spaces/SPACEKEY/pages/sddssd/Page+Title", None, ), ( "https://company.atlassian.net/wiki/spaces/SPACEKEY/overview", ConfluenceRef(space_key="SPACEKEY"), ), ( "https://api.atlassian.com/ex/confluence/CLOUDID/wiki/spaces/SPACEKEY/pages/123456789/Page+Title", ConfluenceRef(space_key="SPACEKEY", page_id=123456789, page_title="Page Title"), ), ( "https://api.atlassian.com/ex/confluence/1232132-12312312-21321332/wiki/spaces/SPACEKEY", ConfluenceRef(space_key="SPACEKEY"), ), ( "https://api.atlassian.com/ex/confluence/1232132-12312312-21321332/wiki/spaces/SPACEKEY/pages/123456789", ConfluenceRef(space_key="SPACEKEY", page_id=123456789), ), ( "/wiki/spaces/SPACEKEY/", ConfluenceRef(space_key="SPACEKEY"), ), ( "/wiki/spaces/SPACEKEY/overview", ConfluenceRef(space_key="SPACEKEY"), ), ( "/wiki/spaces/SPACEKEY/pages/123456789/Page+Title", ConfluenceRef(space_key="SPACEKEY", page_id=123456789, page_title="Page Title"), ), ( "/ex/confluence/CLOUDID/wiki/spaces/SPACEKEY/pages/123456789/Page+Title", ConfluenceRef(space_key="SPACEKEY", page_id=123456789, page_title="Page Title"), ), ( "/ex/confluence/1232132-12312312-21321332/wiki/spaces/SPACEKEY", ConfluenceRef(space_key="SPACEKEY"), ), ( "/ex/confluence/1232132-12312312-21321332/wiki/spaces/SPACEKEY/pages/123456789", ConfluenceRef(space_key="SPACEKEY", page_id=123456789), ), ( "https://confluence.company.com/display/SPACEKEY", ConfluenceRef(space_key="SPACEKEY"), ), ( "https://confluence.company.com/display/SPACEKEY/Page+Title", ConfluenceRef(space_key="SPACEKEY", page_title="Page Title"), ), ( "https://confluence.company.com/SPACEKEY", ConfluenceRef(space_key="SPACEKEY"), ), ( "https://confluence.company.com/SPACEKEY/Page+Title", ConfluenceRef(space_key="SPACEKEY", page_title="Page Title"), ), ( "https://company.atlassian.net/display/SPACEKEY/Page+Title", ConfluenceRef(space_key="SPACEKEY", page_title="Page Title"), ), ( "https://company.atlassian.net/SPACEKEY/Page+Title", ConfluenceRef(space_key="SPACEKEY", page_title="Page Title"), ), ( "/display/SPACEKEY", ConfluenceRef(space_key="SPACEKEY"), ), ( "/display/SPACEKEY/Page+Title", ConfluenceRef(space_key="SPACEKEY", page_title="Page Title"), ), ( "/SPACEKEY", ConfluenceRef(space_key="SPACEKEY"), ), ( "/SPACEKEY/Page+Title", ConfluenceRef(space_key="SPACEKEY", page_title="Page Title"), ), ( "https://wiki.aaa.aaa/spaces/SPACEKEY/pages/123456789/Page+Title", ConfluenceRef(space_key="SPACEKEY", page_id=123456789, page_title="Page Title"), ), ( "/spaces/SPACEKEY/pages/123456789/Page+Title", ConfluenceRef(space_key="SPACEKEY", page_id=123456789, page_title="Page Title"), ), ] class TestParseConfluencePath: """Test cases for parse_confluence_path function.""" @pytest.mark.parametrize(("url", "expected"), _PARSE_CONFLUENCE_PATH_CASES) def test_parse_confluence_path(self, url: str, expected: ConfluenceRef | None) -> None: path = urllib.parse.urlparse(url).path if "://" in url else url result = parse_confluence_path(path) if expected is None: assert result is None else: assert result is not None assert result.model_dump() == expected.model_dump() class TestResponseHook: """Test cases for response_hook function.""" def test_successful_response(self, caplog: pytest.LogCaptureFixture) -> None: """Test that successful responses don't log warnings.""" response = MagicMock(spec=requests.Response) response.ok = True response.status_code = 200 result = response_hook(response) assert result == response assert len(caplog.records) == 0 def test_failed_response(self, caplog: pytest.LogCaptureFixture) -> None: """Test that failed responses log warnings.""" response = MagicMock(spec=requests.Response) response.ok = False response.status_code = 404 response.url = "https://test.atlassian.net/api/test" response.headers = {"Content-Type": "application/json"} result = response_hook(response) assert result == response assert len(caplog.records) == 1 log_record = caplog.records[0] expected_msg = "Request to https://test.atlassian.net/api/test failed with status 404" assert expected_msg in log_record.message assert "Response headers: {'Content-Type': 'application/json'}" in log_record.message class TestApiClientFactory: """Test cases for ApiClientFactory class.""" def test_init(self) -> None: """Test ApiClientFactory initialization stores an AtlassianSdkConnectionConfig.""" config = AtlassianSdkConnectionConfig() factory = ApiClientFactory(config) assert factory.connection_config == config assert isinstance(factory.connection_config, AtlassianSdkConnectionConfig) @patch("confluence_markdown_exporter.api_clients.ConfluenceApiSdk") def test_create_confluence_success( self, mock_confluence_sdk: MagicMock, sample_api_details: ApiDetails ) -> None: """Test successful Confluence client creation.""" mock_instance = MagicMock() mock_instance.get_all_spaces.return_value = [{"key": "TEST"}] mock_confluence_sdk.return_value = mock_instance sdk_config = AtlassianSdkConnectionConfig() factory = ApiClientFactory(sdk_config) result = factory.create_confluence(SAMPLE_CONFLUENCE_URL, sample_api_details) assert result == mock_instance mock_confluence_sdk.assert_called_once_with( url=SAMPLE_CONFLUENCE_URL, username=sample_api_details.username.get_secret_value(), password=sample_api_details.api_token.get_secret_value(), token=sample_api_details.pat.get_secret_value(), **sdk_config.model_dump(), ) mock_instance.get_all_spaces.assert_called_once_with(limit=1) @patch("confluence_markdown_exporter.api_clients.ConfluenceApiSdk") def test_create_confluence_connection_failure( self, mock_confluence_sdk: MagicMock, sample_api_details: ApiDetails ) -> None: """Test Confluence client creation with connection failure.""" mock_instance = MagicMock() mock_instance.get_all_spaces.side_effect = ApiError("Connection failed") mock_confluence_sdk.return_value = mock_instance factory = ApiClientFactory(AtlassianSdkConnectionConfig()) with pytest.raises(ConnectionError, match="Confluence connection failed"): factory.create_confluence(SAMPLE_CONFLUENCE_URL, sample_api_details) @patch("confluence_markdown_exporter.api_clients.JiraApiSdk") def test_create_jira_success( self, mock_jira_sdk: MagicMock, sample_api_details: ApiDetails ) -> None: """Test successful Jira client creation.""" mock_instance = MagicMock() mock_instance.get_all_projects.return_value = [{"key": "TEST"}] mock_jira_sdk.return_value = mock_instance sdk_config = AtlassianSdkConnectionConfig() factory = ApiClientFactory(sdk_config) result = factory.create_jira(SAMPLE_CONFLUENCE_URL, sample_api_details) assert result == mock_instance mock_jira_sdk.assert_called_once_with( url=SAMPLE_CONFLUENCE_URL, username=sample_api_details.username.get_secret_value(), password=sample_api_details.api_token.get_secret_value(), token=sample_api_details.pat.get_secret_value(), **sdk_config.model_dump(), ) mock_instance.get_all_projects.assert_called_once() @patch("confluence_markdown_exporter.api_clients.JiraApiSdk") def test_create_jira_connection_failure( self, mock_jira_sdk: MagicMock, sample_api_details: ApiDetails ) -> None: """Test Jira client creation with connection failure.""" mock_instance = MagicMock() mock_instance.get_all_projects.side_effect = ApiError("Connection failed") mock_jira_sdk.return_value = mock_instance factory = ApiClientFactory(AtlassianSdkConnectionConfig()) with pytest.raises(ConnectionError, match="Jira connection failed"): factory.create_jira(SAMPLE_CONFLUENCE_URL, sample_api_details) class TestGetConfluenceInstance: """Test cases for get_confluence_instance function.""" @patch("confluence_markdown_exporter.api_clients._confluence_clients", {}) @patch("confluence_markdown_exporter.api_clients.get_settings") @patch("confluence_markdown_exporter.api_clients.ApiClientFactory") def test_successful_connection( self, mock_factory_class: MagicMock, mock_get_settings: MagicMock, sample_config_model: ConfigModel, ) -> None: """Test successful Confluence instance creation.""" mock_get_settings.return_value = sample_config_model mock_factory = MagicMock() mock_confluence = MagicMock() mock_factory.create_confluence.return_value = mock_confluence mock_factory_class.return_value = mock_factory result = get_confluence_instance(SAMPLE_CONFLUENCE_URL) assert result == mock_confluence mock_factory_class.assert_called_once_with(sample_config_model.connection_config) mock_factory.create_confluence.assert_called_once_with( SAMPLE_CONFLUENCE_URL, sample_config_model.auth.get_instance(SAMPLE_CONFLUENCE_URL), ) @patch("confluence_markdown_exporter.api_clients._confluence_clients", {}) @patch("confluence_markdown_exporter.api_clients.get_settings") @patch("confluence_markdown_exporter.api_clients.ApiClientFactory") def test_connection_failure_raises( self, mock_factory_class: MagicMock, mock_get_settings: MagicMock, sample_config_model: ConfigModel, ) -> None: """Test that a Confluence connection failure raises AuthNotConfiguredError.""" mock_get_settings.return_value = sample_config_model mock_factory = MagicMock() mock_factory.create_confluence.side_effect = ConnectionError("Connection failed") mock_factory_class.return_value = mock_factory with pytest.raises(AuthNotConfiguredError) as exc_info: get_confluence_instance(SAMPLE_CONFLUENCE_URL) assert exc_info.value.url == SAMPLE_CONFLUENCE_URL assert exc_info.value.service == "Confluence" assert mock_factory.create_confluence.call_count == 1 class TestAuthConfigContextPath: """Test auth lookup for instances deployed under a context path (e.g. /confluence).""" def _make_config(self, key: str) -> AuthConfig: details = ApiDetails(username=SecretStr("user"), api_token=SecretStr("token")) return AuthConfig(confluence={key: details}) @pytest.mark.parametrize( ("stored_key", "lookup_url"), [ # Auth stored without context path, URL includes context path ("https://host.example.com", "https://host.example.com/confluence"), ("https://host.example.com", "https://host.example.com/confluence/spaces/KEY"), ("https://host.example.com", "https://host.example.com/confluence/display/KEY/Title"), # Auth stored with context path, URL includes context path ("https://host.example.com/confluence", "https://host.example.com/confluence"), ( "https://host.example.com/confluence", "https://host.example.com/confluence/spaces/KEY/pages/123", ), # Non-standard port ("https://host.example.com:8443", "https://host.example.com:8443/confluence"), ], ) def test_get_instance_matches_context_path_url( self, stored_key: str, lookup_url: str ) -> None: config = self._make_config(stored_key) assert config.get_instance(lookup_url) is not None @pytest.mark.parametrize( ("stored_key", "lookup_url"), [ # Different host — must not match ("https://other.example.com", "https://host.example.com/confluence"), # Different port — must not match ("https://host.example.com:8080", "https://host.example.com:9090/confluence"), # Gateway URL — must not match by host fallback ( "https://api.atlassian.com/ex/confluence/CLOUD1", "https://api.atlassian.com/ex/confluence/CLOUD2/wiki/spaces/KEY", ), ], ) def test_get_instance_no_false_match(self, stored_key: str, lookup_url: str) -> None: config = self._make_config(stored_key) assert config.get_instance(lookup_url) is None ================================================ FILE: tests/unit/test_confluence.py ================================================ """Unit tests for confluence module URL resolution.""" from __future__ import annotations import types from pathlib import Path from unittest.mock import MagicMock from unittest.mock import patch import pytest from confluence_markdown_exporter.confluence import Attachment from confluence_markdown_exporter.confluence import Page from confluence_markdown_exporter.confluence import Space from confluence_markdown_exporter.confluence import User from confluence_markdown_exporter.confluence import Version class MockPage: """Minimal page object for Converter tests.""" def __init__(self) -> None: self.id = "test-page" self.title = "Test Page" self.type = "" self.html = "" self.body_storage = "" self.web_url = "" self.tiny_url = "" self.labels = [] self.ancestors = [] self.space = MagicMock() self.space.key = "TEST" self.version = MagicMock() self.version.number = 1 self.version.when = "" self.version.by = MagicMock() self.version.by.display_name = "" self.history = MagicMock() self.history.created = "" self.history.created_by = MagicMock() self.history.created_by.display_name = "" def get_attachment_by_file_id(self, file_id: str) -> None: return None @pytest.fixture def converter() -> Page.Converter: return Page.Converter(MockPage()) class TestSquareBracketEscaping: """Square brackets in plain text must be escaped to avoid markdown link syntax.""" def test_bracket_notation_escaped(self, converter: Page.Converter) -> None: html = "

test [R1] test

" result = converter.convert(html).strip() assert result == r"test \[R1\] test" def test_bracket_at_start(self, converter: Page.Converter) -> None: html = "

[R1] test

" result = converter.convert(html).strip() assert result == r"\[R1\] test" def test_bracket_at_end(self, converter: Page.Converter) -> None: html = "

test [R1]

" result = converter.convert(html).strip() assert result == r"test \[R1\]" def test_multiple_bracket_groups(self, converter: Page.Converter) -> None: html = "

[A1] and [B2]

" result = converter.convert(html).strip() assert result == r"\[A1\] and \[B2\]" def test_bracket_in_code_not_escaped(self, converter: Page.Converter) -> None: html = "[R1]" result = converter.convert(html).strip() assert result == "`[R1]`" def test_real_link_not_affected(self, converter: Page.Converter) -> None: html = 'click here' result = converter.convert(html).strip() assert result == "[click here](https://example.com)" class TestAnchorLinkConversion: """Internal anchor links must use the href value for slug, not link text.""" def test_anchor_uses_href_not_link_text(self, converter: Page.Converter) -> None: """Anchor slug derived from href, not display text.""" html = 'request service' result = converter.convert(html).strip() assert result == "[request service](#1-request-service)" def test_anchor_plain_heading(self, converter: Page.Converter) -> None: """Simple heading anchor round-trips correctly.""" html = 'My Heading' result = converter.convert(html).strip() assert result == "[My Heading](#my-heading)" def test_anchor_with_numbers_and_punctuation(self, converter: Page.Converter) -> None: """Numbered heading anchors match GitHub markdown anchor format.""" html = 'setup steps' result = converter.convert(html).strip() assert result == "[setup steps](#2-setup-steps)" def test_wiki_anchor_uses_link_text(self, converter: Page.Converter) -> None: """Wiki links use link text for slug, not href.""" from unittest.mock import patch with patch("confluence_markdown_exporter.confluence.settings") as mock_settings: mock_settings.export.page_href = "wiki" html = 'Request Service' result = converter.convert(html).strip() assert result == "[[#Request Service]]" def _make_attachment( att_id: str, file_id: str, title: str = "file.png", media_type: str = "image/png", ) -> Attachment: space = Space(base_url="https://example.com", key="TS", name="Test", description="", homepage=0) version = Version( number=1, by=User(account_id="u1", display_name="User", username="user", public_name="", email=""), when="2024-01-01T00:00:00Z", friendly_when="Jan 1", ) return Attachment( base_url="https://example.com", title=title, space=space, ancestors=[], version=version, id=att_id, file_size=100, media_type=media_type, media_type_description="", file_id=file_id, collection_name="", download_link="/download", comment="", ) def _make_page( body: str, body_export: str, attachments: list[Attachment], body_storage: str = "", ) -> Page: space = Space(base_url="https://example.com", key="TS", name="Test", description="", homepage=0) version = Version( number=1, by=User(account_id="u1", display_name="User", username="user", public_name="", email=""), when="2024-01-01T00:00:00Z", friendly_when="Jan 1", ) return Page( base_url="https://example.com", id=1, title="Test Page", space=space, ancestors=[], version=version, body=body, body_export=body_export, editor2="", body_storage=body_storage, labels=[], attachments=attachments, ) class TestAttachmentsForExport: """_attachments_for_export selects the right attachments.""" def test_file_id_in_body_included(self) -> None: att = _make_attachment("111", "abc-guid-111") page = _make_page( body='', body_export="", attachments=[att], ) with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.attachments_export = "referenced" result = page._attachments_for_export() assert att in result def test_attachment_id_in_body_included(self) -> None: """SVG/MP4 referenced via data-linked-resource-id must be exported.""" att = _make_attachment( "99999", "xyz-guid-99", title="image.svg", media_type="image/svg+xml" ) page = _make_page( body='', body_export="", attachments=[att], ) with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.attachments_export = "referenced" result = page._attachments_for_export() assert att in result def test_attachment_id_in_body_export_included(self) -> None: """Attachment referenced only in body_export (e.g. MP4) must be exported.""" att = _make_attachment("88888", "xyz-guid-88", title="video.mp4", media_type="video/mp4") page = _make_page( body="", body_export='video.mp4', attachments=[att], ) with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.attachments_export = "referenced" result = page._attachments_for_export() assert att in result def test_title_in_body_src_url_included(self) -> None: """SVG referenced only by filename in src URL (no data attributes) must be exported.""" att = _make_attachment( "66666", "xyz-guid-66", title="MEP-Symbol_CH-REP.svg", media_type="image/svg+xml" ) page = _make_page( body='', body_export="", attachments=[att], ) with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.attachments_export = "referenced" result = page._attachments_for_export() assert att in result def test_title_with_spaces_url_encoded_in_body_export_included(self) -> None: att = _make_attachment("55555", "xyz-guid-55", title="my video.mp4", media_type="video/mp4") page = _make_page( body="", body_export='', attachments=[att], ) with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.attachments_export = "referenced" result = page._attachments_for_export() assert att in result def test_unreferenced_attachment_excluded(self) -> None: att = _make_attachment("77777", "xyz-guid-77", title="unused.png") page = _make_page(body="no references here", body_export="", attachments=[att]) with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.attachments_export = "referenced" result = page._attachments_for_export() assert att not in result def test_attachments_export_all_returns_all(self) -> None: att1 = _make_attachment("111", "aaa") att2 = _make_attachment("222", "bbb", title="other.svg", media_type="image/svg+xml") page = _make_page(body="", body_export="", attachments=[att1, att2]) with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.attachments_export = "all" result = page._attachments_for_export() assert result == [att1, att2] class TestAttachmentsExportFlag: """Tests for the export.attachments_export setting.""" def _make_attachment_mock(self, att_id: str = "att-1", version: int = 3) -> MagicMock: att = MagicMock() att.id = att_id att.version.number = version att.export_path = Path(f"attachments/{att_id}.bin") return att def _make_page_mock(self, attachments: list) -> MagicMock: page = MagicMock() page.id = 42 page._attachments_for_export.return_value = attachments return page def test_referenced_default_exports_attachments(self, tmp_path: Path) -> None: """With attachments_export='referenced' (default), attachments are downloaded.""" att = self._make_attachment_mock() page = self._make_page_mock([att]) with ( patch("confluence_markdown_exporter.confluence.settings") as mock_settings, patch( "confluence_markdown_exporter.confluence.LockfileManager" ) as mock_lockfile, patch("confluence_markdown_exporter.confluence.get_stats"), ): mock_settings.export.attachments_export = "referenced" mock_settings.export.output_path = tmp_path mock_lockfile.get_page_attachment_entries.return_value = {} result = Page.export_attachments(page) att.export.assert_called_once() assert "att-1" in result def test_disabled_skips_download_and_lockfile(self) -> None: """With attachments_export='disabled', no download and no lockfile lookup.""" att = self._make_attachment_mock() page = self._make_page_mock([att]) with ( patch("confluence_markdown_exporter.confluence.settings") as mock_settings, patch( "confluence_markdown_exporter.confluence.LockfileManager" ) as mock_lockfile, patch("confluence_markdown_exporter.confluence.get_stats"), ): mock_settings.export.attachments_export = "disabled" result = Page.export_attachments(page) assert result == {} att.export.assert_not_called() mock_lockfile.get_page_attachment_entries.assert_not_called() def test_metadata_still_populated_when_disabled(self) -> None: """Page.from_json populates Page.attachments even when downloads are disabled. Guards against future scope creep that would gate metadata loading on the same flag — body image and file links must keep resolving. """ base_url = "https://example.atlassian.net" fake_space = Space( base_url=base_url, key="K", name="Space", description="", homepage=None ) fake_user = User( account_id="", username="", display_name="", public_name="", email="" ) fake_version = Version(number=1, by=fake_user, when="", friendly_when="") fake_attachment = Attachment( base_url=base_url, id="att-1", title="file.png", space=fake_space, ancestors=[], version=fake_version, file_size=10, media_type="image/png", media_type_description="", file_id="file-id-1", collection_name="", download_link="", comment="", ) page_data = { "id": 42, "title": "Test", "_expandable": {"space": "/rest/api/space/K"}, "body": { "view": {"value": ""}, "export_view": {"value": ""}, "editor2": {"value": ""}, }, "metadata": {"labels": {"results": []}}, "ancestors": [], "version": {}, } with ( patch( "confluence_markdown_exporter.confluence.Attachment.from_page_id", return_value=[fake_attachment], ), patch( "confluence_markdown_exporter.confluence.Space.from_key", return_value=fake_space, ), patch("confluence_markdown_exporter.confluence.settings") as mock_settings, ): mock_settings.export.attachments_export = "disabled" page = Page.from_json(page_data, base_url) assert len(page.attachments) == 1 assert page.attachments[0].id == "att-1" class TestTransformErrorImg: """transform-error SVG images must resolve via data-encoded-xml.""" def test_transform_error_resolves_attachment_by_encoded_xml(self) -> None: from pathlib import Path from urllib.parse import quote class MockAttachment: title = "MEP-Symbol_CH-REP.svg" export_path = Path("TEST/attachments/guid123.svg") class MockPageWithSvg: def __init__(self) -> None: self.id = "test-page" self.title = "Test Page" self.html = "" self.body_storage = "" self.labels: list = [] self.ancestors: list = [] self.export_path = Path("TEST/Instructions for Use.md") def get_attachment_by_file_id(self, _fid: str) -> None: return None def get_attachment_by_id(self, _aid: str) -> None: return None def get_attachments_by_title(self, title: str) -> list: if title == "MEP-Symbol_CH-REP.svg": return [MockAttachment()] return [] encoded = quote('') html = ( f'' ) with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.attachment_href = "relative" s.export.page_href = "relative" conv = Page.Converter(MockPageWithSvg()) # type: ignore[arg-type] result = conv.convert(html).strip() assert "placeholder/error" not in result assert "MEP-Symbol_CH-REP.svg" in result or "guid123.svg" in result class TestParseImageCaptions: """_parse_image_captions extracts captions from Confluence storage XML.""" def test_cdata_caption_extracted(self) -> None: from confluence_markdown_exporter.confluence import _parse_image_captions storage = ( '' '' "" "" "" "" ) assert _parse_image_captions(storage) == {"testbild.jpeg": "My Caption"} def test_plain_text_caption_extracted(self) -> None: from confluence_markdown_exporter.confluence import _parse_image_captions storage = ( "" '' "" "Plain Caption" "" "" ) assert _parse_image_captions(storage) == {"photo.png": "Plain Caption"} def test_paragraph_caption_extracted(self) -> None: from confluence_markdown_exporter.confluence import _parse_image_captions storage = ( '' '' "

Dialog in VS Code to create a new branch

" "
" ) result = _parse_image_captions(storage) assert result == {"screenshot.png": "Dialog in VS Code to create a new branch"} def test_caption_with_attributes_extracted(self) -> None: from confluence_markdown_exporter.confluence import _parse_image_captions storage = ( '' '' '' "

Exemplary Tissue Map

" "
" "
" ) result = _parse_image_captions(storage) assert result == {"TissueMap.png": "Exemplary Tissue Map"} def test_image_without_caption_excluded(self) -> None: from confluence_markdown_exporter.confluence import _parse_image_captions storage = ( "" '' "" ) assert _parse_image_captions(storage) == {} def test_multiple_images_mixed(self) -> None: from confluence_markdown_exporter.confluence import _parse_image_captions storage = ( "" '' "" "" "" "" '' "" "" '' "" "" "" ) result = _parse_image_captions(storage) assert result == {"a.png": "Caption A", "c.jpg": "Caption C"} def test_empty_storage_returns_empty(self) -> None: from confluence_markdown_exporter.confluence import _parse_image_captions assert _parse_image_captions("") == {} class TestImageCaptionsInConvertImg: """convert_img renders captions as italics below the image when image_captions is enabled.""" def test_caption_rendered_as_italic_below_image(self) -> None: att = _make_attachment("111", "abc-guid-111", title="testbild.jpeg") storage = ( "" '' "" "" "" "" ) page = _make_page( body='', body_export="", attachments=[att], body_storage=storage, ) _att_path = "{space_name}/attachments/{attachment_file_id}{attachment_extension}" with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.attachment_href = "relative" s.export.attachment_path = _att_path s.export.page_href = "relative" s.export.page_path = "{space_name}/{page_title}.md" s.export.image_captions = True s.export.include_document_title = False s.export.page_breadcrumbs = False conv = Page.Converter(page) result = conv.convert(page.body).strip() assert "![](" in result # image with empty alt assert "*My Caption*" in result lines = result.splitlines() img_line = next(i for i, line in enumerate(lines) if "![](" in line) assert lines[img_line + 1] == "*My Caption*" def test_caption_disabled_preserves_original_alt(self) -> None: att = _make_attachment("111", "abc-guid-111", title="testbild.jpeg") storage = ( "" '' "" "" "" "" ) page = _make_page( body='', body_export="", attachments=[att], body_storage=storage, ) _att_path = "{space_name}/attachments/{attachment_file_id}{attachment_extension}" with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.attachment_href = "relative" s.export.attachment_path = _att_path s.export.page_href = "relative" s.export.page_path = "{space_name}/{page_title}.md" s.export.image_captions = False s.export.include_document_title = False s.export.page_breadcrumbs = False conv = Page.Converter(page) result = conv.convert(page.body).strip() assert "My Caption" not in result class TestPageFromUrl: """Test cases for Page.from_url.""" def test_from_url_prefers_page_id_query_parameter_for_legacy_server_url(self) -> None: """Legacy Server/DC viewpage.action links should resolve by pageId.""" page_url = ( "https://wiki.example.com/pages/viewpage.action" "?pageId=317425825&src=contextnavpagetreemode" ) with ( patch("confluence_markdown_exporter.confluence.get_confluence_instance"), patch("confluence_markdown_exporter.confluence.Page.from_id") as mock_from_id, patch("confluence_markdown_exporter.confluence.get_thread_confluence") as mock_client, ): mock_from_id.return_value = "page" result = Page.from_url(page_url) assert result == "page" mock_from_id.assert_called_once_with(317425825, "https://wiki.example.com") mock_client.assert_not_called() class TestSpanHighlightConversion: """Background-color spans must become elements when enabled.""" def test_background_color_rgb_converted_to_mark(self, converter: Page.Converter) -> None: html = '

hello

' result = converter.convert(html).strip() assert 'hello' in result def test_multiple_channels_converted_correctly(self, converter: Page.Converter) -> None: html = '

text

' result = converter.convert(html).strip() assert 'text' in result def test_highlight_disabled_returns_plain_text(self, converter: Page.Converter) -> None: html = '

hello

' with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.convert_text_highlights = False s.export.convert_font_colors = True result = converter.convert(html).strip() assert "/ must become wrappers.""" def test_td_hex_attribute_wraps_in_mark(self, converter: Page.Converter) -> None: html = ( '' '' '

2

' ) result = converter.convert(html) assert '2' in result def test_th_hex_attribute_wraps_in_mark(self, converter: Page.Converter) -> None: html = ( '' '' '

P / S

' ) result = converter.convert(html) assert '**P / S**' in result def test_default_header_gray_not_wrapped(self, converter: Page.Converter) -> None: """Confluence's default background (#f4f5f7) is not user-chosen — skip.""" html = ( '' '' '' '

P / S

P5

' ) result = converter.convert(html) assert " None: html = ( '' '' '

plain

' ) result = converter.convert(html) assert " None: html = ( '

plain

' ) result = converter.convert(html) assert " None: html = ( '' '' '

x

' ) result = converter.convert(html) assert " None: html = ( '' '' '
' ) result = converter.convert(html) assert ' ' in result def test_setting_disabled_returns_plain_text(self, converter: Page.Converter) -> None: html = ( '' '' '

2

' ) with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.convert_text_highlights = False s.export.convert_font_colors = True s.export.convert_status_badges = True result = converter.convert(html) assert " elements when enabled.""" def test_inline_color_rgb_converted_to_font(self, converter: Page.Converter) -> None: html = '

blue text

' result = converter.convert(html).strip() assert 'blue text' in result def test_background_color_not_matched_as_font_color(self, converter: Page.Converter) -> None: html = '

hi

' result = converter.convert(html).strip() assert "hi
' in result def test_data_colorid_resolved_from_style_tag(self) -> None: page = MockPage() page.html = ( '' ) conv = Page.Converter(page) # type: ignore[arg-type] html = '

colored

' result = conv.convert(html).strip() assert 'colored' in result def test_data_colorid_unknown_falls_through(self, converter: Page.Converter) -> None: html = '

text

' result = converter.convert(html).strip() assert " None: html = '

red text

' with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.convert_text_highlights = True s.export.convert_font_colors = False result = converter.convert(html).strip() assert " elements when enabled.""" def _badge(self, extra_class: str, label: str) -> str: classes = f"status-macro aui-lozenge aui-lozenge-visual-refresh {extra_class}".strip() return ( f'

{label}

' ) def test_gray_badge(self, converter: Page.Converter) -> None: html = self._badge("", "IN PROGRESS") result = converter.convert(html).strip() assert 'IN PROGRESS' in result def test_blue_badge(self, converter: Page.Converter) -> None: html = self._badge("aui-lozenge-complete", "DONE") result = converter.convert(html).strip() assert 'DONE' in result def test_green_badge(self, converter: Page.Converter) -> None: html = self._badge("aui-lozenge-success", "SUCCESS") result = converter.convert(html).strip() assert 'SUCCESS' in result def test_yellow_badge(self, converter: Page.Converter) -> None: html = self._badge("aui-lozenge-current", "ORANGE") result = converter.convert(html).strip() assert 'ORANGE' in result def test_red_badge(self, converter: Page.Converter) -> None: html = self._badge("aui-lozenge-error", "BLOCKED") result = converter.convert(html).strip() assert 'BLOCKED' in result def test_purple_badge(self, converter: Page.Converter) -> None: html = self._badge("aui-lozenge-progress", "VIOLET") result = converter.convert(html).strip() assert 'VIOLET' in result def test_badge_disabled_returns_plain_text(self, converter: Page.Converter) -> None: html = self._badge("aui-lozenge-error", "BLOCKED") with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.convert_status_badges = False s.export.convert_font_colors = True s.export.convert_text_highlights = True result = converter.convert(html).strip() assert "
AuthorJohn Doe
StatusActive
""" class TestPagePropertiesFormat: """Page Properties macro renders according to page_properties_format setting.""" def _converter(self) -> Page.Converter: return Page.Converter(MockPage()) def test_frontmatter_removes_table(self) -> None: converter = self._converter() with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.page_properties_format = "frontmatter" result = converter.convert(_DETAILS_HTML) assert "Author" not in result assert "author" in converter.page_properties assert converter.page_properties["author"] == "John Doe" def test_table_keeps_table_no_properties(self) -> None: converter = self._converter() with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.page_properties_format = "table" result = converter.convert(_DETAILS_HTML) assert "Author" in result assert converter.page_properties == {} def test_frontmatter_and_table_keeps_both(self) -> None: converter = self._converter() with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.page_properties_format = "frontmatter_and_table" result = converter.convert(_DETAILS_HTML) assert "Author" in result assert "author" in converter.page_properties assert converter.page_properties["author"] == "John Doe" def test_dataview_inline_field(self) -> None: converter = self._converter() with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.page_properties_format = "dataview-inline-field" result = converter.convert(_DETAILS_HTML) assert "Author:: John Doe" in result assert "Status:: Active" in result assert "|" not in result assert converter.page_properties == {} def test_meta_bind_view_fields(self) -> None: converter = self._converter() with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.page_properties_format = "meta-bind-view-fields" result = converter.convert(_DETAILS_HTML) assert "| **Author** | `VIEW[{author}][text(renderMarkdown)]` |" in result assert "| **Status** | `VIEW[{status}][text(renderMarkdown)]` |" in result assert "author" in converter.page_properties assert "status" in converter.page_properties def test_duplicate_keys_get_numeric_suffix(self) -> None: html = """
StatusDraft
StatusReview
StatusFinal
""" converter = self._converter() with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.page_properties_format = "frontmatter" converter.convert(html) assert converter.page_properties["status"] == "Draft" assert converter.page_properties["status_2"] == "Review" assert converter.page_properties["status_3"] == "Final" def test_duplicate_keys_in_inline_fields(self) -> None: html = """
Tagfoo
Tagbar
""" converter = self._converter() with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.page_properties_format = "dataview-inline-field" result = converter.convert(html) assert "Tag:: foo" in result assert "Tag 2:: bar" in result class TestPagePropertiesMigration: """Legacy page_properties_as_front_matter bool migrates to page_properties_format.""" def test_old_true_maps_to_frontmatter(self) -> None: from confluence_markdown_exporter.utils.app_data_store import ExportConfig config = ExportConfig.model_validate({"page_properties_as_front_matter": True}) assert config.page_properties_format == "frontmatter" def test_old_false_maps_to_table(self) -> None: from confluence_markdown_exporter.utils.app_data_store import ExportConfig config = ExportConfig.model_validate({"page_properties_as_front_matter": False}) assert config.page_properties_format == "table" def test_new_field_takes_precedence_over_old(self) -> None: from confluence_markdown_exporter.utils.app_data_store import ExportConfig config = ExportConfig.model_validate( {"page_properties_as_front_matter": True, "page_properties_format": "table"} ) assert config.page_properties_format == "table" def test_default_is_frontmatter_and_table(self) -> None: from confluence_markdown_exporter.utils.app_data_store import ExportConfig config = ExportConfig() assert config.page_properties_format == "frontmatter_and_table" class TestConfluenceUrlInFrontmatter: """Confluence page URLs render to YAML front matter according to the setting.""" _WEBUI = "https://example.atlassian.net/wiki/spaces/TEST/pages/123/Test+Page" _TINYUI = "https://example.atlassian.net/wiki/x/AbCdEf" def _converter(self, *, with_urls: bool = True) -> Page.Converter: page = MockPage() if with_urls: page.web_url = self._WEBUI page.tiny_url = self._TINYUI return Page.Converter(page) def test_get_web_url_combines_base_and_webui(self) -> None: from confluence_markdown_exporter.confluence import _get_web_url data = { "_links": { "base": "https://example.atlassian.net/wiki", "webui": "/spaces/TEST/pages/123/Test+Page", } } assert _get_web_url(data) == self._WEBUI def test_get_tiny_url_combines_base_and_tinyui(self) -> None: from confluence_markdown_exporter.confluence import _get_tiny_url data = { "_links": { "base": "https://example.atlassian.net/wiki", "tinyui": "/x/AbCdEf", } } assert _get_tiny_url(data) == self._TINYUI def test_helpers_strip_redundant_separators(self) -> None: from confluence_markdown_exporter.confluence import _get_web_url data = { "_links": { "base": "https://example.atlassian.net/wiki/", "webui": "/spaces/TEST/pages/123/Test+Page", } } assert _get_web_url(data) == self._WEBUI def test_helpers_return_empty_when_links_missing(self) -> None: from confluence_markdown_exporter.confluence import _get_tiny_url from confluence_markdown_exporter.confluence import _get_web_url assert _get_web_url({}) == "" assert _get_tiny_url({}) == "" def test_helpers_return_empty_when_links_not_dict(self) -> None: from confluence_markdown_exporter.confluence import _get_tiny_url from confluence_markdown_exporter.confluence import _get_web_url assert _get_web_url({"_links": "broken"}) == "" assert _get_tiny_url({"_links": None}) == "" def test_helpers_return_empty_when_base_or_rel_missing(self) -> None: from confluence_markdown_exporter.confluence import _get_web_url assert _get_web_url({"_links": {"base": "https://example.com"}}) == "" assert _get_web_url({"_links": {"webui": "/spaces/TEST"}}) == "" def test_frontmatter_contains_webui_url_when_mode_webui(self) -> None: converter = self._converter() with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.confluence_url_in_frontmatter = "webui" result = converter.front_matter assert f"confluence_webui_url: {self._WEBUI}" in result assert "confluence_tinyui_url" not in result def test_frontmatter_contains_tinyui_url_when_mode_tinyui(self) -> None: converter = self._converter() with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.confluence_url_in_frontmatter = "tinyui" result = converter.front_matter assert f"confluence_tinyui_url: {self._TINYUI}" in result assert "confluence_webui_url" not in result def test_frontmatter_contains_both_when_mode_both(self) -> None: converter = self._converter() with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.confluence_url_in_frontmatter = "both" result = converter.front_matter assert f"confluence_webui_url: {self._WEBUI}" in result assert f"confluence_tinyui_url: {self._TINYUI}" in result def test_frontmatter_omits_urls_when_mode_none(self) -> None: converter = self._converter() with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.confluence_url_in_frontmatter = "none" result = converter.front_matter assert "confluence_webui_url" not in result assert "confluence_tinyui_url" not in result def test_frontmatter_skips_when_url_value_is_empty(self) -> None: converter = self._converter(with_urls=False) with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.confluence_url_in_frontmatter = "both" result = converter.front_matter assert "confluence_webui_url" not in result assert "confluence_tinyui_url" not in result def test_macro_value_takes_precedence_over_extracted_url(self) -> None: converter = self._converter() converter.page_properties["confluence_webui_url"] = "manual-override" with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.confluence_url_in_frontmatter = "webui" result = converter.front_matter assert "confluence_webui_url: manual-override" in result assert self._WEBUI not in result class TestPageMetadataInFrontmatter: """Page metadata fields render to YAML front matter according to the setting.""" def _make_page( self, *, display_name: str = "Alex Johnson", page_type: str = "page", created: str = "2024-08-15T08:34:12.000+02:00", created_by: str = "Sam Creator", ) -> MockPage: page = MockPage() page.id = 123 page.type = page_type space = MagicMock() space.key = "TEAM" page.space = space version = MagicMock() version.when = "2026-04-12T10:34:00.000+02:00" version.number = 7 version.by = MagicMock() version.by.display_name = display_name page.version = version history = MagicMock() history.created = created history.created_by = MagicMock() history.created_by.display_name = created_by page.history = history return page def _converter(self, **kwargs: object) -> Page.Converter: return Page.Converter(self._make_page(**kwargs)) def test_default_disabled_writes_no_metadata(self) -> None: converter = self._converter() with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.confluence_url_in_frontmatter = "none" s.export.page_metadata_in_frontmatter = False result = converter.front_matter assert "confluence_page_id" not in result assert "confluence_space_key" not in result assert "confluence_type" not in result assert "confluence_created" not in result assert "confluence_created_by" not in result assert "confluence_last_modified" not in result assert "confluence_last_modified_by" not in result assert "confluence_version" not in result def test_enabled_writes_all_eight_keys(self) -> None: converter = self._converter() with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.confluence_url_in_frontmatter = "none" s.export.page_metadata_in_frontmatter = True result = converter.front_matter assert "confluence_page_id: '123'" in result assert "confluence_space_key: TEAM" in result assert "confluence_type: page" in result assert "confluence_created: '2024-08-15T08:34:12.000+02:00'" in result assert "confluence_created_by: Sam Creator" in result assert "confluence_last_modified: '2026-04-12T10:34:00.000+02:00'" in result assert "confluence_last_modified_by: Alex Johnson" in result assert "confluence_version: 7" in result assert "confluence_version: '7'" not in result def test_blogpost_type_renders(self) -> None: converter = self._converter(page_type="blogpost") with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.confluence_url_in_frontmatter = "none" s.export.page_metadata_in_frontmatter = True result = converter.front_matter assert "confluence_type: blogpost" in result def test_macro_precedence_for_page_id(self) -> None: converter = self._converter() converter.page_properties["confluence_page_id"] = "macro-override" with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.confluence_url_in_frontmatter = "none" s.export.page_metadata_in_frontmatter = True result = converter.front_matter assert "confluence_page_id: macro-override" in result assert "confluence_page_id: '123'" not in result @pytest.mark.parametrize( ("key", "macro_value", "api_substring"), [ ("confluence_type", "macro-type", "confluence_type: page"), ("confluence_created", "macro-created", "2024-08-15T08:34:12.000+02:00"), ("confluence_created_by", "macro-author", "Sam Creator"), ], ) def test_macro_precedence_for_history_fields( self, key: str, macro_value: str, api_substring: str ) -> None: converter = self._converter() converter.page_properties[key] = macro_value with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.confluence_url_in_frontmatter = "none" s.export.page_metadata_in_frontmatter = True result = converter.front_matter assert f"{key}: {macro_value}" in result assert api_substring not in result def test_empty_display_name_skipped(self) -> None: converter = self._converter(display_name="") with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.confluence_url_in_frontmatter = "none" s.export.page_metadata_in_frontmatter = True result = converter.front_matter assert "confluence_last_modified_by" not in result assert "confluence_page_id: '123'" in result assert "confluence_space_key: TEAM" in result assert "confluence_last_modified" in result assert "confluence_version: 7" in result def test_empty_creator_skipped(self) -> None: converter = self._converter(created_by="") with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.confluence_url_in_frontmatter = "none" s.export.page_metadata_in_frontmatter = True result = converter.front_matter assert "confluence_created_by" not in result assert "confluence_created: '2024-08-15T08:34:12.000+02:00'" in result assert "confluence_type: page" in result def test_empty_type_skipped(self) -> None: converter = self._converter(page_type="") with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.confluence_url_in_frontmatter = "none" s.export.page_metadata_in_frontmatter = True result = converter.front_matter assert "confluence_type" not in result assert "confluence_page_id: '123'" in result assert "confluence_created_by: Sam Creator" in result class TestInlineCommentsFrontMatter: """Pin the YAML front matter keys written into *.comments.md sidecars.""" def test_front_matter_uses_confluence_prefix(self) -> None: page = MockPage() page.id = 123 page.title = "My Page" page.space = MagicMock() page.space.key = "TEAM" page.base_url = "https://example.atlassian.net" page.export_path = Path("TEAM/My Page.md") page._marked_texts = {"ref-1": "marked excerpt"} page._COMMENT_TITLE_MAX_LEN = Page._COMMENT_TITLE_MAX_LEN.default page._fetch_inline_comments = lambda: [ { "id": "c1", "extensions": {"inlineProperties": {"markerRef": "ref-1"}}, "history": { "createdBy": {"displayName": "Alice"}, "createdDate": "2026-04-01T10:00:00Z", }, "body": {"view": {"value": "

nice

"}}, } ] page._fetch_page_comments = list page._fetch_comment_replies = lambda _cid: [] page._render_inline_comments = types.MethodType(Page._render_inline_comments, page) page._render_page_comments = types.MethodType(Page._render_page_comments, page) with ( patch("confluence_markdown_exporter.confluence.save_file") as mock_save, patch("confluence_markdown_exporter.confluence.settings") as s, ): s.export.output_path = Path("out") s.export.comments_export = "inline" Page.export_comments_sidecar(page) assert mock_save.called content = mock_save.call_args[0][1] # New keys with correct YAML form assert "confluence_page_id: '123'" in content assert 'confluence_page_title: "My Page"' in content assert ( 'confluence_webui_url: "https://example.atlassian.net' '/wiki/spaces/TEAM/pages/123"' in content ) # Regression guard: old keys must not reappear assert "\npage_id:" not in content assert "\npage_title:" not in content assert "\nsource:" not in content def _make_comments_page( *, inline_comments: list[dict] | None = None, page_comments: list[dict] | None = None, replies: dict[str, list[dict]] | None = None, marked_texts: dict[str, str] | None = None, ) -> MockPage: page = MockPage() page.id = 123 page.title = "My Page" page.space = MagicMock() page.space.key = "TEAM" page.base_url = "https://example.atlassian.net" page.export_path = Path("TEAM/My Page.md") page._marked_texts = marked_texts or {} page._COMMENT_TITLE_MAX_LEN = Page._COMMENT_TITLE_MAX_LEN.default page._fetch_inline_comments = lambda: list(inline_comments or []) page._fetch_page_comments = lambda: list(page_comments or []) replies_map = replies or {} page._fetch_comment_replies = lambda cid: list(replies_map.get(cid, [])) page._render_inline_comments = types.MethodType(Page._render_inline_comments, page) page._render_page_comments = types.MethodType(Page._render_page_comments, page) return page def _run_export_capturing_save(page: MockPage, mode: str) -> MagicMock: with ( patch("confluence_markdown_exporter.confluence.save_file") as mock_save, patch("confluence_markdown_exporter.confluence.settings") as s, ): s.export.output_path = Path("out") s.export.comments_export = mode Page.export_comments_sidecar(page) return mock_save def _inline_comment( ref: str = "ref-1", body: str = "

nice

", cid: str = "c1", author: str = "Alice", ) -> dict: return { "id": cid, "extensions": {"inlineProperties": {"markerRef": ref}}, "history": { "createdBy": {"displayName": author}, "createdDate": "2026-04-01T10:00:00Z", }, "body": {"view": {"value": body}}, } def _page_comment( cid: str = "p1", body: str = "

discussion body

", author: str = "Bob", *, resolved: bool = False, ) -> dict: return { "id": cid, "extensions": {"resolution": {"status": "resolved" if resolved else "open"}}, "history": { "createdBy": {"displayName": author}, "createdDate": "2026-04-02T11:00:00Z", }, "body": {"view": {"value": body}}, } class TestPageCommentsSidecarBody: """Sidecar rendering for page-level (footer) and combined comments.""" def test_only_footer_writes_only_page_section(self) -> None: page = _make_comments_page(page_comments=[_page_comment()]) save = _run_export_capturing_save(page, "footer") assert save.called content = save.call_args[0][1] assert "## Page comments" in content assert "## Inline comments" not in content assert "discussion body" in content assert "**Bob** · 2026-04-02" in content def test_all_writes_both_sections_inline_first(self) -> None: page = _make_comments_page( inline_comments=[_inline_comment()], page_comments=[_page_comment()], marked_texts={"ref-1": "marked excerpt"}, ) save = _run_export_capturing_save(page, "all") assert save.called content = save.call_args[0][1] assert "## Inline comments" in content assert "## Page comments" in content assert content.index("## Inline comments") < content.index("## Page comments") def test_none_writes_no_file(self) -> None: page = _make_comments_page( inline_comments=[_inline_comment()], page_comments=[_page_comment()], ) save = _run_export_capturing_save(page, "none") assert save.called is False def test_inline_only_omits_page_section(self) -> None: page = _make_comments_page( inline_comments=[_inline_comment()], page_comments=[_page_comment()], marked_texts={"ref-1": "marked excerpt"}, ) save = _run_export_capturing_save(page, "inline") assert save.called content = save.call_args[0][1] assert "## Inline comments" in content assert "## Page comments" not in content def test_page_comment_title_falls_back_to_comment_id(self) -> None: page = _make_comments_page( page_comments=[_page_comment(cid="abcdef1234567", body="")], ) save = _run_export_capturing_save(page, "footer") assert save.called content = save.call_args[0][1] assert "### Comment abcdef12" in content def test_page_comment_replies_render_under_parent(self) -> None: replies = { "p1": [ { "id": "r1", "history": { "createdBy": {"displayName": "Carol"}, "createdDate": "2026-04-03T11:00:00Z", }, "body": {"view": {"value": "

reply one

"}}, }, { "id": "r2", "history": { "createdBy": {"displayName": "Dave"}, "createdDate": "2026-04-03T12:00:00Z", }, "body": {"view": {"value": "

reply two

"}}, }, ] } page = _make_comments_page( page_comments=[_page_comment(cid="p1", body="

parent body

", author="Bob")], replies=replies, ) save = _run_export_capturing_save(page, "footer") assert save.called content = save.call_args[0][1] assert content.index("Bob") < content.index("Carol") < content.index("Dave") assert "reply one" in content assert "reply two" in content def test_fetch_page_comments_filters_resolved(self) -> None: page = MockPage() page.id = 123 page.base_url = "https://example.atlassian.net" client = MagicMock() client.get_page_comments.return_value = { "results": [ _page_comment(cid="open1", body="

open one

"), _page_comment(cid="resolved1", body="

resolved one

", resolved=True), _page_comment(cid="open2", body="

open two

"), ], "_links": {}, } with patch( "confluence_markdown_exporter.confluence.get_thread_confluence", return_value=client, ): results = Page._fetch_page_comments(page) ids = [c["id"] for c in results] assert ids == ["open1", "open2"] class TestPagePropertiesReportDataview: """Page Properties Report macro can be exported as a Dataview DQL query.""" _REPORT_HTML = ( '' "" ) _BODY_EXPORT = ( '" "" "" "" ) class _MockPageWithExport: def __init__(self, body_export: str = "") -> None: from pathlib import Path self.id = 42 self.title = "Test Page" self.html = "" self.labels: list = [] self.ancestors: list = [] self.body_export = body_export self.export_path = Path("Test Space/Test Page/Test Page.md") def get_attachment_by_file_id(self, file_id: str) -> None: return None def test_dataview_output_contains_table_clause(self) -> None: page = self._MockPageWithExport(body_export=self._BODY_EXPORT) converter = Page.Converter(page) with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.page_properties_report_format = "dataview" result = converter.convert(self._REPORT_HTML) assert "```dataview" in result expected_cols = 'tool_version AS "Tool Version", approved_for_use AS "Approved for Use"' assert f"TABLE {expected_cols}" in result def test_dataview_output_contains_from_clause(self) -> None: page = self._MockPageWithExport(body_export=self._BODY_EXPORT) converter = Page.Converter(page) with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.page_properties_report_format = "dataview" result = converter.convert(self._REPORT_HTML) assert 'FROM "Test Space/Test Page"' in result def test_dataview_from_clause_with_current_content_ancestor(self) -> None: html = ( '' "" ) page = self._MockPageWithExport(body_export="") converter = Page.Converter(page) with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.page_properties_report_format = "dataview" result = converter.convert(html) assert 'FROM "Test Space/Test Page"' in result def test_dataview_output_contains_label_in_from_clause(self) -> None: page = self._MockPageWithExport(body_export=self._BODY_EXPORT) converter = Page.Converter(page) with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.page_properties_report_format = "dataview" result = converter.convert(self._REPORT_HTML) assert "#tool-validation" in result def test_dataview_output_contains_sort_clause(self) -> None: page = self._MockPageWithExport(body_export=self._BODY_EXPORT) converter = Page.Converter(page) with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.page_properties_report_format = "dataview" result = converter.convert(self._REPORT_HTML) assert "SORT title ASC" in result def test_frozen_table_when_format_is_frozen(self) -> None: page = self._MockPageWithExport(body_export=self._BODY_EXPORT) converter = Page.Converter(page) with patch("confluence_markdown_exporter.confluence.settings") as s: s.export.page_properties_report_format = "frozen" result = converter.convert(self._REPORT_HTML) assert "```dataview" not in result assert "Page A" in result class TestAttachmentTemplateVars: """`attachment_file_id` falls back to the content id when fileId is empty.""" def test_cloud_style_keeps_file_id(self) -> None: """Cloud attachments expose the GUID fileId verbatim.""" attachment = _make_attachment("content-456", "cloud-guid-123") assert attachment._template_vars["attachment_file_id"] == "cloud-guid-123" def test_dc_style_falls_back_to_content_id(self) -> None: """Data Center / Server attachments fall back to the content id.""" attachment = _make_attachment("content-456", "") assert attachment._template_vars["attachment_file_id"] == "content-456" def test_two_dc_attachments_get_distinct_paths(self) -> None: """Two DC attachments with the same extension must not collide.""" att1 = _make_attachment("123", "") att2 = _make_attachment("124", "") with patch("confluence_markdown_exporter.confluence.settings") as mock_settings: mock_settings.export.attachment_path = ( "{space_name}/attachments/{attachment_file_id}{attachment_extension}" ) path1 = att1.export_path path2 = att2.export_path assert path1 != path2 class TestWikiLinkDisambiguation: """Wiki page links use a vault-relative path when titles collide across spaces.""" def _make_target_page(self, page_id: int, title: str, space_key: str) -> Page: space = Space( base_url="https://example.com", key=space_key, name=space_key, description="", homepage=0, ) version = Version( number=1, by=User( account_id="u1", display_name="User", username="user", public_name="", email="", ), when="2024-01-01T00:00:00Z", friendly_when="Jan 1", ) return Page( base_url="https://example.com", id=page_id, title=title, space=space, ancestors=[], version=version, body="", body_export="", editor2="", body_storage="", labels=[], attachments=[], ) def test_unique_title_emits_short_wiki_link(self) -> None: from confluence_markdown_exporter.utils.page_registry import PageTitleRegistry PageTitleRegistry.reset() target = self._make_target_page(101, "Unique Page", "ALPHA") PageTitleRegistry.register(target.id, target.title) source = _make_page(body="", body_export="", attachments=[]) with ( patch("confluence_markdown_exporter.confluence.Page.from_id", return_value=target), patch("confluence_markdown_exporter.confluence.settings") as s, ): s.export.page_href = "wiki" s.export.page_path = "{space_name}/{page_title}.md" conv = Page.Converter(source) html = 'x' result = conv.convert(html).strip() PageTitleRegistry.reset() assert result == "[[Unique Page]]" def test_colliding_title_emits_path_qualified_wiki_link(self) -> None: from confluence_markdown_exporter.utils.page_registry import PageTitleRegistry PageTitleRegistry.reset() target_alpha = self._make_target_page(201, "Shared Title", "ALPHA") target_beta = self._make_target_page(202, "Shared Title", "BETA") PageTitleRegistry.register(target_alpha.id, target_alpha.title) PageTitleRegistry.register(target_beta.id, target_beta.title) source = _make_page(body="", body_export="", attachments=[]) with ( patch( "confluence_markdown_exporter.confluence.Page.from_id", return_value=target_alpha, ), patch("confluence_markdown_exporter.confluence.settings") as s, ): s.export.page_href = "wiki" s.export.page_path = "{space_name}/{page_title}.md" conv = Page.Converter(source) html = 'x' result = conv.convert(html).strip() PageTitleRegistry.reset() assert result == "[[ALPHA/Shared Title|Shared Title]]" def test_relative_link_unaffected(self) -> None: from confluence_markdown_exporter.utils.page_registry import PageTitleRegistry PageTitleRegistry.reset() target_alpha = self._make_target_page(201, "Shared Title", "ALPHA") target_beta = self._make_target_page(202, "Shared Title", "BETA") PageTitleRegistry.register(target_alpha.id, target_alpha.title) PageTitleRegistry.register(target_beta.id, target_beta.title) source = _make_page(body="", body_export="", attachments=[]) with ( patch( "confluence_markdown_exporter.confluence.Page.from_id", return_value=target_alpha, ), patch("confluence_markdown_exporter.confluence.settings") as s, ): s.export.page_href = "relative" s.export.page_path = "{space_name}/{page_title}.md" conv = Page.Converter(source) html = 'x' result = conv.convert(html).strip() PageTitleRegistry.reset() assert "Shared%20Title.md" in result assert result.startswith("[Shared Title](") class TestAbsoluteUrlPageLinks: """Absolute Confluence URLs in href must resolve to page links, not pass through.""" def _make_target_page(self, page_id: int, title: str, space_key: str) -> Page: space = Space( base_url="https://example.com", key=space_key, name=space_key, description="", homepage=0, ) version = Version( number=1, by=User( account_id="u1", display_name="User", username="user", public_name="", email="", ), when="2024-01-01T00:00:00Z", friendly_when="Jan 1", ) return Page( base_url="https://example.com", id=page_id, title=title, space=space, ancestors=[], version=version, body="", body_export="", editor2="", body_storage="", labels=[], attachments=[], ) def test_absolute_url_same_host_resolves_page(self) -> None: from confluence_markdown_exporter.utils.page_registry import PageTitleRegistry PageTitleRegistry.reset() target = self._make_target_page(1437663233, "Linked Page", "STRUCT") source = _make_page(body="", body_export="", attachments=[]) with ( patch( "confluence_markdown_exporter.confluence.Page.from_id", return_value=target, ), patch("confluence_markdown_exporter.confluence.settings") as s, ): s.export.page_href = "wiki" s.export.page_path = "{space_name}/{page_title}.md" conv = Page.Converter(source) html = ( '' "https://example.com/wiki/spaces/STRUCT/pages/1437663233" ) result = conv.convert(html).strip() PageTitleRegistry.reset() assert result == "[[Linked Page]]" def test_absolute_url_different_host_left_alone(self) -> None: source = _make_page(body="", body_export="", attachments=[]) conv = Page.Converter(source) html = ( '' "https://other.atlassian.net/wiki/spaces/X/pages/9/T" ) result = conv.convert(html).strip() assert result == "" def test_legacy_pageid_query_resolves_page(self) -> None: from confluence_markdown_exporter.utils.page_registry import PageTitleRegistry PageTitleRegistry.reset() target = self._make_target_page(555, "Legacy Page", "OLD") source = _make_page(body="", body_export="", attachments=[]) with ( patch( "confluence_markdown_exporter.confluence.Page.from_id", return_value=target, ), patch("confluence_markdown_exporter.confluence.settings") as s, ): s.export.page_href = "wiki" s.export.page_path = "{space_name}/{page_title}.md" conv = Page.Converter(source) html = 'x' result = conv.convert(html).strip() PageTitleRegistry.reset() assert result == "[[Legacy Page]]" ================================================ FILE: tests/unit/test_emoticon_conversion.py ================================================ """Test that Confluence emoticon img tags are converted to unicode emoji.""" from __future__ import annotations from typing import TYPE_CHECKING import pytest if TYPE_CHECKING: from confluence_markdown_exporter.confluence import Page @pytest.fixture def converter() -> Page.Converter: from confluence_markdown_exporter.confluence import Page class MockPage: def __init__(self) -> None: self.id = "test-page" self.title = "Test Page" self.html = "" self.labels = [] self.ancestors = [] def get_attachment_by_file_id(self, file_id: str) -> None: return None return Page.Converter(MockPage()) class TestEmoticonConversion: def test_atlassian_check_mark(self, converter: Page.Converter) -> None: html = ( '(tick)' ) assert converter.convert(html).strip() == "✅" def test_atlassian_cross_mark(self, converter: Page.Converter) -> None: html = ( '(error)' ) assert converter.convert(html).strip() == "❌" def test_unicode_emoji_by_hex_id(self, converter: Page.Converter) -> None: html = ( '(blue star)' ) assert converter.convert(html).strip() == "\U0001f6e0️" def test_unicode_emoji_fallback_direct(self, converter: Page.Converter) -> None: html = ( 'smile' ) assert converter.convert(html).strip() == "\U0001f600" def test_custom_emoji_uuid_falls_back_to_shortname(self, converter: Page.Converter) -> None: html = ( '(blue star)' ) assert converter.convert(html).strip() == ":alert-1:" def test_non_emoticon_img_unchanged(self, converter: Page.Converter) -> None: html = 'photo' result = converter.convert(html).strip() assert "emoticon" not in result assert "example.com" in result def test_emoticon_inline_in_text(self, converter: Page.Converter) -> None: html = ( 'Status: (tick) Done' ) result = converter.convert(html).strip() assert "✅" in result assert "Done" in result ================================================ FILE: tests/unit/test_include_macro_conversion.py ================================================ """Unit tests for `include` / `excerpt-include` macro conversion.""" from unittest.mock import MagicMock from unittest.mock import patch from bs4 import BeautifulSoup from confluence_markdown_exporter.confluence import Page def _make_page(editor2: str) -> MagicMock: page = MagicMock(spec=Page) page.id = 12345 page.title = "Test Page" page.html = "

Test Page

" page.labels = [] page.ancestors = [] page.attachments = [] page.editor2 = editor2 return page INCLUDE_EDITOR2 = """ """ EXCERPT_INCLUDE_EDITOR2 = """ Named Excerpt """ @patch("confluence_markdown_exporter.confluence.settings") def test_include_macro_transclusion_mode(mock_settings: MagicMock) -> None: mock_settings.export.include_document_title = False mock_settings.export.page_breadcrumbs = False mock_settings.export.include_macro = "transclusion" converter = Page.Converter(_make_page(INCLUDE_EDITOR2)) html = ( '
' "

fallback inline text

" ) el = BeautifulSoup(html, "html.parser").find("div") result = converter.convert_include(el, "fallback inline text", []) assert result.strip() == "![[Shared Reference Page]]" @patch("confluence_markdown_exporter.confluence.settings") def test_excerpt_include_macro_transclusion_mode(mock_settings: MagicMock) -> None: mock_settings.export.include_document_title = False mock_settings.export.page_breadcrumbs = False mock_settings.export.include_macro = "transclusion" converter = Page.Converter(_make_page(EXCERPT_INCLUDE_EDITOR2)) html = ( '
' "

resolved excerpt body

" ) el = BeautifulSoup(html, "html.parser").find("div") result = converter.convert_include(el, "resolved excerpt body", []) assert result.strip() == "![[Source Page]]" @patch("confluence_markdown_exporter.confluence.settings") def test_include_macro_inline_mode(mock_settings: MagicMock) -> None: mock_settings.export.include_document_title = False mock_settings.export.page_breadcrumbs = False mock_settings.export.include_macro = "inline" converter = Page.Converter(_make_page(INCLUDE_EDITOR2)) html = ( '
' "

inlined content

" ) el = BeautifulSoup(html, "html.parser").find("div") result = converter.convert_include(el, "inlined content", []) assert "![[" not in result @patch("confluence_markdown_exporter.confluence.settings") def test_excerpt_include_inline_strips_source_page_title_panel( mock_settings: MagicMock, ) -> None: mock_settings.export.include_document_title = False mock_settings.export.page_breadcrumbs = False mock_settings.export.include_macro = "inline" converter = Page.Converter(_make_page(EXCERPT_INCLUDE_EDITOR2)) html = ( '
' '
Source Page
' '
body cell
' "
" ) stripped = converter._strip_excerpt_include_panel_titles(html) assert "Source Page" not in stripped assert "panelHeader" not in stripped assert "panelContent" not in stripped assert "body cell" in stripped @patch("confluence_markdown_exporter.confluence.settings") def test_excerpt_include_inline_keeps_body_when_no_panel( mock_settings: MagicMock, ) -> None: mock_settings.export.include_document_title = False mock_settings.export.page_breadcrumbs = False mock_settings.export.include_macro = "inline" converter = Page.Converter(_make_page(EXCERPT_INCLUDE_EDITOR2)) html = ( 'actual excerpt body' ) stripped = converter._strip_excerpt_include_panel_titles(html) assert "actual excerpt body" in stripped @patch("confluence_markdown_exporter.confluence.settings") def test_include_macro_transclusion_falls_back_when_target_unresolvable( mock_settings: MagicMock, ) -> None: mock_settings.export.include_document_title = False mock_settings.export.page_breadcrumbs = False mock_settings.export.include_macro = "transclusion" # editor2 has a different macro-id → lookup fails converter = Page.Converter(_make_page(INCLUDE_EDITOR2)) html = '

inlined content

' el = BeautifulSoup(html, "html.parser").find("div") result = converter.convert_include(el, "inlined content", []) assert "![[" not in result ================================================ FILE: tests/unit/test_main.py ================================================ """Unit tests for main module.""" import pytest import typer from confluence_markdown_exporter.main import app from confluence_markdown_exporter.main import version class TestVersionCommand: """Test cases for version command.""" def test_version_output(self, capsys: pytest.CaptureFixture[str]) -> None: """Test that version command outputs correct format.""" version() captured = capsys.readouterr() assert "confluence-markdown-exporter" in captured.out # Should contain version information assert len(captured.out.strip()) > len("confluence-markdown-exporter") class TestAppConfiguration: """Test cases for the Typer app configuration.""" def test_app_is_typer_instance(self) -> None: """Test that app is a Typer instance.""" assert isinstance(app, typer.Typer) def test_app_has_commands(self) -> None: """Test that app has expected top-level commands.""" commands = [ callback.callback.__name__.replace("_", "-") for callback in app.registered_commands if callback.callback is not None ] expected_commands = ["pages", "pages-with-descendants", "spaces", "orgs", "version"] for expected_command in expected_commands: assert expected_command in commands def test_app_has_config_group(self) -> None: """Test that the config sub-app is registered as a command group.""" group_names = [group.name for group in app.registered_groups] assert "config" in group_names ================================================ FILE: tests/unit/test_nbsp_fix.py ================================================ """Test that Unicode whitespace (especially  ) is preserved in inline formatting.""" from __future__ import annotations from typing import TYPE_CHECKING import pytest if TYPE_CHECKING: from confluence_markdown_exporter.confluence import Page class TestNbspPreservation: """Test that non-breaking spaces and other Unicode whitespace are preserved.""" @pytest.fixture def converter(self) -> Page.Converter: """Create a minimal Page object with a Converter for testing.""" from confluence_markdown_exporter.confluence import Page # Create a minimal page object for testing class MockPage: def __init__(self) -> None: self.id = "test-page" self.title = "Test Page" self.html = "" self.labels = [] self.ancestors = [] def get_attachment_by_file_id(self, file_id: str) -> None: return None page = MockPage() return Page.Converter(page) def test_em_with_leading_nbsp(self, converter: Page.Converter) -> None: """Test  text converts to ' *text*' (space before asterisk).""" html = " text" result = converter.convert(html).strip() assert result == "*text*", f"Expected '*text*' but got '{result}'" # The space is preserved in the conversion html_with_context = "word text" result_with_context = converter.convert(html_with_context).strip() assert "word *text*" in result_with_context or "word *text*" in result_with_context def test_em_with_trailing_nbsp(self, converter: Page.Converter) -> None: """Test text  converts to '*text* ' (space after asterisk).""" html = "text " result = converter.convert(html).strip() assert result == "*text*", f"Expected '*text*' but got '{result}'" # The space is preserved in the conversion html_with_context = "text word" result_with_context = converter.convert(html_with_context).strip() assert "*text* word" in result_with_context or "*text* word" in result_with_context def test_em_with_both_nbsp(self, converter: Page.Converter) -> None: """Test  text  preserves both spaces.""" html = "word text end" result = converter.convert(html).strip() # Should have spaces around the emphasis assert "*text*" in result # Check that there's space before and after assert "word *text* end" in result or "word *text* end" in result def test_strong_with_leading_nbsp(self, converter: Page.Converter) -> None: """Test  text converts to ' **text**'.""" html = "word text" result = converter.convert(html).strip() assert "**text**" in result assert "word **text**" in result or "word **text**" in result def test_strong_with_trailing_nbsp(self, converter: Page.Converter) -> None: """Test text  converts to '**text** '.""" html = "text word" result = converter.convert(html).strip() assert "**text**" in result assert "**text** word" in result or "**text** word" in result def test_code_with_leading_nbsp(self, converter: Page.Converter) -> None: """Test  text converts to ' `text`'.""" html = "word text" result = converter.convert(html).strip() assert "`text`" in result assert "word `text`" in result or "word `text`" in result def test_code_with_trailing_nbsp(self, converter: Page.Converter) -> None: """Test text  converts to '`text` '.""" html = "text word" result = converter.convert(html).strip() assert "`text`" in result assert "`text` word" in result or "`text` word" in result def test_i_tag_with_nbsp(self, converter: Page.Converter) -> None: """Test  text (italic alias) preserves space.""" html = "word text" result = converter.convert(html).strip() assert "*text*" in result assert "word *text*" in result or "word *text*" in result def test_b_tag_with_nbsp(self, converter: Page.Converter) -> None: """Test  text (bold alias) preserves space.""" html = "word text" result = converter.convert(html).strip() assert "**text**" in result assert "word **text**" in result or "word **text**" in result def test_real_world_confluence_example(self, converter: Page.Converter) -> None: """Test the actual example from MOSART Audio.md.""" html = "property JungerRoot ." result = converter.convert(html).strip() # Should NOT be "property*JungerRoot*" (missing space) assert "property*JungerRoot*" not in result, "Space was lost!" # Should be "property *JungerRoot*" or "property *JungerRoot*" assert "*JungerRoot*" in result assert "property" in result def test_multiple_nbsp_in_sequence(self, converter: Page.Converter) -> None: """Test multiple   entities in a row.""" html = "word  text" result = converter.convert(html).strip() # Multiple nbsp should become multiple spaces assert "*text*" in result or "* text*" in result def test_mixed_whitespace(self, converter: Page.Converter) -> None: """Test normal spaces work alongside nbsp.""" html = "see figure 1 below" result = converter.convert(html).strip() assert "see *figure 1* below" in result def test_normalize_helper_function(self, converter: Page.Converter) -> None: """Test the _normalize_unicode_whitespace helper directly.""" # Test with various Unicode whitespace characters test_text = "\xa0text\xa0" # \xa0 is nbsp # Before normalization assert "\xa0" in test_text # Normalize normalized_text = converter._normalize_unicode_whitespace(test_text) # After normalization - nbsp should be replaced with regular space assert "\xa0" not in normalized_text, "nbsp should be replaced" assert normalized_text.strip() == "text", "Text should be preserved" # Spaces should now be regular spaces assert normalized_text.startswith(" "), "Leading space should be preserved" assert normalized_text.endswith(" "), "Trailing space should be preserved" def test_unicode_em_space(self, converter: Page.Converter) -> None: """Test that EM SPACE (\u2003) is also normalized.""" test_text = "\u2003text" # EM SPACE normalized_text = converter._normalize_unicode_whitespace(test_text) assert "\u2003" not in normalized_text, "EM SPACE should be replaced" assert normalized_text.strip() == "text" assert normalized_text.startswith(" "), "Space should be preserved as regular space" def test_unicode_thin_space(self, converter: Page.Converter) -> None: """Test that THIN SPACE (\u2009) is normalized.""" test_text = "text\u2009end" # THIN SPACE normalized_text = converter._normalize_unicode_whitespace(test_text) assert "\u2009" not in normalized_text, "THIN SPACE should be replaced" assert normalized_text == "text end", "Space should be preserved as regular space" def test_preserves_newlines_and_tabs(self, converter: Page.Converter) -> None: """Test that normal whitespace (newlines, tabs) are NOT affected.""" test_text = "text\nwith\nnewlines" normalized_text = converter._normalize_unicode_whitespace(test_text) # Newlines should be preserved assert "\n" in normalized_text assert normalized_text == test_text, "Regular whitespace should not be touched" def test_no_modification_when_no_unicode_whitespace(self, converter: Page.Converter) -> None: """Test that text without Unicode whitespace is not modified.""" test_text = "normal text" normalized_text = converter._normalize_unicode_whitespace(test_text) assert normalized_text == test_text, "Normal text should not be modified" ================================================ FILE: tests/unit/test_plantuml_code_block_detection.py ================================================ """Unit tests for PlantUML auto-detection in code blocks.""" from unittest.mock import MagicMock from unittest.mock import patch from bs4 import BeautifulSoup from confluence_markdown_exporter.confluence import Page class TestPlantUMLCodeBlockDetection: """Test cases for @startuml auto-detection in
 code blocks."""

    def _make_page(self) -> MagicMock:
        page = MagicMock(spec=Page)
        page.id = 12345
        page.title = "Test Page"
        page.html = "

Test Page

" page.labels = [] page.ancestors = [] page.attachments = [] page.editor2 = "" page.body_storage = "" return page @patch("confluence_markdown_exporter.confluence.settings") def test_pre_with_startuml_uses_plantuml_fence(self, mock_settings: MagicMock) -> None: """Code block containing @startuml should be fenced as plantuml.""" mock_settings.export.include_document_title = False converter = Page.Converter(self._make_page()) html = ( '
'
            "@startuml\nA -> B\n@enduml
" ) el = BeautifulSoup(html, "html.parser").find("pre") result = converter.convert_pre(el, "@startuml\nA -> B\n@enduml", []) assert "```plantuml" in result assert "```java" not in result @patch("confluence_markdown_exporter.confluence.settings") def test_pre_without_startuml_keeps_original_language(self, mock_settings: MagicMock) -> None: """Regular code blocks should keep their original language.""" mock_settings.export.include_document_title = False converter = Page.Converter(self._make_page()) html = ( '
'
            "public class Foo {}
" ) el = BeautifulSoup(html, "html.parser").find("pre") result = converter.convert_pre(el, "public class Foo {}", []) assert "```java" in result assert "```plantuml" not in result @patch("confluence_markdown_exporter.confluence.settings") def test_pre_empty_text_returns_empty(self, mock_settings: MagicMock) -> None: """Empty pre block should return empty string.""" mock_settings.export.include_document_title = False converter = Page.Converter(self._make_page()) html = "
"
        el = BeautifulSoup(html, "html.parser").find("pre")

        result = converter.convert_pre(el, "", [])

        assert result == ""

    @patch("confluence_markdown_exporter.confluence.settings")
    def test_pre_no_language_with_startuml(self, mock_settings: MagicMock) -> None:
        """Pre block without brush param but containing @startuml gets plantuml fence."""
        mock_settings.export.include_document_title = False

        converter = Page.Converter(self._make_page())

        html = "
@startuml\nBob -> Alice\n@enduml
" el = BeautifulSoup(html, "html.parser").find("pre") result = converter.convert_pre(el, "@startuml\nBob -> Alice\n@enduml", []) assert "```plantuml" in result ================================================ FILE: tests/unit/test_plantuml_conversion.py ================================================ """Unit tests for PlantUML diagram conversion.""" from unittest.mock import MagicMock from unittest.mock import patch import pytest from bs4 import BeautifulSoup from confluence_markdown_exporter.confluence import Page class TestPlantUMLConversion: """Test cases for PlantUML diagram conversion.""" @pytest.fixture def mock_page(self) -> MagicMock: """Create a mock page with PlantUML content in editor2 (Cloud format).""" page = MagicMock(spec=Page) page.id = 12345 page.title = "Test Page" page.html = "

Test Page

" page.labels = [] page.ancestors = [] page.attachments = [] page.body_storage = "" # Sample editor2 XML with PlantUML macro uml_data = '{"umlDefinition":"@startuml\\nAlice -> Bob: Hello\\n@enduml"}' page.editor2 = f''' plantuml_test ''' return page @pytest.fixture def mock_server_page(self) -> MagicMock: """Create a mock page with PlantUML content in body.storage (Server format).""" page = MagicMock(spec=Page) page.id = 67890 page.title = "Server Page" page.html = "

Server Page

" page.labels = [] page.ancestors = [] page.attachments = [] page.editor2 = "" page.body_storage = ( '' "" " Bob: Hello\n@enduml]]>" "" "" ) return page @patch("confluence_markdown_exporter.confluence.settings") def test_convert_plantuml_cloud_editor2( self, mock_settings: MagicMock, mock_page: MagicMock ) -> None: """Test PlantUML conversion from editor2 XML (Cloud format).""" mock_settings.export.include_document_title = False mock_settings.export.page_breadcrumbs = False converter = Page.Converter(mock_page) html = '
' el = BeautifulSoup(html, "html.parser").find("div") result = converter.convert_plantuml(el, "", []) assert "```plantuml" in result assert "@startuml" in result assert "Alice -> Bob: Hello" in result assert "@enduml" in result @patch("confluence_markdown_exporter.confluence.settings") def test_convert_plantuml_server_storage( self, mock_settings: MagicMock, mock_server_page: MagicMock ) -> None: """Test PlantUML conversion from body.storage (Server/DC format).""" mock_settings.export.include_document_title = False converter = Page.Converter(mock_server_page) # Server renders PlantUML as without macro-id html = '' el = BeautifulSoup(html, "html.parser").find("span") result = converter.convert_plantuml(el, "", []) assert "```plantuml" in result assert "@startuml" in result assert "Alice -> Bob: Hello" in result assert "@enduml" in result @patch("confluence_markdown_exporter.confluence.settings") def test_convert_plantuml_server_multiple_diagrams( self, mock_settings: MagicMock ) -> None: """Test positional matching of multiple PlantUML diagrams on Server.""" mock_settings.export.include_document_title = False page = MagicMock(spec=Page) page.id = 11111 page.title = "Multi-Diagram Page" page.html = "

Multi-Diagram Page

" page.labels = [] page.ancestors = [] page.attachments = [] page.editor2 = "" page.body_storage = ( '' "" " Bob: First\n@enduml]]>" "" "" "

Some text between diagrams

" '' "" " Carol: Second\n@enduml]]>" "" "" ) converter = Page.Converter(page) html1 = '' el1 = BeautifulSoup(html1, "html.parser").find("span") result1 = converter.convert_plantuml(el1, "", []) html2 = '' el2 = BeautifulSoup(html2, "html.parser").find("span") result2 = converter.convert_plantuml(el2, "", []) assert "Alice -> Bob: First" in result1 assert "Bob -> Carol: Second" in result2 @patch("confluence_markdown_exporter.confluence.settings") def test_convert_plantuml_no_source_available( self, mock_settings: MagicMock ) -> None: """Test PlantUML conversion when neither editor2 nor storage has content.""" mock_settings.export.include_document_title = False page = MagicMock(spec=Page) page.id = 99999 page.title = "Empty Page" page.html = "

Empty Page

" page.labels = [] page.ancestors = [] page.attachments = [] page.editor2 = "" page.body_storage = "" converter = Page.Converter(page) html = '
' el = BeautifulSoup(html, "html.parser").find("div") result = converter.convert_plantuml(el, "", []) assert " B"}' /> """ result = extract_mermaid_data(xml_content) assert result is not None assert "graph TB" in result def test_extract_no_mermaid_data(self) -> None: """Test extraction when no mermaid data exists.""" xml_content = """ """ result = extract_mermaid_data(xml_content) assert result is None def test_extract_invalid_xml(self) -> None: """Test extraction with invalid XML returns None.""" xml_content = "xml" result = extract_mermaid_data(xml_content) assert result is None class TestParseMermaidJson: """Test mermaid JSON parsing.""" def test_parse_json_with_data_field(self) -> None: """Test parsing JSON with 'data' field.""" json_data = '{"data": "graph TB\\n A --> B"}' result = parse_mermaid_json(json_data) assert result == "graph TB\n A --> B" def test_parse_plain_diagram(self) -> None: """Test parsing plain diagram string.""" diagram = "graph TB\n A --> B" result = parse_mermaid_json(diagram) assert result == diagram def test_parse_malformed_json(self) -> None: """Test parsing malformed JSON returns input as-is.""" malformed = '{"incomplete": ' result = parse_mermaid_json(malformed) assert result == malformed class TestFormatMermaidMarkdown: """Test mermaid markdown formatting.""" def test_format_diagram(self) -> None: """Test formatting a diagram as markdown.""" diagram = "graph TB\n A --> B" result = format_mermaid_markdown(diagram) assert result == "```mermaid\ngraph TB\n A --> B\n```" class TestLoadAndParseDrawio: """Integration tests for full DrawIO parsing.""" def test_full_pipeline(self, tmp_path: Path) -> None: """Test full pipeline from file to markdown.""" # XML parser preserves case, so use UserObject and mermaidData mermaid_data = '{"data": "graph TB\\n A[Start]\\n B[End]\\n A --> B"}' xml_content = f""" """ test_file = tmp_path / "test.drawio" test_file.write_text(xml_content) result = load_and_parse_drawio(test_file) assert result is not None assert "```mermaid" in result assert "graph TB" in result assert "A[Start]" in result assert "B[End]" in result def test_nonexistent_file(self, tmp_path: Path) -> None: """Test with nonexistent file returns None.""" result = load_and_parse_drawio(tmp_path / "nonexistent.drawio") assert result is None def test_file_without_mermaid_data(self, tmp_path: Path) -> None: """Test file without mermaid data returns None.""" xml_content = """ """ test_file = tmp_path / "test.drawio" test_file.write_text(xml_content) result = load_and_parse_drawio(test_file) assert result is None ================================================ FILE: tests/unit/utils/test_export.py ================================================ """Unit tests for export module.""" import tempfile from pathlib import Path from unittest.mock import MagicMock from unittest.mock import patch import pytest from confluence_markdown_exporter.utils.export import escape_character_class from confluence_markdown_exporter.utils.export import github_heading_slug from confluence_markdown_exporter.utils.export import parse_encode_setting from confluence_markdown_exporter.utils.export import sanitize_filename from confluence_markdown_exporter.utils.export import sanitize_key from confluence_markdown_exporter.utils.export import save_file class TestParseEncodeSetting: """Test cases for parse_encode_setting function.""" def test_empty_string(self) -> None: """Test parsing empty string returns empty dict.""" result = parse_encode_setting("") assert result == {} def test_simple_mapping(self) -> None: """Test parsing simple character mapping.""" result = parse_encode_setting('" ":"%2D","-":"%2D"') expected = {" ": "%2D", "-": "%2D"} assert result == expected def test_mixed_mapping(self) -> None: """Test parsing mixed character mapping.""" result = parse_encode_setting('" ":"dash","-":"%2D"') expected = {" ": "dash", "-": "%2D"} assert result == expected def test_equals_mapping(self) -> None: """Test parsing equals sign mapping.""" result = parse_encode_setting('"=":" equals "') expected = {"=": " equals "} assert result == expected def test_special_characters(self) -> None: """Test parsing special characters.""" result = parse_encode_setting('"\\"":" quote ","\\\\":" backslash "') expected = {'"': " quote ", "\\": " backslash "} assert result == expected def test_invalid_json(self) -> None: """Test that invalid JSON returns empty dict.""" result = parse_encode_setting("invalid json") assert result == {} def test_non_dict_json(self) -> None: """Test that non-dict JSON returns empty dict.""" result = parse_encode_setting('"this is a string"') assert result == {} def test_malformed_json(self) -> None: """Test that malformed JSON returns empty dict.""" result = parse_encode_setting('"key":"value",') assert result == {} class TestSaveFile: """Test cases for save_file function.""" def test_save_string_content(self) -> None: """Test saving string content to file.""" with tempfile.TemporaryDirectory() as temp_dir: file_path = Path(temp_dir) / "test.txt" content = "Hello, World!" save_file(file_path, content) assert file_path.exists() assert file_path.read_text(encoding="utf-8") == content def test_save_bytes_content(self) -> None: """Test saving bytes content to file.""" with tempfile.TemporaryDirectory() as temp_dir: file_path = Path(temp_dir) / "test.bin" content = b"Binary content" save_file(file_path, content) assert file_path.exists() assert file_path.read_bytes() == content def test_create_parent_directories(self) -> None: """Test that parent directories are created when needed.""" with tempfile.TemporaryDirectory() as temp_dir: file_path = Path(temp_dir) / "subdir" / "nested" / "test.txt" content = "Test content" save_file(file_path, content) assert file_path.exists() assert file_path.read_text(encoding="utf-8") == content def test_overwrite_existing_file(self) -> None: """Test overwriting an existing file.""" with tempfile.TemporaryDirectory() as temp_dir: file_path = Path(temp_dir) / "test.txt" original_content = "Original content" new_content = "New content" save_file(file_path, original_content) save_file(file_path, new_content) assert file_path.read_text(encoding="utf-8") == new_content def test_invalid_content_type(self) -> None: """Test that invalid content type raises TypeError.""" with tempfile.TemporaryDirectory() as temp_dir: file_path = Path(temp_dir) / "test.txt" with pytest.raises(TypeError, match=r"Content must be either a string or bytes\."): save_file(file_path, 123) # type: ignore[arg-type] class TestSanitizeFilename: """Test cases for sanitize_filename function.""" @patch("confluence_markdown_exporter.utils.export.export_options") def test_no_encoding_specified(self, mock_export_options: MagicMock) -> None: """Test sanitizing filename with no encoding specified.""" mock_export_options.filename_encoding = "" mock_export_options.filename_length = 255 mock_export_options.filename_lowercase = False result = sanitize_filename("Test File.txt") assert result == "Test File.txt" @patch("confluence_markdown_exporter.utils.export.export_options") def test_with_encoding_mapping(self, mock_export_options: MagicMock) -> None: """Test sanitizing filename with encoding mapping.""" mock_export_options.filename_encoding = '" ":"_",":":"_"' mock_export_options.filename_length = 255 mock_export_options.filename_lowercase = False result = sanitize_filename("Test File: Name.txt") assert result == "Test_File__Name.txt" @patch("confluence_markdown_exporter.utils.export.export_options") def test_with_encoding_mapping_lowercase(self, mock_export_options: MagicMock) -> None: """Test sanitizing filename with encoding mapping.""" mock_export_options.filename_encoding = '" ":"_",":":"_"' mock_export_options.filename_length = 255 mock_export_options.filename_lowercase = True result = sanitize_filename("Test File: Name.txt") assert result == "test_file__name.txt" @patch("confluence_markdown_exporter.utils.export.export_options") def test_trim_trailing_spaces_and_dots(self, mock_export_options: MagicMock) -> None: """Test that trailing spaces and dots are trimmed.""" mock_export_options.filename_encoding = "" mock_export_options.filename_length = 255 mock_export_options.filename_lowercase = False result = sanitize_filename("filename . . ") assert result == "filename" @patch("confluence_markdown_exporter.utils.export.export_options") def test_reserved_windows_names(self, mock_export_options: MagicMock) -> None: """Test that reserved Windows names are handled.""" mock_export_options.filename_encoding = "" mock_export_options.filename_length = 255 mock_export_options.filename_lowercase = False reserved_names = ["CON", "PRN", "AUX", "NUL", "COM1", "LPT1"] for name in reserved_names: result = sanitize_filename(name) assert result == f"{name}_" # Test case insensitive result = sanitize_filename(name.lower()) assert result == f"{name.lower()}_" @patch("confluence_markdown_exporter.utils.export.export_options") def test_filename_length_limit(self, mock_export_options: MagicMock) -> None: """Test that filename length is limited.""" mock_export_options.filename_encoding = "" mock_export_options.filename_length = 10 long_filename = "very_long_filename_that_exceeds_limit" result = sanitize_filename(long_filename) assert len(result) == 10 assert result == long_filename[:10] @patch("confluence_markdown_exporter.utils.export.export_options") def test_complex_filename_sanitization(self, mock_export_options: MagicMock) -> None: """Test complex filename sanitization with multiple rules.""" mock_export_options.filename_encoding = '" ":"_","?":"_",":":"_"' mock_export_options.filename_length = 50 mock_export_options.filename_lowercase = False filename = "My Document: What? How? . ." result = sanitize_filename(filename) # Character replacements happen first, then rstrip of spaces and dots assert result == "My_Document__What__How___._" @patch("confluence_markdown_exporter.utils.export.export_options") def test_control_characters_removed(self, mock_export_options: MagicMock) -> None: """Control characters (e.g. backspace) should be stripped.""" mock_export_options.filename_encoding = "" mock_export_options.filename_length = 255 result = sanitize_filename("on-pr\x08emise") assert result == "on-premise" @patch("confluence_markdown_exporter.utils.export.export_options") def test_multiple_control_characters(self, mock_export_options: MagicMock) -> None: """Multiple control characters should all be stripped.""" mock_export_options.filename_encoding = "" mock_export_options.filename_length = 255 result = sanitize_filename("test\x00\x08\x1fname") assert result == "testname" class TestSanitizeKey: """Test cases for sanitize_key function.""" def test_basic_string(self) -> None: """Test sanitizing basic string.""" result = sanitize_key("Test String") assert result == "test_string" def test_special_characters(self) -> None: """Test sanitizing string with special characters.""" result = sanitize_key("Test-Key: With @ Special % Characters!") assert result == "test_key_with_special_characters" def test_multiple_underscores_collapse(self) -> None: """Test that multiple consecutive underscores are collapsed.""" result = sanitize_key("test___multiple___underscores") assert result == "test_multiple_underscores" def test_trim_leading_trailing_underscores(self) -> None: """Test that leading and trailing underscores are trimmed.""" result = sanitize_key("__test_key__") assert result == "test_key" def test_starts_with_number(self) -> None: """Test that string starting with number gets key_ prefix.""" result = sanitize_key("123test") assert result == "key_123test" def test_starts_with_special_character(self) -> None: """Test that string starting with special character becomes valid after processing.""" result = sanitize_key("@test") # "@test" -> "@test" (lowercase) -> "_test" (replace @) -> "test" (strip _) # Since "test" starts with 't' (a letter), no key_ prefix is added assert result == "test" def test_custom_connector(self) -> None: """Test using custom connector character.""" result = sanitize_key("Test String", connector="-") assert result == "test-string" def test_already_valid_key(self) -> None: """Test that already valid key remains unchanged.""" result = sanitize_key("valid_key") assert result == "valid_key" def test_empty_string(self) -> None: """Test sanitizing empty string.""" result = sanitize_key("") assert result == "key_" def test_only_special_characters(self) -> None: """Test string with only special characters.""" result = sanitize_key("@#$%") assert result == "key_" class TestGithubHeadingSlug: """Test cases for github_heading_slug function.""" def test_leading_hyphen_preserved(self) -> None: """Heading starting with hyphen keeps it — the reported bug.""" assert github_heading_slug("- Final State") == "-final-state" def test_plain_heading(self) -> None: assert github_heading_slug("Final State") == "final-state" def test_uppercase(self) -> None: assert github_heading_slug("Hello World") == "hello-world" def test_special_chars_removed(self) -> None: assert github_heading_slug("Hello, World!") == "hello-world" def test_multiple_spaces_collapsed(self) -> None: assert github_heading_slug("Hello World") == "hello-world" def test_trailing_hyphen(self) -> None: assert github_heading_slug("Hello -") == "hello-" def test_empty_string(self) -> None: assert github_heading_slug("") == "" class TestEscapeCharacterClass: """Test cases for escape_character_class function.""" def test_escape_backslash(self) -> None: """Test escaping backslash character.""" result = escape_character_class("\\") assert result == "\\\\" def test_escape_dash(self) -> None: """Test escaping dash character.""" result = escape_character_class("-") assert result == "\\-" def test_escape_right_bracket(self) -> None: """Test escaping right bracket character.""" result = escape_character_class("]") assert result == "\\]" def test_escape_caret(self) -> None: """Test escaping caret character.""" result = escape_character_class("^") assert result == "\\^" def test_escape_multiple_characters(self) -> None: """Test escaping multiple special characters.""" result = escape_character_class("\\-]^") assert result == "\\\\\\-\\]\\^" def test_no_special_characters(self) -> None: """Test string with no special characters.""" result = escape_character_class("abc123") assert result == "abc123" def test_mixed_characters(self) -> None: """Test string with mix of special and normal characters.""" result = escape_character_class("a-b]c^d\\e") assert result == "a\\-b\\]c\\^d\\\\e" def test_empty_string(self) -> None: """Test escaping empty string.""" result = escape_character_class("") assert result == "" ================================================ FILE: tests/unit/utils/test_lockfile.py ================================================ """Unit tests for lockfile module.""" import json import tempfile from pathlib import Path from unittest.mock import MagicMock from unittest.mock import patch import pytest from confluence_markdown_exporter.utils.lockfile import AttachmentEntry from confluence_markdown_exporter.utils.lockfile import ConfluenceLock from confluence_markdown_exporter.utils.lockfile import LockfileManager from confluence_markdown_exporter.utils.lockfile import OrgEntry from confluence_markdown_exporter.utils.lockfile import PageEntry from confluence_markdown_exporter.utils.lockfile import SpaceEntry LOCKFILE_FILENAME = "confluence-lock.json" _TEST_BASE_URL = "https://test.atlassian.net" _TEST_SPACE_KEY = "TEST" def _make_mock_page( page_id: int, version_number: int, export_path: str, *, base_url: str = _TEST_BASE_URL, space_key: str = _TEST_SPACE_KEY, ) -> MagicMock: """Create a mock page/descendant with the attributes used by LockfileManager.""" page = MagicMock() page.id = page_id page.version.number = version_number page.export_path = Path(export_path) page.title = f"Page {page_id}" page.base_url = base_url page.space.key = space_key return page def _lock_with_pages( pages: dict, *, base_url: str = _TEST_BASE_URL, space_key: str = _TEST_SPACE_KEY, ) -> ConfluenceLock: """Build a ConfluenceLock with pages nested under the given org/space.""" return ConfluenceLock( orgs={ base_url: OrgEntry( spaces={space_key: SpaceEntry(pages=pages)} ) } ) def _lock_data( pages: dict, *, base_url: str = _TEST_BASE_URL, space_key: str = _TEST_SPACE_KEY, ) -> dict: """Build a lockfile JSON-compatible dict with pages nested under org/space.""" return { "lockfile_version": 2, "last_export": "2025-01-01T00:00:00+00:00", "orgs": { base_url: { "spaces": { space_key: {"pages": pages} } } }, } @pytest.fixture(autouse=True) def _reset_lockfile_manager() -> None: """Reset LockfileManager class state before each test.""" LockfileManager._lockfile_path = None LockfileManager._lock = None LockfileManager._output_path = None LockfileManager._all_entries_snapshot = {} LockfileManager._seen_page_ids = set() class TestLockfileManagerInit: """Test cases for LockfileManager.init.""" @patch("confluence_markdown_exporter.utils.app_data_store.get_settings") def test_init_creates_empty_lock_when_no_lockfile( self, mock_get_settings: MagicMock, ) -> None: """When lockfile does not exist, init creates an empty lock.""" with tempfile.TemporaryDirectory() as tmp: mock_get_settings.return_value.export.output_path = Path(tmp) mock_get_settings.return_value.export.lockfile_name = LOCKFILE_FILENAME LockfileManager.init() assert LockfileManager._lock is not None assert LockfileManager._lock.orgs == {} assert LockfileManager._lockfile_path == Path(tmp) / LOCKFILE_FILENAME @patch("confluence_markdown_exporter.utils.app_data_store.get_settings") def test_init_loads_existing_lockfile( self, mock_get_settings: MagicMock, ) -> None: """When lockfile exists, init loads its contents.""" with tempfile.TemporaryDirectory() as tmp: mock_get_settings.return_value.export.output_path = Path(tmp) mock_get_settings.return_value.export.lockfile_name = LOCKFILE_FILENAME lockfile_path = Path(tmp) / LOCKFILE_FILENAME data = _lock_data( {"100": {"title": "Page A", "version": 3, "export_path": "space/Page A.md"}} ) lockfile_path.write_text(json.dumps(data), encoding="utf-8") LockfileManager.init() assert LockfileManager._lock is not None entry = LockfileManager._lock.get_page("100") assert entry is not None assert entry.version == 3 @patch("confluence_markdown_exporter.utils.app_data_store.get_settings") def test_init_snapshots_all_entries( self, mock_get_settings: MagicMock, ) -> None: """Init snapshots all lockfile entries for moved-page detection.""" with tempfile.TemporaryDirectory() as tmp: mock_get_settings.return_value.export.output_path = Path(tmp) mock_get_settings.return_value.export.lockfile_name = LOCKFILE_FILENAME lockfile_path = Path(tmp) / LOCKFILE_FILENAME data = _lock_data({ "100": {"title": "A", "version": 1, "export_path": "a.md"}, "200": {"title": "B", "version": 2, "export_path": "b.md"}, }) lockfile_path.write_text(json.dumps(data), encoding="utf-8") LockfileManager.init() assert set(LockfileManager._all_entries_snapshot.keys()) == {"100", "200"} assert LockfileManager._seen_page_ids == set() @patch("confluence_markdown_exporter.utils.app_data_store.get_settings") def test_init_discards_v1_lockfile( self, mock_get_settings: MagicMock, ) -> None: """A v1 lockfile (flat pages dict) is discarded and replaced with an empty lock.""" with tempfile.TemporaryDirectory() as tmp: mock_get_settings.return_value.export.output_path = Path(tmp) mock_get_settings.return_value.export.lockfile_name = LOCKFILE_FILENAME lockfile_path = Path(tmp) / LOCKFILE_FILENAME v1_data = { "lockfile_version": 1, "last_export": "2025-01-01T00:00:00+00:00", "pages": { "100": {"title": "Old Page", "version": 1, "export_path": "old.md"}, }, } lockfile_path.write_text(json.dumps(v1_data), encoding="utf-8") LockfileManager.init() assert LockfileManager._lock is not None assert LockfileManager._lock.orgs == {} class TestLockfileManagerRecordPage: """Test cases for LockfileManager.record_page.""" def test_record_page_creates_lockfile(self) -> None: """record_page creates the lockfile on disk and writes the page entry.""" with tempfile.TemporaryDirectory() as tmp: lockfile_path = Path(tmp) / LOCKFILE_FILENAME LockfileManager._lockfile_path = lockfile_path LockfileManager._lock = ConfluenceLock() page = _make_mock_page(page_id=100, version_number=1, export_path="space/Page A.md") LockfileManager.record_page(page) assert lockfile_path.exists() saved = json.loads(lockfile_path.read_text(encoding="utf-8")) pages = saved["orgs"][_TEST_BASE_URL]["spaces"][_TEST_SPACE_KEY]["pages"] assert "100" in pages assert pages["100"]["version"] == 1 def test_record_page_does_nothing_when_not_initialized(self) -> None: """record_page is a no-op when LockfileManager has not been initialized.""" page = _make_mock_page(page_id=100, version_number=1, export_path="space/Page A.md") # Should not raise LockfileManager.record_page(page) def test_record_page_updates_existing_entry(self) -> None: """record_page updates an existing page entry with the new version.""" with tempfile.TemporaryDirectory() as tmp: lockfile_path = Path(tmp) / LOCKFILE_FILENAME LockfileManager._lockfile_path = lockfile_path LockfileManager._lock = _lock_with_pages({ "100": PageEntry(title="Page A", version=1, export_path="space/Page A.md"), }) page = _make_mock_page(page_id=100, version_number=2, export_path="space/Page A.md") LockfileManager.record_page(page) saved = json.loads(lockfile_path.read_text(encoding="utf-8")) pages = saved["orgs"][_TEST_BASE_URL]["spaces"][_TEST_SPACE_KEY]["pages"] assert pages["100"]["version"] == 2 def test_record_page_adds_to_seen_page_ids(self) -> None: """record_page adds the page ID to the seen set.""" with tempfile.TemporaryDirectory() as tmp: lockfile_path = Path(tmp) / LOCKFILE_FILENAME LockfileManager._lockfile_path = lockfile_path LockfileManager._lock = ConfluenceLock() page = _make_mock_page(page_id=100, version_number=1, export_path="a.md") LockfileManager.record_page(page) assert "100" in LockfileManager._seen_page_ids def test_record_page_across_multiple_orgs_and_spaces(self) -> None: """Pages from different orgs and spaces are stored independently.""" with tempfile.TemporaryDirectory() as tmp: lockfile_path = Path(tmp) / LOCKFILE_FILENAME LockfileManager._lockfile_path = lockfile_path LockfileManager._lock = ConfluenceLock() page_a = _make_mock_page( 100, 1, "a.md", base_url="https://org-a.atlassian.net", space_key="AAA" ) page_b = _make_mock_page( 200, 1, "b.md", base_url="https://org-b.atlassian.net", space_key="BBB" ) LockfileManager.record_page(page_a) LockfileManager.record_page(page_b) saved = json.loads(lockfile_path.read_text(encoding="utf-8")) assert "100" in saved["orgs"]["https://org-a.atlassian.net"]["spaces"]["AAA"]["pages"] assert "200" in saved["orgs"]["https://org-b.atlassian.net"]["spaces"]["BBB"]["pages"] class TestLockfileManagerShouldExport: """Test cases for LockfileManager.should_export.""" def test_page_not_in_lockfile_should_export(self) -> None: """A page not present in the lockfile should be exported.""" LockfileManager._lock = _lock_with_pages({ "999": PageEntry(title="Other", version=1, export_path="other.md"), }) page = _make_mock_page(page_id=123, version_number=1, export_path="space/New.md") assert LockfileManager.should_export(page) is True def test_page_in_lockfile_same_version_same_path_should_not_export(self) -> None: """A page with same version and same path should NOT be exported.""" LockfileManager._lock = _lock_with_pages({ "123": PageEntry(title="Page A", version=5, export_path="space/Page A.md"), }) page = _make_mock_page(page_id=123, version_number=5, export_path="space/Page A.md") assert LockfileManager.should_export(page) is False def test_page_in_lockfile_different_version_should_export(self) -> None: """A page whose version has changed should be exported.""" LockfileManager._lock = _lock_with_pages({ "123": PageEntry(title="Page A", version=5, export_path="space/Page A.md"), }) page = _make_mock_page(page_id=123, version_number=6, export_path="space/Page A.md") assert LockfileManager.should_export(page) is True def test_page_in_lockfile_different_export_path_should_export(self) -> None: """A page whose export path has changed (file moved) should be exported.""" LockfileManager._lock = _lock_with_pages({ "123": PageEntry(title="Page A", version=5, export_path="old/Page A.md"), }) page = _make_mock_page(page_id=123, version_number=5, export_path="new/Page A.md") assert LockfileManager.should_export(page) is True def test_lock_is_none_should_export(self) -> None: """When lockfile manager is not initialized, all pages should be exported.""" assert LockfileManager._lock is None page = _make_mock_page(page_id=123, version_number=1, export_path="space/Page A.md") assert LockfileManager.should_export(page) is True def test_missing_output_file_should_export(self) -> None: """A page whose output file no longer exists on disk should be re-exported.""" with tempfile.TemporaryDirectory() as tmp: output = Path(tmp) LockfileManager._output_path = output LockfileManager._lock = _lock_with_pages({ "123": PageEntry(title="Page A", version=5, export_path="space/Page A.md"), }) # File does NOT exist on disk page = _make_mock_page(page_id=123, version_number=5, export_path="space/Page A.md") assert LockfileManager.should_export(page) is True def test_existing_output_file_unchanged_should_not_export(self) -> None: """A page whose output file exists and is up-to-date should NOT be re-exported.""" with tempfile.TemporaryDirectory() as tmp: output = Path(tmp) md_file = output / "space" / "Page A.md" md_file.parent.mkdir(parents=True) md_file.write_text("content") LockfileManager._output_path = output LockfileManager._lock = _lock_with_pages({ "123": PageEntry(title="Page A", version=5, export_path="space/Page A.md"), }) page = _make_mock_page(page_id=123, version_number=5, export_path="space/Page A.md") assert LockfileManager.should_export(page) is False class TestLockfileManagerMarkSeen: """Test cases for LockfileManager.mark_seen.""" def test_mark_seen_adds_page_ids(self) -> None: """mark_seen adds page IDs to the seen set.""" LockfileManager.mark_seen([100, 200, 300]) assert LockfileManager._seen_page_ids == {"100", "200", "300"} def test_mark_seen_accumulates(self) -> None: """mark_seen accumulates across multiple calls.""" LockfileManager.mark_seen([100]) LockfileManager.mark_seen([200]) assert LockfileManager._seen_page_ids == {"100", "200"} class TestLockfileManagerCleanup: """Test cases for LockfileManager.cleanup.""" def test_cleanup_noop_when_not_initialized(self) -> None: """Cleanup does nothing when not initialized.""" LockfileManager.remove_pages(set()) # Should not raise def test_cleanup_deletes_file_for_removed_page(self) -> None: """Pages deleted from Confluence have their files removed.""" with tempfile.TemporaryDirectory() as tmp: output = Path(tmp) md_file = output / "space" / "Removed.md" md_file.parent.mkdir(parents=True) md_file.write_text("content") lockfile_path = output / LOCKFILE_FILENAME LockfileManager._output_path = output LockfileManager._lockfile_path = lockfile_path LockfileManager._lock = _lock_with_pages({ "100": PageEntry(title="Removed", version=1, export_path="space/Removed.md"), }) LockfileManager._all_entries_snapshot = dict(LockfileManager._lock.all_pages()) LockfileManager._seen_page_ids = set() # page 100 not seen LockfileManager.remove_pages({"100"}) assert not md_file.exists() def test_cleanup_removes_entry_from_lockfile(self) -> None: """Deleted pages are removed from the lockfile.""" with tempfile.TemporaryDirectory() as tmp: output = Path(tmp) lockfile_path = output / LOCKFILE_FILENAME LockfileManager._output_path = output LockfileManager._lockfile_path = lockfile_path LockfileManager._lock = _lock_with_pages({ "100": PageEntry(title="Removed", version=1, export_path="space/Removed.md"), "200": PageEntry(title="Kept", version=1, export_path="space/Kept.md"), }) LockfileManager._all_entries_snapshot = dict(LockfileManager._lock.all_pages()) LockfileManager._seen_page_ids = {"200"} LockfileManager.remove_pages({"100"}) saved = json.loads(lockfile_path.read_text(encoding="utf-8")) pages = saved["orgs"][_TEST_BASE_URL]["spaces"][_TEST_SPACE_KEY]["pages"] assert "100" not in pages assert "200" in pages def test_cleanup_deletes_old_file_for_moved_page(self) -> None: """When a page's export_path changes, the old file is deleted.""" with tempfile.TemporaryDirectory() as tmp: output = Path(tmp) old_file = output / "old" / "Page.md" old_file.parent.mkdir(parents=True) old_file.write_text("old content") lockfile_path = output / LOCKFILE_FILENAME LockfileManager._output_path = output LockfileManager._lockfile_path = lockfile_path LockfileManager._all_entries_snapshot = { "100": PageEntry(title="Page", version=1, export_path="old/Page.md"), } LockfileManager._lock = _lock_with_pages({ "100": PageEntry(title="Page", version=2, export_path="new/Page.md"), }) LockfileManager._seen_page_ids = {"100"} LockfileManager.remove_pages(set()) assert not old_file.exists() def test_cleanup_keeps_page_existing_on_confluence(self) -> None: """Unseen pages that still exist on Confluence are kept.""" with tempfile.TemporaryDirectory() as tmp: output = Path(tmp) md_file = output / "space" / "Still.md" md_file.parent.mkdir(parents=True) md_file.write_text("content") lockfile_path = output / LOCKFILE_FILENAME LockfileManager._output_path = output LockfileManager._lockfile_path = lockfile_path LockfileManager._lock = _lock_with_pages({ "100": PageEntry(title="Still", version=1, export_path="space/Still.md"), }) LockfileManager._all_entries_snapshot = dict(LockfileManager._lock.all_pages()) LockfileManager._seen_page_ids = set() LockfileManager.remove_pages(set()) assert md_file.exists() assert LockfileManager._lock.get_page("100") is not None def test_cleanup_keeps_unchanged_seen_pages(self) -> None: """Pages that were seen during export are not checked via API.""" with tempfile.TemporaryDirectory() as tmp: output = Path(tmp) lockfile_path = output / LOCKFILE_FILENAME LockfileManager._output_path = output LockfileManager._lockfile_path = lockfile_path LockfileManager._lock = _lock_with_pages({ "100": PageEntry(title="Seen", version=1, export_path="a.md"), }) LockfileManager._all_entries_snapshot = dict(LockfileManager._lock.all_pages()) LockfileManager._seen_page_ids = {"100"} LockfileManager.remove_pages(set()) # fetch_deleted_page_ids is never called — all pages were seen def test_cleanup_handles_already_deleted_file(self) -> None: """Cleanup does not fail when the file is already gone.""" with tempfile.TemporaryDirectory() as tmp: output = Path(tmp) lockfile_path = output / LOCKFILE_FILENAME LockfileManager._output_path = output LockfileManager._lockfile_path = lockfile_path LockfileManager._lock = _lock_with_pages({ "100": PageEntry(title="Gone", version=1, export_path="space/Gone.md"), }) LockfileManager._all_entries_snapshot = dict(LockfileManager._lock.all_pages()) LockfileManager._seen_page_ids = set() LockfileManager.remove_pages({"100"}) # Should not raise def test_cleanup_api_failure_keeps_pages(self) -> None: """When API check fails, pages are kept (safe default).""" with tempfile.TemporaryDirectory() as tmp: output = Path(tmp) md_file = output / "space" / "Safe.md" md_file.parent.mkdir(parents=True) md_file.write_text("content") lockfile_path = output / LOCKFILE_FILENAME LockfileManager._output_path = output LockfileManager._lockfile_path = lockfile_path LockfileManager._lock = _lock_with_pages({ "100": PageEntry(title="Safe", version=1, export_path="space/Safe.md"), }) LockfileManager._all_entries_snapshot = dict(LockfileManager._lock.all_pages()) LockfileManager._seen_page_ids = set() # Pass empty set: safe default — don't delete anything on API failure LockfileManager.remove_pages(set()) assert md_file.exists() assert LockfileManager._lock.get_page("100") is not None class TestFetchDeletedPageIds: """Test cases for fetch_deleted_page_ids.""" def test_empty_input_returns_empty(self) -> None: """Empty list returns empty set.""" from confluence_markdown_exporter.confluence import fetch_deleted_page_ids result = fetch_deleted_page_ids([], _TEST_BASE_URL) assert result == set() @patch("confluence_markdown_exporter.confluence.settings") @patch("confluence_markdown_exporter.confluence.get_thread_confluence") def test_returns_deleted_ids( self, mock_get_client: MagicMock, mock_settings: MagicMock ) -> None: """Returns IDs that no longer exist on Confluence.""" mock_settings.connection_config.use_v2_api = True mock_settings.export.existence_check_batch_size = 250 mock_client = MagicMock() mock_client.get.return_value = { "results": [{"id": "100"}, {"id": "300"}], } mock_get_client.return_value = mock_client from confluence_markdown_exporter.confluence import fetch_deleted_page_ids result = fetch_deleted_page_ids(["100", "200", "300"], _TEST_BASE_URL) assert result == {"200"} @patch("confluence_markdown_exporter.confluence.settings") @patch("confluence_markdown_exporter.confluence.get_thread_confluence") def test_api_error_returns_no_deleted_ids( self, mock_get_client: MagicMock, mock_settings: MagicMock ) -> None: """On API error, returns empty set (safe: don't delete anything).""" mock_settings.connection_config.use_v2_api = True mock_settings.export.existence_check_batch_size = 250 mock_client = MagicMock() mock_client.get.side_effect = Exception("Network error") mock_get_client.return_value = mock_client from confluence_markdown_exporter.confluence import fetch_deleted_page_ids result = fetch_deleted_page_ids(["100", "200"], _TEST_BASE_URL) assert result == set() @patch("confluence_markdown_exporter.confluence.settings") @patch("confluence_markdown_exporter.confluence.get_thread_confluence") def test_batches_large_sets( self, mock_get_client: MagicMock, mock_settings: MagicMock ) -> None: """300 IDs are split into 2 v2-API batches of 250.""" mock_settings.connection_config.use_v2_api = True mock_settings.export.existence_check_batch_size = 250 ids = [str(i) for i in range(300)] mock_client = MagicMock() mock_client.get.return_value = {"results": []} mock_get_client.return_value = mock_client from confluence_markdown_exporter.confluence import fetch_deleted_page_ids fetch_deleted_page_ids(ids, _TEST_BASE_URL) assert mock_client.get.call_count == 2 class TestConfluenceLockSave: """Test cases for ConfluenceLock.save.""" def test_save_is_atomic_on_success(self) -> None: """After save, the file contains valid, complete JSON.""" with tempfile.TemporaryDirectory() as tmp: lockfile_path = Path(tmp) / "confluence-lock.json" lock = _lock_with_pages({ "100": PageEntry(title="Page A", version=1, export_path="space/Page A.md"), }) lock.save(lockfile_path) content = lockfile_path.read_text(encoding="utf-8") data = json.loads(content) pages = data["orgs"][_TEST_BASE_URL]["spaces"][_TEST_SPACE_KEY]["pages"] assert pages["100"]["version"] == 1 tmp_files = list(Path(tmp).glob("*.tmp")) assert tmp_files == [] def test_save_windows_permission_error_fallback(self) -> None: """On Windows, PermissionError from replace falls back to unlink + rename.""" with tempfile.TemporaryDirectory() as tmp: lockfile_path = Path(tmp) / "confluence-lock.json" lock = _lock_with_pages({ "100": PageEntry(title="Page A", version=1, export_path="space/Page A.md"), }) with patch( "confluence_markdown_exporter.utils.lockfile.Path.replace", side_effect=PermissionError("WinError 5"), ): lock.save(lockfile_path) content = lockfile_path.read_text(encoding="utf-8") data = json.loads(content) pages = data["orgs"][_TEST_BASE_URL]["spaces"][_TEST_SPACE_KEY]["pages"] assert "100" in pages tmp_files = list(Path(tmp).glob("*.tmp")) assert tmp_files == [] def test_save_cleans_up_tmp_on_error(self) -> None: """When writing fails, no .tmp files are left behind.""" with tempfile.TemporaryDirectory() as tmp: lockfile_path = Path(tmp) / "confluence-lock.json" lock = _lock_with_pages({ "100": PageEntry(title="Page A", version=1, export_path="space/Page A.md"), }) with ( patch( "confluence_markdown_exporter.utils.lockfile.Path.replace", side_effect=OSError("disk error"), ), pytest.raises(OSError, match="disk error"), ): lock.save(lockfile_path) tmp_files = list(Path(tmp).glob("*.tmp")) assert tmp_files == [] def test_save_preserves_original_on_error(self) -> None: """When writing fails, the original lockfile is not corrupted.""" with tempfile.TemporaryDirectory() as tmp: lockfile_path = Path(tmp) / "confluence-lock.json" original_data = _lock_data({ "100": {"title": "Page A", "version": 1, "export_path": "space/Page A.md"}, }) lockfile_path.write_text(json.dumps(original_data), encoding="utf-8") lock = _lock_with_pages({ "200": PageEntry(title="Page B", version=1, export_path="space/Page B.md"), }) with ( patch( "confluence_markdown_exporter.utils.lockfile.Path.replace", side_effect=OSError("disk error"), ), pytest.raises(OSError, match="disk error"), ): lock.save(lockfile_path) content = lockfile_path.read_text(encoding="utf-8") data = json.loads(content) pages = data["orgs"][_TEST_BASE_URL]["spaces"][_TEST_SPACE_KEY]["pages"] assert "100" in pages assert "200" not in pages def test_save_with_delete_ids(self) -> None: """Save removes entries specified in delete_ids.""" with tempfile.TemporaryDirectory() as tmp: lockfile_path = Path(tmp) / "confluence-lock.json" lock = _lock_with_pages({ "100": PageEntry(title="A", version=1, export_path="a.md"), "200": PageEntry(title="B", version=1, export_path="b.md"), }) lock.save(lockfile_path, delete_ids={"100"}) saved = json.loads(lockfile_path.read_text(encoding="utf-8")) pages = saved["orgs"][_TEST_BASE_URL]["spaces"][_TEST_SPACE_KEY]["pages"] assert "100" not in pages assert "200" in pages class TestConfluenceLockSaveSortsKeys: """Test cases for sorted key output in ConfluenceLock.save.""" def test_save_sorts_page_keys(self) -> None: """Pages in the saved lockfile should be sorted by page ID.""" with tempfile.TemporaryDirectory() as tmp: lockfile_path = Path(tmp) / "confluence-lock.json" lock = _lock_with_pages({ "999": PageEntry(title="Page C", version=1, export_path="c.md"), "123": PageEntry(title="Page A", version=2, export_path="a.md"), "456": PageEntry(title="Page B", version=1, export_path="b.md"), }) lock.save(lockfile_path) content = lockfile_path.read_text(encoding="utf-8") data = json.loads(content) pages = data["orgs"][_TEST_BASE_URL]["spaces"][_TEST_SPACE_KEY]["pages"] page_ids = list(pages.keys()) assert page_ids == ["123", "456", "999"] def test_save_preserves_model_field_order(self) -> None: """Top-level keys should follow the model field order.""" with tempfile.TemporaryDirectory() as tmp: lockfile_path = Path(tmp) / "confluence-lock.json" lock = _lock_with_pages({ "100": PageEntry(title="Page A", version=1, export_path="a.md"), }) lock.save(lockfile_path) content = lockfile_path.read_text(encoding="utf-8") data = json.loads(content) keys = list(data.keys()) assert keys == ["lockfile_version", "last_export", "orgs"] def test_save_sorts_spaces_and_orgs(self) -> None: """Orgs and spaces within the saved lockfile should be sorted.""" with tempfile.TemporaryDirectory() as tmp: lockfile_path = Path(tmp) / "confluence-lock.json" lock = ConfluenceLock( orgs={ "https://z-org.atlassian.net": OrgEntry( spaces={ "ZZZ": SpaceEntry( pages={"1": PageEntry(title="P", version=1, export_path="p.md")} ), "AAA": SpaceEntry(pages={}), } ), "https://a-org.atlassian.net": OrgEntry(spaces={}), } ) lock.save(lockfile_path) data = json.loads(lockfile_path.read_text(encoding="utf-8")) org_keys = list(data["orgs"].keys()) assert org_keys == ["https://a-org.atlassian.net", "https://z-org.atlassian.net"] space_keys = list(data["orgs"]["https://z-org.atlassian.net"]["spaces"].keys()) assert space_keys == ["AAA", "ZZZ"] class TestAttachmentEntryTracking: """Tests for attachment tracking in the lock file.""" def test_page_entry_stores_attachments(self) -> None: """PageEntry persists attachment entries keyed by attachment ID.""" entry = PageEntry( title="Page", version=1, export_path="a.md", attachments={ "att1": AttachmentEntry(version=3, path="space/attachments/uuid-a.png"), }, ) assert entry.attachments["att1"].version == 3 assert entry.attachments["att1"].path == "space/attachments/uuid-a.png" def test_page_entry_attachments_default_empty(self) -> None: """PageEntry.attachments defaults to empty dict (backward-compatible).""" entry = PageEntry(title="Page", version=1, export_path="a.md") assert entry.attachments == {} def test_lock_file_roundtrip_with_attachments(self) -> None: """Attachment entries survive a JSON save/load cycle.""" with tempfile.TemporaryDirectory() as tmp: lockfile_path = Path(tmp) / "confluence-lock.json" lock = _lock_with_pages({ "100": PageEntry( title="Page A", version=1, export_path="a.md", attachments={ "att1": AttachmentEntry(version=2, path="space/attachments/file.png"), }, ), }) lock.save(lockfile_path) saved = json.loads(lockfile_path.read_text(encoding="utf-8")) org = saved["orgs"][_TEST_BASE_URL]["spaces"][_TEST_SPACE_KEY] att = org["pages"]["100"]["attachments"]["att1"] assert att["version"] == 2 assert att["path"] == "space/attachments/file.png" def test_lock_file_missing_attachments_field_loads_as_empty(self) -> None: """Old lock files without 'attachments' field load without error.""" with tempfile.TemporaryDirectory() as tmp: lockfile_path = Path(tmp) / "confluence-lock.json" old_format = _lock_data({ "100": {"title": "Page A", "version": 3, "export_path": "a.md"}, }) lockfile_path.write_text(json.dumps(old_format), encoding="utf-8") lock = ConfluenceLock.load(lockfile_path) entry = lock.get_page("100") assert entry is not None assert entry.attachments == {} def test_record_page_stores_attachment_entries(self) -> None: """record_page persists attachment entries to the lock file.""" with tempfile.TemporaryDirectory() as tmp: lockfile_path = Path(tmp) / LOCKFILE_FILENAME LockfileManager._lockfile_path = lockfile_path LockfileManager._lock = ConfluenceLock() page = _make_mock_page(page_id=100, version_number=1, export_path="a.md") attachment_entries = { "att42": AttachmentEntry(version=5, path="space/attachments/abc.png"), } LockfileManager.record_page(page, attachment_entries) saved = json.loads(lockfile_path.read_text(encoding="utf-8")) pages = saved["orgs"][_TEST_BASE_URL]["spaces"][_TEST_SPACE_KEY]["pages"] att = pages["100"]["attachments"]["att42"] assert att["version"] == 5 assert att["path"] == "space/attachments/abc.png" def test_get_page_attachment_entries_returns_entries(self) -> None: """get_page_attachment_entries returns the stored attachment dict for a page.""" LockfileManager._lock = _lock_with_pages({ "100": PageEntry( title="Page", version=1, export_path="a.md", attachments={ "att1": AttachmentEntry(version=2, path="space/attachments/x.png"), }, ), }) entries = LockfileManager.get_page_attachment_entries("100") assert "att1" in entries assert entries["att1"].version == 2 def test_get_page_attachment_entries_returns_empty_for_unknown_page(self) -> None: """get_page_attachment_entries returns {} for a page not in the lock.""" LockfileManager._lock = _lock_with_pages({}) assert LockfileManager.get_page_attachment_entries("999") == {} def test_get_page_attachment_entries_returns_empty_when_not_initialized(self) -> None: """get_page_attachment_entries returns {} when the manager is not initialized.""" assert LockfileManager._lock is None assert LockfileManager.get_page_attachment_entries("100") == {} ================================================ FILE: tests/unit/utils/test_measure_time.py ================================================ """Unit tests for the measure_time module.""" import logging import time from datetime import datetime from unittest.mock import patch import pytest from confluence_markdown_exporter.utils.measure_time import measure from confluence_markdown_exporter.utils.measure_time import measure_time class TestMeasureTime: """Test cases for measure_time decorator.""" def test_measure_time_decorator_logs(self, caplog: pytest.LogCaptureFixture) -> None: """Test that measure_time decorator logs execution time.""" logger_name = "confluence_markdown_exporter.utils.measure_time" caplog.set_level(logging.INFO, logger=logger_name) @measure_time def test_function(x: int, y: int) -> int: time.sleep(0.01) return x + y result = test_function(2, 3) assert result == 5 log_messages = [record.message for record in caplog.records] assert len(log_messages) == 1 assert "Function 'test_function' took" in log_messages[0] assert "seconds to execute" in log_messages[0] def test_measure_time_with_exception(self, caplog: pytest.LogCaptureFixture) -> None: """Test that measure_time decorator handles exceptions properly.""" logger_name = "confluence_markdown_exporter.utils.measure_time" caplog.set_level(logging.INFO, logger=logger_name) @measure_time def failing_function() -> None: msg = "Test error" raise ValueError(msg) with pytest.raises(ValueError, match="Test error"): failing_function() # The decorator should not log on exception (it only logs on success) log_messages = [record.message for record in caplog.records] assert len(log_messages) == 0 def test_measure_time_with_return_value(self) -> None: """Test that measure_time decorator preserves return values.""" @measure_time def function_with_return() -> str: return "test_result" result = function_with_return() assert result == "test_result" def test_measure_time_with_args_kwargs(self) -> None: """Test that measure_time decorator works with args and kwargs.""" @measure_time def function_with_params(a: int, b: int, c: int = 3) -> int: return a + b + c result = function_with_params(1, 2, c=4) assert result == 7 class TestMeasureContextManager: """Test cases for measure context manager.""" def test_measure_success(self) -> None: """Test measure context manager completes successfully.""" with measure("Test Operation"): time.sleep(0.01) def test_measure_with_exception(self) -> None: """Test measure context manager re-raises exceptions.""" def failing_operation() -> None: msg = "Test error" raise ValueError(msg) with pytest.raises(ValueError, match="Test error"), measure("Failing Operation"): failing_operation() def test_measure_debug_logs_start(self, caplog: pytest.LogCaptureFixture) -> None: """Test that measure logs the start time at DEBUG level.""" logger_name = "confluence_markdown_exporter.utils.measure_time" caplog.set_level(logging.DEBUG, logger=logger_name) with measure("Debug Operation"): pass debug_messages = [r.message for r in caplog.records if r.levelno == logging.DEBUG] assert any("Started at" in m for m in debug_messages) @patch("confluence_markdown_exporter.utils.measure_time.datetime") def test_measure_timing_calculation(self, mock_datetime: pytest.MonkeyPatch) -> None: """Test that measure context manager does not suppress exceptions on timing.""" start_time = datetime(2023, 1, 1, 12, 0, 0) end_time = datetime(2023, 1, 1, 12, 0, 5) mock_datetime.now.side_effect = [start_time, end_time] with measure("Timed Operation"): pass def test_measure_no_exception_propagation(self) -> None: """Test that measure context manager doesn't suppress exceptions.""" class CustomError(Exception): pass def raise_error() -> None: msg = "Custom error message" raise CustomError(msg) with pytest.raises(CustomError), measure("Exception Test"): raise_error() ================================================ FILE: tests/unit/utils/test_page_registry.py ================================================ """Tests for PageTitleRegistry collision detection.""" from __future__ import annotations import pytest from confluence_markdown_exporter.utils.page_registry import PageTitleRegistry @pytest.fixture(autouse=True) def _clean_registry() -> None: PageTitleRegistry.reset() yield PageTitleRegistry.reset() def test_unique_title_not_ambiguous() -> None: PageTitleRegistry.register(1, "Shared Title") assert PageTitleRegistry.is_ambiguous("Shared Title") is False def test_two_pages_same_title_ambiguous() -> None: PageTitleRegistry.register(1, "Shared Title") PageTitleRegistry.register(2, "Shared Title") assert PageTitleRegistry.is_ambiguous("Shared Title") is True def test_unknown_title_not_ambiguous() -> None: assert PageTitleRegistry.is_ambiguous("Never Seen") is False def test_re_register_same_id_does_not_inflate_count() -> None: PageTitleRegistry.register(1, "Shared Title") PageTitleRegistry.register(1, "Shared Title") PageTitleRegistry.register(1, "Shared Title") assert PageTitleRegistry.is_ambiguous("Shared Title") is False assert PageTitleRegistry.title_count("Shared Title") == 1 def test_renaming_page_updates_counts() -> None: PageTitleRegistry.register(1, "Old Title") PageTitleRegistry.register(2, "Old Title") assert PageTitleRegistry.is_ambiguous("Old Title") is True PageTitleRegistry.register(1, "New Title") assert PageTitleRegistry.is_ambiguous("Old Title") is False assert PageTitleRegistry.title_count("Old Title") == 1 assert PageTitleRegistry.title_count("New Title") == 1 def test_reset_clears_state() -> None: PageTitleRegistry.register(1, "X") PageTitleRegistry.register(2, "X") PageTitleRegistry.reset() assert PageTitleRegistry.is_ambiguous("X") is False assert PageTitleRegistry.title_count("X") == 0 def test_blank_inputs_ignored() -> None: PageTitleRegistry.register(0, "X") PageTitleRegistry.register(1, "") assert PageTitleRegistry.title_count("X") == 0 assert PageTitleRegistry.title_count("") == 0 ================================================ FILE: tests/unit/utils/test_rich_console.py ================================================ """Tests for the logging helpers in rich_console.""" import logging from pathlib import Path from confluence_markdown_exporter.utils.rich_console import setup_logging def test_setup_logging_writes_to_file(tmp_path: Path) -> None: """When a log_file is given, log records are also written to that file.""" log_file = tmp_path / "cme.log" setup_logging("DEBUG", log_file=log_file) logger = logging.getLogger("cme.test") logger.debug("a debug message") logger.info("an info message") for handler in logging.getLogger().handlers: handler.flush() contents = log_file.read_text(encoding="utf-8") assert "a debug message" in contents assert "an info message" in contents def test_setup_logging_without_file_does_not_create_one(tmp_path: Path) -> None: """Default invocation does not create a log file.""" log_file = tmp_path / "cme.log" setup_logging("INFO") logging.getLogger("cme.test").info("hello") assert not log_file.exists() ================================================ FILE: tests/unit/utils/test_table_converter.py ================================================ """Tests for the table_converter module.""" from bs4 import BeautifulSoup from confluence_markdown_exporter.utils.table_converter import TableConverter class TestTableConverter: """Test TableConverter class.""" def test_pipe_character_in_cell(self) -> None: """Test that pipe characters are escaped in table cells.""" html = """
Column 1 Column 2
Value with | pipe Normal value
""" BeautifulSoup(html, "html.parser") converter = TableConverter() result = converter.convert(html) # The pipe character should be escaped assert "\\|" in result # The result should still have proper table structure assert "Column 1" in result assert "Column 2" in result assert "Value with" in result assert "pipe" in result def test_multiple_pipes_in_cell(self) -> None: """Test that multiple pipe characters are escaped in table cells.""" html = """
Header
Value | with | multiple | pipes
""" BeautifulSoup(html, "html.parser") converter = TableConverter() result = converter.convert(html) # All pipe characters should be escaped (3 pipes in the content) assert result.count("\\|") == 3 assert "Value" in result assert "with" in result assert "multiple" in result assert "pipes" in result def test_pipe_character_in_header(self) -> None: """Test that pipe characters are escaped in table header cells.""" html = """
Column | 1 Column | 2
Value 1 Value 2
""" converter = TableConverter() result = converter.convert(html) # The pipe characters in headers should be escaped (2 pipes) assert result.count("\\|") == 2 assert "Column" in result assert "Value 1" in result assert "Value 2" in result def test_table_without_pipes(self) -> None: """Test normal table conversion without pipe characters.""" html = """
Name Age
John 30
""" converter = TableConverter() result = converter.convert(html) assert "Name" in result assert "Age" in result assert "John" in result assert "30" in result # Should have proper table structure assert "|" in result assert "---" in result # Should have no escaped pipes assert "\\|" not in result def test_convert_p_bool_parent_tags_no_crash(self) -> None: """convert_p must not crash when markdownify passes bool instead of set.""" converter = TableConverter() el = BeautifulSoup("

text.

", "html.parser").p assert el is not None result = converter.convert_p(el, "text.", parent_tags=False) # type: ignore[arg-type] assert "text." in result def test_convert_ol_bool_parent_tags_no_crash(self) -> None: """convert_ol must not crash when markdownify passes bool instead of set.""" converter = TableConverter() el = BeautifulSoup("
  1. item
", "html.parser").ol assert el is not None result = converter.convert_ol(el, "item", parent_tags=False) # type: ignore[arg-type] assert "item" in result def test_convert_ul_bool_parent_tags_no_crash(self) -> None: """convert_ul must not crash when markdownify passes bool instead of set.""" converter = TableConverter() el = BeautifulSoup("
  • item
", "html.parser").ul assert el is not None result = converter.convert_ul(el, "item", parent_tags=False) # type: ignore[arg-type] assert "item" in result def test_single_item_ul_in_cell_strips_list_symbol(self) -> None: """Single-item ul in a table cell should not render a leading '- '.""" html = """
Header
  • Only item
""" converter = TableConverter() result = converter.convert(html) assert "Only item" in result assert "- Only item" not in result def test_multi_item_ul_in_cell_keeps_list_symbols(self) -> None: """Multi-item ul in a table cell should still render with '- ' prefixes.""" html = """
Header
  • First
  • Second
""" converter = TableConverter() result = converter.convert(html) assert "- First" in result assert "- Second" in result def test_ol_in_cell_with_empty_paragraph_shows_number(self) -> None: """Ol with empty

in a table cell should show the CSS-implicit number.""" html = """
Header
""" converter = TableConverter() result = converter.convert(html) assert "1" in result def test_ol_in_cell_with_empty_paragraph_respects_start(self) -> None: """Ol with start attribute and empty

should use the start number.""" html = """
Header
""" converter = TableConverter() result = converter.convert(html) assert "3" in result def test_ol_in_cell_with_content(self) -> None: """Ol with text content in a table cell should number each item.""" html = """
Header
  1. alpha

  2. beta

""" converter = TableConverter() result = converter.convert(html) assert "1. alpha" in result assert "2. beta" in result assert "
" in result def test_ul_in_cell_with_paragraph_items(self) -> None: """Ul with

-wrapped items in a table cell should use '- ' bullet syntax.""" html = """
Header
  • First

  • Second

  • Third

""" converter = TableConverter() result = converter.convert(html) assert "- First" in result assert "
- Second" in result assert "
- Third" in result def test_td_detection_still_works_with_set_parent_tags(self) -> None: """set-based parent_tags (markdownify 1.x) must still trigger td-specific behaviour.""" converter = TableConverter() el = BeautifulSoup("

text.

", "html.parser").p assert el is not None result = converter.convert_p(el, "text.", {"td", "_inline"}) # type: ignore[arg-type] assert result.endswith("
") ================================================ FILE: tests/unit/utils/test_type_converter.py ================================================ """Unit tests for type_converter module.""" import pytest from confluence_markdown_exporter.utils.type_converter import str_to_bool class TestStrToBool: """Test cases for str_to_bool function.""" def test_true_values(self) -> None: """Test that various true values are converted correctly.""" true_values = ["true", "True", "TRUE", "1", "yes", "Yes", "YES", "on", "On", "ON"] for value in true_values: assert str_to_bool(value) is True, f"Failed for value: {value}" def test_false_values(self) -> None: """Test that various false values are converted correctly.""" false_values = [ "false", "False", "FALSE", "0", "no", "No", "NO", "off", "Off", "OFF", ] for value in false_values: assert str_to_bool(value) is False, f"Failed for value: {value}" def test_whitespace_handling(self) -> None: """Test that whitespace is properly stripped.""" assert str_to_bool(" true ") is True assert str_to_bool("\tfalse\t") is False assert str_to_bool("\n1\n") is True assert str_to_bool(" 0 ") is False def test_invalid_values(self) -> None: """Test that invalid values raise ValueError.""" invalid_values = ["maybe", "2", "invalid", "", "true false", "truthy"] for value in invalid_values: with pytest.raises(ValueError, match=f"Invalid boolean string: '{value}'"): str_to_bool(value) def test_empty_string(self) -> None: """Test that empty string raises ValueError.""" with pytest.raises(ValueError, match="Invalid boolean string: ''"): str_to_bool("") def test_none_handling(self) -> None: """Test behavior with None (should raise AttributeError for strip method).""" with pytest.raises(AttributeError): str_to_bool(None) # type: ignore[arg-type] ================================================ FILE: tsconfig.json ================================================ { "extends": "@docusaurus/tsconfig", "compilerOptions": { "baseUrl": "." }, "exclude": ["build", ".docusaurus", "node_modules"] }