Repository: Anionex/banana-slides Branch: main Commit: 2a0b45540be5 Files: 333 Total size: 2.3 MB Directory structure: gitextract_oydoioei/ ├── .dockerignore ├── .githooks/ │ └── pre-commit.disabled ├── .github/ │ ├── CI_SETUP.md │ ├── ISSUE_TEMPLATE/ │ │ └── bug_report.yml │ ├── PULL_REQUEST_TEMPLATE.md │ └── workflows/ │ ├── build-sha-image.yml │ ├── ci-test.yml │ ├── docker-publish.yml │ ├── nightly.yml │ ├── pr-quick-check.yml │ └── translate-readme.yml ├── .gitignore ├── CLA.md ├── CONTRIBUTING.md ├── Dockerfile.allinone ├── LICENSE ├── README.md ├── backend/ │ ├── .gitignore │ ├── Dockerfile │ ├── README.md │ ├── alembic.ini │ ├── app.py │ ├── config.py │ ├── controllers/ │ │ ├── __init__.py │ │ ├── export_controller.py │ │ ├── file_controller.py │ │ ├── material_controller.py │ │ ├── page_controller.py │ │ ├── project_controller.py │ │ ├── reference_file_controller.py │ │ ├── settings_controller.py │ │ └── template_controller.py │ ├── migrations/ │ │ ├── env.py │ │ ├── script.py.mako │ │ └── versions/ │ │ ├── 001_baseline_schema.py │ │ ├── 002_create_settings_table.py │ │ ├── 003_add_model_and_mineru_settings.py │ │ ├── 004_add_template_style_to_projects.py │ │ ├── 005_add_pdf_image_path.py │ │ ├── 006_add_export_settings_to_projects.py │ │ ├── 007_add_enable_reasoning_to_settings.py │ │ ├── 008_add_baidu_ocr_api_key_to_settings.py │ │ ├── 009_split_reasoning_config.py │ │ ├── 010_add_cached_image_path.py │ │ ├── 011_add_user_template_thumb.py │ │ ├── 012_add_export_allow_partial_to_projects.py │ │ ├── 013_add_lazyllm_source_fields.py │ │ ├── 014_add_per_model_provider_config.py │ │ ├── 015_rename_baidu_ocr_api_key.py │ │ ├── 38292967f3ca_add_output_language_to_settings_table.py │ │ ├── 64ecc9f34de0_add_description_generation_mode_to_.py │ │ ├── 7acf21d5e41d_make_settings_columns_nullable_for_env_.py │ │ ├── 88054bda1ece_add_outline_and_description_.py │ │ ├── 9439faddcdd5_add_description_extra_fields_to_settings.py │ │ ├── 9ad736fec43d_add_image_prompt_extra_fields_to_.py │ │ ├── a912a64b7a86_add_mineru_token_to_settings_table.py │ │ └── ee22f1512027_add_image_aspect_ratio_to_project.py │ ├── models/ │ │ ├── __init__.py │ │ ├── material.py │ │ ├── page.py │ │ ├── page_image_version.py │ │ ├── project.py │ │ ├── reference_file.py │ │ ├── settings.py │ │ ├── task.py │ │ └── user_template.py │ ├── run.bat │ ├── run.sh │ ├── server.log │ ├── server_running.log │ ├── services/ │ │ ├── __init__.py │ │ ├── ai_providers/ │ │ │ ├── __init__.py │ │ │ ├── genai_client.py │ │ │ ├── image/ │ │ │ │ ├── __init__.py │ │ │ │ ├── baidu_inpainting_provider.py │ │ │ │ ├── base.py │ │ │ │ ├── gemini_inpainting_provider.py │ │ │ │ ├── genai_provider.py │ │ │ │ ├── lazyllm_provider.py │ │ │ │ ├── openai_provider.py │ │ │ │ └── volcengine_inpainting_provider.py │ │ │ ├── lazyllm_env.py │ │ │ ├── ocr/ │ │ │ │ ├── __init__.py │ │ │ │ ├── baidu_accurate_ocr_provider.py │ │ │ │ └── baidu_table_ocr_provider.py │ │ │ └── text/ │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── genai_provider.py │ │ │ ├── lazyllm_provider.py │ │ │ └── openai_provider.py │ │ ├── ai_service.py │ │ ├── ai_service_manager.py │ │ ├── export_service.py │ │ ├── file_parser_service.py │ │ ├── file_service.py │ │ ├── image_editability/ │ │ │ ├── __init__.py │ │ │ ├── coordinate_mapper.py │ │ │ ├── data_models.py │ │ │ ├── extractors.py │ │ │ ├── factories.py │ │ │ ├── helpers.py │ │ │ ├── hybrid_extractor.py │ │ │ ├── inpaint_providers.py │ │ │ ├── service.py │ │ │ └── text_attribute_extractors.py │ │ ├── inpainting_service.py │ │ ├── pdf_service.py │ │ ├── prompts.py │ │ └── task_manager.py │ ├── tests/ │ │ ├── conftest.py │ │ ├── integration/ │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── test_api_full_flow.py │ │ │ └── test_full_workflow.py │ │ ├── pytest.ini │ │ └── unit/ │ │ ├── __init__.py │ │ ├── test_ai_mock.py │ │ ├── test_api_health.py │ │ ├── test_api_material.py │ │ ├── test_api_project.py │ │ ├── test_api_settings_provider.py │ │ ├── test_editable_pptx_style_extraction.py │ │ ├── test_file_parser_service.py │ │ ├── test_image_prompt_ratio.py │ │ ├── test_lazyllm_image_content_type.py │ │ └── test_smart_merge.py │ └── utils/ │ ├── __init__.py │ ├── image_utils.py │ ├── latex_utils.py │ ├── mask_utils.py │ ├── page_utils.py │ ├── path_utils.py │ ├── pptx_builder.py │ ├── response.py │ └── validators.py ├── create-test-data.mjs ├── create-test-data.sh ├── docker/ │ ├── nginx-allinone.conf │ ├── start-backend.sh │ └── supervisord.conf ├── docker-compose.allinone.yml ├── docker-compose.prod.yml ├── docker-compose.yml ├── docs/ │ ├── configuration.mdx │ ├── docs.json │ ├── faq.mdx │ ├── features/ │ │ ├── creation.mdx │ │ ├── descriptions.mdx │ │ ├── editing.mdx │ │ ├── export.mdx │ │ ├── images.mdx │ │ ├── import-export.mdx │ │ ├── materials.mdx │ │ ├── outline.mdx │ │ └── overview.mdx │ ├── history.mdx │ ├── index.mdx │ ├── logo/ │ │ └── .gitkeep │ ├── quickstart.mdx │ └── zh/ │ ├── configuration.mdx │ ├── faq.mdx │ ├── features/ │ │ ├── creation.mdx │ │ ├── descriptions.mdx │ │ ├── editing.mdx │ │ ├── export.mdx │ │ ├── images.mdx │ │ ├── import-export.mdx │ │ ├── materials.mdx │ │ ├── outline.mdx │ │ └── overview.mdx │ ├── history.mdx │ ├── index.mdx │ └── quickstart.mdx ├── frontend/ │ ├── .eslintrc.cjs │ ├── .gitignore │ ├── Dockerfile │ ├── README.md │ ├── e2e/ │ │ ├── README.md │ │ ├── access-code.spec.ts │ │ ├── aspect-ratio-lock-integration.spec.ts │ │ ├── aspect-ratio-lock.spec.ts │ │ ├── attachment-sort-filter.spec.ts │ │ ├── badge-status-after-generation.spec.ts │ │ ├── desc-regeneration-skeleton.spec.ts │ │ ├── description-detail-level.spec.ts │ │ ├── description-no-flicker.spec.ts │ │ ├── editable-export-failure.spec.ts │ │ ├── export-aspect-ratio.spec.ts │ │ ├── export-images.spec.ts │ │ ├── extract-style-caption.spec.ts │ │ ├── failed-file-reselect.spec.ts │ │ ├── file-preview-scrollbar.spec.ts │ │ ├── generation-fail.spec.ts │ │ ├── generation-requirements.spec.ts │ │ ├── helpers/ │ │ │ └── seed-project.ts │ │ ├── history-pagination.spec.ts │ │ ├── image-prompt-ratio.spec.ts │ │ ├── image-queued-status.spec.ts │ │ ├── import-markdown.spec.ts │ │ ├── lazyllm-global-vendor.spec.ts │ │ ├── lazyllm-image-content-type.spec.ts │ │ ├── markdown-card-style.spec.ts │ │ ├── material-aspect-ratio.spec.ts │ │ ├── outline-autosave-blur.spec.ts │ │ ├── outline-null-crash.spec.ts │ │ ├── parsing-preview-toast.spec.ts │ │ ├── pdf-export-metadata.spec.ts │ │ ├── per-model-startup-creds.spec.ts │ │ ├── preset-capsules.spec.ts │ │ ├── preview-text-style-template.spec.ts │ │ ├── renovation-aspect-ratio.spec.ts │ │ ├── settings-api-clarity.spec.ts │ │ ├── settings-api-links.spec.ts │ │ ├── settings-back-to-top.spec.ts │ │ ├── settings-backfill.spec.ts │ │ ├── settings-env-fallback.spec.ts │ │ ├── settings-per-model-provider-integration.spec.ts │ │ ├── settings-per-model-provider.spec.ts │ │ ├── settings-read-only.spec.ts │ │ ├── settings-reset-fallback.spec.ts │ │ ├── settings-test-vendor-format.spec.ts │ │ ├── smart-merge.spec.ts │ │ ├── streaming-descriptions.spec.ts │ │ ├── streaming-outline.spec.ts │ │ ├── ui-full-flow-mocked.spec.ts │ │ ├── ui-full-flow.spec.ts │ │ ├── upload-folder-path.spec.ts │ │ ├── ux-polish-i18n.spec.ts │ │ └── visual-regression.spec.ts │ ├── index.html │ ├── nginx.conf │ ├── package.json │ ├── playwright.config.ts │ ├── postcss.config.js │ ├── src/ │ │ ├── App.tsx │ │ ├── api/ │ │ │ ├── client.ts │ │ │ └── endpoints.ts │ │ ├── components/ │ │ │ ├── history/ │ │ │ │ └── ProjectCard.tsx │ │ │ ├── outline/ │ │ │ │ └── OutlineCard.tsx │ │ │ ├── preview/ │ │ │ │ ├── DescriptionCard.tsx │ │ │ │ └── SlideCard.tsx │ │ │ └── shared/ │ │ │ ├── AccessCodeGuard.tsx │ │ │ ├── AiRefineInput.tsx │ │ │ ├── Button.tsx │ │ │ ├── Card.tsx │ │ │ ├── ConfirmDialog.tsx │ │ │ ├── ContextualStatusBadge.tsx │ │ │ ├── ExportTasksPanel.tsx │ │ │ ├── FilePreviewModal.tsx │ │ │ ├── Footer.tsx │ │ │ ├── GithubBadge.tsx │ │ │ ├── GithubRepoCard.tsx │ │ │ ├── HelpModal.tsx │ │ │ ├── ImagePreviewList.tsx │ │ │ ├── Input.tsx │ │ │ ├── Loading.tsx │ │ │ ├── Markdown.tsx │ │ │ ├── MarkdownTextarea.tsx │ │ │ ├── MaterialCenterModal.tsx │ │ │ ├── MaterialGeneratorModal.tsx │ │ │ ├── MaterialSelector.tsx │ │ │ ├── Modal.tsx │ │ │ ├── Pagination.tsx │ │ │ ├── PresetCapsules.tsx │ │ │ ├── ProjectResourcesList.tsx │ │ │ ├── ProjectSettingsModal.tsx │ │ │ ├── ReferenceFileCard.tsx │ │ │ ├── ReferenceFileList.tsx │ │ │ ├── ReferenceFileSelector.tsx │ │ │ ├── ShimmerOverlay.tsx │ │ │ ├── StatusBadge.tsx │ │ │ ├── TemplateSelector.tsx │ │ │ ├── TextStyleSelector.tsx │ │ │ ├── Textarea.tsx │ │ │ ├── Toast.tsx │ │ │ └── index.ts │ │ ├── config/ │ │ │ ├── aspectRatio.ts │ │ │ ├── presetStyles.ts │ │ │ └── presetStylesI18n.ts │ │ ├── hooks/ │ │ │ ├── useGeneratingState.ts │ │ │ ├── useImagePaste.ts │ │ │ ├── usePageStatus.ts │ │ │ ├── useT.ts │ │ │ └── useTheme.ts │ │ ├── i18n.ts │ │ ├── index.css │ │ ├── locales/ │ │ │ ├── en.json │ │ │ └── zh.json │ │ ├── main.tsx │ │ ├── pages/ │ │ │ ├── DetailEditor.tsx │ │ │ ├── History.tsx │ │ │ ├── Home.tsx │ │ │ ├── Landing.tsx │ │ │ ├── OutlineEditor.tsx │ │ │ ├── Settings.tsx │ │ │ └── SlidePreview.tsx │ │ ├── store/ │ │ │ ├── useExportTasksStore.ts │ │ │ └── useProjectStore.ts │ │ ├── tests/ │ │ │ ├── components/ │ │ │ │ ├── Button.test.tsx │ │ │ │ ├── DescriptionCard.test.tsx │ │ │ │ └── Markdown.test.tsx │ │ │ ├── setup.ts │ │ │ ├── store/ │ │ │ │ ├── useProjectStore.initializeProject.test.ts │ │ │ │ └── useProjectStore.test.ts │ │ │ └── utils.normalizeErrorMessage.test.ts │ │ ├── types/ │ │ │ └── index.ts │ │ ├── utils/ │ │ │ ├── i18nHelper.ts │ │ │ ├── index.ts │ │ │ ├── logger.ts │ │ │ └── projectUtils.ts │ │ └── vite-env.d.ts │ ├── start.bat │ ├── start.sh │ ├── tailwind.config.js │ ├── tsconfig.json │ ├── tsconfig.node.json │ └── vite.config.ts ├── package.json ├── pyproject.toml ├── scripts/ │ ├── export_editable_pptx.py │ ├── pre-push-check.sh │ ├── run-local-ci.sh │ ├── setup-env-from-secrets.sh │ ├── setup_git_hooks.sh │ ├── test_docker_environment.sh │ ├── translate_readme.py │ ├── translate_readme_incremental.py │ ├── verify-e2e-refactoring.sh │ └── wait-for-health.sh ├── tests/ │ └── docker/ │ └── test_docker_environment.sh └── v0_demo/ ├── demo.py ├── gemini_genai.py └── lazyllm_genai.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .dockerignore ================================================ # Python __pycache__/ *.py[cod] *$py.class *.so .Python *.egg-info/ dist/ build/ *.egg .venv/ venv/ env/ ENV/ # Node **/node_modules/ node_modules/ npm-debug.log* yarn-debug.log* yarn-error.log* .pnpm-debug.log* .npm .eslintcache frontend/node_modules/ frontend/dist/ frontend/.vite/ # IDE .vscode/ .idea/ *.swp *.swo *~ .DS_Store # Project specific instance/*.db instance/*.db-journal uploads/ *.log server.log server_running.log # Test files test_*.py *_test.py tests/ # Docs and examples *.md !README.md docs/ demo.py gemini_genai.py generate-example.py !assets/** *.png *.jpg *.jpeg *.pptx *.pdf output/ res.png template*.png page*.png # Git .git/ .gitignore # Docker Dockerfile* docker-compose*.yml .dockerignore # Environment variables .env .env.local .env.*.local # Others *.lock uv.lock LICENSE PRD.md *.md # Windows Thumbs.db Desktop.ini *.lnk # Build output dist/ build/ *.pptx *.pdf ================================================ FILE: .githooks/pre-commit.disabled ================================================ #!/bin/bash # Pre-commit hook: 自动翻译README.md到README_EN.md # 只有在README.md被修改时才会触发 set -e # 检查README.md是否在本次提交中被修改 if git diff --cached --name-only | grep -q "^README\.md$"; then echo "检测到README.md变更,正在自动翻译到README_EN.md..." # 检查是否在项目根目录 if [ ! -f "README.md" ]; then echo "错误: 未找到README.md" exit 1 fi # 检查.env文件 if [ ! -f ".env" ]; then echo "警告: 未找到.env文件,跳过翻译" echo "如需自动翻译,请确保.env文件包含必要的API密钥配置" exit 0 fi # 加载环境变量 set -a source .env 2>/dev/null || true set +a # 检查必要的环境变量 if [ -z "$GOOGLE_API_KEY" ]; then echo "警告: GOOGLE_API_KEY未设置,跳过翻译" echo "如需自动翻译,请在.env文件中设置GOOGLE_API_KEY" exit 0 fi # 检查uv是否可用 if ! command -v uv &> /dev/null; then echo "警告: uv未安装,跳过翻译" exit 0 fi # 运行翻译脚本 echo "开始翻译..." if uv run python scripts/translate_readme.py; then echo "✅ 翻译成功!" # 将翻译后的README_EN.md添加到本次提交 git add README_EN.md echo "README_EN.md已自动添加到本次提交" else echo "❌ 翻译失败,但不阻止提交" echo "你可以稍后手动运行: ./scripts/test_translation.sh" # 不阻止提交,允许继续 exit 0 fi else # README.md未修改,无需翻译 exit 0 fi ================================================ FILE: .github/CI_SETUP.md ================================================ # CI/CD 配置说明 本项目使用GitHub Actions实现自动化CI/CD,包含**Light检查**和**Full测试**两个层级。 ## 📋 CI架构概览 ### 🚀 Light检查 - PR快速反馈 **触发时机**: 提交PR时自动运行 **耗时**: 2-5分钟 **工作流**: `.github/workflows/pr-quick-check.yml` 包含: - ✅ 代码语法检查(flake8, ESLint) - ✅ 代码格式检查(black, prettier) - ✅ TypeScript构建检查 - ✅ 后端冒烟测试(健康检查) - ✅ PR自动评论 ### 🎯 Full测试 - 完整验证 **触发时机**: 1. **PR添加`ready-for-test`标签时** 👈 推荐方式 2. 直接Push到`main`或`develop`分支(不通过PR) **注意**:PR合并后**不会**再次运行完整测试,避免重复浪费资源 **耗时**: 15-30分钟 **工作流**: `.github/workflows/ci-test.yml` 包含: - ✅ 后端单元测试(pytest + coverage) - ✅ 后端集成测试(使用 mock AI) - ✅ 前端测试(Vitest + coverage) - ✅ Docker 环境测试(容器构建、启动、健康检查) - ✅ **E2E 测试(从创建到导出 PPT)** - 需要真实 Google Gemini API key - 测试完整的 AI 生成流程 - 如果未配置 API key,会自动跳过并显示说明 - ✅ 安全扫描(依赖漏洞检查) --- ## 🔧 配置步骤 ### 1. 配置GitHub Secrets(必需) 为了运行完整的E2E测试(包含真实AI生成),需要配置以下Secrets: #### 步骤: 1. 进入GitHub仓库页面 2. 点击 `Settings` → `Secrets and variables` → `Actions` 3. 点击 `New repository secret` 4. 添加以下Secret: | Secret名称 | 必需 | 说明 | 获取方式 | |-----------|------|------|---------| | `GOOGLE_API_KEY` | ✅ 必需 | Google Gemini API密钥(用于完整E2E测试) | [https://aistudio.google.com/app/apikey](https://aistudio.google.com/app/apikey) | | `OPENAI_API_KEY` | ⚪ 可选 | OpenAI API密钥(用于集成测试验证兼容性) | [https://platform.openai.com/api-keys](https://platform.openai.com/api-keys) | | `SECRET_KEY` | ⚪ 可选 | Flask应用密钥(生产环境建议配置) | 随机生成,建议使用:`python -c "import secrets; print(secrets.token_hex(32))"` | | `MINERU_TOKEN` | ⚪ 可选 | MinerU服务Token(如果使用MinerU解析) | 从MinerU服务获取 | **关于 E2E 测试策略**: - 💡 **单一 E2E 测试**:使用 Gemini 格式测试完整流程(创建→大纲→描述→图片→导出) - 💰 **成本优化**:只运行一次完整 E2E,避免重复测试 - ⚠️ **条件运行**:只在配置了真实 `GOOGLE_API_KEY` 时运行 **注意**: - ⚠️ **没有配置 `GOOGLE_API_KEY` 时,E2E 测试会被跳过** - ✅ 其他测试(单元、集成、Docker)仍会运行,覆盖大部分功能 - 💰 真实 API 调用会消耗配额(约 $0.01-0.05/次),建议使用测试专用账号 - 🔧 CI 会自动将 Secrets 替换到 `.env` 文件中对应的占位符 **CI如何处理Secrets**: CI配置会自动处理以下逻辑: 1. **复制`.env.example`到`.env`**(保持所有默认配置) 2. **自动检测并替换Secrets**: - 如果GitHub Secrets中配置了某个Secret → 自动替换`.env`中对应的占位符 - 如果没有配置 → 保持`.env.example`中的默认值 **支持的Secrets列表**: CI配置会自动检测并替换以下Secrets(如果配置了的话): - ✅ `GOOGLE_API_KEY` - 必需,如果没有配置则使用`mock-api-key` - ⚪ `OPENAI_API_KEY` - 可选,如果配置了则替换 - ⚪ `SECRET_KEY` - 可选,生产环境建议配置 - ⚪ `MINERU_TOKEN` - 可选,如果使用MinerU服务则配置 **添加新的Secret支持**: 如果需要支持其他配置项的Secret替换,只需在`.github/workflows/ci-test.yml`中添加对应的检查逻辑: ```yaml # 在"设置环境变量"步骤中添加 if [ -n "${{ secrets.YOUR_NEW_SECRET }}" ]; then sed -i '/^YOUR_ENV_VAR=/s/placeholder/${{ secrets.YOUR_NEW_SECRET }}/' .env echo "✓ 已替换 YOUR_ENV_VAR" fi ``` ### 2. (可选)配置CodeCov 如果需要代码覆盖率报告和徽章: 1. 访问 [codecov.io](https://codecov.io) 2. 关联GitHub账号并授权仓库 3. 获取Upload Token(通常不需要,公开仓库自动识别) 4. 如需手动配置,添加Secret:`CODECOV_TOKEN` --- ## 🏷️ 如何触发Full测试 ### 方法1:PR添加标签触发(✅ 推荐) 当你认为PR已经准备好进行完整测试时: ```bash # 在PR页面右侧,点击 "Labels" # 添加 "ready-for-test" 标签 ``` 这会立即触发完整测试套件,包括: - ✅ 所有单元和集成测试 - ✅ Docker 环境测试 - ✅ **E2E 测试(如果配置了真实 API key)** **测试通过后,直接合并即可!合并后不会重复运行测试。** **E2E 测试说明**: - 如果配置了 `GOOGLE_API_KEY`:运行完整 E2E(额外 10-15 分钟) - 如果未配置:跳过 E2E,显示友好说明(其他测试已覆盖大部分功能) ### 方法2:手动触发(✅ 新增) 在GitHub Actions页面手动运行Full Test: 1. 进入仓库页面 2. 点击 **Actions** 标签 3. 在左侧选择 **Full Test Suite** 4. 点击右侧的 **Run workflow** 按钮 5. 选择分支(通常是`main`或`develop`) 6. 点击 **Run workflow** **适用场景**: - ✅ 想在任何时候验证代码 - ✅ 调试CI问题 - ✅ 验证main分支的当前状态 ### 方法3:直接Push到main 如果你直接push到`main`或`develop`分支(不通过PR),会自动运行完整测试。 **注意**: - ⚠️ **PR合并不会触发Full测试**(避免重复) - ✅ 请确保PR在合并前已通过`ready-for-test`测试 - 🔒 建议在仓库设置中启用分支保护,要求`ready-for-test`状态通过才能合并 --- ## 🔒 建议:启用分支保护规则 为了确保所有PR在合并前都经过完整测试,建议配置GitHub分支保护: ### 配置步骤 1. 进入仓库 → `Settings` → `Branches` 2. 在 `Branch protection rules` 下点击 `Add rule` 3. 配置如下: - **Branch name pattern**: `main` - ✅ **Require status checks to pass before merging** - 搜索并勾选 `Backend Unit Tests`(或其他关键测试) - ✅ **Require branches to be up to date before merging** - 可选:**Require pull request reviews before merging** ### 效果 配置后,PR只有在以下条件满足时才能合并: - ✅ Light检查通过(自动运行) - ✅ Full测试通过(通过`ready-for-test`标签触发) - ✅ 代码review通过(如果启用) 这样可以完全避免未测试代码进入`main`分支! --- ## 🧪 测试文件说明 ### Light检查测试 - **前端Lint**: `frontend/src/**/*.{ts,tsx}` - **后端语法**: `backend/**/*.py` - **冒烟测试**: 启动后端并检查`/health`端点 ### Full测试文件 ``` backend/tests/ ├── unit/ # 后端单元测试 │ ├── test_ai_service.py │ ├── test_file_service.py │ └── ... ├── integration/ # 后端集成测试 │ ├── test_api.py │ └── ... frontend/src/ ├── **/*.test.tsx # 前端组件测试 └── **/*.spec.tsx # 前端功能测试 e2e/ ├── home.spec.ts # 首页UI测试 ├── create-ppt.spec.ts # PPT创建基础测试 └── full-flow.spec.ts # 🎯 完整流程测试(创建→大纲→描述→图片→导出) ``` --- ## 📊 测试结果查看 ### CI状态检查 - PR页面底部会显示所有检查状态 - 点击 `Details` 查看详细日志 - Light检查会在PR评论中自动发布结果 ### 测试报告和覆盖率 - **代码覆盖率**: 自动上传到CodeCov(如果配置) - **E2E测试报告**: 失败时会上传Playwright报告和截图 - 在Actions页面 → 对应的workflow run → `Artifacts` 下载 - `playwright-report`: HTML测试报告 - `playwright-screenshots`: 失败时的截图和视频 ### 查看日志 ```bash # 本地查看Actions日志 gh run list gh run view --log ``` --- ## 🚨 常见问题 ### Q1: E2E测试超时失败 **原因**: AI生成需要较长时间 **解决**: - 检查API key是否有效 - 检查API配额是否用尽 - 本地运行测试验证:`npx playwright test full-flow.spec.ts` ### Q2: Docker测试失败 **原因**: 容器启动超时或端口冲突 **解决**: - 检查`docker-compose.yml`配置 - 查看容器日志(CI会在失败时自动显示) - 本地测试:`./scripts/test_docker_environment.sh` ### Q3: 前端构建失败 **原因**: TypeScript类型错误或依赖问题 **解决**: - 本地运行:`cd frontend && npm run build:check` - 检查`frontend/package.json`依赖版本 - 确保`package-lock.json`已提交 ### Q4: "ready-for-test"标签不触发测试 **原因**: Workflow权限或配置问题 **解决**: - 确认标签名称完全匹配(小写,带连字符) - 检查仓库Settings → Actions → General → Workflow permissions - 查看Actions页面确认workflow是否被触发 --- ## 📝 本地测试 ### 🚀 快速开始 ```bash # Light检查(2-3分钟)- 提交前快速检查 ./scripts/run-local-ci.sh light # Full测试(10-20分钟)- PR合并前完整测试 ./scripts/run-local-ci.sh full ``` ### 🔧 前置依赖 ```bash # Python环境 (>= 3.10) python3 --version # Node.js环境 (>= 18) node --version # UV包管理器 curl -LsSf https://astral.sh/uv/install.sh | sh # Docker docker --version docker compose --version # 安装依赖 uv sync --extra test cd frontend && npm ci npx playwright install --with-deps chromium ``` ### 🧪 运行特定测试 ```bash # 后端单元测试 cd backend uv run pytest tests/unit -v --cov=. --cov-report=html # 前端测试 cd frontend npm test -- --coverage # E2E测试(需要真实API key) cp .env.example .env # 编辑.env填入真实API密钥 docker compose up -d npx playwright test full-flow.spec.ts # Docker环境测试 ./scripts/test_docker_environment.sh ``` ### 🐛 调试失败的测试 ```bash # E2E UI模式调试 npx playwright test --ui # 后端调试模式 cd backend uv run pytest tests/unit/test_xxx.py --pdb # 查看Docker日志 docker compose logs backend docker compose logs frontend ``` --- ## 🎯 最佳实践 ### 开发流程建议 1. **开发阶段**: - 频繁提交小改动 - 依赖Light检查快速反馈 - 修复lint和构建错误 2. **功能完成后**: - 自测主要功能 - 运行本地测试套件 - 提交PR 3. **准备合并前**: - 添加`ready-for-test`标签 👈 **关键步骤** - 等待Full测试通过 - Code review通过后合并 - 合并后**不会重复运行测试**,节省资源 ✅ 4. **合并后**: - 代码直接进入`main`分支 - 无需等待额外的CI运行 - 节省时间和成本 ### CI优化建议 - ✅ 保持测试快速(单元测试 < 5分钟) - ✅ E2E测试只验证关键流程 - ✅ 使用缓存加速依赖安装 - ✅ 并行运行独立测试 - ✅ 失败快速反馈(fail-fast) --- ## 📚 相关文档 - [GitHub Actions文档](https://docs.github.com/en/actions) - [Playwright测试文档](https://playwright.dev) - [pytest文档](https://docs.pytest.org) - [Vitest文档](https://vitest.dev) --- ## 🆘 需要帮助? 如果遇到CI问题: 1. 查看Actions日志详细错误信息 2. 参考本文档常见问题部分 3. 在issue中提问并附上错误日志 4. 联系维护者 --- **最后更新**: 2025-01-20 **维护者**: Banana Slides Team ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.yml ================================================ name: Bug Report / 问题反馈 description: Report a bug or issue / 报告错误或问题 labels: ["bug"] body: - type: dropdown id: deployment attributes: label: Deployment Method / 部署方式 description: Where did you encounter this issue? / 你在哪里遇到了这个问题? options: - Demo Website / 在线 Demo (bananaslides.online) - Docker Compose (docker-compose.yml) - Docker Compose with Pre-built Images (docker-compose.prod.yml) - Local Development / 本地开发 (uv + npm) - Cloud Platform / 云平台部署 (雨云等) - Other / 其他 validations: required: true - type: textarea id: description attributes: label: Issue Description / 问题描述 description: Describe the issue you encountered / 描述你遇到的问题 placeholder: What happened? / 发生了什么? validations: required: true - type: textarea id: steps attributes: label: Steps to Reproduce / 复现步骤 description: How can we reproduce this issue? / 如何复现这个问题? placeholder: | 1. Go to... / 进入... 2. Click on... / 点击... 3. See error / 看到错误 validations: required: false - type: textarea id: expected attributes: label: Expected Behavior / 期望行为 description: What did you expect to happen? / 你期望发生什么? validations: required: false - type: textarea id: logs attributes: label: Logs / 日志 description: If applicable, paste relevant logs (docker logs, browser console, etc.) / 如果适用,粘贴相关日志 render: shell validations: required: false - type: input id: version attributes: label: Version / 版本 description: Which version are you using? / 你使用的是哪个版本? placeholder: v0.4.0 or commit hash / v0.4.0 或 commit hash validations: required: false ================================================ FILE: .github/PULL_REQUEST_TEMPLATE.md ================================================ ## Summary ## Changed Files ## Test Plan ## CLA - [ ] I have read the [Contributor License Agreement](CLA.md) and [Contributing Guidelines](CONTRIBUTING.md), and I agree to the CLA ================================================ FILE: .github/workflows/build-sha-image.yml ================================================ name: Build SHA Image on: workflow_dispatch: inputs: sha: description: 'Git SHA to build (full or short)' required: true type: string image_type: description: 'Which images to build' required: true type: choice options: - allinone - split - both default: allinone tag: description: 'Additional Docker tag (e.g. latest). Leave empty to only tag with SHA.' required: false type: string run_tests: description: 'Run unit tests before building' required: false type: boolean default: false concurrency: group: build-sha-${{ inputs.sha }} cancel-in-progress: false jobs: build-and-push: name: Build ${{ inputs.image_type }} image(s) at SHA runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - name: Checkout target SHA run: git checkout ${{ inputs.sha }} - name: Resolve SHA id: resolve run: | echo "full_sha=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT echo "short_sha=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT - name: Overlay Dockerfiles from main run: | git fetch origin main if [[ "${{ inputs.image_type }}" != "split" ]]; then git checkout origin/main -- Dockerfile.allinone docker/ fi if [[ "${{ inputs.image_type }}" != "allinone" ]]; then git checkout origin/main -- backend/Dockerfile frontend/Dockerfile frontend/nginx.conf fi - name: Install uv if: ${{ inputs.run_tests }} run: | curl -LsSf https://astral.sh/uv/install.sh | sh echo "$HOME/.cargo/bin" >> $GITHUB_PATH - name: Run backend unit tests if: ${{ inputs.run_tests }} run: | uv sync --extra test uv run pytest backend/tests/unit -v - name: Run frontend lint + tests if: ${{ inputs.run_tests }} run: | cd frontend && npm ci npm run lint npm test -- --run - uses: docker/setup-qemu-action@v3 - uses: docker/setup-buildx-action@v3 - name: Login to Docker Hub uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Compute tags id: tags env: USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} SHORT_SHA: ${{ steps.resolve.outputs.short_sha }} CUSTOM_TAG: ${{ inputs.tag }} IMAGE_TYPE: ${{ inputs.image_type }} run: | compute_tags() { local image="$1" local tags="${USERNAME}/${image}:sha-${SHORT_SHA}" if [ -n "$CUSTOM_TAG" ]; then tags="${tags},${USERNAME}/${image}:${CUSTOM_TAG}" fi echo "$tags" } if [[ "$IMAGE_TYPE" != "split" ]]; then ALLINONE_TAGS=$(compute_tags "banana-slides") echo "allinone=$ALLINONE_TAGS" >> $GITHUB_OUTPUT echo "All-in-one tags: $ALLINONE_TAGS" fi if [[ "$IMAGE_TYPE" != "allinone" ]]; then BACKEND_TAGS=$(compute_tags "banana-slides-backend") FRONTEND_TAGS=$(compute_tags "banana-slides-frontend") echo "backend=$BACKEND_TAGS" >> $GITHUB_OUTPUT echo "frontend=$FRONTEND_TAGS" >> $GITHUB_OUTPUT echo "Backend tags: $BACKEND_TAGS" echo "Frontend tags: $FRONTEND_TAGS" fi - name: Build and push all-in-one image if: inputs.image_type == 'allinone' || inputs.image_type == 'both' uses: docker/build-push-action@v5 with: context: . file: ./Dockerfile.allinone push: true platforms: linux/amd64,linux/arm64 tags: ${{ steps.tags.outputs.allinone }} cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/banana-slides:buildcache cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/banana-slides:buildcache,mode=max build-args: | DOCKER_REGISTRY=${{ secrets.DOCKER_REGISTRY }} GHCR_REGISTRY=${{ secrets.GHCR_REGISTRY || 'ghcr.io/' }} APT_MIRROR=${{ secrets.APT_MIRROR }} PYPI_INDEX_URL=${{ secrets.PYPI_INDEX_URL }} NPM_REGISTRY=${{ secrets.NPM_REGISTRY }} - name: Build and push backend image if: inputs.image_type == 'split' || inputs.image_type == 'both' uses: docker/build-push-action@v5 with: context: . file: ./backend/Dockerfile push: true platforms: linux/amd64,linux/arm64 tags: ${{ steps.tags.outputs.backend }} cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/banana-slides-backend:buildcache cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/banana-slides-backend:buildcache,mode=max build-args: | DOCKER_REGISTRY=${{ secrets.DOCKER_REGISTRY }} GHCR_REGISTRY=${{ secrets.GHCR_REGISTRY || 'ghcr.io/' }} APT_MIRROR=${{ secrets.APT_MIRROR }} PYPI_INDEX_URL=${{ secrets.PYPI_INDEX_URL }} - name: Build and push frontend image if: inputs.image_type == 'split' || inputs.image_type == 'both' uses: docker/build-push-action@v5 with: context: . file: ./frontend/Dockerfile push: true platforms: linux/amd64,linux/arm64 tags: ${{ steps.tags.outputs.frontend }} cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/banana-slides-frontend:buildcache cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/banana-slides-frontend:buildcache,mode=max build-args: | DOCKER_BUILDKIT=1 DOCKER_REGISTRY=${{ secrets.DOCKER_REGISTRY }} NPM_REGISTRY=${{ secrets.NPM_REGISTRY }} ================================================ FILE: .github/workflows/ci-test.yml ================================================ name: CI Tests # Push to main/develop (excluding docs-only changes) or manual trigger on: push: branches: [ main, develop ] paths-ignore: - '**/*.md' - 'docs/**' workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true env: UV_INDEX_URL: https://pypi.org/simple jobs: backend-test: name: Backend Tests runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: '3.10' - name: Install uv run: | curl -LsSf https://astral.sh/uv/install.sh | sh echo "$HOME/.cargo/bin" >> $GITHUB_PATH - name: Install dependencies run: uv sync --extra test - name: Unit tests run: uv run pytest backend/tests/unit -v --cov=backend --cov-report=xml - name: Integration tests run: uv run pytest backend/tests/integration -v -m "not requires_service" env: TESTING: true GOOGLE_API_KEY: mock-api-key-for-testing - name: Upload coverage uses: codecov/codecov-action@v4 with: file: ./backend/coverage.xml flags: backend frontend-test: name: Frontend Tests runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-node@v4 with: node-version: '18' cache: 'npm' cache-dependency-path: frontend/package-lock.json - run: cd frontend && npm ci - name: Lint run: cd frontend && npm run lint - name: Unit tests run: cd frontend && npm test -- --run --coverage - name: Build check run: cd frontend && npm run build - name: Upload coverage uses: codecov/codecov-action@v4 with: file: ./frontend/coverage/coverage-final.json flags: frontend ================================================ FILE: .github/workflows/docker-publish.yml ================================================ name: Release # 手动触发,输入版本号 on: workflow_dispatch: inputs: version: description: 'Release version (e.g. v0.3.1)' required: true type: string permissions: contents: write concurrency: group: ${{ github.workflow }} cancel-in-progress: false env: UV_INDEX_URL: https://pypi.org/simple jobs: validate: name: Validate Version Format runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - name: Check version format env: VERSION: ${{ inputs.version }} run: | if [[ ! "$VERSION" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then echo "::error::Version must match vX.Y.Z format (got: $VERSION)" exit 1 fi - name: Check tag does not exist env: VERSION: ${{ inputs.version }} run: | if git rev-parse "$VERSION" >/dev/null 2>&1; then echo "::error::Tag $VERSION already exists. Aborting to prevent duplicate release." exit 1 fi test: name: Pre-release Tests needs: validate runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: '3.10' - name: Install uv run: | curl -LsSf https://astral.sh/uv/install.sh | sh echo "$HOME/.cargo/bin" >> $GITHUB_PATH - name: Install backend dependencies run: uv sync --extra test - name: Backend unit tests run: uv run pytest backend/tests/unit -v - name: Backend integration tests run: uv run pytest backend/tests/integration -v -m "not requires_service" env: TESTING: true GOOGLE_API_KEY: mock-api-key-for-testing - uses: actions/setup-node@v4 with: node-version: '18' cache: 'npm' cache-dependency-path: frontend/package-lock.json - name: Install frontend dependencies run: cd frontend && npm ci - name: Frontend lint run: cd frontend && npm run lint - name: Frontend tests run: cd frontend && npm test -- --run - name: Frontend build check run: cd frontend && npm run build e2e-test: name: E2E Tests needs: test runs-on: ubuntu-latest env: GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} steps: - uses: actions/checkout@v4 - name: Setup environment run: | chmod +x scripts/setup-env-from-secrets.sh ./scripts/setup-env-from-secrets.sh sed -i 's/^AI_PROVIDER_FORMAT=.*/AI_PROVIDER_FORMAT=gemini/' .env || echo "AI_PROVIDER_FORMAT=gemini" >> .env env: AI_PROVIDER_FORMAT: gemini GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} GOOGLE_API_BASE: ${{ secrets.GOOGLE_API_BASE }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENAI_API_BASE: ${{ secrets.OPENAI_API_BASE }} OPENAI_TIMEOUT: ${{ secrets.OPENAI_TIMEOUT }} OPENAI_MAX_RETRIES: ${{ secrets.OPENAI_MAX_RETRIES }} TEXT_MODEL: ${{ secrets.TEXT_MODEL }} IMAGE_MODEL: ${{ secrets.IMAGE_MODEL }} LOG_LEVEL: ${{ secrets.LOG_LEVEL }} FLASK_ENV: ${{ secrets.FLASK_ENV }} SECRET_KEY: ${{ secrets.SECRET_KEY }} BACKEND_PORT: ${{ secrets.BACKEND_PORT }} CORS_ORIGINS: ${{ secrets.CORS_ORIGINS }} MAX_DESCRIPTION_WORKERS: ${{ secrets.MAX_DESCRIPTION_WORKERS }} MAX_IMAGE_WORKERS: ${{ secrets.MAX_IMAGE_WORKERS }} MINERU_TOKEN: ${{ secrets.MINERU_TOKEN }} MINERU_API_BASE: ${{ secrets.MINERU_API_BASE }} IMAGE_CAPTION_MODEL: ${{ secrets.IMAGE_CAPTION_MODEL }} OUTPUT_LANGUAGE: ${{ secrets.OUTPUT_LANGUAGE }} - name: Build and start Docker services run: | docker compose build --no-cache docker compose up -d - name: Wait for services run: | chmod +x scripts/wait-for-health.sh ./scripts/wait-for-health.sh http://localhost:5000/health 60 2 ./scripts/wait-for-health.sh http://localhost:3000 60 2 - name: Docker environment tests run: | chmod +x scripts/test_docker_environment.sh AUTO_CLEANUP=false ./scripts/test_docker_environment.sh - name: Setup Node.js if: env.GOOGLE_API_KEY != '' && env.GOOGLE_API_KEY != 'mock-api-key-for-testing' uses: actions/setup-node@v4 with: node-version: '18' cache: 'npm' cache-dependency-path: frontend/package-lock.json - name: Install Playwright if: env.GOOGLE_API_KEY != '' && env.GOOGLE_API_KEY != 'mock-api-key-for-testing' run: | cd frontend npm ci npx playwright install --with-deps chromium - name: Run E2E tests if: env.GOOGLE_API_KEY != '' && env.GOOGLE_API_KEY != 'mock-api-key-for-testing' run: cd frontend && npx playwright test ui-full-flow.spec.ts --workers=1 env: CI: true timeout-minutes: 25 - name: Upload E2E reports if: always() uses: actions/upload-artifact@v4 with: name: release-e2e-report path: frontend/playwright-report/ retention-days: 7 - name: View logs on failure if: failure() run: | docker compose logs backend docker compose logs frontend - name: Cleanup if: always() run: | docker compose down -v docker system prune -f sync-version: name: Sync Version to Source Files needs: e2e-test runs-on: ubuntu-latest environment: release steps: - uses: actions/checkout@v4 with: ref: main - name: Update version files env: VERSION: ${{ inputs.version }} run: | V="${VERSION#v}" jq --arg v "$V" '.version = $v' package.json > tmp.json && mv tmp.json package.json sed -i "s/^version = .*/version = \"$V\"/" pyproject.toml - name: Commit and push env: VERSION: ${{ inputs.version }} run: | git config user.name "github-actions[bot]" git config user.email "41898282+github-actions[bot]@users.noreply.github.com" git add package.json pyproject.toml git diff --cached --quiet && exit 0 git commit -m "chore: bump version to $VERSION" git pull --rebase git push create-release: name: Create GitHub Release needs: sync-version runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: ref: main - name: Pull latest (includes version bump) run: git pull origin main - name: Create tag and release env: VERSION: ${{ inputs.version }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | git tag "$VERSION" git push origin "$VERSION" gh release create "$VERSION" --generate-notes build-and-push: name: Build and Push Docker Images needs: create-release runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: ref: ${{ inputs.version }} - uses: docker/setup-qemu-action@v3 - uses: docker/setup-buildx-action@v3 - name: Login to Docker Hub uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Extract metadata for backend id: meta-backend uses: docker/metadata-action@v5 with: images: ${{ secrets.DOCKERHUB_USERNAME }}/banana-slides-backend tags: | type=raw,value=${{ inputs.version }} type=raw,value=latest type=sha - name: Extract metadata for frontend id: meta-frontend uses: docker/metadata-action@v5 with: images: ${{ secrets.DOCKERHUB_USERNAME }}/banana-slides-frontend tags: | type=raw,value=${{ inputs.version }} type=raw,value=latest type=sha - name: Build and push backend image uses: docker/build-push-action@v5 with: context: . file: ./backend/Dockerfile push: true platforms: linux/amd64,linux/arm64 tags: ${{ steps.meta-backend.outputs.tags }} labels: ${{ steps.meta-backend.outputs.labels }} cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/banana-slides-backend:buildcache cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/banana-slides-backend:buildcache,mode=max build-args: | DOCKER_REGISTRY=${{ secrets.DOCKER_REGISTRY }} GHCR_REGISTRY=${{ secrets.GHCR_REGISTRY || 'ghcr.io/' }} APT_MIRROR=${{ secrets.APT_MIRROR }} PYPI_INDEX_URL=${{ secrets.PYPI_INDEX_URL }} - name: Build and push frontend image uses: docker/build-push-action@v5 with: context: . file: ./frontend/Dockerfile push: true platforms: linux/amd64,linux/arm64 tags: ${{ steps.meta-frontend.outputs.tags }} labels: ${{ steps.meta-frontend.outputs.labels }} cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/banana-slides-frontend:buildcache cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/banana-slides-frontend:buildcache,mode=max build-args: | DOCKER_BUILDKIT=1 DOCKER_REGISTRY=${{ secrets.DOCKER_REGISTRY }} NPM_REGISTRY=${{ secrets.NPM_REGISTRY }} # build-binaries: # name: Build Binary Artifacts # needs: create-release # runs-on: ubuntu-latest # steps: # - uses: actions/checkout@v4 # - name: Build # run: echo "TODO: add build steps" # - name: Upload to Release # env: # VERSION: ${{ inputs.version }} # GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} # run: gh release upload "$VERSION" ./dist/* ================================================ FILE: .github/workflows/nightly.yml ================================================ name: Nightly # Every day at 03:00 UTC, or manual trigger on: schedule: - cron: '0 3 * * *' workflow_dispatch: concurrency: group: ${{ github.workflow }} cancel-in-progress: false jobs: e2e-test: name: Docker E2E Tests runs-on: ubuntu-latest env: GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} steps: - uses: actions/checkout@v4 - name: Setup environment run: | chmod +x scripts/setup-env-from-secrets.sh ./scripts/setup-env-from-secrets.sh sed -i 's/^AI_PROVIDER_FORMAT=.*/AI_PROVIDER_FORMAT=gemini/' .env || echo "AI_PROVIDER_FORMAT=gemini" >> .env env: AI_PROVIDER_FORMAT: gemini GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} GOOGLE_API_BASE: ${{ secrets.GOOGLE_API_BASE }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENAI_API_BASE: ${{ secrets.OPENAI_API_BASE }} OPENAI_TIMEOUT: ${{ secrets.OPENAI_TIMEOUT }} OPENAI_MAX_RETRIES: ${{ secrets.OPENAI_MAX_RETRIES }} TEXT_MODEL: ${{ secrets.TEXT_MODEL }} IMAGE_MODEL: ${{ secrets.IMAGE_MODEL }} LOG_LEVEL: ${{ secrets.LOG_LEVEL }} FLASK_ENV: ${{ secrets.FLASK_ENV }} SECRET_KEY: ${{ secrets.SECRET_KEY }} BACKEND_PORT: ${{ secrets.BACKEND_PORT }} CORS_ORIGINS: ${{ secrets.CORS_ORIGINS }} MAX_DESCRIPTION_WORKERS: ${{ secrets.MAX_DESCRIPTION_WORKERS }} MAX_IMAGE_WORKERS: ${{ secrets.MAX_IMAGE_WORKERS }} MINERU_TOKEN: ${{ secrets.MINERU_TOKEN }} MINERU_API_BASE: ${{ secrets.MINERU_API_BASE }} IMAGE_CAPTION_MODEL: ${{ secrets.IMAGE_CAPTION_MODEL }} OUTPUT_LANGUAGE: ${{ secrets.OUTPUT_LANGUAGE }} - name: Build Docker images run: docker compose build --no-cache - name: Start services run: docker compose up -d - name: Wait for services run: | chmod +x scripts/wait-for-health.sh ./scripts/wait-for-health.sh http://localhost:5000/health 60 2 ./scripts/wait-for-health.sh http://localhost:3000 60 2 - name: Docker environment tests run: | chmod +x scripts/test_docker_environment.sh AUTO_CLEANUP=false ./scripts/test_docker_environment.sh - name: Setup Node.js if: env.GOOGLE_API_KEY != '' && env.GOOGLE_API_KEY != 'mock-api-key-for-testing' uses: actions/setup-node@v4 with: node-version: '18' cache: 'npm' cache-dependency-path: frontend/package-lock.json - name: Install Playwright if: env.GOOGLE_API_KEY != '' && env.GOOGLE_API_KEY != 'mock-api-key-for-testing' run: | cd frontend npm ci npx playwright install --with-deps chromium - name: Run E2E tests if: env.GOOGLE_API_KEY != '' && env.GOOGLE_API_KEY != 'mock-api-key-for-testing' run: cd frontend && npx playwright test ui-full-flow.spec.ts --workers=1 env: CI: true timeout-minutes: 25 - name: Upload E2E reports if: always() uses: actions/upload-artifact@v4 with: name: playwright-report path: frontend/playwright-report/ retention-days: 7 - name: View logs on failure if: failure() run: | docker compose logs backend docker compose logs frontend - name: Cleanup if: always() run: | docker compose down -v docker system prune -f security-scan: name: Security Scan runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Backend dependency scan run: | uv pip install safety uv run safety check --json || echo "Vulnerabilities found (warning)" continue-on-error: true - name: Frontend dependency scan run: cd frontend && npm audit --audit-level=moderate || true continue-on-error: true - name: Dockerfile scan uses: hadolint/hadolint-action@v3.1.0 with: dockerfile: backend/Dockerfile continue-on-error: true push-nightly: name: Push Nightly Images needs: e2e-test runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: docker/setup-qemu-action@v3 - uses: docker/setup-buildx-action@v3 - name: Login to Docker Hub uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Build and push backend uses: docker/build-push-action@v5 with: context: . file: ./backend/Dockerfile push: true platforms: linux/amd64,linux/arm64 tags: ${{ secrets.DOCKERHUB_USERNAME }}/banana-slides-backend:nightly cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/banana-slides-backend:buildcache cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/banana-slides-backend:buildcache,mode=max build-args: | DOCKER_REGISTRY=${{ secrets.DOCKER_REGISTRY }} GHCR_REGISTRY=${{ secrets.GHCR_REGISTRY || 'ghcr.io/' }} APT_MIRROR=${{ secrets.APT_MIRROR }} PYPI_INDEX_URL=${{ secrets.PYPI_INDEX_URL }} - name: Build and push frontend uses: docker/build-push-action@v5 with: context: . file: ./frontend/Dockerfile push: true platforms: linux/amd64,linux/arm64 tags: ${{ secrets.DOCKERHUB_USERNAME }}/banana-slides-frontend:nightly cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/banana-slides-frontend:buildcache cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/banana-slides-frontend:buildcache,mode=max build-args: | DOCKER_BUILDKIT=1 DOCKER_REGISTRY=${{ secrets.DOCKER_REGISTRY }} NPM_REGISTRY=${{ secrets.NPM_REGISTRY }} ================================================ FILE: .github/workflows/pr-quick-check.yml ================================================ name: PR Quick Check on: pull_request: branches: [ main, develop ] # Quick check: lint + unit tests + build + smoke env: UV_INDEX_URL: https://pypi.org/simple jobs: quick-check: name: Quick Check (Lint + Unit Tests + Build) runs-on: ubuntu-latest timeout-minutes: 8 steps: - name: Checkout code uses: actions/checkout@v4 # Backend checks - name: Setup Python uses: actions/setup-python@v5 with: python-version: '3.10' - name: Install uv package manager run: | curl -LsSf https://astral.sh/uv/install.sh | sh echo "$HOME/.cargo/bin" >> $GITHUB_PATH - name: Install backend dependencies run: uv sync --extra test - name: Backend syntax check run: | cd backend uv run flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics || true continue-on-error: true - name: Backend unit tests run: uv run pytest backend/tests/unit -v # Frontend checks - name: Setup Node.js uses: actions/setup-node@v4 with: node-version: '18' cache: 'npm' cache-dependency-path: frontend/package-lock.json - name: Install frontend dependencies run: cd frontend && npm ci - name: Frontend lint check run: cd frontend && npm run lint - name: Frontend unit tests run: cd frontend && npm test -- --run - name: Frontend build check run: cd frontend && npm run build docs-check: name: Docs Link Check runs-on: ubuntu-latest timeout-minutes: 3 steps: - uses: actions/checkout@v4 - uses: actions/setup-node@v4 with: node-version: '18' - name: Check docs links run: cd docs && npx mintlify@latest broken-links # Simple API smoke test smoke-test: name: Smoke Test runs-on: ubuntu-latest timeout-minutes: 3 needs: quick-check steps: - name: Checkout code uses: actions/checkout@v4 - name: Setup Python uses: actions/setup-python@v5 with: python-version: '3.10' - name: Install uv package manager run: | curl -LsSf https://astral.sh/uv/install.sh | sh echo "$HOME/.cargo/bin" >> $GITHUB_PATH - name: Install dependencies run: | uv sync - name: Start backend and test run: | cp .env.example .env cd backend # Start in background uv run python app.py & SERVER_PID=$! # Poll until backend is ready (up to 30s) echo "Waiting for backend to start..." for i in $(seq 1 30); do if curl -sf http://localhost:5000/health; then echo "" echo "Backend smoke test passed (ready after ${i}s)" kill $SERVER_PID exit 0 fi if ! kill -0 $SERVER_PID 2>/dev/null; then echo "Backend process exited unexpectedly" cat ../instance/app.log 2>/dev/null || echo "No log file found" exit 1 fi sleep 1 done echo "Backend failed to start within 30s" cat ../instance/app.log 2>/dev/null || echo "No log file found" kill $SERVER_PID 2>/dev/null exit 1 env: GOOGLE_API_KEY: mock-key-for-testing TESTING: true ================================================ FILE: .github/workflows/translate-readme.yml ================================================ name: Auto Translate README # 当主分支的 README.md 改动时自动翻译到 README_EN.md on: push: branches: - main paths: - 'README.md' workflow_dispatch: # 允许手动触发 # 防止多个翻译任务同时运行 concurrency: group: translate-readme cancel-in-progress: true # 授予工作流推送权限 permissions: contents: write jobs: translate: name: Translate README to English runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v4 with: fetch-depth: 0 # 使用 GitHub Token 推送改动 token: ${{ secrets.GITHUB_TOKEN }} - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.10' - name: Install uv uses: astral-sh/setup-uv@v5 with: enable-cache: true - name: Install dependencies run: | uv sync - name: Translate README env: # 从仓库 secrets 读取 API 密钥 GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} GOOGLE_API_BASE: ${{ secrets.GOOGLE_API_BASE }} AI_PROVIDER_FORMAT: gemini TEXT_MODEL: gemini-3-flash-preview run: | echo "开始增量翻译 README.md(仅翻译修改的部分)..." uv run python scripts/translate_readme_incremental.py echo "翻译完成!" - name: Check for changes id: check_changes run: | if git diff --quiet README_EN.md; then echo "changed=false" >> $GITHUB_OUTPUT echo "README_EN.md 无变化,跳过提交" else echo "changed=true" >> $GITHUB_OUTPUT echo "README_EN.md 已更新" fi - name: Commit and push changes if: steps.check_changes.outputs.changed == 'true' run: | git config --local user.email "github-actions[bot]@users.noreply.github.com" git config --local user.name "github-actions[bot]" git add README_EN.md git commit -m "docs: auto translate README to English [skip ci]" git push - name: Summary run: | if [ "${{ steps.check_changes.outputs.changed }}" == "true" ]; then echo "✅ README_EN.md 已自动更新并推送" else echo "ℹ️ README_EN.md 无变化" fi ================================================ FILE: .gitignore ================================================ *.png *.jpg *.jpeg *.gif *.bmp *.svg *.webp *.ico *.tiff *.tif *.heic *.heif *.avif !frontend/public/**/*.png !frontend/public/**/*.jpg !frontend/public/**/*.jpeg !frontend/public/**/*.gif !frontend/public/**/*.bmp !frontend/public/**/*.svg !frontend/public/**/*.webp !frontend/public/**/*.ico !frontend/public/**/*.tiff !frontend/public/**/*.tif !frontend/public/**/*.heic !frontend/public/**/*.heif !frontend/public/**/*.avif # 保留测试fixtures !e2e/fixtures/*.png # 保留项目模板图片 !template*.png # 忽略临时文档,但保留项目文档 *.md !README.md # !docs/**/*.md !.github/**/*.md *.pyc .env # GCP service-account key (sensitive — never commit) gcp-service-account.json *.ppt *.pptx generate-example.py *.mdc *.pdf .cursor/worktrees.json uploads/ !assets/* # 镜像源配置脚本的备份文件 *.orig # 本地备份目录(保存测试文件等) _local_backup/ !CLA.md !CONTRIBUTING.md .venv ================================================ FILE: CLA.md ================================================ # Banana-slides Contributor License Agreement Thank you for your interest in contributing to Banana-slides ("Project"). By signing this Contributor License Agreement ("CLA"), you accept and agree to the following terms and conditions for your present and future Contributions submitted to the Project. ## 0. Acceptance (How this CLA becomes effective) By submitting a Contribution to the Project (including via a Pull Request or any other form of submission intended for inclusion), you acknowledge that you have read and agree to be bound by the terms of this CLA. If you do not agree to these terms, do not submit any Contribution. ## 1. Definitions - **"Contribution"** means any code, documentation, or other original work of authorship, including any modifications or additions to existing work, that is intentionally submitted by you to the Project for inclusion in the Project. - **"You" (or "Your")** means the individual or legal entity on behalf of whom a Contribution is submitted. - **"Project Maintainer"** means the original author and primary maintainer of the Project (Anionex). ## 2. Grant of Copyright License You retain all right, title, and interest in your Contributions. You hereby grant to the Project Maintainer a perpetual, worldwide, non-exclusive, royalty-free, irrevocable copyright license to: - Reproduce, prepare derivative works of, publicly display, publicly perform, and distribute your Contributions and such derivative works. - Sublicense and relicense your Contributions under any license, **including for commercial purposes**. ## 3. Grant of Patent License You hereby grant to the Project Maintainer a perpetual, worldwide, non-exclusive, royalty-free, irrevocable patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer your Contributions, where such license applies only to those patent claims licensable by you that are necessarily infringed by your Contribution(s) alone or by combination of your Contribution(s) with the Project. ## 4. Representations and Warranties You represent and warrant that: - You are legally entitled to grant the above licenses. - Each of your Contributions is your original creation. - Your Contribution submissions include complete details of any third-party license or other restriction of which you are aware and which are associated with any part of your Contributions. - Your Contribution does not violate any third-party's intellectual property rights. - If you are employed, your employer has waived any rights to your Contributions, or you have obtained permission from your employer to submit Contributions. ## 5. No Support Obligation You are not expected to provide support for your Contributions, except to the extent you desire to provide support. You may provide support for free, for a fee, or not at all. ## 6. Open Source Availability The Project Maintainer will continue to make an open-source edition of the Project publicly available, under an OSI-approved open-source license. For clarity: - The Maintainer may also distribute separate commercial/proprietary editions and/or offer alternative licenses for Contributions. - The code will be published in a publicly accessible source repository (not necessarily GitHub). ## 7. No Warranty Your Contributions are provided "AS IS" without warranty of any kind, express or implied. --- ## How to Agree When you open a Pull Request, the PR template includes a CLA checkbox. Check the box to indicate that you have read and agree to this CLA. Your agreement will remain publicly visible in the Pull Request description and be associated with your GitHub account. --- *Last updated: February 2026* ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing to Banana-slides Thank you for your interest in contributing to Banana-slides! We welcome contributions from the community. ## Before You Start ### Contributor License Agreement (CLA) By submitting a Pull Request (or any other Contribution intended for inclusion) to this repository, you confirm that you have read and agree to the terms of our [Contributor License Agreement (CLA)](CLA.md). If you do not agree to the CLA, please do not submit a Pull Request. **Why do we need a CLA?** - To ensure we have the necessary rights to use, modify, and distribute contributions - To allow the project to explore sustainable commercial models while keeping the open-source edition available - To protect both contributors and the project legally **How to agree:** When you open a Pull Request, the PR template includes a CLA checkbox. Simply check it to indicate your agreement. PRs without the CLA checkbox checked may be delayed or closed. ## How to Contribute ### Reporting Bugs 1. Check if the bug has already been reported in [Issues](https://github.com/Anionex/banana-slides/issues) 2. If not, create a new issue with: - A clear, descriptive title - Steps to reproduce the bug - Expected behavior vs actual behavior - Screenshots if applicable - Your environment (OS, browser, etc.) ### Suggesting Features 1. Check existing issues for similar suggestions 2. Create a new issue with the "feature request" label 3. Describe the feature and its use case ### Submitting Code 1. Fork the repository 2. Create a new branch for your feature/fix: `git checkout -b feature/your-feature-name` 3. Make your changes 4. Test your changes thoroughly 5. Commit with clear, descriptive messages 6. Push to your fork 7. Open a Pull Request with: - A clear description of the changes - Reference to any related issues - **CLA checkbox checked** (PRs without this may be delayed/closed) (PRs without this statement may be delayed/closed) ## Development Setup ### 环境要求 / Requirements - Python 3.10+ - [uv](https://github.com/astral-sh/uv) - Python 包管理器 - Node.js 16+ 和 npm - 有效的 API 密钥(详见 `.env.example`) ### 安装步骤 / Installation ```bash # 克隆代码仓库 git clone https://github.com/Anionex/banana-slides.git cd banana-slides # 安装 uv(如果尚未安装) curl -LsSf https://astral.sh/uv/install.sh | sh # 安装后端依赖(在项目根目录运行) uv sync # 配置环境变量 cp .env.example .env # 编辑 .env 文件,配置你的 API 密钥 # 安装前端依赖 cd frontend npm install ``` ### 启动开发服务器 / Start Development Server ```bash # 启动后端(在项目根目录) cd backend uv run alembic upgrade head && uv run python app.py # 后端运行在 http://localhost:5000 # 启动前端(新开一个终端) cd frontend npm run dev # 前端运行在 http://localhost:3000 ``` ## Code Style - Follow the existing code style in the project - Write clear, self-documenting code - Add comments for complex logic - Include tests for new features when applicable ## Questions? If you have questions, feel free to open an issue or reach out to the maintainers. --- Thank you for contributing to Banana-slides! 🍌 ================================================ FILE: Dockerfile.allinone ================================================ # 镜像源配置参数(可通过 build args 覆盖) ARG DOCKER_REGISTRY= ARG GHCR_REGISTRY=ghcr.io/ ARG NPM_REGISTRY= ARG APT_MIRROR= ARG PYPI_INDEX_URL= # ── Stage 1: 构建前端 ────────────────────────────────────────── FROM ${DOCKER_REGISTRY:-}node:18-alpine AS frontend-builder ARG NPM_REGISTRY= WORKDIR /app COPY frontend/package.json frontend/package-lock.json* ./ RUN if [ -n "$NPM_REGISTRY" ]; then \ npm config set registry "$NPM_REGISTRY"; \ fi && \ (npm install --frozen-lockfile || npm install) COPY frontend/ ./ RUN npm run build # ── Stage 2: 获取 uv ────────────────────────────────────────── FROM ${GHCR_REGISTRY}astral-sh/uv:latest AS uv # ── Stage 3: 最终一体镜像 ────────────────────────────────────── FROM ${DOCKER_REGISTRY:-}python:3.10-slim ARG APT_MIRROR= ARG PYPI_INDEX_URL= WORKDIR /app # 安装系统依赖:nginx + supervisor + curl RUN if [ -n "$APT_MIRROR" ]; then \ if [ -f /etc/apt/sources.list.d/debian.sources ]; then \ sed -i "s@deb.debian.org@$APT_MIRROR@g" /etc/apt/sources.list.d/debian.sources; \ fi; \ fi && \ apt-get update && apt-get install -y \ curl \ nginx \ supervisor \ && rm -rf /var/lib/apt/lists/* # 复制 uv COPY --from=uv /uv /usr/local/bin/uv RUN chmod +x /usr/local/bin/uv # 安装 Python 依赖 COPY pyproject.toml uv.lock* ./ ENV UV_INDEX_URL=${PYPI_INDEX_URL} ENV UV_HTTP_TIMEOUT=300 RUN if [ -f uv.lock ]; then uv sync --frozen; else uv sync; fi # 复制后端代码和资源 COPY backend/ ./backend/ COPY assets/ ./assets/ COPY docker/ ./docker/ # 复制前端构建产物 COPY --from=frontend-builder /app/dist /usr/share/nginx/html # 配置 nginx COPY docker/nginx-allinone.conf /etc/nginx/conf.d/default.conf RUN rm -f /etc/nginx/sites-enabled/default # 启动脚本可执行 RUN chmod +x /app/docker/start-backend.sh # 创建必要目录 RUN mkdir -p /app/backend/instance /app/uploads ENV PYTHONPATH=/app ENV FLASK_APP=backend/app.py ENV IN_DOCKER=1 EXPOSE 80 HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \ CMD curl -f http://localhost/health || exit 1 CMD ["/usr/bin/supervisord", "-c", "/app/docker/supervisord.conf"] ================================================ FILE: LICENSE ================================================ GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU Affero General Public License is a free, copyleft license for software and other kinds of works, specifically designed to ensure cooperation with the community in the case of network server software. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, our General Public Licenses are intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. Developers that use our General Public Licenses protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License which gives you legal permission to copy, distribute and/or modify the software. A secondary benefit of defending all users' freedom is that improvements made in alternate versions of the program, if they receive widespread use, become available for other developers to incorporate. Many developers of free software are heartened and encouraged by the resulting cooperation. However, in the case of software used on network servers, this result may fail to come about. The GNU General Public License permits making a modified version and letting the public access it on a server without ever releasing its source code to the public. The GNU Affero General Public License is designed specifically to ensure that, in such cases, the modified source code becomes available to the community. It requires the operator of a network server to provide the source code of the modified version running there to the users of that server. Therefore, public use of a modified version, on a publicly accessible server, gives the public access to the source code of the modified version. An older license, called the Affero General Public License and published by Affero, was designed to accomplish similar goals. This is a different license, not a version of the Affero GPL, but Affero has released a new version of the Affero GPL which permits relicensing under this license. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU Affero General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Remote Network Interaction; Use with the GNU General Public License. Notwithstanding any other provision of this License, if you modify the Program, your modified version must prominently offer all users interacting with it remotely through a computer network (if your version supports such interaction) an opportunity to receive the Corresponding Source of your version by providing access to the Corresponding Source from a network server at no charge, through some standard or customary means of facilitating copying of software. This Corresponding Source shall include the Corresponding Source for any work covered by version 3 of the GNU General Public License that is incorporated pursuant to the following paragraph. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the work with which it is combined will remain governed by version 3 of the GNU General Public License. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU Affero General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU Affero General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU Affero General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU Affero General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If your software can interact with users remotely through a computer network, you should also make sure that it provides a way for users to get its source. For example, if your program is a web application, its interface could display a "Source" link that leads users to an archive of the code. There are many ways you could offer source, and different solutions will be better for different programs; see section 13 for the specific requirements. You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU AGPL, see . ================================================ FILE: README.md ================================================
banana-slides Anionex%2Fbanana-slides | Trendshift
一个基于nano banana pro🍌的原生AI PPT生成应用
在几分钟内从想法到演示文稿,无需繁琐排版、口头提出修改,迈向真正的"Vibe PPT"

🚀 在线 Demo  •  📚 文档  •  English

[![GitHub Stars](https://img.shields.io/github/stars/Anionex/banana-slides?style=flat-square&color=FFD700)](https://github.com/Anionex/banana-slides/stargazers) [![GitHub Forks](https://img.shields.io/github/forks/Anionex/banana-slides?style=flat-square&color=FFD700)](https://github.com/Anionex/banana-slides/network) [![GitHub Watchers](https://img.shields.io/github/watchers/Anionex/banana-slides?style=flat-square&color=FFD700)](https://github.com/Anionex/banana-slides/watchers) [![Version](https://img.shields.io/badge/version-v0.4.0-44cc11?style=flat-square)](https://github.com/Anionex/banana-slides) ![Docker](https://img.shields.io/badge/Docker-Build-4A90D9?logo=docker&logoColor=white&style=flat-square) [![License](https://img.shields.io/github/license/Anionex/banana-slides?color=0055aa&style=flat-square)](https://github.com/Anionex/banana-slides/blob/main/LICENSE)

如果该项目对你有用,欢迎 Star 🌟 & Fork 🍴

## ✨ 项目缘起 你是否也曾陷入这样的困境:明天就要汇报,但PPT还是一片空白;脑中有无数精彩的想法,却被繁琐的排版和设计消磨掉所有热情? 我(们)渴望能快速创作出既专业又具设计感的演示文稿,传统的AI PPT生成app,虽然大体满足“快”这一需求,却还存在以下问题: - 1️⃣只能选择预设模版,无法灵活调整风格 - 2️⃣自由度低,多轮改动难以进行 - 3️⃣成品观感相似,同质化严重 - 4️⃣素材质量较低,缺乏针对性 - 5️⃣图文排版割裂,设计感差 以上这些缺陷,让传统的AI ppt生成器难以同时满足我们“快”和“美”的两大PPT制作需求。即使自称Vibe PPT,但是在我的眼中还远不够“Vibe”。 但是,nano banana🍌模型的出现让一切有了转机。我尝试使用🍌pro进行ppt页面生成,发现生成的结果无论是质量、美感还是一致性,都做的非常好,且几乎能精确渲染prompt要求的所有文字+遵循参考图的风格。那为什么不基于🍌pro,做一个原生的"Vibe PPT"应用呢? ## 👨‍💻 适用场景 1. **小白**:零门槛快速生成美观PPT,无需设计经验,减少模板选择烦恼 2. **PPT专业人士**:参考AI生成的布局和图文元素组合,快速获取设计灵感 3. **教育工作者**:将教学内容快速转换为配图教案PPT,提升课堂效果 4. **学生**:快速完成作业Pre,把精力专注于内容而非排版美化 5. **职场人士**:商业提案、产品介绍快速可视化,多场景快速适配

🎯目标: 降低 PPT 制作门槛,让每个人都能快速创作出美观专业的演示文稿

## 🎨 结果案例
| | | |:---:|:---:| | 案例3 | 案例2 | | **软件开发最佳实践** | **DeepSeek-V3.2技术展示** | | 案例4 | 案例1 | | **预制菜智能产线装备研发和产业化** | **钱的演变:从贝壳到纸币的旅程** |
更多可见 使用案例 ## 🎯 功能介绍 ### 1. 灵活多样的创作路径 支持**想法**、**大纲**、**页面描述**三种起步方式,满足不同创作习惯。 - **一句话生成**:输入一个主题,AI 自动生成结构清晰的大纲和逐页内容描述。 - **自然语言编辑**:支持以 Vibe 形式口头修改大纲或描述(如"把第三页改成案例分析"),AI 实时响应调整。 - **大纲/描述模式**:既可一键批量生成,也可手动调整细节。 image ### 2. 强大的素材解析能力 - **多格式支持**:上传 PDF/Docx/MD/Txt 等文件,后台自动解析内容。 - **智能提取**:自动识别文本中的关键点、图片链接和图表信息,为生成提供丰富素材。 - **风格参考**:支持上传参考图片或模板,定制 PPT 风格。 文件解析与素材处理 ### 3. "Vibe" 式自然语言修改 不再受限于复杂的菜单按钮,直接通过**自然语言**下达修改指令。 - **局部重绘**:对不满意的区域进行口头式修改(如"把这个图换成饼图")。 - **整页优化**:基于 nano banana pro🍌 生成高清、风格统一的页面。 image ### 4. 开箱即用的格式导出 - **多格式支持**:一键导出标准 **PPTX** 或 **PDF** 文件。 - **完美适配**:默认 16:9 比例,排版无需二次调整,直接演示。 image PPT与PDF导出 ### 5. 可自由编辑的pptx导出(Beta迭代中) - **导出图像为高还原度、背景干净的、可自由编辑图像和文字的PPT页面** - 相关更新见 https://github.com/Anionex/banana-slides/issues/121 image
**🌟和notebooklm slide deck功能对比** | 功能 | notebooklm | 本项目 | | --- | --- | --- | | 页数上限 | 15页 | **无限制** | | 二次编辑 | 提示词修改 | **框选编辑+口头编辑** | | 素材添加 | 生成后无法添加 | **生成后自由添加** | | 导出格式 | 支持导出为 PDF、(不可编辑图片)pptx | **导出为PDF、(图片or可编辑)pptx** | | 水印 | 免费版有水印 | **无水印,自由增删元素** | > 注:随着新功能添加,对比可能过时 ## 🔥 近期更新 - 【2-9】: * 新功能 * 支持在首页、大纲、描述卡片里面粘贴图片并立即识别,并提供更好的交互体验 * 大纲章节手动编辑:支持手动调整页面所属章节(part)。 * Docker 多架构:镜像支持 amd64 / arm64 构建。 * 国际化 + 暗黑模式:新增中英文切换;支持亮色/暗色/跟随系统主题;全组件适配暗黑模式。 * 修复与体验优化 * 修复导出相关 500、参考文件关联时序、outline/page 数据错位、任务轮询错误项目、描述生成无限轮询、图片预览内存泄漏、批量删除部分失败处理。 * 优化格式示例提示、HTTP 错误提示文案、Modal 关闭体验、清理旧项目 localStorage、移除首次创建项目冗余提示。 * 若干其他优化和修复 - 【1-4】 : v0.4.0发布:可编辑pptx导出全面升级: * 支持最大程度还原图片中文字的字号、颜色、加粗等样式; * 支持了识别表格中的文字内容; * 更精确的文字大小和文字位置还原逻辑 * 优化导出工作流,大大减少了导出后背景图残留文字的现象; * 支持页面多选逻辑,灵活选择需要生成和导出的具体页面。 * **详细效果和使用方法见 https://github.com/Anionex/banana-slides/issues/121** - 【12-27】: 加入了对无图片模板模式的支持和较高质量的文字预设,现在可以通过纯文字描述的方式来控制ppt页面风格 ## 🗺️ 开发计划 | 状态 | 里程碑 | | --- | --- | | ✅ 已完成 | 从想法、大纲、页面描述三种路径创建 PPT | | ✅ 已完成 | 解析文本中的 Markdown 格式图片 | | ✅ 已完成 | PPT 单页添加更多素材 | | ✅ 已完成 | PPT 单页框选区域Vibe口头编辑 | | ✅ 已完成 | 素材模块: 素材生成、上传等 | | ✅ 已完成 | 支持多种文件的上传+解析 | | ✅ 已完成 | 支持Vibe口头调整大纲和描述 | | ✅ 已完成 | 初步支持可编辑版本pptx文件导出 | | 🔄 进行中 | 支持多层次、精确抠图的可编辑pptx导出 | | 🔄 进行中 | 网络搜索 | | 🔄 进行中 | Agent 模式 | | 🚍 部分 | 优化前端加载速度 | | 🧭 规划中 | 在线播放功能 | | 🧭 规划中 | 简单的动画和页面切换效果 | | 🚍 部分 | 多语种支持 | | 🏢商业版功能 | 用户系统 | ## 📦 使用方法 ### (新)使用应用模板一键部署 这是最简单的方式,无需安装docker或下载项目,创建后可直接进入应用 1. 通过雨云一键部署和启动本应用 (新用户有15天免费使用+首充双倍政策) [![通过雨云一键部署](https://rainyun-apps.cn-nb1.rains3.com/materials/deploy-on-rainyun-cn.svg)](https://app.rainyun.com/apps/rca/store/7549/anionex_) 2. 敬请期待 ### 使用 Docker Compose🐳 通过docker compose快速启动前后端服务。
📒 Windows/Mac用户说明 如果你使用 **Windows 或 macOS**,请先安装 **Docker Desktop**,并确保 Docker 正在运行(Windows 可检查系统托盘图标;macOS 可检查菜单栏图标),然后按文档中的相同步骤操作。 > **提示**:如果遇到问题,Windows 用户请在 Docker Desktop 设置中启用 **WSL 2 后端**(推荐);同时确保端口 **3000** 和 **5000** 未被占用。
0. **克隆代码仓库** ```bash git clone https://github.com/Anionex/banana-slides cd banana-slides ``` 1. **配置环境变量** 创建 `.env` 文件(参考 `.env.example`): ```bash cp .env.example .env ``` 编辑 `.env` 文件,配置必要的环境变量: > **项目中大模型接口以AIHubMix平台格式为标准,推荐使用 [AIHubMix(点击此处可直接访问)](https://aihubmix.com/?aff=17EC) 获取API密钥,减小迁移成本**
> **友情提示:谷歌nano banana pro模型接口费用较高,请注意调用成本** ```env # AI Provider格式配置 (gemini / openai / vertex) AI_PROVIDER_FORMAT=gemini # Gemini 格式配置(当 AI_PROVIDER_FORMAT=gemini 时使用) GOOGLE_API_KEY=your-api-key-here GOOGLE_API_BASE=https://generativelanguage.googleapis.com # 代理示例: https://aihubmix.com/gemini # OpenAI 格式配置(当 AI_PROVIDER_FORMAT=openai 时使用) OPENAI_API_KEY=your-api-key-here OPENAI_API_BASE=https://api.openai.com/v1 # 代理示例: https://aihubmix.com/v1 # Vertex AI 配置(AI_PROVIDER_FORMAT=vertex) # 需要 GCP 项目和服务账户密钥 # VERTEX_PROJECT_ID=your-gcp-project-id # VERTEX_LOCATION=global # GOOGLE_APPLICATION_CREDENTIALS=./gcp-service-account.json # Lazyllm 格式配置(当 AI_PROVIDER_FORMAT=lazyllm 时使用) # 选择文本生成和图片生成使用的厂商 TEXT_MODEL_SOURCE=deepseek # 文本生成模型厂商 IMAGE_MODEL_SOURCE=doubao # 图片编辑模型厂商 IMAGE_CAPTION_MODEL_SOURCE=qwen # 图片描述模型厂商 # 各厂商 API Key(只需配置你要使用的厂商) DOUBAO_API_KEY=your-doubao-api-key # 火山引擎/豆包 DEEPSEEK_API_KEY=your-deepseek-api-key # DeepSeek QWEN_API_KEY=your-qwen-api-key # 阿里云/通义千问 GLM_API_KEY=your-glm-api-key # 智谱 GLM SILICONFLOW_API_KEY=your-siliconflow-api-key # 硅基流动 SENSENOVA_API_KEY=your-sensenova-api-key # 商汤日日新 MINIMAX_API_KEY=your-minimax-api-key # MiniMax ... ``` **使用新版可编辑导出配置方法,获得更好的可编辑导出效果**: 需在[百度智能云平台](https://console.bce.baidu.com/iam/#/iam/apikey/list)(点击此处进入)中获取API KEY,填写在.env文件中的BAIDU_API_KEY字段(有充足的免费使用额度)。详见https://github.com/Anionex/banana-slides/issues/121 中的说明
📒 Vertex AI 配置指南(适用于 GCP 用户) Google Cloud Vertex AI 允许通过 GCP 服务账户调用 Gemini 模型,新用户可使用赠金额度。配置步骤: 1. 前往 [GCP Console](https://console.cloud.google.com/),创建一个服务账户并下载 JSON 格式的密钥文件 2. 将密钥文件保存为项目根目录下的 `gcp-service-account.json` 3. 在 `.env` 中设置: ```env AI_PROVIDER_FORMAT=vertex VERTEX_PROJECT_ID=your-gcp-project-id VERTEX_LOCATION=global ``` 4. 如果使用 Docker 部署,还需要在 `docker-compose.yml` 中取消相关注释,将密钥文件挂载到容器内并设置 `GOOGLE_APPLICATION_CREDENTIALS` 环境变量。 > `gemini-3-*` 系列模型要求 `VERTEX_LOCATION=global`
2. **启动服务** **⚡ 使用预构建镜像(推荐)** 项目在 Docker Hub 提供了构建好的前端和后端镜像(同步主分支最新版本),可以跳过本地构建步骤,实现快速部署: ```bash # 使用预构建镜像启动(无需从头构建) docker compose -f docker-compose.prod.yml up -d ``` 镜像名称: - `anoinex/banana-slides-frontend:latest` - `anoinex/banana-slides-backend:latest` **从头构建镜像** ```bash docker compose up -d ``` > [!TIP] > 如遇网络问题,可在 `.env` 文件中取消镜像源配置的注释, 再重新运行启动命令: > ```env > # 在 .env 文件中取消以下注释即可使用国内镜像源 > DOCKER_REGISTRY=docker.1ms.run/ > GHCR_REGISTRY=ghcr.nju.edu.cn/ > APT_MIRROR=mirrors.aliyun.com > PYPI_INDEX_URL=https://mirrors.cloud.tencent.com/pypi/simple > NPM_REGISTRY=https://registry.npmmirror.com/ > ``` 3. **访问应用** - 前端:http://localhost:3000 - 后端 API:http://localhost:5000 4. **查看日志** ```bash # 查看后端日志(最后 200 行) docker logs --tail 200 banana-slides-backend # 实时查看后端日志(最后 100 行) docker logs -f --tail 100 banana-slides-backend # 查看前端日志(最后 100 行) docker logs --tail 100 banana-slides-frontend ``` 5. **停止服务** ```bash docker compose down ``` 6. **更新项目** **使用预构建镜像(docker-compose.prod.yml)** ```bash docker compose -f docker-compose.prod.yml pull docker compose -f docker-compose.prod.yml up -d ``` **使用本地构建(docker-compose.yml)** ```bash git pull docker compose down docker compose build --no-cache docker compose up -d ``` **注:感谢优秀开发者朋友 [@ShellMonster](https://github.com/ShellMonster/) 提供了[新人部署教程](https://github.com/ShellMonster/banana-slides/blob/docs-deploy-tutorial/docs/NEWBIE_DEPLOYMENT.md),专为没有任何服务器部署经验的新手设计,可[点击链接](https://github.com/ShellMonster/banana-slides/blob/docs-deploy-tutorial/docs/NEWBIE_DEPLOYMENT.md)查看。** ### 从源码部署 #### 环境要求 - Python 3.10 或更高版本 - [uv](https://github.com/astral-sh/uv) - Python 包管理器 - Node.js 16+ 和 npm - 有效的 Google Gemini API 密钥 - (可选)[LibreOffice](https://www.libreoffice.org/) - 使用「PPT 翻新」功能上传 PPTX 文件时需要,用于将 PPTX 转换为 PDF。**推荐先在本地将 PPTX 转为 PDF 后再上传**,原因:LibreOffice 在服务端渲染时可能因缺少字体(如微软雅黑、Calibri 等)导致排版错位,且无法完整还原部分特效。上传 PDF 文件则不需要 LibreOffice。Docker 用户如仍需在容器内支持 PPTX 上传,可执行: ```bash docker exec -it banana-slides-backend bash -c "apt-get update && apt-get install -y libreoffice-impress && rm -rf /var/lib/apt/lists/*" ``` > 注意:此方式安装的 LibreOffice 在容器重建后会丢失,需重新安装。 #### 后端安装 0. **克隆代码仓库** ```bash git clone https://github.com/Anionex/banana-slides cd banana-slides ``` 1. **安装 uv(如果尚未安装)** ```bash curl -LsSf https://astral.sh/uv/install.sh | sh ``` 2. **安装依赖** 在项目根目录下运行: ```bash uv sync ``` 这将根据 `pyproject.toml` 自动安装所有依赖。 3. **配置环境变量** 复制环境变量模板: ```bash cp .env.example .env ``` 编辑 `.env` 文件,配置你的 API 密钥: > **项目中大模型接口以AIHubMix平台格式为标准,推荐使用 [AIHubMix](https://aihubmix.com/?aff=17EC) 获取API密钥,减小迁移成本** ```env # AI Provider格式配置 (gemini / openai / vertex) AI_PROVIDER_FORMAT=gemini # Gemini 格式配置(当 AI_PROVIDER_FORMAT=gemini 时使用) GOOGLE_API_KEY=your-api-key-here GOOGLE_API_BASE=https://generativelanguage.googleapis.com # 代理示例: https://aihubmix.com/gemini # OpenAI 格式配置(当 AI_PROVIDER_FORMAT=openai 时使用) OPENAI_API_KEY=your-api-key-here OPENAI_API_BASE=https://api.openai.com/v1 # 代理示例: https://aihubmix.com/v1 # Vertex AI 配置(AI_PROVIDER_FORMAT=vertex) # 需要 GCP 项目和服务账户密钥 # VERTEX_PROJECT_ID=your-gcp-project-id # VERTEX_LOCATION=global # GOOGLE_APPLICATION_CREDENTIALS=./gcp-service-account.json # 可修改此变量来控制后端服务端口 BACKEND_PORT=5000 ... ``` #### 前端安装 1. **进入前端目录** ```bash cd frontend ``` 2. **安装依赖** ```bash npm install ``` 3. **配置API地址** 前端会自动连接到 `http://localhost:5000` 的后端服务。如需修改,请编辑 `src/api/client.ts`。 #### 启动后端服务 > (可选)如果本地已有重要数据,升级前建议先备份数据库: > `cp backend/instance/database.db backend/instance/database.db.bak` ```bash cd backend uv run alembic upgrade head && uv run python app.py ``` 后端服务将在 `http://localhost:5000` 启动。 访问 `http://localhost:5000/health` 验证服务是否正常运行。 #### 启动前端开发服务器 ```bash cd frontend npm run dev ``` 前端开发服务器将在 `http://localhost:3000` 启动。 打开浏览器访问即可使用应用。 ## 🛠️ 技术架构 ### 前端技术栈 - **框架**:React 18 + TypeScript - **构建工具**:Vite 5 - **状态管理**:Zustand - **路由**:React Router v6 - **UI组件**:Tailwind CSS - **拖拽功能**:@dnd-kit - **图标**:Lucide React - **HTTP客户端**:Axios ### 后端技术栈 - **语言**:Python 3.10+ - **框架**:Flask 3.0 - **包管理**:uv - **数据库**:SQLite + Flask-SQLAlchemy - **AI能力**:Google Gemini API - **PPT处理**:python-pptx - **图片处理**:Pillow - **并发处理**:ThreadPoolExecutor - **跨域支持**:Flask-CORS ## 📁 项目结构 ``` banana-slides/ ├── frontend/ # React前端应用 │ ├── src/ │ │ ├── pages/ # 页面组件 │ │ │ ├── Home.tsx # 首页(创建项目) │ │ │ ├── OutlineEditor.tsx # 大纲编辑页 │ │ │ ├── DetailEditor.tsx # 详细描述编辑页 │ │ │ ├── SlidePreview.tsx # 幻灯片预览页 │ │ │ └── History.tsx # 历史版本管理页 │ │ ├── components/ # UI组件 │ │ │ ├── outline/ # 大纲相关组件 │ │ │ │ └── OutlineCard.tsx │ │ │ ├── preview/ # 预览相关组件 │ │ │ │ ├── SlideCard.tsx │ │ │ │ └── DescriptionCard.tsx │ │ │ ├── shared/ # 共享组件 │ │ │ │ ├── Button.tsx │ │ │ │ ├── Card.tsx │ │ │ │ ├── Input.tsx │ │ │ │ ├── Textarea.tsx │ │ │ │ ├── Modal.tsx │ │ │ │ ├── Loading.tsx │ │ │ │ ├── Toast.tsx │ │ │ │ ├── Markdown.tsx │ │ │ │ ├── MaterialSelector.tsx │ │ │ │ ├── MaterialGeneratorModal.tsx │ │ │ │ ├── TemplateSelector.tsx │ │ │ │ ├── ReferenceFileSelector.tsx │ │ │ │ └── ... │ │ │ ├── layout/ # 布局组件 │ │ │ └── history/ # 历史版本组件 │ │ ├── store/ # Zustand状态管理 │ │ │ └── useProjectStore.ts │ │ ├── api/ # API接口 │ │ │ ├── client.ts # Axios客户端配置 │ │ │ └── endpoints.ts # API端点定义 │ │ ├── types/ # TypeScript类型定义 │ │ ├── utils/ # 工具函数 │ │ ├── constants/ # 常量定义 │ │ └── styles/ # 样式文件 │ ├── public/ # 静态资源 │ ├── package.json │ ├── vite.config.ts │ ├── tailwind.config.js # Tailwind CSS配置 │ ├── Dockerfile │ └── nginx.conf # Nginx配置 │ ├── backend/ # Flask后端应用 │ ├── app.py # Flask应用入口 │ ├── config.py # 配置文件 │ ├── models/ # 数据库模型 │ │ ├── project.py # Project模型 │ │ ├── page.py # Page模型(幻灯片页) │ │ ├── task.py # Task模型(异步任务) │ │ ├── material.py # Material模型(参考素材) │ │ ├── user_template.py # UserTemplate模型(用户模板) │ │ ├── reference_file.py # ReferenceFile模型(参考文件) │ │ ├── page_image_version.py # PageImageVersion模型(页面版本) │ ├── services/ # 服务层 │ │ ├── ai_service.py # AI生成服务(Gemini集成) │ │ ├── file_service.py # 文件管理服务 │ │ ├── file_parser_service.py # 文件解析服务 │ │ ├── export_service.py # PPTX/PDF导出服务 │ │ ├── task_manager.py # 异步任务管理 │ │ ├── prompts.py # AI提示词模板 │ ├── controllers/ # API控制器 │ │ ├── project_controller.py # 项目管理 │ │ ├── page_controller.py # 页面管理 │ │ ├── material_controller.py # 素材管理 │ │ ├── template_controller.py # 模板管理 │ │ ├── reference_file_controller.py # 参考文件管理 │ │ ├── export_controller.py # 导出功能 │ │ └── file_controller.py # 文件上传 │ ├── utils/ # 工具函数 │ │ ├── response.py # 统一响应格式 │ │ ├── validators.py # 数据验证 │ │ └── path_utils.py # 路径处理 │ ├── instance/ # SQLite数据库(自动生成) │ ├── exports/ # 导出文件目录 │ ├── Dockerfile │ └── README.md │ ├── tests/ # 测试文件目录 ├── v0_demo/ # 早期演示版本 ├── output/ # 输出文件目录 │ ├── pyproject.toml # Python项目配置(uv管理) ├── uv.lock # uv依赖锁定文件 ├── docker-compose.yml # Docker Compose配置 ├── .env.example # 环境变量示例 ├── LICENSE # 许可证 └── README.md # 本文件 ``` ## 交流群 为了方便大家沟通互助,建此微信交流群. 欢迎提出新功能建议或反馈,本人也会~~佛系~~回答大家问题 ## **🔧 常见问题** 1. **生成页面文字有乱码,文字不清晰** - 可选择更高分辨率的输出(openai 格式可能不支持调高分辨率,建议使用gemini格式)。根据测试,生成页面前将 1k 分辨率调整至 2k 后,文字渲染质量会显著提升。 - 请确保在页面描述中包含具体要渲染的文字内容。 2. **导出可编辑 ppt 效果不佳,如文字重叠、无样式等** - 90% 情况为 API 配置出现问题。可以参考 [issue 121](https://github.com/Anionex/banana-slides/issues/121) 中的排查与解决方案。 3. **支持免费层级的 Gemini API Key 吗?** - 免费层级只支持文本生成,不支持图片生成。 4. **生成内容时提示 503 错误或 Retry Error** - 可以根据 README 中的命令查看 Docker 后端日志,定位 503 问题的详细报错,一般是模型配置不正确导致。 5. **.env 中设置了 API Key 之后,为什么不生效?** - 运行时编辑 `.env` 后需要重启 Docker 容器以应用更改。 - 如果曾在网页设置页中配置参数,会覆盖 `.env` 中的参数,可通过"还原默认设置"恢复为 `.env` 设置。 ## 🤝 贡献指南 欢迎通过 [Issue](https://github.com/Anionex/banana-slides/issues) 和 [Pull Request](https://github.com/Anionex/banana-slides/pulls) 为本项目贡献力量! > **重要:** 贡献前请阅读 [CONTRIBUTING.md](CONTRIBUTING.md) ## 📄 许可证 本项目采用 **GNU Affero General Public License v3.0(AGPL-3.0)** 开源, 可自由用于个人学习、研究、试验、教育或非营利科研活动等非商业用途;
详情 需要商业许可证(Commercial License)(例如:希望闭源使用、私有化部署交付、将本项目集成进闭源产品,或在不公开对应源代码的前提下提供服务),请联系作者:anionex@qq.com - 联系方式:anionex@qq.com

🚀 Sponsor / 赞助


AIHubMix

感谢AIHubMix对本项目的赞助


image
感谢AI火宝对本项目的赞助 “聚合全球多模型API服务商。更低价格享受安全、稳定且72小时链接全球最新模型的服务。”
## 致谢 - 项目贡献者们: [![Contributors](https://contrib.rocks/image?repo=Anionex/banana-slides)](https://github.com/Anionex/banana-slides/graphs/contributors) - [Linux.do](https://linux.do/): 新的理想型社区 ## 赞赏 开源不易🙏如果本项目对你有价值,欢迎请开发者喝杯咖啡☕️ image 感谢以下朋友对项目的无偿赞助支持: > @雅俗共赏、@曹峥、@以年观日、@John、@胡yun星Ethan, @azazo1、@刘聪NLP、@🍟、@苍何、@万瑾、@biubiu、@law、@方源、@寒松Falcon > 如对赞助列表有疑问,可联系作者 ## 📈 项目统计 Star History Chart
================================================ FILE: backend/.gitignore ================================================ # Python __pycache__/ *.py[cod] *$py.class *.so .Python env/ venv/ ENV/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg # Flask instance/ .webassets-cache # Environment .env .env.local # Uploads uploads/ *.db # IDE .vscode/ .idea/ *.swp *.swo *~ # OS .DS_Store Thumbs.db ================================================ FILE: backend/Dockerfile ================================================ # 镜像源配置参数(可通过 build args 覆盖) ARG DOCKER_REGISTRY= ARG GHCR_REGISTRY=ghcr.io/ ARG APT_MIRROR= ARG PYPI_INDEX_URL= # 安装 uv(使用中间阶段避免 COPY --from 的变量展开问题) FROM ${GHCR_REGISTRY}astral-sh/uv:latest AS uv # 使用 Python 3.10 作为基础镜像 # 如果指定了 DOCKER_REGISTRY,使用镜像源;否则使用官方源 FROM ${DOCKER_REGISTRY:-}python:3.10-slim # 重新声明ARG(FROM之后ARG作用域失效,需要重新声明) ARG APT_MIRROR= ARG PYPI_INDEX_URL= # 设置工作目录 WORKDIR /app # 安装系统依赖(如果配置了 APT_MIRROR,先替换镜像源) RUN if [ -n "$APT_MIRROR" ]; then \ if [ -f /etc/apt/sources.list.d/debian.sources ]; then \ sed -i "s@deb.debian.org@$APT_MIRROR@g" /etc/apt/sources.list.d/debian.sources; \ else \ echo "Warning: /etc/apt/sources.list.d/debian.sources not found, skipping mirror setup." >&2; \ fi; \ fi && \ apt-get update && apt-get install -y \ curl \ && rm -rf /var/lib/apt/lists/* # 从 uv 阶段复制二进制文件 COPY --from=uv /uv /usr/local/bin/uv RUN chmod +x /usr/local/bin/uv # 复制项目配置文件 COPY pyproject.toml ./ COPY uv.lock* ./ # 配置 PyPI 镜像源(如果指定) ENV UV_INDEX_URL=${PYPI_INDEX_URL} # 配置 uv 网络超时 ENV UV_HTTP_TIMEOUT=300 # 安装 Python 依赖 # 如果有 uv.lock 文件则使用 --frozen,否则生成新的锁定文件 RUN if [ -f uv.lock ]; then \ uv sync --frozen; \ else \ uv sync; \ fi # 复制后端代码 COPY backend/ ./backend/ # 复制测试资源 COPY assets/ ./assets/ # 创建必要的目录 RUN mkdir -p /app/backend/instance /app/uploads ENV PYTHONPATH=/app ENV FLASK_APP=backend/app.py # 容器内固定监听 5000;宿主机端口由 docker-compose 的 BACKEND_PORT 控制 ENV IN_DOCKER=1 # 暴露端口 EXPOSE 5000 # 容器内部固定使用 5000 端口 HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ CMD ["sh", "-c", "curl -f http://localhost:5000/health || exit 1"] # 启动应用 CMD ["sh", "-c", "uv run --directory backend alembic upgrade head && uv run --directory backend python app.py"] ================================================ FILE: backend/README.md ================================================ # Banana Slides Backend 蕉幻(Banana Slides)后端服务 - AI驱动的PPT生成系统 ## 技术栈 - **框架**: Flask 3.0 - **数据库**: SQLite + SQLAlchemy ORM - **AI服务**: Google Gemini API - **PPT处理**: python-pptx - **并发处理**: ThreadPoolExecutor - **包管理**: uv ## 项目结构 ``` backend/ ├── app.py # Flask应用入口 ├── config.py # 配置文件 ├── models/ # 数据库模型 │ ├── __init__.py │ ├── project.py # Project模型 │ ├── page.py # Page模型 │ └── task.py # Task模型 ├── services/ # 服务层 │ ├── __init__.py │ ├── ai_service.py # AI相关服务 │ ├── file_service.py # 文件管理服务 │ ├── export_service.py # 导出服务 │ └── task_manager.py # 异步任务管理 ├── controllers/ # 控制器层 │ ├── __init__.py │ ├── project_controller.py │ ├── page_controller.py │ ├── template_controller.py │ ├── export_controller.py │ └── file_controller.py ├── utils/ # 工具函数 │ ├── __init__.py │ ├── response.py # 统一响应格式 │ └── validators.py # 数据验证 ├── instance/ # 数据库文件目录(自动创建) ├── uploads/ # 文件上传目录(自动创建) ├── .env.example # 环境变量示例 └── README.md # 本文件 ``` ## 快速开始 ### 1. 安装依赖 本项目使用 [uv](https://github.com/astral-sh/uv) 管理 Python 依赖。所有依赖定义在项目根目录的 `pyproject.toml` 文件中。 在项目根目录下运行: ```bash uv sync ``` 这将自动安装所有必需的依赖包。 ### 2. 配置环境变量 复制 `.env.example` 为 `.env` 并填写配置: ```bash cp .env.example .env ``` 编辑 `.env` 文件: ```env GOOGLE_API_KEY=your-google-api-key GOOGLE_API_BASE=https://generativelanguage.googleapis.com # 火山引擎配置(可选,用于 Inpainting 图像消除功能) VOLCENGINE_ACCESS_KEY=your-volcengine-access-key VOLCENGINE_SECRET_KEY=your-volcengine-secret-key VOLCENGINE_INPAINTING_TIMEOUT=60 VOLCENGINE_INPAINTING_MAX_RETRIES=3 ``` ### 3. 初始化 / 升级数据库结构(Alembic 迁移) 从当前版本开始,后端使用 Alembic 管理数据库结构变更。 ```bash cd backend uv run alembic upgrade head ``` > 注意: > - 首次运行时会自动创建 `alembic_version` 表并将数据库迁移到最新结构; > - 后续新增模型字段时,只需要更新 `models/`,然后使用 `alembic revision --autogenerate` 生成迁移,再执行 `alembic upgrade head`。 ### 4. 运行服务 使用 uv 运行: ```bash cd backend uv run python app.py ``` 服务将在 `http://localhost:5000` 启动。 ## API文档 完整的API文档请参考项目根目录的 `API设计文档.md`。 ### 主要端点 #### 项目管理 - `POST /api/projects` - 创建项目 - `GET /api/projects/{project_id}` - 获取项目详情 - `PUT /api/projects/{project_id}` - 更新项目 - `DELETE /api/projects/{project_id}` - 删除项目 #### 大纲生成 - `POST /api/projects/{project_id}/generate/outline` - 生成大纲 #### 描述生成 - `POST /api/projects/{project_id}/generate/descriptions` - 批量生成描述(异步) - `POST /api/projects/{project_id}/pages/{page_id}/generate/description` - 单页生成 #### 图片生成 - `POST /api/projects/{project_id}/generate/images` - 批量生成图片(异步) - `POST /api/projects/{project_id}/pages/{page_id}/generate/image` - 单页生成 - `POST /api/projects/{project_id}/pages/{page_id}/edit/image` - 编辑图片 #### 模板管理 - `POST /api/projects/{project_id}/template` - 上传模板 - `DELETE /api/projects/{project_id}/template` - 删除模板 #### 导出 - `GET /api/projects/{project_id}/export/pptx` - 导出PPTX - `GET /api/projects/{project_id}/export/pdf` - 导出PDF #### 静态文件 - `GET /files/{project_id}/{type}/{filename}` - 获取文件 ## 核心功能 ### 1. AI驱动的内容生成 基于 Google Gemini API,支持: - 自动生成PPT大纲 - 并行生成页面描述 - 根据参考模板生成图片 - 自然语言编辑图片 ### 2. 异步任务处理 使用 `ThreadPoolExecutor` 实现简单但高效的异步任务处理: - 并行生成多个页面描述 - 并行生成多个页面图片 - 实时任务进度跟踪 ### 3. 文件管理 完整的文件管理系统: - 项目级文件隔离 - 模板图片管理 - 生成图片管理 - 自动清理机制 ### 4. Inpainting 图像消除(可选) 基于火山引擎的 Inpainting 服务,支持: - 根据边界框(bbox)精确消除图像区域 - 自动生成掩码图像 - 重新生成背景(保留前景,消除其他区域) - 支持批量处理和重试机制 使用方法: ```python from services.inpainting_service import InpaintingService, remove_regions from PIL import Image # 方式1:使用服务类 service = InpaintingService() image = Image.open('original.png') bboxes = [(100, 100, 200, 200), (300, 150, 400, 250)] # 要消除的区域 result = service.remove_regions_by_bboxes(image, bboxes) # 方式2:使用便捷函数 result = remove_regions(image, bboxes, expand_pixels=5) ``` ### 5. 数据持久化 使用 SQLite + SQLAlchemy: - 轻量级,无需额外配置 - 支持关系型数据操作 - 事务保证数据一致性 ## 开发说明 ### 数据模型 #### Project(项目) - 项目基本信息 - 模板图片路径 - 项目状态 - 关联的页面和任务 #### Page(页面) - 页面顺序 - 大纲内容(JSON) - 描述内容(JSON) - 生成的图片路径 - 页面状态 #### Task(任务) - 任务类型(生成描述/生成图片) - 任务状态 - 进度信息(JSON) - 错误信息 ### 状态机 #### 项目状态 ``` DRAFT → OUTLINE_GENERATED → DESCRIPTIONS_GENERATED → GENERATING_IMAGES → COMPLETED ``` #### 页面状态 ``` DRAFT → DESCRIPTION_GENERATED → GENERATING → COMPLETED | FAILED ``` #### 任务状态 ``` PENDING → PROCESSING → COMPLETED | FAILED ``` ### 扩展开发 #### 添加新的AI模型 在 `services/ai_service.py` 中添加新的模型支持: ```python class AIService: def __init__(self, api_key: str, model_type: str = 'gemini'): if model_type == 'gemini': # Gemini implementation elif model_type == 'openai': # OpenAI implementation # ... ``` #### 自定义提示词模板 修改 `services/ai_service.py` 中的提示词生成逻辑: ```python def generate_image_prompt(self, ...): prompt = dedent(f""" # 自定义提示词模板 ... """) return prompt ``` #### 添加新的导出格式 在 `services/export_service.py` 中添加新的导出方法: ```python class ExportService: @staticmethod def create_custom_format(image_paths, output_file): # 实现自定义格式导出 pass ``` ## 测试 ### 健康检查 ```bash curl http://localhost:5000/health ``` ### 创建项目 ```bash curl -X POST http://localhost:5000/api/projects \ -H "Content-Type: application/json" \ -d '{"creation_type":"idea","idea_prompt":"生成环保主题ppt"}' ``` ### 上传模板 ```bash curl -X POST http://localhost:5000/api/projects/{project_id}/template \ -F "template_image=@template.png" ``` ### 生成大纲 ```bash curl -X POST http://localhost:5000/api/projects/{project_id}/generate/outline \ -H "Content-Type: application/json" \ -d '{"idea_prompt":"生成环保主题ppt"}' ``` ## 常见问题 ### Q: 数据库文件在哪里? A: 在 `backend/instance/database.db`,会自动创建。 ### Q: 上传的文件存在哪里? A: 在 `uploads/{project_id}/` 目录下,按项目隔离。 ### Q: 如何修改并发数? A: 推荐通过前端设置页修改(会同步到数据库并覆盖 `.env` 值);也可以在 `.env` 文件中修改 `MAX_DESCRIPTION_WORKERS` 和 `MAX_IMAGE_WORKERS` 作为默认值,然后在设置页点击“重置为默认值”同步到 DB。 ### Q: 如何切换到其他AI模型 / 修改 MinerU 地址? A: 从当前版本开始,推荐通过前端“系统设置”页面修改: - 大模型提供商格式 / API Base / API Key - 文本模型 (`TEXT_MODEL`) / 图片模型 (`IMAGE_MODEL`) - MinerU 地址 (`MINERU_API_BASE`) / 图片识别模型 (`IMAGE_CAPTION_MODEL`) 这些值会保存到 `settings` 表并覆盖 `.env` 中对应配置,点击“重置为默认值”会回到 `.env` 的默认值。 ### Q: 支持哪些图片格式? A: PNG, JPG, JPEG, GIF, WEBP。在 `config.py` 中的 `ALLOWED_EXTENSIONS` 配置。 ## 开源字体说明 本项目包含 **Noto Sans CJK SC**(思源黑体简体中文)字体文件,用于 PPT 导出时的精确文本测量。 - **字体文件**: `fonts/NotoSansSC-Regular.ttf` - **来源**: [Google Noto CJK Fonts](https://github.com/googlefonts/noto-cjk) - **许可证**: [SIL Open Font License 1.1 (OFL)](https://scripts.sil.org/OFL) OFL 许可证允许自由使用、修改和分发该字体。 ## 联系方式 如有问题或建议,请通过 GitHub Issues 反馈。 ================================================ FILE: backend/alembic.ini ================================================ [alembic] script_location = migrations sqlalchemy.url = sqlite:///placeholder.db [loggers] keys = root,sqlalchemy,alembic [handlers] keys = console [formatters] keys = generic [logger_root] level = WARN handlers = console [logger_sqlalchemy] level = WARN handlers = qualname = sqlalchemy.engine [logger_alembic] level = INFO handlers = qualname = alembic [handler_console] class = StreamHandler args = (sys.stderr,) level = NOTSET formatter = generic [formatter_generic] format = %(levelname)-5.5s [%(name)s] %(message)s datefmt = %H:%M:%S ================================================ FILE: backend/app.py ================================================ """ Simplified Flask Application Entry Point """ import os import sys import hmac import logging from pathlib import Path from dotenv import load_dotenv from sqlalchemy import event from sqlalchemy.engine import Engine import sqlite3 from sqlalchemy.exc import SQLAlchemyError from flask_migrate import Migrate # Load environment variables from project root .env file _project_root = Path(__file__).parent.parent _env_file = _project_root / '.env' load_dotenv(dotenv_path=_env_file, override=True) from flask import Flask from flask_cors import CORS from models import db from config import Config from controllers.material_controller import material_bp, material_global_bp from controllers.reference_file_controller import reference_file_bp from controllers.settings_controller import settings_bp from controllers import project_bp, page_bp, template_bp, user_template_bp, export_bp, file_bp, style_bp # Enable SQLite WAL mode for all connections @event.listens_for(Engine, "connect") def set_sqlite_pragma(dbapi_conn, connection_record): """ Enable WAL mode and related PRAGMAs for each SQLite connection. Registered once at import time to avoid duplicate handlers when create_app() is called multiple times. """ # Only apply to SQLite connections if not isinstance(dbapi_conn, sqlite3.Connection): return cursor = dbapi_conn.cursor() try: cursor.execute("PRAGMA journal_mode=WAL") cursor.execute("PRAGMA synchronous=NORMAL") cursor.execute("PRAGMA busy_timeout=30000") # 30 seconds timeout finally: cursor.close() def create_app(): """Application factory""" app = Flask(__name__) # Load configuration from Config class app.config.from_object(Config) # Override with environment-specific paths (use absolute path) backend_dir = os.path.dirname(os.path.abspath(__file__)) instance_dir = os.path.join(backend_dir, 'instance') os.makedirs(instance_dir, exist_ok=True) db_path = os.path.join(instance_dir, 'database.db') app.config['SQLALCHEMY_DATABASE_URI'] = f'sqlite:///{db_path}' # Ensure upload folder exists project_root = os.path.dirname(backend_dir) upload_folder = os.path.join(project_root, 'uploads') os.makedirs(upload_folder, exist_ok=True) app.config['UPLOAD_FOLDER'] = upload_folder # CORS configuration (parse from environment) raw_cors = os.getenv('CORS_ORIGINS', 'http://localhost:3000') if raw_cors.strip() == '*': cors_origins = '*' else: cors_origins = [o.strip() for o in raw_cors.split(',') if o.strip()] app.config['CORS_ORIGINS'] = cors_origins # Initialize logging (log to stdout so Docker can capture it) log_level = getattr(logging, app.config['LOG_LEVEL'], logging.INFO) logging.basicConfig( level=log_level, format="%(asctime)s [%(levelname)s] %(name)s - %(message)s", handlers=[logging.StreamHandler(sys.stdout)], ) # 设置第三方库的日志级别,避免过多的DEBUG日志 logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING) logging.getLogger('httpcore').setLevel(logging.WARNING) logging.getLogger('httpx').setLevel(logging.WARNING) logging.getLogger('urllib3').setLevel(logging.WARNING) logging.getLogger('werkzeug').setLevel(logging.INFO) # Flask开发服务器日志保持INFO logging.getLogger('volcenginesdkarkruntime').setLevel(logging.WARNING) # Initialize extensions db.init_app(app) CORS(app, origins=cors_origins) # Database migrations (Alembic via Flask-Migrate) Migrate(app, db) # Register blueprints app.register_blueprint(project_bp) app.register_blueprint(page_bp) app.register_blueprint(template_bp) app.register_blueprint(user_template_bp) app.register_blueprint(export_bp) app.register_blueprint(file_bp) app.register_blueprint(material_bp) app.register_blueprint(material_global_bp) app.register_blueprint(reference_file_bp, url_prefix='/api/reference-files') app.register_blueprint(settings_bp) app.register_blueprint(style_bp) with app.app_context(): # Load settings from database and sync to app.config _load_settings_to_config(app) # Access code enforcement on all /api/ routes @app.before_request def _enforce_access_code(): from flask import request, jsonify expected = os.getenv('ACCESS_CODE', '').strip() if not expected: return # not enabled if not request.path.startswith('/api/'): return # non-API routes (health, static, etc.) if request.path.startswith('/api/access-code/'): return # allow check/verify endpoints code = request.headers.get('X-Access-Code', '') if hmac.compare_digest(code, expected): return return jsonify({'error': 'Access code required'}), 403 # Health check endpoint @app.route('/health') def health_check(): return {'status': 'ok', 'message': 'Banana Slides API is running'} # Access code verification @app.route('/api/access-code/check', methods=['GET']) def check_access_code(): """Check if access code protection is enabled""" enabled = bool(os.getenv('ACCESS_CODE', '').strip()) return {'data': {'enabled': enabled}} @app.route('/api/access-code/verify', methods=['POST']) def verify_access_code(): """Verify the provided access code""" from flask import request, jsonify expected = os.getenv('ACCESS_CODE', '').strip() if not expected: return {'data': {'valid': True}} code = (request.json or {}).get('code', '') if hmac.compare_digest(code, expected): return {'data': {'valid': True}} return jsonify({'error': 'Invalid access code'}), 403 # Output language endpoint @app.route('/api/output-language', methods=['GET']) def get_output_language(): """ 获取用户的输出语言偏好(从数据库 Settings 读取) 返回: zh, ja, en, auto """ from models import Settings try: settings = Settings.get_settings() return {'data': {'language': settings.output_language or Config.OUTPUT_LANGUAGE}} except SQLAlchemyError as db_error: logging.warning(f"Failed to load output language from settings: {db_error}") return {'data': {'language': Config.OUTPUT_LANGUAGE}} # 默认中文 # Root endpoint @app.route('/') def index(): return { 'name': 'Banana Slides API', 'version': '1.0.0', 'description': 'AI-powered PPT generation service', 'endpoints': { 'health': '/health', 'api_docs': '/api', 'projects': '/api/projects' } } return app def _load_settings_to_config(app): """Load settings from database and apply to app.config on startup""" from models import Settings try: settings = Settings.get_settings() # Load AI provider format (always sync, has default value) if settings.ai_provider_format: app.config['AI_PROVIDER_FORMAT'] = settings.ai_provider_format logging.info(f"Loaded AI_PROVIDER_FORMAT from settings: {settings.ai_provider_format}") # Load API configuration # Note: We load even if value is None/empty to allow clearing settings # But we only log if there's an actual value if settings.api_base_url is not None: # 将数据库中的统一 API Base 同步到 Google/OpenAI 两个配置,确保覆盖环境变量 app.config['GOOGLE_API_BASE'] = settings.api_base_url app.config['OPENAI_API_BASE'] = settings.api_base_url if settings.api_base_url: logging.info(f"Loaded API_BASE from settings: {settings.api_base_url}") else: logging.info("API_BASE is empty in settings, using env var or default") if settings.api_key is not None: # 同步到两个提供商的 key,数据库优先于环境变量 app.config['GOOGLE_API_KEY'] = settings.api_key app.config['OPENAI_API_KEY'] = settings.api_key if settings.api_key: logging.info("Loaded API key from settings") else: logging.info("API key is empty in settings, using env var or default") # Load image generation settings (fall back to .env/Config when NULL) resolution = settings.image_resolution or Config.DEFAULT_RESOLUTION aspect_ratio = settings.image_aspect_ratio or Config.DEFAULT_ASPECT_RATIO app.config['DEFAULT_RESOLUTION'] = resolution app.config['DEFAULT_ASPECT_RATIO'] = aspect_ratio logging.info(f"Loaded image settings: {resolution}, {aspect_ratio}") # Load worker settings (fall back to .env/Config when NULL) desc_workers = settings.max_description_workers or Config.MAX_DESCRIPTION_WORKERS img_workers = settings.max_image_workers or Config.MAX_IMAGE_WORKERS app.config['MAX_DESCRIPTION_WORKERS'] = desc_workers app.config['MAX_IMAGE_WORKERS'] = img_workers logging.info(f"Loaded worker settings: desc={desc_workers}, img={img_workers}") # Load model settings (FIX for Issue #136: these were missing before) if settings.text_model: app.config['TEXT_MODEL'] = settings.text_model logging.info(f"Loaded TEXT_MODEL from settings: {settings.text_model}") if settings.image_model: app.config['IMAGE_MODEL'] = settings.image_model logging.info(f"Loaded IMAGE_MODEL from settings: {settings.image_model}") # Load MinerU settings if settings.mineru_api_base: app.config['MINERU_API_BASE'] = settings.mineru_api_base logging.info(f"Loaded MINERU_API_BASE from settings: {settings.mineru_api_base}") if settings.mineru_token: app.config['MINERU_TOKEN'] = settings.mineru_token logging.info("Loaded MINERU_TOKEN from settings") # Load image caption model if settings.image_caption_model: app.config['IMAGE_CAPTION_MODEL'] = settings.image_caption_model logging.info(f"Loaded IMAGE_CAPTION_MODEL from settings: {settings.image_caption_model}") # Load output language if settings.output_language: app.config['OUTPUT_LANGUAGE'] = settings.output_language logging.info(f"Loaded OUTPUT_LANGUAGE from settings: {settings.output_language}") # Load reasoning mode settings (separate for text and image) app.config['ENABLE_TEXT_REASONING'] = settings.enable_text_reasoning app.config['TEXT_THINKING_BUDGET'] = settings.text_thinking_budget app.config['ENABLE_IMAGE_REASONING'] = settings.enable_image_reasoning app.config['IMAGE_THINKING_BUDGET'] = settings.image_thinking_budget logging.info(f"Loaded reasoning config: text={settings.enable_text_reasoning}(budget={settings.text_thinking_budget}), image={settings.enable_image_reasoning}(budget={settings.image_thinking_budget})") # Load Baidu API settings if settings.baidu_api_key: app.config['BAIDU_API_KEY'] = settings.baidu_api_key logging.info("Loaded BAIDU_API_KEY from settings") # Load LazyLLM source settings if settings.text_model_source: app.config['TEXT_MODEL_SOURCE'] = settings.text_model_source logging.info(f"Loaded TEXT_MODEL_SOURCE from settings: {settings.text_model_source}") if settings.image_model_source: app.config['IMAGE_MODEL_SOURCE'] = settings.image_model_source logging.info(f"Loaded IMAGE_MODEL_SOURCE from settings: {settings.image_model_source}") if settings.image_caption_model_source: app.config['IMAGE_CAPTION_MODEL_SOURCE'] = settings.image_caption_model_source logging.info(f"Loaded IMAGE_CAPTION_MODEL_SOURCE from settings: {settings.image_caption_model_source}") # Load per-model API credentials (for gemini/openai per-model overrides) for model_type in ('text', 'image', 'image_caption'): prefix = model_type.upper() for suffix, setting_suffix in [('_API_KEY', '_api_key'), ('_API_BASE', '_api_base_url')]: config_key = f'{prefix}{suffix}' val = getattr(settings, f'{model_type}{setting_suffix}', None) if val: app.config[config_key] = val if suffix == '_API_BASE': logging.info(f"Loaded {config_key} from settings: {val}") else: logging.info(f"Loaded {config_key} from settings") # Sync LazyLLM vendor API keys to environment variables # Only allow known vendor names to prevent environment variable injection from services.ai_providers.lazyllm_env import ALLOWED_LAZYLLM_VENDORS if settings.lazyllm_api_keys: import json try: keys = json.loads(settings.lazyllm_api_keys) for vendor, key in keys.items(): if key and vendor.lower() in ALLOWED_LAZYLLM_VENDORS: os.environ[f"{vendor.upper()}_API_KEY"] = key elif key: logging.warning(f"Ignoring unknown lazyllm vendor: {vendor}") logging.info(f"Loaded LazyLLM API keys for vendors: {[v for v, k in keys.items() if k and v.lower() in ALLOWED_LAZYLLM_VENDORS]}") except (json.JSONDecodeError, TypeError): logging.warning("Failed to parse lazyllm_api_keys from settings") except Exception as e: if isinstance(e, SQLAlchemyError) and "no such table: settings" in str(e): logging.debug(f"Settings table not yet created (expected on first boot): {e}") else: logging.warning(f"Could not load settings from database: {e}") # Create app instance app = create_app() def _compute_worktree_port(base_port: int) -> int: """Compute a deterministic port from the worktree directory name. Uses MD5 of the project root basename so each worktree gets a unique, stable port pair (backend 5xxx, frontend 3xxx) without manual config. """ import hashlib basename = _project_root.name offset = int(hashlib.md5(basename.encode()).hexdigest()[:8], 16) % 500 return base_port + offset if __name__ == '__main__': # Run development server if os.getenv("IN_DOCKER", "0") == "1": port = 5000 # Docker 容器内部固定使用 5000 端口 elif os.getenv('BACKEND_PORT'): port = int(os.getenv('BACKEND_PORT')) else: port = _compute_worktree_port(5000) debug = os.getenv('FLASK_ENV', 'development') == 'development' logging.info( "\n" "╔══════════════════════════════════════╗\n" "║ 🍌 Banana Slides API Server 🍌 ║\n" "╚══════════════════════════════════════╝\n" f"Server starting on: http://localhost:{port}\n" f"Output Language: {Config.OUTPUT_LANGUAGE}\n" f"Environment: {os.getenv('FLASK_ENV', 'development')}\n" f"Debug mode: {debug}\n" f"API Base URL: http://localhost:{port}/api\n" f"Database: {app.config['SQLALCHEMY_DATABASE_URI']}\n" f"Uploads: {app.config['UPLOAD_FOLDER']}" ) # Using absolute paths for database, so WSL path issues should not occur app.run(host='0.0.0.0', port=port, debug=debug, use_reloader=debug) ================================================ FILE: backend/config.py ================================================ """ Backend configuration file """ import os import sys from datetime import timedelta # 基础配置 - 使用更可靠的路径计算方式 # 在模块加载时立即计算并固定路径 _current_file = os.path.realpath(__file__) # 使用realpath解析所有符号链接 BASE_DIR = os.path.dirname(_current_file) PROJECT_ROOT = os.path.dirname(BASE_DIR) # Flask配置 class Config: """Base configuration""" SECRET_KEY = os.getenv('SECRET_KEY', 'your-secret-key-change-this') # 数据库配置 # Use absolute path to avoid WSL path issues db_path = os.path.join(BASE_DIR, 'instance', 'database.db') SQLALCHEMY_DATABASE_URI = os.getenv( 'DATABASE_URL', f'sqlite:///{db_path}' ) SQLALCHEMY_TRACK_MODIFICATIONS = False # SQLite线程安全配置 - 关键修复 SQLALCHEMY_ENGINE_OPTIONS = { 'connect_args': { 'check_same_thread': False, # 允许跨线程使用(仅SQLite) 'timeout': 30 # 增加超时时间 }, 'pool_pre_ping': True, # 连接前检查 'pool_recycle': 3600, # 1小时回收连接 } # 文件存储配置 UPLOAD_FOLDER = os.path.join(PROJECT_ROOT, 'uploads') MAX_CONTENT_LENGTH = 200 * 1024 * 1024 # 200MB max file size ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif', 'webp'} ALLOWED_REFERENCE_FILE_EXTENSIONS = {'pdf', 'docx', 'pptx', 'doc', 'ppt', 'xlsx', 'xls', 'csv', 'txt', 'md'} # AI服务配置 GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY', '') GOOGLE_API_BASE = os.getenv('GOOGLE_API_BASE', '') # Provider format: gemini | openai | vertex | lazyllm AI_PROVIDER_FORMAT = os.getenv('AI_PROVIDER_FORMAT', 'gemini') # Google Cloud Vertex AI (requires AI_PROVIDER_FORMAT=vertex) VERTEX_PROJECT_ID = os.getenv('VERTEX_PROJECT_ID', '') VERTEX_LOCATION = os.getenv('VERTEX_LOCATION', 'us-central1') # GenAI (Gemini) 格式专用配置 GENAI_TIMEOUT = float(os.getenv('GENAI_TIMEOUT', '300.0')) # Gemini 超时时间(秒) GENAI_MAX_RETRIES = int(os.getenv('GENAI_MAX_RETRIES', '2')) # Gemini 最大重试次数(应用层实现) # OpenAI 格式专用配置(当 AI_PROVIDER_FORMAT=openai 时使用) OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', '') # 当 AI_PROVIDER_FORMAT=openai 时必须设置 OPENAI_API_BASE = os.getenv('OPENAI_API_BASE', 'https://aihubmix.com/v1') OPENAI_TIMEOUT = float(os.getenv('OPENAI_TIMEOUT', '300.0')) # 增加到 5 分钟(生成清洁背景图需要很长时间) OPENAI_MAX_RETRIES = int(os.getenv('OPENAI_MAX_RETRIES', '2')) # 减少重试次数,避免过多重试导致累积超时 # Lazyllm 格式专用配置(当 AI_PROVIDER_FORMAT=lazyllm 时使用) TEXT_MODEL_SOURCE = os.getenv('TEXT_MODEL_SOURCE', '') # 文本生成模型厂商(留空则跟随全局 AI_PROVIDER_FORMAT) IMAGE_MODEL_SOURCE = os.getenv('IMAGE_MODEL_SOURCE', '') # 图片生成模型厂商(留空则跟随全局 AI_PROVIDER_FORMAT) IMAGE_CAPTION_MODEL_SOURCE = os.getenv('IMAGE_CAPTION_MODEL_SOURCE', '') # 图片识别模型厂商(留空则跟随全局 AI_PROVIDER_FORMAT) # AI 模型配置 TEXT_MODEL = os.getenv('TEXT_MODEL', 'gemini-3-flash-preview') IMAGE_MODEL = os.getenv('IMAGE_MODEL', 'gemini-3-pro-image-preview') # MinerU 文件解析服务配置 MINERU_TOKEN = os.getenv('MINERU_TOKEN', '') MINERU_API_BASE = os.getenv('MINERU_API_BASE', 'https://mineru.net') # 图片识别模型配置 IMAGE_CAPTION_MODEL = os.getenv('IMAGE_CAPTION_MODEL', 'gemini-3-flash-preview') # 并发配置 MAX_DESCRIPTION_WORKERS = int(os.getenv('MAX_DESCRIPTION_WORKERS', '5')) MAX_IMAGE_WORKERS = int(os.getenv('MAX_IMAGE_WORKERS', '8')) # 图片生成配置 DEFAULT_ASPECT_RATIO = "16:9" DEFAULT_RESOLUTION = "2K" # 日志配置 LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO').upper() # CORS配置 CORS_ORIGINS = os.getenv('CORS_ORIGINS', 'http://localhost:3000').split(',') # 输出语言配置 # 可选值: 'zh' (中文), 'ja' (日本語), 'en' (English), 'auto' (自动) OUTPUT_LANGUAGE = os.getenv('OUTPUT_LANGUAGE', 'zh') # 火山引擎配置 VOLCENGINE_ACCESS_KEY = os.getenv('VOLCENGINE_ACCESS_KEY', '') VOLCENGINE_SECRET_KEY = os.getenv('VOLCENGINE_SECRET_KEY', '') VOLCENGINE_INPAINTING_TIMEOUT = int(os.getenv('VOLCENGINE_INPAINTING_TIMEOUT', '60')) # Inpainting 超时时间(秒) VOLCENGINE_INPAINTING_MAX_RETRIES = int(os.getenv('VOLCENGINE_INPAINTING_MAX_RETRIES', '3')) # 最大重试次数 # Inpainting Provider 配置(用于 InpaintingService 的单张图片修复) # 可选值: 'volcengine' (火山引擎), 'gemini' (Google Gemini) # 注意: 可编辑PPTX导出功能使用 ImageEditabilityService,其中 HybridInpaintProvider 会结合百度重绘和生成式质量增强 INPAINTING_PROVIDER = os.getenv('INPAINTING_PROVIDER', 'gemini') # 默认使用 Gemini # 百度 API 配置(用于 OCR 和图像修复) BAIDU_API_KEY = os.getenv('BAIDU_API_KEY', '') or os.getenv('BAIDU_OCR_API_KEY', '') class DevelopmentConfig(Config): """Development configuration""" DEBUG = True class ProductionConfig(Config): """Production configuration""" DEBUG = False # 根据环境变量选择配置 config_map = { 'development': DevelopmentConfig, 'production': ProductionConfig, 'default': DevelopmentConfig } def get_config(): """Get configuration based on environment""" env = os.getenv('FLASK_ENV', 'development') return config_map.get(env, DevelopmentConfig) ================================================ FILE: backend/controllers/__init__.py ================================================ """Controllers package""" from .project_controller import project_bp, style_bp from .page_controller import page_bp from .template_controller import template_bp, user_template_bp from .export_controller import export_bp from .file_controller import file_bp from .material_controller import material_bp from .settings_controller import settings_bp __all__ = ['project_bp', 'style_bp', 'page_bp', 'template_bp', 'user_template_bp', 'export_bp', 'file_bp', 'material_bp', 'settings_bp'] ================================================ FILE: backend/controllers/export_controller.py ================================================ """ Export Controller - handles file export endpoints """ import logging import os import io import shutil import time import zipfile from flask import Blueprint, request, current_app from werkzeug.utils import secure_filename from models import db, Project, Page, Task from utils import ( error_response, not_found, bad_request, success_response, parse_page_ids_from_query, parse_page_ids_from_body, get_filtered_pages ) from services import ExportService, FileService from services.ai_service_manager import get_ai_service logger = logging.getLogger(__name__) export_bp = Blueprint('export', __name__, url_prefix='/api/projects') @export_bp.route('//export/pptx', methods=['GET']) def export_pptx(project_id): """ GET /api/projects/{project_id}/export/pptx?filename=...&page_ids=id1,id2,id3 - Export PPTX Query params: - filename: optional custom filename - page_ids: optional comma-separated page IDs to export (if not provided, exports all pages) Returns: JSON with download URL, e.g. { "success": true, "data": { "download_url": "/files/{project_id}/exports/xxx.pptx", "download_url_absolute": "http://host:port/files/{project_id}/exports/xxx.pptx" } } """ try: project = Project.query.get(project_id) if not project: return not_found('Project') # Get page_ids from query params and fetch filtered pages selected_page_ids = parse_page_ids_from_query(request) logger.debug(f"[export_pptx] selected_page_ids: {selected_page_ids}") pages = get_filtered_pages(project_id, selected_page_ids if selected_page_ids else None) logger.debug(f"[export_pptx] Exporting {len(pages)} pages") if not pages: return bad_request("No pages found for project") # Get image paths file_service = FileService(current_app.config['UPLOAD_FOLDER']) image_paths = [] for page in pages: if page.generated_image_path: abs_path = file_service.get_absolute_path(page.generated_image_path) image_paths.append(abs_path) if not image_paths: return bad_request("No generated images found for project") # Determine export directory and filename exports_dir = file_service._get_exports_dir(project_id) # Get filename from query params or use default filename = secure_filename(request.args.get('filename', f'presentation_{project_id}.pptx')) if not filename.endswith('.pptx'): filename += '.pptx' output_path = os.path.join(exports_dir, filename) # Generate PPTX file on disk ExportService.create_pptx_from_images(image_paths, output_file=output_path, aspect_ratio=project.image_aspect_ratio) # Build download URLs download_path = f"/files/{project_id}/exports/{filename}" base_url = request.url_root.rstrip("/") download_url_absolute = f"{base_url}{download_path}" return success_response( data={ "download_url": download_path, "download_url_absolute": download_url_absolute, }, message="Export PPTX task created" ) except Exception as e: return error_response('SERVER_ERROR', str(e), 500) @export_bp.route('//export/pdf', methods=['GET']) def export_pdf(project_id): """ GET /api/projects/{project_id}/export/pdf?filename=...&page_ids=id1,id2,id3 - Export PDF Query params: - filename: optional custom filename - page_ids: optional comma-separated page IDs to export (if not provided, exports all pages) Returns: JSON with download URL, e.g. { "success": true, "data": { "download_url": "/files/{project_id}/exports/xxx.pdf", "download_url_absolute": "http://host:port/files/{project_id}/exports/xxx.pdf" } } """ try: project = Project.query.get(project_id) if not project: return not_found('Project') # Get page_ids from query params and fetch filtered pages selected_page_ids = parse_page_ids_from_query(request) pages = get_filtered_pages(project_id, selected_page_ids if selected_page_ids else None) if not pages: return bad_request("No pages found for project") # Get image paths file_service = FileService(current_app.config['UPLOAD_FOLDER']) image_paths = [] for page in pages: if page.generated_image_path: abs_path = file_service.get_absolute_path(page.generated_image_path) image_paths.append(abs_path) if not image_paths: return bad_request("No generated images found for project") # Determine export directory and filename exports_dir = file_service._get_exports_dir(project_id) # Get filename from query params or use default filename = secure_filename(request.args.get('filename', f'presentation_{project_id}.pdf')) if not filename.endswith('.pdf'): filename += '.pdf' output_path = os.path.join(exports_dir, filename) # Generate PDF file on disk ExportService.create_pdf_from_images(image_paths, output_file=output_path, aspect_ratio=project.image_aspect_ratio) # Build download URLs download_path = f"/files/{project_id}/exports/{filename}" base_url = request.url_root.rstrip("/") download_url_absolute = f"{base_url}{download_path}" return success_response( data={ "download_url": download_path, "download_url_absolute": download_url_absolute, }, message="Export PDF task created" ) except Exception as e: return error_response('SERVER_ERROR', str(e), 500) @export_bp.route('//export/images', methods=['GET']) def export_images(project_id): """ GET /api/projects/{project_id}/export/images?page_ids=id1,id2,id3 - Export images Single image: copies to exports dir and returns download URL. Multiple images: creates a ZIP archive and returns download URL. """ try: if '..' in project_id or '/' in project_id or '\\' in project_id: return bad_request('Invalid project ID') s_project_id = secure_filename(project_id) if s_project_id != project_id: return bad_request('Invalid project ID') project = Project.query.get(s_project_id) if not project: return not_found('Project') selected_page_ids = parse_page_ids_from_query(request) pages = get_filtered_pages(s_project_id, selected_page_ids if selected_page_ids else None) if not pages: return bad_request("No pages found for project") file_service = FileService(current_app.config['UPLOAD_FOLDER']) image_items = [] for page in pages: if page.generated_image_path: abs_path = file_service.get_absolute_path(page.generated_image_path) if os.path.exists(abs_path): image_items.append((page, abs_path)) if not image_items: return bad_request("No generated images found for project") exports_dir = file_service._get_exports_dir(s_project_id) timestamp = int(time.time()) if len(image_items) == 1: page, path = image_items[0] ext = os.path.splitext(path)[1] or '.png' filename = f'slide_{page.id}_{timestamp}{ext}' output_path = os.path.join(exports_dir, filename) shutil.copy2(path, output_path) else: filename = f'slides_{s_project_id}_{timestamp}.zip' output_path = os.path.join(exports_dir, filename) with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf: for page, path in image_items: ext = os.path.splitext(path)[1] or '.png' zf.write(path, f'slide_{page.order_index + 1:03d}{ext}') download_path = f"/files/{s_project_id}/exports/{filename}" base_url = request.url_root.rstrip("/") return success_response( data={ "download_url": download_path, "download_url_absolute": f"{base_url}{download_path}", }, message="Export images completed" ) except Exception as e: return error_response('SERVER_ERROR', str(e), 500) @export_bp.route('//export/editable-pptx', methods=['POST']) def export_editable_pptx(project_id): """ POST /api/projects/{project_id}/export/editable-pptx - 导出可编辑PPTX(异步) 使用递归分析方法(支持任意尺寸、递归子图分析) 这个端点创建一个异步任务来执行以下操作: 1. 递归分析图片(支持任意尺寸和分辨率) 2. 转换为PDF并上传MinerU识别 3. 提取元素bbox和生成clean background(inpainting) 4. 递归处理图片/图表中的子元素 5. 创建可编辑PPTX Request body (JSON): { "filename": "optional_custom_name.pptx", "page_ids": ["id1", "id2"], // 可选,要导出的页面ID列表(不提供则导出所有) "max_depth": 1, // 可选,递归深度(默认1=不递归,2=递归一层) "max_workers": 4 // 可选,并发数(默认4) } Returns: JSON with task_id, e.g. { "success": true, "data": { "task_id": "uuid-here", "method": "recursive_analysis", "max_depth": 2, "max_workers": 4 }, "message": "Export task created" } 轮询 /api/projects/{project_id}/tasks/{task_id} 获取进度和下载链接 """ try: project = Project.query.get(project_id) if not project: return not_found('Project') # Get parameters from request body data = request.get_json() or {} # Get page_ids from request body and fetch filtered pages selected_page_ids = parse_page_ids_from_body(data) pages = get_filtered_pages(project_id, selected_page_ids if selected_page_ids else None) if not pages: return bad_request("No pages found for project") # Check if pages have generated images has_images = any(page.generated_image_path for page in pages) if not has_images: return bad_request("No generated images found for project") # Get parameters from request body data = request.get_json() or {} filename = data.get('filename', f'presentation_editable_{project_id}.pptx') if not filename.endswith('.pptx'): filename += '.pptx' # 递归分析参数 # max_depth 语义:1=只处理表层不递归,2=递归一层(处理图片/图表中的子元素) max_depth = data.get('max_depth', 1) # 默认不递归,与测试脚本一致 max_workers = data.get('max_workers', 4) # Validate parameters # max_depth >= 1: 至少处理表层元素 if not isinstance(max_depth, int) or max_depth < 1 or max_depth > 5: return bad_request("max_depth must be an integer between 1 and 5") if not isinstance(max_workers, int) or max_workers < 1 or max_workers > 16: return bad_request("max_workers must be an integer between 1 and 16") # Create task record task = Task( project_id=project_id, task_type='EXPORT_EDITABLE_PPTX', status='PENDING' ) db.session.add(task) db.session.commit() logger.info(f"Created export task {task.id} for project {project_id} (recursive analysis: depth={max_depth}, workers={max_workers})") # Get services from services.file_service import FileService from services.task_manager import task_manager, export_editable_pptx_with_recursive_analysis_task file_service = FileService(current_app.config['UPLOAD_FOLDER']) # Get Flask app instance for background task app = current_app._get_current_object() # 读取项目的导出设置 export_extractor_method = project.export_extractor_method or 'hybrid' export_inpaint_method = project.export_inpaint_method or 'hybrid' logger.info(f"Export settings: extractor={export_extractor_method}, inpaint={export_inpaint_method}") # 使用递归分析任务(不需要 ai_service,使用 ImageEditabilityService) task_manager.submit_task( task.id, export_editable_pptx_with_recursive_analysis_task, project_id=project_id, filename=filename, file_service=file_service, page_ids=selected_page_ids if selected_page_ids else None, max_depth=max_depth, max_workers=max_workers, export_extractor_method=export_extractor_method, export_inpaint_method=export_inpaint_method, app=app ) logger.info(f"Submitted recursive export task {task.id} to task manager") return success_response( data={ "task_id": task.id, "method": "recursive_analysis", "max_depth": max_depth, "max_workers": max_workers }, message="Export task created (using recursive analysis)" ) except Exception as e: logger.exception("Error creating export task") return error_response('SERVER_ERROR', str(e), 500) ================================================ FILE: backend/controllers/file_controller.py ================================================ """ File Controller - handles static file serving """ from flask import Blueprint, send_from_directory, current_app from utils import error_response, not_found from utils.path_utils import find_file_with_prefix import os from pathlib import Path from werkzeug.utils import secure_filename file_bp = Blueprint('files', __name__, url_prefix='/files') @file_bp.route('///', methods=['GET']) def serve_file(project_id, file_type, filename): """ GET /files/{project_id}/{type}/{filename} - Serve static files Args: project_id: Project UUID file_type: 'template' or 'pages' filename: File name """ try: if file_type not in ['template', 'pages', 'materials', 'exports']: return not_found('File') # Construct file path file_dir = os.path.join( current_app.config['UPLOAD_FOLDER'], project_id, file_type ) # Check if directory exists if not os.path.exists(file_dir): return not_found('File') # Check if file exists file_path = os.path.join(file_dir, filename) if not os.path.exists(file_path): return not_found('File') # Serve file return send_from_directory(file_dir, filename) except Exception as e: return error_response('SERVER_ERROR', str(e), 500) @file_bp.route('/user-templates//', methods=['GET']) def serve_user_template(template_id, filename): """ GET /files/user-templates/{template_id}/{filename} - Serve user template files Args: template_id: Template UUID filename: File name """ try: # Construct file path file_dir = os.path.join( current_app.config['UPLOAD_FOLDER'], 'user-templates', template_id ) # Check if directory exists if not os.path.exists(file_dir): return not_found('File') # Check if file exists file_path = os.path.join(file_dir, filename) if not os.path.exists(file_path): return not_found('File') # Serve file return send_from_directory(file_dir, filename) except Exception as e: return error_response('SERVER_ERROR', str(e), 500) @file_bp.route('/materials/', methods=['GET']) def serve_global_material(filename): """ GET /files/materials/{filename} - Serve global material files (not bound to a project) Args: filename: File name """ try: safe_filename = secure_filename(filename) # Construct file path file_dir = os.path.join( current_app.config['UPLOAD_FOLDER'], 'materials' ) # Check if directory exists if not os.path.exists(file_dir): return not_found('File') # Check if file exists file_path = os.path.join(file_dir, safe_filename) if not os.path.exists(file_path): return not_found('File') # Serve file return send_from_directory(file_dir, safe_filename) except Exception as e: return error_response('SERVER_ERROR', str(e), 500) @file_bp.route('/mineru//', methods=['GET']) def serve_mineru_file(extract_id, filepath): """ GET /files/mineru/{extract_id}/{filepath} - Serve MinerU extracted files. Args: extract_id: Extract UUID filepath: Relative file path within the extract """ try: root_dir = os.path.join(current_app.config['UPLOAD_FOLDER'], 'mineru_files', extract_id) full_path = Path(root_dir) / filepath # This prevents path traversal attacks resolved_root_dir = Path(root_dir).resolve() try: # Check if the path is trying to escape the root directory resolved_full_path = full_path.resolve() if not str(resolved_full_path).startswith(str(resolved_root_dir)): return error_response('INVALID_PATH', 'Invalid file path', 403) except Exception: # If we can't resolve the path at all, it's invalid return error_response('INVALID_PATH', 'Invalid file path', 403) # Try to find file with prefix matching matched_path = find_file_with_prefix(full_path) if matched_path is not None: # Additional security check for matched path try: resolved_matched_path = matched_path.resolve(strict=True) # Verify the matched file is still within the root directory if not str(resolved_matched_path).startswith(str(resolved_root_dir)): return error_response('INVALID_PATH', 'Invalid file path', 403) except FileNotFoundError: return not_found('File') except Exception: return error_response('INVALID_PATH', 'Invalid file path', 403) return send_from_directory(str(matched_path.parent), matched_path.name) return not_found('File') except Exception as e: return error_response('SERVER_ERROR', str(e), 500) ================================================ FILE: backend/controllers/material_controller.py ================================================ """ Material Controller - handles standalone material image generation """ from flask import Blueprint, request, current_app, send_file from models import db, Project, Material, Task from utils import success_response, error_response, not_found, bad_request from services import FileService from services.ai_service_manager import get_ai_service from services.task_manager import task_manager, generate_material_image_task from pathlib import Path from werkzeug.utils import secure_filename from typing import Optional import tempfile import shutil import time import zipfile import io import base64 import logging logger = logging.getLogger(__name__) material_bp = Blueprint('materials', __name__, url_prefix='/api/projects') material_global_bp = Blueprint('materials_global', __name__, url_prefix='/api/materials') ALLOWED_MATERIAL_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.svg'} ALLOWED_ASPECT_RATIOS = frozenset({'16:9', '21:9', '4:3', '3:2', '5:4', '1:1', '4:5', '2:3', '3:4', '9:16'}) def _generate_image_caption(filepath: str) -> str: """Generate AI caption for an uploaded image. Returns empty string on failure.""" if filepath.lower().endswith('.svg'): return "" try: from PIL import Image image = Image.open(filepath) image.thumbnail((1024, 1024), Image.Resampling.LANCZOS) output_lang = current_app.config.get('OUTPUT_LANGUAGE', 'zh') if output_lang == 'en': prompt = "Please provide a short description of the main content of this image. Return only the description text without any other explanation." else: prompt = "请用一句简短的中文描述这张图片的主要内容。只返回描述文字,不要其他解释。" provider_format = (current_app.config.get('AI_PROVIDER_FORMAT') or 'gemini').lower() caption_model = current_app.config.get('IMAGE_CAPTION_MODEL', 'gemini-3-flash-preview') if provider_format == 'openai': from openai import OpenAI api_key = current_app.config.get('OPENAI_API_KEY', '') if not api_key: return "" client = OpenAI( api_key=api_key, base_url=current_app.config.get('OPENAI_API_BASE') or None ) buffered = io.BytesIO() if image.mode in ('RGBA', 'LA', 'P'): background = Image.new('RGB', image.size, (255, 255, 255)) background.paste(image, mask=image.split()[-1] if image.mode in ('RGBA', 'LA') else None) image = background image.save(buffered, format="JPEG", quality=95) base64_image = base64.b64encode(buffered.getvalue()).decode('utf-8') response = client.chat.completions.create( model=caption_model, messages=[{ "role": "user", "content": [ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}, {"type": "text", "text": prompt} ] }], temperature=0.3 ) return response.choices[0].message.content.strip() else: # Gemini (default) from google import genai from google.genai import types api_key = current_app.config.get('GOOGLE_API_KEY', '') if not api_key: return "" api_base = current_app.config.get('GOOGLE_API_BASE', '') client = genai.Client( http_options=types.HttpOptions(base_url=api_base) if api_base else None, api_key=api_key ) result = client.models.generate_content( model=caption_model, contents=[image, prompt], config=types.GenerateContentConfig(temperature=0.3) ) return result.text.strip() except Exception as e: logger.warning(f"Failed to generate caption for {filepath}: {e}") return "" def _build_material_query(filter_project_id: str): """Build common material query with project validation.""" query = Material.query if filter_project_id == 'all': return query, None if filter_project_id == 'none': return query.filter(Material.project_id.is_(None)), None project = Project.query.get(filter_project_id) if not project: return None, not_found('Project') return query.filter(Material.project_id == filter_project_id), None def _get_materials_list(filter_project_id: str): """ Common logic to get materials list. Returns (materials_list, error_response) """ query, error = _build_material_query(filter_project_id) if error: return None, error materials = query.order_by(Material.created_at.desc()).all() materials_list = [material.to_dict() for material in materials] return materials_list, None def _handle_material_upload(default_project_id: Optional[str] = None): """ Common logic to handle material upload. Returns Flask response object. """ try: raw_project_id = request.args.get('project_id', default_project_id) target_project_id, error = _resolve_target_project_id(raw_project_id) if error: return error file = request.files.get('file') material, error = _save_material_file(file, target_project_id) if error: return error result = material.to_dict() # Generate AI caption if requested generate_caption = request.args.get('generate_caption', '').lower() in ('true', '1', 'yes') if generate_caption: file_service = FileService(current_app.config['UPLOAD_FOLDER']) filepath = file_service.get_absolute_path(material.relative_path) caption = _generate_image_caption(filepath) result['caption'] = caption return success_response(result, status_code=201) except Exception as e: db.session.rollback() return error_response('SERVER_ERROR', str(e), 500) def _resolve_target_project_id(raw_project_id: Optional[str], allow_none: bool = True): """ Normalize project_id from request. Returns (project_id | None, error_response | None) """ if allow_none and (raw_project_id is None or raw_project_id == 'none'): return None, None if raw_project_id == 'all': return None, bad_request("project_id cannot be 'all' when uploading materials") if raw_project_id: project = Project.query.get(raw_project_id) if not project: return None, not_found('Project') return raw_project_id, None def _save_material_file(file, target_project_id: Optional[str]): """Shared logic for saving uploaded material files to disk and DB.""" if not file or not file.filename: return None, bad_request("file is required") filename = secure_filename(file.filename) file_ext = Path(filename).suffix.lower() if file_ext not in ALLOWED_MATERIAL_EXTENSIONS: return None, bad_request(f"Unsupported file type. Allowed: {', '.join(sorted(ALLOWED_MATERIAL_EXTENSIONS))}") file_service = FileService(current_app.config['UPLOAD_FOLDER']) if target_project_id: materials_dir = file_service.upload_folder / file_service._get_materials_dir(target_project_id) else: materials_dir = file_service.upload_folder / "materials" materials_dir.mkdir(exist_ok=True, parents=True) timestamp = int(time.time() * 1000) base_name = Path(filename).stem unique_filename = f"{base_name}_{timestamp}{file_ext}" filepath = materials_dir / unique_filename file.save(str(filepath)) relative_path = str(filepath.relative_to(file_service.upload_folder)) if target_project_id: image_url = file_service.get_file_url(target_project_id, 'materials', unique_filename) else: image_url = f"/files/materials/{unique_filename}" material = Material( project_id=target_project_id, filename=unique_filename, relative_path=relative_path, url=image_url ) try: db.session.add(material) db.session.commit() return material, None except Exception: db.session.rollback() raise @material_bp.route('//materials/generate', methods=['POST']) def generate_material_image(project_id): """ POST /api/projects/{project_id}/materials/generate - Generate a standalone material image Supports multipart/form-data: - prompt: Text-to-image prompt (passed directly to the model without modification) - ref_image: Main reference image (optional) - extra_images: Additional reference images (multiple files, optional) Note: project_id can be 'none' to generate global materials (not associated with any project) """ try: # 支持 'none' 作为特殊值,表示生成全局素材 if project_id != 'none': project = Project.query.get(project_id) if not project: return not_found('Project') else: project = None project_id = None # 设置为None表示全局素材 # Parse request data (prioritize multipart for file uploads) if request.is_json: data = request.get_json() or {} prompt = data.get('prompt', '').strip() ref_file = None extra_files = [] else: data = request.form.to_dict() prompt = (data.get('prompt') or '').strip() ref_file = request.files.get('ref_image') extra_files = request.files.getlist('extra_images') or [] aspect_ratio = (data.get('aspect_ratio') or '').strip() or None if aspect_ratio and aspect_ratio not in ALLOWED_ASPECT_RATIOS: return bad_request(f"Invalid aspect ratio. Allowed values: {', '.join(sorted(ALLOWED_ASPECT_RATIOS))}") if not prompt: return bad_request("prompt is required") # 处理project_id:对于全局素材,使用'global'作为Task的project_id # Task模型要求project_id不能为null,但Material可以 task_project_id = project_id if project_id is not None else 'global' # 验证project_id(如果不是'global') if task_project_id != 'global': project = Project.query.get(task_project_id) if not project: return not_found('Project') # Initialize services ai_service = get_ai_service() file_service = FileService(current_app.config['UPLOAD_FOLDER']) # 创建临时目录保存参考图片(后台任务会清理) temp_dir = Path(tempfile.mkdtemp(dir=current_app.config['UPLOAD_FOLDER'])) temp_dir_str = str(temp_dir) try: ref_path = None # Save main reference image to temp directory if provided if ref_file and ref_file.filename: ref_filename = secure_filename(ref_file.filename or 'ref.png') ref_path = temp_dir / ref_filename ref_file.save(str(ref_path)) ref_path_str = str(ref_path) else: ref_path_str = None # Save additional reference images to temp directory additional_ref_images = [] for extra in extra_files: if not extra or not extra.filename: continue extra_filename = secure_filename(extra.filename) extra_path = temp_dir / extra_filename extra.save(str(extra_path)) additional_ref_images.append(str(extra_path)) # Create async task for material generation task = Task( project_id=task_project_id, task_type='GENERATE_MATERIAL', status='PENDING' ) task.set_progress({ 'total': 1, 'completed': 0, 'failed': 0 }) db.session.add(task) db.session.commit() # Get app instance for background task app = current_app._get_current_object() # Submit background task task_manager.submit_task( task.id, generate_material_image_task, task_project_id, # 传递给任务函数,它会处理'global'的情况 prompt, ai_service, file_service, ref_path_str, additional_ref_images if additional_ref_images else None, aspect_ratio or (project.image_aspect_ratio if project else None) or current_app.config.get('DEFAULT_ASPECT_RATIO', '16:9'), current_app.config['DEFAULT_RESOLUTION'], temp_dir_str, app ) # Return task_id immediately (不再清理temp_dir,由后台任务清理) return success_response({ 'task_id': task.id, 'status': 'PENDING' }, status_code=202) except Exception as e: # Clean up temp directory on error if temp_dir.exists(): shutil.rmtree(temp_dir, ignore_errors=True) raise except Exception as e: db.session.rollback() return error_response('AI_SERVICE_ERROR', str(e), 503) @material_bp.route('//materials', methods=['GET']) def list_materials(project_id): """ GET /api/projects/{project_id}/materials - List materials for a specific project Returns: List of material images with filename, url, and metadata for the specified project """ try: materials_list, error = _get_materials_list(project_id) if error: return error return success_response({ "materials": materials_list, "count": len(materials_list) }) except Exception as e: return error_response('SERVER_ERROR', str(e), 500) @material_bp.route('//materials/upload', methods=['POST']) def upload_material(project_id): """ POST /api/projects/{project_id}/materials/upload - Upload a material image Supports multipart/form-data: - file: Image file (required) - project_id: Optional query parameter, defaults to path parameter if not provided Returns: Material info with filename, url, and metadata """ return _handle_material_upload(default_project_id=project_id) @material_global_bp.route('', methods=['GET']) def list_all_materials(): """ GET /api/materials - Global materials endpoint for complex queries Query params: - project_id: Filter by project_id * 'all' (default): Get all materials regardless of project * 'none': Get only materials without a project (global materials) * : Get materials for specific project Returns: List of material images with filename, url, and metadata """ try: filter_project_id = request.args.get('project_id', 'all') materials_list, error = _get_materials_list(filter_project_id) if error: return error return success_response({ "materials": materials_list, "count": len(materials_list) }) except Exception as e: return error_response('SERVER_ERROR', str(e), 500) @material_global_bp.route('/upload', methods=['POST']) def upload_material_global(): """ POST /api/materials/upload - Upload a material image (global, not bound to a project) Supports multipart/form-data: - file: Image file (required) - project_id: Optional query parameter to associate with a project Returns: Material info with filename, url, and metadata """ return _handle_material_upload(default_project_id=None) @material_global_bp.route('/', methods=['DELETE']) def delete_material(material_id): """ DELETE /api/materials/{material_id} - Delete a material and its file """ try: material = Material.query.get(material_id) if not material: return not_found('Material') file_service = FileService(current_app.config['UPLOAD_FOLDER']) material_path = Path(file_service.get_absolute_path(material.relative_path)) # First, delete the database record to ensure data consistency db.session.delete(material) db.session.commit() # Then, attempt to delete the file. If this fails, log the error # but still return a success response. This leaves an orphan file, try: if material_path.exists(): material_path.unlink(missing_ok=True) except OSError as e: current_app.logger.warning(f"Failed to delete file for material {material_id} at {material_path}: {e}") return success_response({"id": material_id}) except Exception as e: db.session.rollback() return error_response('SERVER_ERROR', str(e), 500) @material_global_bp.route('/associate', methods=['POST']) def associate_materials_to_project(): """ POST /api/materials/associate - Associate materials to a project by URLs Request body (JSON): { "project_id": "project_id", "material_urls": ["url1", "url2", ...] } Returns: List of associated material IDs and count """ try: data = request.get_json() or {} project_id = data.get('project_id') material_urls = data.get('material_urls', []) if not project_id: return bad_request("project_id is required") if not material_urls or not isinstance(material_urls, list): return bad_request("material_urls must be a non-empty array") # Validate project exists project = Project.query.get(project_id) if not project: return not_found('Project') # Find materials by URLs and update their project_id updated_ids = [] materials_to_update = Material.query.filter( Material.url.in_(material_urls), Material.project_id.is_(None) ).all() for material in materials_to_update: material.project_id = project_id updated_ids.append(material.id) db.session.commit() return success_response({ "updated_ids": updated_ids, "count": len(updated_ids) }) except Exception as e: db.session.rollback() return error_response('SERVER_ERROR', str(e), 500) @material_global_bp.route('/download', methods=['POST']) def download_materials_zip(): """Bundle requested materials into a ZIP and stream it back.""" body = request.get_json(silent=True) or {} ids = body.get('material_ids') if not ids or not isinstance(ids, list): return bad_request("material_ids must be a non-empty list") MAX_BATCH = 200 if len(ids) > MAX_BATCH: return bad_request(f"Too many materials requested (max {MAX_BATCH})") rows = Material.query.filter(Material.id.in_(ids)).all() if not rows: return not_found('Materials') tmp = tempfile.SpooledTemporaryFile(max_size=64 * 1024 * 1024) try: fs = FileService(current_app.config['UPLOAD_FOLDER']) with zipfile.ZipFile(tmp, 'w', zipfile.ZIP_DEFLATED) as zf: for row in rows: abs_path = Path(fs.get_absolute_path(row.relative_path)) if not abs_path.is_file(): current_app.logger.warning("Skipping missing file for material %s", row.id) continue zf.write(str(abs_path), row.filename) tmp.seek(0) fname = f"materials_{int(time.time())}.zip" return send_file(tmp, mimetype='application/zip', as_attachment=True, download_name=fname) except Exception: tmp.close() current_app.logger.exception("Failed to build materials zip") return error_response('SERVER_ERROR', 'Failed to create zip archive', 500) ================================================ FILE: backend/controllers/page_controller.py ================================================ """ Page Controller - handles page-related endpoints """ import logging from flask import Blueprint, request, current_app from models import db, Project, Page, PageImageVersion, Task from utils import success_response, error_response, not_found, bad_request from services import FileService, ProjectContext from services.ai_service_manager import get_ai_service from services.task_manager import task_manager, generate_single_page_image_task, edit_page_image_task from datetime import datetime from pathlib import Path from werkzeug.utils import secure_filename import shutil import tempfile import json logger = logging.getLogger(__name__) page_bp = Blueprint('pages', __name__, url_prefix='/api/projects') @page_bp.route('//pages', methods=['POST']) def create_page(project_id): """ POST /api/projects/{project_id}/pages - Add new page Request body: { "order_index": 2, "part": "optional", "outline_content": {"title": "...", "points": [...]} } """ try: project = Project.query.get(project_id) if not project: return not_found('Project') data = request.get_json() if not data or 'order_index' not in data: return bad_request("order_index is required") # Create new page page = Page( project_id=project_id, order_index=data['order_index'], part=data.get('part'), status='DRAFT' ) if 'outline_content' in data: page.set_outline_content(data['outline_content']) if 'description_content' in data: page.set_description_content(data['description_content']) page.status = 'DESCRIPTION_GENERATED' db.session.add(page) # Update other pages' order_index if necessary other_pages = Page.query.filter( Page.project_id == project_id, Page.order_index >= data['order_index'] ).all() for p in other_pages: if p.id != page.id: p.order_index += 1 project.updated_at = datetime.utcnow() db.session.commit() return success_response(page.to_dict(), status_code=201) except Exception as e: db.session.rollback() return error_response('SERVER_ERROR', str(e), 500) @page_bp.route('//pages/', methods=['DELETE']) def delete_page(project_id, page_id): """ DELETE /api/projects/{project_id}/pages/{page_id} - Delete page """ try: page = Page.query.get(page_id) if not page or page.project_id != project_id: return not_found('Page') # Delete page image if exists file_service = FileService(current_app.config['UPLOAD_FOLDER']) file_service.delete_page_image(project_id, page_id) # Delete page db.session.delete(page) # Update project project = Project.query.get(project_id) if project: project.updated_at = datetime.utcnow() db.session.commit() return success_response(message="Page deleted successfully") except Exception as e: db.session.rollback() return error_response('SERVER_ERROR', str(e), 500) @page_bp.route('//pages/', methods=['PUT']) def update_page(project_id, page_id): """ PUT /api/projects/{project_id}/pages/{page_id} - Update page fields Request body: { "part": "章节名" } """ try: page = Page.query.get(page_id) if not page or page.project_id != project_id: return not_found('Page') data = request.get_json() if not data: return bad_request("Request body is required") # Update part field if provided if 'part' in data: page.part = data['part'] page.updated_at = datetime.utcnow() # Update project if page.project: page.project.updated_at = datetime.utcnow() db.session.commit() return success_response(page.to_dict()) except Exception as e: db.session.rollback() logger.error(f"Failed to update page {page_id}: {e}") return error_response('SERVER_ERROR', 'An internal server error occurred', 500) @page_bp.route('//pages//outline', methods=['PUT']) def update_page_outline(project_id, page_id): """ PUT /api/projects/{project_id}/pages/{page_id}/outline - Edit page outline Request body: { "outline_content": {"title": "...", "points": [...]} } """ try: page = Page.query.get(page_id) if not page or page.project_id != project_id: return not_found('Page') data = request.get_json() if not data or 'outline_content' not in data: return bad_request("outline_content is required") page.set_outline_content(data['outline_content']) page.updated_at = datetime.utcnow() # Update project project = Project.query.get(project_id) if project: project.updated_at = datetime.utcnow() db.session.commit() return success_response(page.to_dict()) except Exception as e: db.session.rollback() return error_response('SERVER_ERROR', str(e), 500) @page_bp.route('//pages//description', methods=['PUT']) def update_page_description(project_id, page_id): """ PUT /api/projects/{project_id}/pages/{page_id}/description - Edit description Request body: { "description_content": { "title": "...", "text_content": ["...", "..."], "extra_fields": {"排版布局": "..."} } } """ try: page = Page.query.get(page_id) if not page or page.project_id != project_id: return not_found('Page') data = request.get_json() if not data or 'description_content' not in data: return bad_request("description_content is required") page.set_description_content(data['description_content']) page.updated_at = datetime.utcnow() # Update project project = Project.query.get(project_id) if project: project.updated_at = datetime.utcnow() db.session.commit() return success_response(page.to_dict()) except Exception as e: db.session.rollback() return error_response('SERVER_ERROR', str(e), 500) @page_bp.route('//pages//generate/description', methods=['POST']) def generate_page_description(project_id, page_id): """ POST /api/projects/{project_id}/pages/{page_id}/generate/description - Generate single page description Request body: { "force_regenerate": false } """ try: page = Page.query.get(page_id) if not page or page.project_id != project_id: return not_found('Page') project = Project.query.get(project_id) if not project: return not_found('Project') data = request.get_json() or {} force_regenerate = data.get('force_regenerate', False) language = data.get('language', current_app.config.get('OUTPUT_LANGUAGE', 'zh')) detail_level = data.get('detail_level', 'default') # Check if already generated if page.get_description_content() and not force_regenerate: return bad_request("Description already exists. Set force_regenerate=true to regenerate") # Get outline content outline_content = page.get_outline_content() if not outline_content: return bad_request("Page must have outline content first") # Reconstruct full outline all_pages = Page.query.filter_by(project_id=project_id).order_by(Page.order_index).all() outline = [] for p in all_pages: oc = p.get_outline_content() if oc: page_data = oc.copy() if p.part: page_data['part'] = p.part outline.append(page_data) # Initialize AI service ai_service = get_ai_service() # Get reference files content and create project context from controllers.project_controller import _get_project_reference_files_content reference_files_content = _get_project_reference_files_content(project_id) project_context = ProjectContext(project, reference_files_content) # Generate description page_data = outline_content.copy() if page.part: page_data['part'] = page.part desc_result = ai_service.generate_page_description( project_context, outline, page_data, page.order_index + 1, language=language, detail_level=detail_level ) # Save description (generate_page_description returns dict with text + optional extra_fields) desc_content = { "text": desc_result['text'], "generated_at": datetime.utcnow().isoformat() } if desc_result.get('extra_fields'): desc_content['extra_fields'] = desc_result['extra_fields'] page.set_description_content(desc_content) page.status = 'DESCRIPTION_GENERATED' page.updated_at = datetime.utcnow() db.session.commit() return success_response(page.to_dict()) except Exception as e: db.session.rollback() return error_response('AI_SERVICE_ERROR', str(e), 503) @page_bp.route('//pages//generate/image', methods=['POST']) def generate_page_image(project_id, page_id): """ POST /api/projects/{project_id}/pages/{page_id}/generate/image - Generate single page image Request body: { "use_template": true, "force_regenerate": false } """ try: page = Page.query.get(page_id) if not page or page.project_id != project_id: return not_found('Page') project = Project.query.get(project_id) if not project: return not_found('Project') data = request.get_json() or {} use_template = data.get('use_template', True) force_regenerate = data.get('force_regenerate', False) language = data.get('language', current_app.config.get('OUTPUT_LANGUAGE', 'zh')) # Check if already generated if page.generated_image_path and not force_regenerate: return bad_request("Image already exists. Set force_regenerate=true to regenerate") # Get description content desc_content = page.get_description_content() if not desc_content: return bad_request("Page must have description content first") # Reconstruct full outline with part structure all_pages = Page.query.filter_by(project_id=project_id).order_by(Page.order_index).all() outline = [] current_part = None current_part_pages = [] for p in all_pages: oc = p.get_outline_content() if not oc: continue page_data = oc.copy() # 如果当前页面属于一个 part if p.part: # 如果这是新的 part,先保存之前的 part(如果有) if current_part and current_part != p.part: outline.append({ "part": current_part, "pages": current_part_pages }) current_part_pages = [] current_part = p.part # 移除 part 字段,因为它在顶层 if 'part' in page_data: del page_data['part'] current_part_pages.append(page_data) else: # 如果当前页面不属于任何 part,先保存之前的 part(如果有) if current_part: outline.append({ "part": current_part, "pages": current_part_pages }) current_part = None current_part_pages = [] # 直接添加页面 outline.append(page_data) # 保存最后一个 part(如果有) if current_part: outline.append({ "part": current_part, "pages": current_part_pages }) # Initialize services ai_service = get_ai_service() file_service = FileService(current_app.config['UPLOAD_FOLDER']) # Get template path ref_image_path = None if use_template: ref_image_path = file_service.get_template_path(project_id) # 检查是否有模板图片或风格描述 # 如果都没有,则返回错误 if not ref_image_path and not project.template_style: return bad_request("No template image or style description found for project") # Generate prompt page_data = page.get_outline_content() or {} if page.part: page_data['part'] = page.part # 获取描述文本(可能是 text 字段或 text_content 数组) desc_text = desc_content.get('text', '') if not desc_text and desc_content.get('text_content'): # 如果 text 字段不存在,尝试从 text_content 数组获取 text_content = desc_content.get('text_content', []) if isinstance(text_content, list): desc_text = '\n'.join(text_content) else: desc_text = str(text_content) # 从当前页面的描述内容中提取图片 URL(在生成 prompt 之前提取,以便告知 AI) additional_ref_images = [] has_material_images = False # 从描述文本中提取图片 if desc_text: image_urls = ai_service.extract_image_urls_from_markdown(desc_text) if image_urls: logger.info(f"Found {len(image_urls)} image(s) in page {page_id} description") additional_ref_images = image_urls has_material_images = True # 合并额外要求和风格描述 combined_requirements = project.extra_requirements or "" if project.template_style: style_requirement = f"\n\nppt页面风格描述:\n\n{project.template_style}" combined_requirements = combined_requirements + style_requirement # Create async task for image generation task = Task( project_id=project_id, task_type='GENERATE_PAGE_IMAGE', status='PENDING' ) task.set_progress({ 'total': 1, 'completed': 0, 'failed': 0 }) db.session.add(task) db.session.commit() # Get app instance for background task app = current_app._get_current_object() # Submit background task task_manager.submit_task( task.id, generate_single_page_image_task, project_id, page_id, ai_service, file_service, outline, use_template, project.image_aspect_ratio, current_app.config['DEFAULT_RESOLUTION'], app, combined_requirements if combined_requirements.strip() else None, language ) # Return task_id immediately return success_response({ 'task_id': task.id, 'page_id': page_id, 'status': 'PENDING' }, status_code=202) except Exception as e: db.session.rollback() return error_response('AI_SERVICE_ERROR', str(e), 503) @page_bp.route('//pages//edit/image', methods=['POST']) def edit_page_image(project_id, page_id): """ POST /api/projects/{project_id}/pages/{page_id}/edit/image - Edit page image Request body (JSON or multipart/form-data): { "edit_instruction": "更改文本框样式为虚线", "context_images": { "use_template": true, // 是否使用template图片 "desc_image_urls": ["url1", "url2"], // desc中的图片URL列表 "uploaded_image_ids": ["file1", "file2"] // 上传的图片文件ID列表(在multipart中) } } For multipart/form-data: - edit_instruction: text field - use_template: text field (true/false) - desc_image_urls: JSON array string - context_images: file uploads (multiple files with key "context_images") """ try: page = Page.query.get(page_id) if not page or page.project_id != project_id: return not_found('Page') if not page.generated_image_path: return bad_request("Page must have generated image first") project = Project.query.get(project_id) if not project: return not_found('Project') # Initialize services ai_service = get_ai_service() file_service = FileService(current_app.config['UPLOAD_FOLDER']) # Parse request data (support both JSON and multipart/form-data) if request.is_json: data = request.get_json() uploaded_files = [] else: # multipart/form-data data = request.form.to_dict() # Get uploaded files uploaded_files = request.files.getlist('context_images') # Parse JSON fields if 'desc_image_urls' in data and data['desc_image_urls']: try: data['desc_image_urls'] = json.loads(data['desc_image_urls']) except Exception: data['desc_image_urls'] = [] else: data['desc_image_urls'] = [] if not data or 'edit_instruction' not in data: return bad_request("edit_instruction is required") # Get current image path current_image_path = file_service.get_absolute_path(page.generated_image_path) # Get original description if available original_description = None desc_content = page.get_description_content() if desc_content: # Extract text from description_content original_description = desc_content.get('text') or '' # If text is not available, try to construct from text_content if not original_description and desc_content.get('text_content'): if isinstance(desc_content['text_content'], list): original_description = '\n'.join(desc_content['text_content']) else: original_description = str(desc_content['text_content']) # Collect additional reference images additional_ref_images = [] # 1. Add template image if requested context_images = data.get('context_images', {}) if isinstance(context_images, dict): use_template = context_images.get('use_template', False) else: use_template = data.get('use_template', 'false').lower() == 'true' if use_template: template_path = file_service.get_template_path(project_id) if template_path: additional_ref_images.append(template_path) # 2. Add desc image URLs if provided if isinstance(context_images, dict): desc_image_urls = context_images.get('desc_image_urls', []) else: desc_image_urls = data.get('desc_image_urls', []) if desc_image_urls: if isinstance(desc_image_urls, str): try: desc_image_urls = json.loads(desc_image_urls) except Exception: desc_image_urls = [] if isinstance(desc_image_urls, list): additional_ref_images.extend(desc_image_urls) # 3. Save and add uploaded files to a persistent location temp_dir = None if uploaded_files: # Create a temporary directory in the project's upload folder import tempfile import shutil from werkzeug.utils import secure_filename temp_dir = Path(tempfile.mkdtemp(dir=current_app.config['UPLOAD_FOLDER'])) try: for uploaded_file in uploaded_files: if uploaded_file.filename: # Save to temp directory temp_path = temp_dir / secure_filename(uploaded_file.filename) uploaded_file.save(str(temp_path)) additional_ref_images.append(str(temp_path)) except Exception as e: # Clean up temp directory on error if temp_dir and temp_dir.exists(): shutil.rmtree(temp_dir) raise e # Create async task for image editing task = Task( project_id=project_id, task_type='EDIT_PAGE_IMAGE', status='PENDING' ) task.set_progress({ 'total': 1, 'completed': 0, 'failed': 0 }) db.session.add(task) db.session.commit() # Get app instance for background task app = current_app._get_current_object() # Submit background task task_manager.submit_task( task.id, edit_page_image_task, project_id, page_id, data['edit_instruction'], ai_service, file_service, project.image_aspect_ratio, current_app.config['DEFAULT_RESOLUTION'], original_description, additional_ref_images if additional_ref_images else None, str(temp_dir) if temp_dir else None, app ) # Return task_id immediately return success_response({ 'task_id': task.id, 'page_id': page_id, 'status': 'PENDING' }, status_code=202) except Exception as e: db.session.rollback() return error_response('AI_SERVICE_ERROR', str(e), 503) @page_bp.route('//pages//image-versions', methods=['GET']) def get_page_image_versions(project_id, page_id): """ GET /api/projects/{project_id}/pages/{page_id}/image-versions - Get all image versions for a page """ try: page = Page.query.get(page_id) if not page or page.project_id != project_id: return not_found('Page') versions = PageImageVersion.query.filter_by(page_id=page_id)\ .order_by(PageImageVersion.version_number.desc()).all() return success_response({ 'versions': [v.to_dict() for v in versions] }) except Exception as e: return error_response('SERVER_ERROR', str(e), 500) @page_bp.route('//pages//image-versions//set-current', methods=['POST']) def set_current_image_version(project_id, page_id, version_id): """ POST /api/projects/{project_id}/pages/{page_id}/image-versions/{version_id}/set-current Set a specific version as the current one """ try: page = Page.query.get(page_id) if not page or page.project_id != project_id: return not_found('Page') version = PageImageVersion.query.get(version_id) if not version or version.page_id != page_id: return not_found('Image Version') # Mark all versions as not current PageImageVersion.query.filter_by(page_id=page_id).update({'is_current': False}) # Set this version as current version.is_current = True page.generated_image_path = version.image_path # 更新 cached_image_path,指向该版本的缓存图(如果存在) file_service = FileService(current_app.config['UPLOAD_FOLDER']) cached_relative_path = file_service.get_cached_image_path(project_id, page_id, version.version_number) if file_service.file_exists(cached_relative_path): page.cached_image_path = cached_relative_path else: # 缓存文件不存在,设置为 None,to_dict() 会回退到原图 page.cached_image_path = None page.updated_at = datetime.utcnow() db.session.commit() return success_response(page.to_dict(include_versions=True)) except Exception as e: db.session.rollback() return error_response('SERVER_ERROR', str(e), 500) @page_bp.route('//pages//regenerate-renovation', methods=['POST']) def regenerate_renovation_page(project_id, page_id): """ POST /api/projects/{project_id}/pages/{page_id}/regenerate-renovation Re-parse the original PDF page and regenerate outline + description for PPT renovation projects. This re-runs the renovation pipeline for a single page. """ try: page = Page.query.get(page_id) if not page or page.project_id != project_id: return not_found('Page') project = Project.query.get(project_id) if not project: return not_found('Project') # Verify this is a renovation project if project.creation_type != 'ppt_renovation': return bad_request("This endpoint is only for PPT renovation projects") data = request.get_json() or {} language = data.get('language', current_app.config.get('OUTPUT_LANGUAGE', 'zh')) keep_layout = data.get('keep_layout', False) # Find the split PDF for this page project_dir = Path(current_app.config['UPLOAD_FOLDER']) / project_id split_dir = project_dir / "split_pages" page_pdf_path = split_dir / f"page_{page.order_index + 1}.pdf" if not page_pdf_path.exists(): return bad_request(f"Split PDF not found for page {page.order_index + 1}") # Initialize services ai_service = get_ai_service() from services.file_parser_service import FileParserService file_parser_service = FileParserService( mineru_api_base=current_app.config.get('MINERU_API_BASE', ''), mineru_token=current_app.config.get('MINERU_TOKEN', ''), google_api_key=current_app.config.get('GOOGLE_API_KEY', ''), ai_provider_format=current_app.config.get('AI_PROVIDER_FORMAT', 'gemini'), openai_api_key=current_app.config.get('OPENAI_API_KEY', ''), openai_api_base=current_app.config.get('OPENAI_API_BASE', ''), image_caption_model=current_app.config.get('IMAGE_CAPTION_MODEL', 'gemini-3-flash-preview'), lazyllm_image_caption_source=current_app.config.get('IMAGE_CAPTION_MODEL_SOURCE', ''), upload_folder=current_app.config.get('UPLOAD_FOLDER', 'uploads') ) file_service = FileService(current_app.config['UPLOAD_FOLDER']) # Step 1: Parse page PDF → markdown logger.info(f"Regenerating renovation page {page.order_index + 1}: parsing PDF...") filename = f"page_{page.order_index + 1}.pdf" _batch_id, md_text, extract_id, error_msg, _failed = file_parser_service.parse_file( str(page_pdf_path), filename ) if error_msg: logger.warning(f"Page {page.order_index + 1} parse warning: {error_msg}") md_text = md_text or '' # Supplement with header/footer from layout.json if extract_id: hf_text = file_parser_service.extract_header_footer_from_layout(extract_id) if hf_text: md_text = hf_text + '\n\n' + md_text if not md_text.strip(): return error_response('PARSE_ERROR', f"Failed to extract content from page {page.order_index + 1}", 400) # Step 2: AI extract structured content logger.info(f"Regenerating renovation page {page.order_index + 1}: extracting content...") content = ai_service.extract_page_content(md_text, language=language) # Step 3: Optional layout caption if keep_layout: try: image_path = None if page.cached_image_path: image_path = file_service.get_absolute_path(page.cached_image_path) elif page.generated_image_path: image_path = file_service.get_absolute_path(page.generated_image_path) if image_path and Path(image_path).exists(): caption = ai_service.generate_layout_caption(image_path) if caption: content['description'] = content.get('description', '') + f"\n\n{caption}" except Exception as e: logger.error(f"Layout caption failed for page {page.order_index + 1}: {e}") # Step 4: Update page in database title = content.get('title', f'Page {page.order_index + 1}') points = content.get('points', []) description = content.get('description', '') page.set_outline_content({ 'title': title, 'points': points }) page.set_description_content({ "text": description, "generated_at": datetime.utcnow().isoformat() }) page.status = 'DESCRIPTION_GENERATED' page.updated_at = datetime.utcnow() db.session.commit() logger.info(f"Regenerated renovation page {page.order_index + 1} successfully") return success_response(page.to_dict()) except Exception as e: db.session.rollback() logger.error(f"Failed to regenerate renovation page: {e}", exc_info=True) return error_response('SERVER_ERROR', str(e), 500) ================================================ FILE: backend/controllers/project_controller.py ================================================ """ Project Controller - handles project-related endpoints """ import json import logging import os import subprocess import traceback from datetime import datetime from pathlib import Path from flask import Blueprint, request, jsonify, current_app, Response, stream_with_context from sqlalchemy import desc from utils.validators import normalize_aspect_ratio from sqlalchemy.orm import joinedload from werkzeug.exceptions import BadRequest from werkzeug.utils import secure_filename from models import db, Project, Page, Task, ReferenceFile from services import ProjectContext, FileService from services.ai_service_manager import get_ai_service from services.task_manager import ( task_manager, generate_descriptions_task, generate_images_task, process_ppt_renovation_task ) from utils import ( success_response, error_response, not_found, bad_request, parse_page_ids_from_body, get_filtered_pages ) logger = logging.getLogger(__name__) project_bp = Blueprint('projects', __name__, url_prefix='/api/projects') def _get_project_reference_files_content(project_id: str) -> list: """ Get reference files content for a project Args: project_id: Project ID Returns: List of dicts with 'filename' and 'content' keys """ reference_files = ReferenceFile.query.filter_by( project_id=project_id, parse_status='completed' ).all() files_content = [] for ref_file in reference_files: if ref_file.markdown_content: files_content.append({ 'filename': ref_file.filename, 'content': ref_file.markdown_content }) return files_content def _reconstruct_outline_from_pages(pages: list) -> list: """ Reconstruct outline structure from Page objects Args: pages: List of Page objects ordered by order_index Returns: Outline structure (list) with optional part grouping """ outline = [] current_part = None current_part_pages = [] for page in pages: outline_content = page.get_outline_content() if not outline_content: continue page_data = outline_content.copy() # 如果当前页面属于一个 part if page.part: # 如果这是新的 part,先保存之前的 part(如果有) if current_part and current_part != page.part: outline.append({ "part": current_part, "pages": current_part_pages }) current_part_pages = [] current_part = page.part # 移除 part 字段,因为它在顶层 if 'part' in page_data: del page_data['part'] current_part_pages.append(page_data) else: # 如果当前页面不属于任何 part,先保存之前的 part(如果有) if current_part: outline.append({ "part": current_part, "pages": current_part_pages }) current_part = None current_part_pages = [] # 直接添加页面 outline.append(page_data) # 保存最后一个 part(如果有) if current_part: outline.append({ "part": current_part, "pages": current_part_pages }) return outline def _smart_merge_pages(project_id, pages_data): """Position-based merge: reuse existing pages by index to preserve descriptions/images. For each new page at index i: - If an old page exists at the same position, update its outline (title/points/part) in place, keeping description_content and image fields untouched. - If no old page at that position, create a new page. Old pages beyond the new page count are deleted. """ old_pages = Page.query.filter_by(project_id=project_id).order_by(Page.order_index).all() pages_list = [] for i, page_data in enumerate(pages_data): if i < len(old_pages): page = old_pages[i] else: page = Page(project_id=project_id, status='DRAFT') db.session.add(page) page.order_index = i page.part = page_data.get('part') page.set_outline_content({ 'title': page_data.get('title'), 'points': page_data.get('points', []) }) pages_list.append(page) for p in old_pages[len(pages_data):]: db.session.delete(p) return pages_list @project_bp.route('', methods=['GET']) def list_projects(): """ GET /api/projects - Get all projects (for history) Query params: - limit: number of projects to return (default: 50, max: 100) - offset: offset for pagination (default: 0) """ try: # Parameter validation limit = request.args.get('limit', 50, type=int) offset = request.args.get('offset', 0, type=int) # Enforce limits to prevent performance issues limit = min(max(1, limit), 100) # Between 1-100 offset = max(0, offset) # Non-negative # Get total count for pagination total = Project.query.count() projects = Project.query\ .options(joinedload(Project.pages))\ .order_by(desc(Project.updated_at))\ .limit(limit)\ .offset(offset)\ .all() return success_response({ 'projects': [project.to_dict(include_pages=True) for project in projects], 'total': total, 'limit': limit, 'offset': offset }) except Exception as e: logger.error(f"list_projects failed: {str(e)}", exc_info=True) return error_response('SERVER_ERROR', str(e), 500) @project_bp.route('', methods=['POST']) def create_project(): """ POST /api/projects - Create a new project Request body: { "creation_type": "idea|outline|descriptions", "idea_prompt": "...", # required for idea type "outline_text": "...", # required for outline type "description_text": "...", # required for descriptions type "template_id": "optional" } """ try: data = request.get_json() if not data: return bad_request("Request body is required") # creation_type is required if 'creation_type' not in data: return bad_request("creation_type is required") creation_type = data.get('creation_type') if creation_type not in ['idea', 'outline', 'descriptions']: return bad_request("Invalid creation_type") # Validate and set aspect ratio if provided image_aspect_ratio = '16:9' if 'image_aspect_ratio' in data: try: image_aspect_ratio = normalize_aspect_ratio(data['image_aspect_ratio']) except ValueError as e: return bad_request(str(e)) # Create project project = Project( creation_type=creation_type, idea_prompt=data.get('idea_prompt'), outline_text=data.get('outline_text'), description_text=data.get('description_text'), template_style=data.get('template_style'), image_aspect_ratio=image_aspect_ratio, status='DRAFT' ) db.session.add(project) db.session.commit() return success_response({ 'project_id': project.id, 'status': project.status, 'pages': [] }, status_code=201) except BadRequest as e: # Handle JSON parsing errors (invalid JSON body) db.session.rollback() logger.warning(f"create_project: Invalid JSON body - {str(e)}") return bad_request("Invalid JSON in request body") except Exception as e: db.session.rollback() error_trace = traceback.format_exc() logger.error(f"create_project failed: {str(e)}", exc_info=True) return error_response('SERVER_ERROR', str(e), 500) @project_bp.route('/', methods=['GET']) def get_project(project_id): """ GET /api/projects/{project_id} - Get project details """ try: # Use eager loading to load project and related pages project = Project.query\ .options(joinedload(Project.pages))\ .filter(Project.id == project_id)\ .first() if not project: return not_found('Project') return success_response(project.to_dict(include_pages=True)) except Exception as e: logger.error(f"get_project failed: {str(e)}", exc_info=True) return error_response('SERVER_ERROR', str(e), 500) @project_bp.route('/', methods=['PUT']) def update_project(project_id): """ PUT /api/projects/{project_id} - Update project Request body: { "idea_prompt": "...", "pages_order": ["page-uuid-1", "page-uuid-2", ...] } """ try: # Use eager loading to load project and pages (for page order updates) project = Project.query\ .options(joinedload(Project.pages))\ .filter(Project.id == project_id)\ .first() if not project: return not_found('Project') data = request.get_json() # Update idea_prompt if provided if 'idea_prompt' in data: project.idea_prompt = data['idea_prompt'] # Update outline_text if provided if 'outline_text' in data: project.outline_text = data['outline_text'] # Update description_text if provided if 'description_text' in data: project.description_text = data['description_text'] # Update extra_requirements if provided if 'extra_requirements' in data: project.extra_requirements = data['extra_requirements'] # Update generation requirements if provided if 'outline_requirements' in data: project.outline_requirements = data['outline_requirements'] if 'description_requirements' in data: project.description_requirements = data['description_requirements'] # Update template_style if provided if 'template_style' in data: project.template_style = data['template_style'] # Update aspect ratio if provided if 'image_aspect_ratio' in data: try: project.image_aspect_ratio = normalize_aspect_ratio(data['image_aspect_ratio']) except ValueError as e: return bad_request(str(e)) # Update export settings if provided if 'export_extractor_method' in data: project.export_extractor_method = data['export_extractor_method'] if 'export_inpaint_method' in data: project.export_inpaint_method = data['export_inpaint_method'] # Update page order if provided if 'pages_order' in data: pages_order = data['pages_order'] # Optimization: batch query all pages to update, avoiding N+1 queries pages_to_update = Page.query.filter( Page.id.in_(pages_order), Page.project_id == project_id ).all() # Create page_id -> page mapping for O(1) lookup pages_map = {page.id: page for page in pages_to_update} # Batch update order for index, page_id in enumerate(pages_order): if page_id in pages_map: pages_map[page_id].order_index = index project.updated_at = datetime.utcnow() db.session.commit() return success_response(project.to_dict(include_pages=True)) except Exception as e: db.session.rollback() logger.error(f"update_project failed: {str(e)}", exc_info=True) return error_response('SERVER_ERROR', str(e), 500) @project_bp.route('/', methods=['DELETE']) def delete_project(project_id): """ DELETE /api/projects/{project_id} - Delete project """ try: project = Project.query.get(project_id) if not project: return not_found('Project') # Delete project files from services import FileService file_service = FileService(current_app.config['UPLOAD_FOLDER']) file_service.delete_project_files(project_id) # Delete project from database (cascade will delete pages and tasks) db.session.delete(project) db.session.commit() return success_response(message="Project deleted successfully") except Exception as e: db.session.rollback() logger.error(f"delete_project failed: {str(e)}", exc_info=True) return error_response('SERVER_ERROR', str(e), 500) @project_bp.route('//generate/outline', methods=['POST']) def generate_outline(project_id): """ POST /api/projects/{project_id}/generate/outline - Generate outline For 'idea' type: Generate outline from idea_prompt For 'outline' type: Parse outline_text into structured format For 'descriptions' type: Extract outline structure from description_text Request body (optional): { "idea_prompt": "...", # for idea type "language": "zh" # output language: zh, en, ja, auto } """ try: project = Project.query.get(project_id) if not project: return not_found('Project') # Get singleton AI service instance ai_service = get_ai_service() # Get request data and language parameter data = request.get_json() or {} language = data.get('language', current_app.config.get('OUTPUT_LANGUAGE', 'zh')) # Get reference files content and create project context reference_files_content = _get_project_reference_files_content(project_id) if reference_files_content: logger.info(f"Found {len(reference_files_content)} reference files for project {project_id}") for rf in reference_files_content: logger.info(f" - {rf['filename']}: {len(rf['content'])} characters") else: logger.info(f"No reference files found for project {project_id}") # 根据项目类型选择不同的处理方式 if project.creation_type == 'outline': # 从大纲生成:解析用户输入的大纲文本 if not project.outline_text: return bad_request("outline_text is required for outline type project") # Create project context and parse outline text into structured format project_context = ProjectContext(project, reference_files_content) outline = ai_service.parse_outline_text(project_context, language=language) elif project.creation_type == 'descriptions': # 从描述生成:从 description_text 提取大纲结构(仅大纲,不含页面描述) if not project.description_text: return bad_request("description_text is required for descriptions type project") project_context = ProjectContext(project, reference_files_content) outline = ai_service.parse_description_to_outline(project_context, language=language) else: # 一句话生成:从idea生成大纲 idea_prompt = data.get('idea_prompt') or project.idea_prompt if not idea_prompt: return bad_request("idea_prompt is required") project.idea_prompt = idea_prompt # Create project context and generate outline from idea project_context = ProjectContext(project, reference_files_content) outline = ai_service.generate_outline(project_context, language=language) # Flatten outline to pages and smart merge with existing pages_data = ai_service.flatten_outline(outline) pages_list = _smart_merge_pages(project_id, pages_data) # Update project status (don't downgrade if all pages already have content) if all(p.description_content for p in pages_list) and pages_list: project.status = 'DESCRIPTIONS_GENERATED' else: project.status = 'OUTLINE_GENERATED' project.updated_at = datetime.utcnow() db.session.commit() logger.info(f"大纲生成完成: 项目 {project_id}, 创建了 {len(pages_list)} 个页面") # Return pages return success_response({ 'pages': [page.to_dict() for page in pages_list] }) except Exception as e: db.session.rollback() logger.error(f"generate_outline failed: {str(e)}", exc_info=True) return error_response('AI_SERVICE_ERROR', str(e), 503) @project_bp.route('//generate/outline/stream', methods=['POST']) def generate_outline_stream(project_id): """ POST /api/projects/{project_id}/generate/outline/stream - Stream outline generation via SSE Streams pages one-by-one as they are generated. Each page is sent as an SSE event. After all pages are streamed, saves them to the database. SSE events: event: page — a single page object {index, title, points, part?} event: done — generation complete {total, pages: [...with ids...]} event: error — error occurred {message} """ # Validate project exists before entering the generator project = Project.query.get(project_id) if not project: return not_found('Project') data = request.get_json() or {} language = data.get('language', current_app.config.get('OUTPUT_LANGUAGE', 'zh')) # Capture app reference for use inside the generator (which runs outside request context) app = current_app._get_current_object() def sse_generate(): with app.app_context(): try: # Re-fetch project inside app context to attach to this session proj = db.session.get(Project, project_id) ai_service = get_ai_service() reference_files_content = _get_project_reference_files_content(project_id) # Validate input based on creation type if proj.creation_type == 'outline' and not proj.outline_text: yield _sse_event('error', {'message': 'outline_text is required'}) return if proj.creation_type == 'descriptions' and not proj.description_text: yield _sse_event('error', {'message': 'description_text is required'}) return # Update idea_prompt if provided if proj.creation_type not in ('outline', 'descriptions'): idea_prompt = data.get('idea_prompt') or proj.idea_prompt if not idea_prompt: yield _sse_event('error', {'message': 'idea_prompt is required'}) return proj.idea_prompt = idea_prompt project_context = ProjectContext(proj, reference_files_content) # Stream pages from AI streamed_pages = [] stream_complete = False for page_data in ai_service.generate_outline_stream(project_context, language=language): # Check for completion sentinel if '__stream_complete__' in page_data: stream_complete = page_data['__stream_complete__'] continue i = len(streamed_pages) streamed_pages.append(page_data) yield _sse_event('page', { 'index': i, 'title': page_data.get('title', ''), 'points': page_data.get('points', []), 'part': page_data.get('part'), }) # Handle lock_page_count: pad with blank pages if needed lock_page_count = data.get('lock_page_count', False) if lock_page_count: old_pages = Page.query.filter_by(project_id=project_id).order_by(Page.order_index).all() old_count = len(old_pages) new_count = len(streamed_pages) if new_count < old_count: for _ in range(old_count - new_count): streamed_pages.append({'title': '', 'points': []}) # Save all pages to database pages_list = _smart_merge_pages(project_id, streamed_pages) if all(p.description_content for p in pages_list) and pages_list: proj.status = 'DESCRIPTIONS_GENERATED' else: proj.status = 'OUTLINE_GENERATED' proj.updated_at = datetime.utcnow() db.session.commit() logger.info(f"流式大纲生成完成: 项目 {project_id}, {len(pages_list)} 个页面") yield _sse_event('done', { 'total': len(pages_list), 'pages': [p.to_dict() for p in pages_list], 'complete': stream_complete, }) except Exception as e: try: db.session.rollback() except Exception as rollback_exc: logger.warning(f"Session rollback failed: {rollback_exc}", exc_info=True) logger.error(f"generate_outline_stream failed: {str(e)}", exc_info=True) yield _sse_event('error', {'message': '生成过程中发生内部错误'}) return Response( stream_with_context(sse_generate()), mimetype='text/event-stream', headers={ 'Cache-Control': 'no-cache, no-transform', 'X-Accel-Buffering': 'no', 'Connection': 'keep-alive', }, ) def _sse_event(event: str, data: dict) -> str: """Format a single SSE event.""" return f"event: {event}\ndata: {json.dumps(data, ensure_ascii=False)}\n\n" @project_bp.route('//generate/from-description', methods=['POST']) def generate_from_description(project_id): """ POST /api/projects/{project_id}/generate/from-description - Generate outline and page descriptions from description text This endpoint: 1. Parses the description_text to extract outline structure 2. Splits the description_text into individual page descriptions 3. Creates pages with both outline and description content filled 4. Sets project status to DESCRIPTIONS_GENERATED Request body (optional): { "description_text": "...", # if not provided, uses project.description_text "language": "zh" # output language: zh, en, ja, auto } """ try: project = Project.query.get(project_id) if not project: return not_found('Project') if project.creation_type != 'descriptions': return bad_request("This endpoint is only for descriptions type projects") # Get description text and language data = request.get_json() or {} description_text = data.get('description_text') or project.description_text language = data.get('language', current_app.config.get('OUTPUT_LANGUAGE', 'zh')) if not description_text: return bad_request("description_text is required") project.description_text = description_text # Get singleton AI service instance ai_service = get_ai_service() # Get reference files content and create project context reference_files_content = _get_project_reference_files_content(project_id) project_context = ProjectContext(project, reference_files_content) logger.info(f"开始从描述生成大纲和页面描述: 项目 {project_id}") # Step 1: Parse description to outline logger.info("Step 1: 解析描述文本到大纲结构...") outline = ai_service.parse_description_to_outline(project_context, language=language) logger.info(f"大纲解析完成,共 {len(ai_service.flatten_outline(outline))} 页") # Step 2: Split description into page descriptions logger.info("Step 2: 切分描述文本到每页描述...") page_descriptions = ai_service.parse_description_to_page_descriptions(project_context, outline, language=language) logger.info(f"描述切分完成,共 {len(page_descriptions)} 页") # Step 3: Flatten outline to pages pages_data = ai_service.flatten_outline(outline) if len(pages_data) != len(page_descriptions): logger.warning(f"页面数量不匹配: 大纲 {len(pages_data)} 页, 描述 {len(page_descriptions)} 页") # 取较小的数量,避免索引错误 min_count = min(len(pages_data), len(page_descriptions)) pages_data = pages_data[:min_count] page_descriptions = page_descriptions[:min_count] # Step 4: Delete existing pages (using ORM session to trigger cascades) old_pages = Page.query.filter_by(project_id=project_id).all() for old_page in old_pages: db.session.delete(old_page) # Step 5: Create pages with both outline and description pages_list = [] for i, (page_data, page_desc) in enumerate(zip(pages_data, page_descriptions)): page = Page( project_id=project_id, order_index=i, part=page_data.get('part'), status='DESCRIPTION_GENERATED' # 直接设置为已生成描述 ) # Set outline content page.set_outline_content({ 'title': page_data.get('title'), 'points': page_data.get('points', []) }) # Set description content desc_content = { "text": page_desc, "generated_at": datetime.utcnow().isoformat() } page.set_description_content(desc_content) db.session.add(page) pages_list.append(page) # Update project status project.status = 'DESCRIPTIONS_GENERATED' project.updated_at = datetime.utcnow() db.session.commit() logger.info(f"从描述生成完成: 项目 {project_id}, 创建了 {len(pages_list)} 个页面,已填充大纲和描述") # Return pages return success_response({ 'pages': [page.to_dict() for page in pages_list], 'status': 'DESCRIPTIONS_GENERATED' }) except Exception as e: db.session.rollback() logger.error(f"generate_from_description failed: {str(e)}", exc_info=True) return error_response('AI_SERVICE_ERROR', str(e), 503) @project_bp.route('//generate/descriptions', methods=['POST']) def generate_descriptions(project_id): """ POST /api/projects/{project_id}/generate/descriptions - Generate descriptions Request body: { "max_workers": 5, "language": "zh" # output language: zh, en, ja, auto } """ try: project = Project.query.get(project_id) if not project: return not_found('Project') if not project.pages: return bad_request("Project must have outline generated first") # IMPORTANT: Expire cached objects to ensure fresh data db.session.expire_all() # Get pages pages = Page.query.filter_by(project_id=project_id).order_by(Page.order_index).all() if not pages: return bad_request("No pages found for project") # Reconstruct outline from pages with part structure outline = _reconstruct_outline_from_pages(pages) data = request.get_json() or {} # 从配置中读取默认并发数,如果请求中提供了则使用请求的值 max_workers = data.get('max_workers', current_app.config.get('MAX_DESCRIPTION_WORKERS', 5)) language = data.get('language', current_app.config.get('OUTPUT_LANGUAGE', 'zh')) detail_level = data.get('detail_level', 'default') # Create task task = Task( project_id=project_id, task_type='GENERATE_DESCRIPTIONS', status='PENDING' ) task.set_progress({ 'total': len(pages), 'completed': 0, 'failed': 0 }) db.session.add(task) db.session.commit() # Get singleton AI service instance ai_service = get_ai_service() # Get reference files content and create project context reference_files_content = _get_project_reference_files_content(project_id) project_context = ProjectContext(project, reference_files_content) # Get app instance for background task app = current_app._get_current_object() # Submit background task task_manager.submit_task( task.id, generate_descriptions_task, project_id, ai_service, project_context, outline, max_workers, app, language, detail_level ) # Update project status project.status = 'GENERATING_DESCRIPTIONS' db.session.commit() return success_response({ 'task_id': task.id, 'status': 'GENERATING_DESCRIPTIONS', 'total_pages': len(pages) }, status_code=202) except Exception as e: db.session.rollback() logger.error(f"generate_descriptions failed: {str(e)}", exc_info=True) return error_response('SERVER_ERROR', str(e), 500) @project_bp.route('//generate/descriptions/stream', methods=['POST']) def generate_descriptions_stream(project_id): """ POST /api/projects/{project_id}/generate/descriptions/stream - Stream description generation via SSE Streams page descriptions one-by-one as they are generated by a single AI call. SSE events: event: description — {page_index, page_id, text, extra_fields?} event: done — {total, pages: [...]} event: error — {message} """ project = Project.query.get(project_id) if not project: return not_found('Project') if not project.pages: return bad_request("Project must have outline generated first") data = request.get_json() or {} language = data.get('language', current_app.config.get('OUTPUT_LANGUAGE', 'zh')) detail_level = data.get('detail_level', 'default') app = current_app._get_current_object() def sse_generate(): with app.app_context(): try: proj = db.session.get(Project, project_id) ai_service = get_ai_service() reference_files_content = _get_project_reference_files_content(project_id) project_context = ProjectContext(proj, reference_files_content) pages = Page.query.filter_by(project_id=project_id).order_by(Page.order_index).all() if not pages: yield _sse_event('error', {'message': 'No pages found for project'}) return outline = _reconstruct_outline_from_pages(pages) flat_pages = ai_service.flatten_outline(outline) # Set all pages to GENERATING_DESCRIPTION for page in pages: page.status = 'GENERATING_DESCRIPTION' proj.status = 'GENERATING_DESCRIPTIONS' db.session.commit() # Stream descriptions for result in ai_service.generate_descriptions_stream( project_context, outline, flat_pages, language=language, detail_level=detail_level ): if '__stream_complete__' in result: continue idx = result.get('page_index', -1) if idx < 0 or idx >= len(pages): continue page = pages[idx] desc_content = { 'text': result.get('description_text', ''), 'generated_at': datetime.utcnow().isoformat(), } if result.get('extra_fields'): desc_content['extra_fields'] = result['extra_fields'] page.set_description_content(desc_content) page.status = 'DESCRIPTION_GENERATED' page.updated_at = datetime.utcnow() db.session.commit() yield _sse_event('description', { 'page_index': idx, 'page_id': page.id, 'text': desc_content['text'], 'extra_fields': result.get('extra_fields'), }) # 检查是否所有页面都已生成描述 missing = [p for p in pages if p.status == 'GENERATING_DESCRIPTION'] if missing: for p in missing: # 有旧描述的保留,无描述的恢复 DRAFT p.status = 'DESCRIPTION_GENERATED' if p.description_content else 'DRAFT' p.updated_at = datetime.utcnow() logger.warning(f"流式描述生成不完整: {len(missing)}/{len(pages)} 页未生成") proj.status = 'DESCRIPTIONS_GENERATED' proj.updated_at = datetime.utcnow() db.session.commit() # Re-fetch pages for final response pages = Page.query.filter_by(project_id=project_id).order_by(Page.order_index).all() yield _sse_event('done', { 'total': len(pages), 'pages': [p.to_dict() for p in pages], **(({'warning': f'{len(missing)} 页描述未生成,请重试'}) if missing else {}), }) except Exception as e: try: db.session.rollback() except Exception as rollback_exc: logger.warning(f"Session rollback failed: {rollback_exc}", exc_info=True) logger.error(f"generate_descriptions_stream failed: {str(e)}", exc_info=True) # 恢复未完成页面的状态:已生成描述的保留,未生成的恢复为 DRAFT try: pages = Page.query.filter_by(project_id=project_id).order_by(Page.order_index).all() proj = db.session.get(Project, project_id) has_any_desc = False for page in pages: if page.status == 'GENERATING_DESCRIPTION': # 如果之前就有描述内容,恢复为 DESCRIPTION_GENERATED if page.description_content: page.status = 'DESCRIPTION_GENERATED' has_any_desc = True else: page.status = 'DRAFT' elif page.status == 'DESCRIPTION_GENERATED': has_any_desc = True if proj: proj.status = 'DESCRIPTIONS_GENERATED' if has_any_desc else 'OUTLINE_GENERATED' proj.updated_at = datetime.utcnow() db.session.commit() except Exception as recover_exc: logger.warning(f"Failed to recover page statuses: {recover_exc}", exc_info=True) yield _sse_event('error', {'message': '生成过程中发生内部错误'}) return Response( stream_with_context(sse_generate()), mimetype='text/event-stream', headers={ 'Cache-Control': 'no-cache, no-transform', 'X-Accel-Buffering': 'no', 'Connection': 'keep-alive', }, ) @project_bp.route('//generate/images', methods=['POST']) def generate_images(project_id): """ POST /api/projects/{project_id}/generate/images - Generate images Request body: { "max_workers": 8, "use_template": true, "language": "zh", # output language: zh, en, ja, auto "page_ids": ["id1", "id2"] # optional: specific page IDs to generate (if not provided, generates all) } """ try: project = Project.query.get(project_id) if not project: return not_found('Project') # if project.status not in ['DESCRIPTIONS_GENERATED', 'OUTLINE_GENERATED']: # return bad_request("Project must have descriptions generated first") # IMPORTANT: Expire cached objects to ensure fresh data db.session.expire_all() data = request.get_json() or {} # Get page_ids from request body and fetch filtered pages selected_page_ids = parse_page_ids_from_body(data) pages = get_filtered_pages(project_id, selected_page_ids if selected_page_ids else None) if not pages: return bad_request("No pages found for project") # 检查是否有模板图片或风格描述 from services import FileService file_service = FileService(current_app.config['UPLOAD_FOLDER']) use_template = data.get('use_template', True) ref_image_path = None if use_template: ref_image_path = file_service.get_template_path(project_id) if not ref_image_path and not project.template_style: return bad_request("请先上传模板图片或添加风格描述。") # Reconstruct outline from pages with part structure outline = _reconstruct_outline_from_pages(pages) # 从配置中读取默认并发数,如果请求中提供了则使用请求的值 max_workers = data.get('max_workers', current_app.config.get('MAX_IMAGE_WORKERS', 8)) use_template = data.get('use_template', True) language = data.get('language', current_app.config.get('OUTPUT_LANGUAGE', 'zh')) # Create task task = Task( project_id=project_id, task_type='GENERATE_IMAGES', status='PENDING' ) task.set_progress({ 'total': len(pages), 'completed': 0, 'failed': 0 }) db.session.add(task) db.session.commit() # Get singleton AI service instance ai_service = get_ai_service() # 合并额外要求和风格描述 combined_requirements = project.extra_requirements or "" if project.template_style: style_requirement = f"\n\nppt页面风格描述:\n\n{project.template_style}" combined_requirements = combined_requirements + style_requirement # Set all target pages to QUEUED before submitting background task # This ensures the status is visible to frontend immediately after API returns for page in pages: page.status = 'QUEUED' db.session.commit() # Get app instance for background task app = current_app._get_current_object() # Submit background task task_manager.submit_task( task.id, generate_images_task, project_id, ai_service, file_service, outline, use_template, max_workers, project.image_aspect_ratio, current_app.config['DEFAULT_RESOLUTION'], app, combined_requirements if combined_requirements.strip() else None, language, selected_page_ids if selected_page_ids else None ) # Update project status project.status = 'GENERATING_IMAGES' db.session.commit() return success_response({ 'task_id': task.id, 'status': 'GENERATING_IMAGES', 'total_pages': len(pages) }, status_code=202) except Exception as e: db.session.rollback() logger.error(f"generate_images failed: {str(e)}", exc_info=True) return error_response('SERVER_ERROR', str(e), 500) @project_bp.route('//tasks/', methods=['GET']) def get_task_status(project_id, task_id): """ GET /api/projects/{project_id}/tasks/{task_id} - Get task status """ try: task = Task.query.get(task_id) if not task or task.project_id != project_id: return not_found('Task') return success_response(task.to_dict()) except Exception as e: logger.error(f"get_task_status failed: {str(e)}", exc_info=True) return error_response('SERVER_ERROR', str(e), 500) @project_bp.route('//refine/outline', methods=['POST']) def refine_outline(project_id): """ POST /api/projects/{project_id}/refine/outline - Refine outline based on user requirements Request body: { "user_requirement": "用户要求,例如:增加一页关于XXX的内容", "language": "zh" # output language: zh, en, ja, auto } """ try: project = Project.query.get(project_id) if not project: return not_found('Project') data = request.get_json() if not data or not data.get('user_requirement'): return bad_request("user_requirement is required") user_requirement = data['user_requirement'] # IMPORTANT: Expire all cached objects to ensure we get fresh data from database # This prevents issues when multiple refine operations are called in sequence db.session.expire_all() # Get current outline from pages pages = Page.query.filter_by(project_id=project_id).order_by(Page.order_index).all() # Reconstruct current outline from pages (如果没有页面,使用空列表) if not pages: logger.info(f"项目 {project_id} 当前没有页面,将从空开始生成") current_outline = [] # 空大纲 else: current_outline = _reconstruct_outline_from_pages(pages) # Get singleton AI service instance ai_service = get_ai_service() # Get reference files content and create project context reference_files_content = _get_project_reference_files_content(project_id) if reference_files_content: logger.info(f"Found {len(reference_files_content)} reference files for refine_outline") for rf in reference_files_content: logger.info(f" - {rf['filename']}: {len(rf['content'])} characters") else: logger.info(f"No reference files found for project {project_id}") project_context = ProjectContext(project.to_dict(), reference_files_content) # Get previous requirements and language from request previous_requirements = data.get('previous_requirements', []) language = data.get('language', current_app.config.get('OUTPUT_LANGUAGE', 'zh')) # Refine outline logger.info(f"开始修改大纲: 项目 {project_id}, 用户要求: {user_requirement}, 历史要求数: {len(previous_requirements)}") refined_outline = ai_service.refine_outline( current_outline=current_outline, user_requirement=user_requirement, project_context=project_context, previous_requirements=previous_requirements, language=language ) # Flatten outline to pages and smart merge with existing pages_data = ai_service.flatten_outline(refined_outline) pages_list = _smart_merge_pages(project_id, pages_data) preserved_count = sum(1 for p in pages_list if p.description_content) new_count = len(pages_list) - preserved_count logger.info(f"描述匹配完成: 保留了 {preserved_count} 个页面的描述, {new_count} 个页面需要重新生成描述") # Update project status if preserved_count and all(p.description_content for p in pages_list): project.status = 'DESCRIPTIONS_GENERATED' else: project.status = 'OUTLINE_GENERATED' project.updated_at = datetime.utcnow() db.session.commit() logger.info(f"大纲修改完成: 项目 {project_id}, 创建了 {len(pages_list)} 个页面") # Return pages return success_response({ 'pages': [page.to_dict() for page in pages_list], 'message': '大纲修改成功' }) except Exception as e: db.session.rollback() logger.error(f"refine_outline failed: {str(e)}", exc_info=True) return error_response('AI_SERVICE_ERROR', str(e), 503) @project_bp.route('//refine/descriptions', methods=['POST']) def refine_descriptions(project_id): """ POST /api/projects/{project_id}/refine/descriptions - Refine page descriptions based on user requirements Request body: { "user_requirement": "用户要求,例如:让描述更详细一些", "language": "zh" # output language: zh, en, ja, auto } """ try: project = Project.query.get(project_id) if not project: return not_found('Project') data = request.get_json() if not data or not data.get('user_requirement'): return bad_request("user_requirement is required") user_requirement = data['user_requirement'] db.session.expire_all() # Get current pages pages = Page.query.filter_by(project_id=project_id).order_by(Page.order_index).all() if not pages: logger.info(f"项目 {project_id} 当前没有页面,无法修改描述") return bad_request("No pages found for project. Please generate outline first.") # Check if pages have descriptions (允许没有描述,从空开始) has_descriptions = any(page.description_content for page in pages) if not has_descriptions: logger.info(f"项目 {project_id} 当前没有描述,将基于大纲生成新描述") # Reconstruct outline from pages outline = _reconstruct_outline_from_pages(pages) # Prepare current descriptions current_descriptions = [] for i, page in enumerate(pages): outline_content = page.get_outline_content() desc_content = page.get_description_content() current_descriptions.append({ 'index': i, 'title': outline_content.get('title', '未命名') if outline_content else '未命名', 'description_content': desc_content if desc_content else '' }) # Get singleton AI service instance ai_service = get_ai_service() # Get reference files content and create project context reference_files_content = _get_project_reference_files_content(project_id) if reference_files_content: logger.info(f"Found {len(reference_files_content)} reference files for refine_descriptions") for rf in reference_files_content: logger.info(f" - {rf['filename']}: {len(rf['content'])} characters") else: logger.info(f"No reference files found for project {project_id}") project_context = ProjectContext(project.to_dict(), reference_files_content) # Get previous requirements and language from request previous_requirements = data.get('previous_requirements', []) language = data.get('language', current_app.config.get('OUTPUT_LANGUAGE', 'zh')) # Refine descriptions logger.info(f"开始修改页面描述: 项目 {project_id}, 用户要求: {user_requirement}, 历史要求数: {len(previous_requirements)}") refined_descriptions = ai_service.refine_descriptions( current_descriptions=current_descriptions, user_requirement=user_requirement, project_context=project_context, outline=outline, previous_requirements=previous_requirements, language=language ) # 验证返回的描述数量 if len(refined_descriptions) != len(pages): error_msg = "" logger.error(f"AI 返回的描述数量不匹配: 期望 {len(pages)} 个页面,实际返回 {len(refined_descriptions)} 个描述。") # 如果 AI 试图增删页面,给出明确提示 if len(refined_descriptions) > len(pages): error_msg += " 提示:如需增加页面,请在大纲页面进行操作。" elif len(refined_descriptions) < len(pages): error_msg += " 提示:如需删除页面,请在大纲页面进行操作。" return bad_request(error_msg) # Update pages with refined descriptions for page, refined_desc in zip(pages, refined_descriptions): desc_content = { "text": refined_desc, "generated_at": datetime.utcnow().isoformat() } page.set_description_content(desc_content) page.status = 'DESCRIPTION_GENERATED' # Update project status project.status = 'DESCRIPTIONS_GENERATED' project.updated_at = datetime.utcnow() db.session.commit() logger.info(f"页面描述修改完成: 项目 {project_id}, 更新了 {len(pages)} 个页面") # Return pages return success_response({ 'pages': [page.to_dict() for page in pages], 'message': '页面描述修改成功' }) except Exception as e: db.session.rollback() logger.error(f"refine_descriptions failed: {str(e)}", exc_info=True) return error_response('AI_SERVICE_ERROR', str(e), 503) @project_bp.route('/renovation', methods=['POST']) def create_ppt_renovation_project(): """ POST /api/projects/renovation - Create a PPT renovation project Accepts a PDF/PPTX file upload, creates project with pages from PDF images, then submits an async task to parse content and fill outline + descriptions. Content-Type: multipart/form-data Form: file: PDF or PPTX file (required) keep_layout: "true"/"false" - whether to preserve layout via caption model (optional, default false) template_style: style description text (optional) Returns: {project_id, task_id, page_count} """ try: # Validate file if 'file' not in request.files: return bad_request("No file uploaded") file = request.files['file'] if file.filename == '': return bad_request("No file selected") # Check file extension filename = file.filename.lower() if not (filename.endswith('.pdf') or filename.endswith('.pptx') or filename.endswith('.ppt')): return bad_request("Only PDF and PPTX files are supported") keep_layout = request.form.get('keep_layout', 'false').lower() == 'true' template_style = request.form.get('template_style', '').strip() or None # Create project project = Project( creation_type='ppt_renovation', template_style=template_style, status='DRAFT' ) db.session.add(project) db.session.commit() project_id = project.id # Save uploaded file file_service = FileService(current_app.config['UPLOAD_FOLDER']) project_dir = Path(current_app.config['UPLOAD_FOLDER']) / project_id template_dir = project_dir / "template" template_dir.mkdir(parents=True, exist_ok=True) # Save original file safe_name = secure_filename(file.filename) safe_name = secure_filename(file.filename) original_path = template_dir / safe_name file.save(str(original_path)) # Convert PPTX to PDF if needed pdf_path = str(original_path) if safe_name.lower().endswith(('.pptx', '.ppt')): try: subprocess.run( ['libreoffice', '--headless', '--convert-to', 'pdf', '--outdir', str(template_dir), str(original_path)], check=True, timeout=120, capture_output=True ) pdf_name = safe_name.rsplit('.', 1)[0] + '.pdf' pdf_path = str(template_dir / pdf_name) if not os.path.exists(pdf_path): raise ValueError("PDF conversion failed - output file not found") logger.info(f"Converted PPTX to PDF: {pdf_path}") except subprocess.TimeoutExpired: raise ValueError("PPTX to PDF conversion timed out") except FileNotFoundError: raise ValueError("PPTX conversion requires LibreOffice, which is not installed. Please convert your PPTX to PDF locally before uploading.") # Convert PDF to page images using PyMuPDF or pdf2image pages_dir = project_dir / "pages" pages_dir.mkdir(parents=True, exist_ok=True) page_image_paths = [] pdf_page_width = None pdf_page_height = None try: import fitz # PyMuPDF doc = fitz.open(pdf_path) # Extract page dimensions from the first page before rendering if len(doc) > 0: rect = doc[0].rect pdf_page_width = rect.width pdf_page_height = rect.height for i, fitz_page in enumerate(doc): try: mat = fitz.Matrix(2, 2) pix = fitz_page.get_pixmap(matrix=mat) img_path = str(pages_dir / f"page_{i + 1}_original.png") pix.save(img_path) page_image_paths.append(img_path) except Exception as e: logger.error(f"Failed to render page {i + 1} with PyMuPDF: {e}") page_image_paths.append(None) doc.close() except ImportError: # Fallback: use pdf2image try: from pdf2image import convert_from_path images = convert_from_path(pdf_path, dpi=200) for i, img in enumerate(images): try: # Extract page dimensions from the first image if pdf_page_width is None: pdf_page_width = img.width pdf_page_height = img.height img_path = str(pages_dir / f"page_{i + 1}_original.png") img.save(img_path, 'PNG') page_image_paths.append(img_path) except Exception as e: logger.error(f"Failed to render page {i + 1} with pdf2image: {e}") page_image_paths.append(None) except ImportError: raise ValueError("Neither PyMuPDF nor pdf2image is available for PDF rendering") # Fail-fast if no pages rendered at all valid_pages = [p for p in page_image_paths if p is not None] if not valid_pages: raise ValueError("All pages failed to render from PDF") logger.info(f"Rendered {len(valid_pages)}/{len(page_image_paths)} page images from PDF") # Set project aspect ratio from PDF page dimensions if pdf_page_width and pdf_page_height and pdf_page_width > 0 and pdf_page_height > 0: try: raw_ratio = f"{int(round(pdf_page_width))}:{int(round(pdf_page_height))}" project.image_aspect_ratio = normalize_aspect_ratio(raw_ratio) logger.info(f"Set project aspect ratio from PDF: {pdf_page_width}x{pdf_page_height} -> {project.image_aspect_ratio}") except (ValueError, OverflowError) as e: logger.warning(f"Could not normalize PDF aspect ratio ({pdf_page_width}x{pdf_page_height}): {e}, keeping default 16:9") # Create Page records with initial images from services.task_manager import save_image_with_version from PIL import Image as PILImage pages_list = [] for i, img_path in enumerate(page_image_paths): if img_path is None: logger.warning(f"Skipping page {i + 1}: render failed") continue page = Page( project_id=project_id, order_index=len(pages_list), status='DRAFT' ) page.set_outline_content({ 'title': f'Page {i + 1}', 'points': [] }) db.session.add(page) db.session.flush() # Get page.id # Save the PDF page image as initial version img = PILImage.open(img_path) image_path, _version = save_image_with_version( img, project_id, page.id, file_service, page_obj=page ) img.close() pages_list.append(page) db.session.commit() # Create async task task = Task( project_id=project_id, task_type='PPT_RENOVATION', status='PENDING' ) task.set_progress({ 'total': len(pages_list), 'completed': 0, 'failed': 0, 'current_step': 'queued' }) db.session.add(task) db.session.commit() # Get services ai_service = get_ai_service() from services.file_parser_service import FileParserService file_parser_service = FileParserService( mineru_token=current_app.config['MINERU_TOKEN'], mineru_api_base=current_app.config['MINERU_API_BASE'], google_api_key=current_app.config.get('GOOGLE_API_KEY', ''), google_api_base=current_app.config.get('GOOGLE_API_BASE', ''), openai_api_key=current_app.config.get('OPENAI_API_KEY', ''), openai_api_base=current_app.config.get('OPENAI_API_BASE', ''), image_caption_model=current_app.config['IMAGE_CAPTION_MODEL'], provider_format=current_app.config.get('AI_PROVIDER_FORMAT', 'gemini'), lazyllm_image_caption_source=current_app.config.get('IMAGE_CAPTION_MODEL_SOURCE', 'doubao'), ) language = request.form.get('language', current_app.config.get('OUTPUT_LANGUAGE', 'zh')) app = current_app._get_current_object() # Submit async task task_manager.submit_task( task.id, process_ppt_renovation_task, project_id, ai_service, file_service, file_parser_service, keep_layout, 5, # max_workers app, language ) project.status = 'PROCESSING' db.session.commit() return success_response({ 'project_id': project_id, 'task_id': task.id, 'page_count': len(pages_list) }, status_code=202) except Exception as e: db.session.rollback() logger.error(f"create_ppt_renovation_project failed: {str(e)}", exc_info=True) return error_response('SERVER_ERROR', str(e), 500) # Style extraction blueprint (not bound to any project) style_bp = Blueprint('style', __name__, url_prefix='/api') @style_bp.route('/extract-style', methods=['POST']) def extract_style(): """ POST /api/extract-style - Extract style description from an image Content-Type: multipart/form-data Form: image: Image file (required) Returns: {style_description: "..."} """ try: if 'image' not in request.files: return bad_request("No image file uploaded") file = request.files['image'] if file.filename == '': return bad_request("No file selected") # Save to temp location import tempfile ext = secure_filename(file.filename).rsplit('.', 1)[-1].lower() if '.' in file.filename else 'png' with tempfile.NamedTemporaryFile(suffix=f'.{ext}', delete=False) as tmp: file.save(tmp.name) tmp_path = tmp.name try: ai_service = get_ai_service() style_description = ai_service.extract_style_description(tmp_path) return success_response({ 'style_description': style_description }) finally: os.unlink(tmp_path) except Exception as e: logger.error(f"extract_style failed: {str(e)}", exc_info=True) return error_response('AI_SERVICE_ERROR', str(e), 503) ================================================ FILE: backend/controllers/reference_file_controller.py ================================================ """ Reference File Controller - handles file upload and parsing """ import os import logging import re import uuid from flask import Blueprint, request, current_app from werkzeug.utils import secure_filename from pathlib import Path from config import Config from datetime import datetime from urllib.parse import unquote import threading from models import db, ReferenceFile, Project from utils.response import success_response, error_response, bad_request, not_found from services.file_parser_service import FileParserService logger = logging.getLogger(__name__) reference_file_bp = Blueprint('reference_file', __name__) def _allowed_file(filename: str, allowed_extensions: set) -> bool: """Check if file extension is allowed""" return '.' in filename and \ filename.rsplit('.', 1)[1].lower() in allowed_extensions def _get_file_type(filename: str) -> str: """Get file type from filename""" if '.' in filename: return filename.rsplit('.', 1)[1].lower() return 'unknown' def _parse_file_async(file_id: str, file_path: str, filename: str, app): """ Parse file asynchronously in background Args: file_id: Reference file ID file_path: Path to the uploaded file filename: Original filename app: Flask app instance (for app context) """ with app.app_context(): try: reference_file = ReferenceFile.query.get(file_id) if not reference_file: logger.error(f"Reference file {file_id} not found") return # Update status to parsing reference_file.parse_status = 'parsing' db.session.commit() # Initialize parser service parser = FileParserService( mineru_token=current_app.config['MINERU_TOKEN'], mineru_api_base=current_app.config['MINERU_API_BASE'], google_api_key=current_app.config.get('GOOGLE_API_KEY', ''), google_api_base=current_app.config.get('GOOGLE_API_BASE', ''), openai_api_key=current_app.config.get('OPENAI_API_KEY', ''), openai_api_base=current_app.config.get('OPENAI_API_BASE', ''), image_caption_model=current_app.config['IMAGE_CAPTION_MODEL'], provider_format=current_app.config.get('AI_PROVIDER_FORMAT', 'gemini'), lazyllm_image_caption_source=current_app.config.get('IMAGE_CAPTION_MODEL_SOURCE', 'doubao'), ) # Parse file logger.info(f"Starting to parse file: {filename}") batch_id, markdown_content, extract_id, error_message, failed_image_count = parser.parse_file(file_path, filename) # Update database reference_file.mineru_batch_id = batch_id if error_message: reference_file.parse_status = 'failed' reference_file.error_message = error_message logger.error(f"File parsing failed: {error_message}") else: reference_file.parse_status = 'completed' reference_file.markdown_content = markdown_content if failed_image_count > 0: logger.warning(f"File parsing completed: {filename}, but {failed_image_count} images failed to generate captions") else: logger.info(f"File parsing completed: {filename}") reference_file.updated_at = datetime.utcnow() db.session.commit() except Exception as e: logger.error(f"Error in async file parsing: {str(e)}", exc_info=True) try: reference_file = ReferenceFile.query.get(file_id) if reference_file: reference_file.parse_status = 'failed' reference_file.error_message = f"Parsing error: {str(e)}" reference_file.updated_at = datetime.utcnow() db.session.commit() except Exception as db_error: logger.error(f"Failed to update error status: {str(db_error)}") @reference_file_bp.route('/upload', methods=['POST']) def upload_reference_file(): """ POST /api/reference-files/upload - Upload a reference file Supports multipart/form-data: - file: The file to upload (required) - project_id: Project ID to associate with (optional, 'none' for global files) Returns: Reference file information with status """ try: # Check if file is in request if 'file' not in request.files: return bad_request("No file provided") file = request.files['file'] # Get filename - handle encoding issues with non-ASCII characters original_filename = file.filename if not original_filename or original_filename == '': # Try to get filename from Content-Disposition header content_disposition = request.headers.get('Content-Disposition', '') if content_disposition: filename_match = re.search(r'filename[^;=\n]*=(([\'"]).*?\2|[^;\n]*)', content_disposition) if filename_match: original_filename = filename_match.group(1).strip('"\'') # Decode if URL encoded try: original_filename = unquote(original_filename) except Exception: pass if not original_filename or original_filename == '': return bad_request("No file selected or filename could not be determined") logger.info(f"Received file upload: {original_filename}") # Check file extension allowed_extensions = current_app.config.get('ALLOWED_REFERENCE_FILE_EXTENSIONS', Config.ALLOWED_REFERENCE_FILE_EXTENSIONS) if not _allowed_file(original_filename, allowed_extensions): return bad_request(f"File type not allowed. Allowed types: {', '.join(allowed_extensions)}") # Get project_id (optional) project_id = request.form.get('project_id') if project_id == 'none' or not project_id: project_id = None else: # Verify project exists project = Project.query.get(project_id) if not project: return not_found('Project') # Secure filename for filesystem (but keep original for database) # secure_filename removes non-ASCII chars, so we need to handle Chinese characters filename = secure_filename(original_filename) # If secure_filename removed everything (e.g., all Chinese chars), use a fallback if not filename or filename == '': # Extract extension from original filename ext = _get_file_type(original_filename) if ext == 'unknown': ext = 'file' filename = f"file_{uuid.uuid4().hex[:8]}.{ext}" logger.warning(f"Original filename '{original_filename}' was sanitized to '{filename}'") # Create upload directory structure upload_folder = current_app.config['UPLOAD_FOLDER'] reference_files_dir = Path(upload_folder) / 'reference_files' reference_files_dir.mkdir(parents=True, exist_ok=True) # Generate unique filename to avoid conflicts unique_id = str(uuid.uuid4())[:8] file_type = _get_file_type(original_filename) # Use original filename for type detection unique_filename = f"{unique_id}_{filename}" file_path = reference_files_dir / unique_filename # Save file file.save(str(file_path)) file_size = os.path.getsize(file_path) # Create database record reference_file = ReferenceFile( project_id=project_id, filename=original_filename, file_path=str(file_path.relative_to(upload_folder)), file_size=file_size, file_type=file_type, parse_status='pending' ) db.session.add(reference_file) db.session.commit() logger.info(f"File uploaded: {original_filename} (ID: {reference_file.id})") # Lazy parsing: 不立即解析,等待用户选择确定后再解析 # 解析将在用户选择文件并确认时触发 return success_response({'file': reference_file.to_dict()}) except Exception as e: logger.error(f"Error uploading reference file: {str(e)}", exc_info=True) return error_response('SERVER_ERROR', str(e), 500) @reference_file_bp.route('/', methods=['GET']) def get_reference_file(file_id): """ GET /api/reference-files/ - Get reference file information Returns: Reference file information including parse status """ try: reference_file = ReferenceFile.query.get(file_id) if not reference_file: return not_found('Reference file') # 单个文件查询时包含内容和失败计数(会在 to_dict 中根据状态判断是否计算) return success_response({'file': reference_file.to_dict(include_content=True, include_failed_count=True)}) except Exception as e: logger.error(f"Error getting reference file: {str(e)}", exc_info=True) return error_response('SERVER_ERROR', str(e), 500) @reference_file_bp.route('/', methods=['DELETE']) def delete_reference_file(file_id): """ DELETE /api/reference-files/ - Delete a reference file Returns: Success message """ try: reference_file = ReferenceFile.query.get(file_id) if not reference_file: return not_found('Reference file') # Delete file from disk try: upload_folder = current_app.config['UPLOAD_FOLDER'] file_path = Path(upload_folder) / reference_file.file_path if file_path.exists(): file_path.unlink() logger.info(f"Deleted file from disk: {file_path}") except Exception as e: logger.warning(f"Failed to delete file from disk: {str(e)}") # Delete from database db.session.delete(reference_file) db.session.commit() logger.info(f"Deleted reference file: {file_id}") return success_response({'message': 'File deleted successfully'}) except Exception as e: logger.error(f"Error deleting reference file: {str(e)}", exc_info=True) return error_response('SERVER_ERROR', str(e), 500) @reference_file_bp.route('/project/', methods=['GET']) def list_project_reference_files(project_id): """ GET /api/reference-files/project/ - List all reference files for a project Special values: - 'all': List all reference files (global + all projects) - 'global' or 'none': List only global files (not associated with any project) - project_id: List files for specific project Returns: List of reference files """ try: # Special case: 'all' means list all files if project_id == 'all': reference_files = ReferenceFile.query.all() # Special case: 'global' or 'none' means list global files (not associated with any project) elif project_id in ['global', 'none']: reference_files = ReferenceFile.query.filter_by(project_id=None).all() else: # Verify project exists project = Project.query.get(project_id) if not project: return not_found('Project') reference_files = ReferenceFile.query.filter_by(project_id=project_id).all() # 列表查询时不包含 markdown_content 和失败计数,加快响应速度 return success_response({ 'files': [f.to_dict(include_content=False) for f in reference_files] }) except Exception as e: logger.error(f"Error listing reference files: {str(e)}", exc_info=True) return error_response('SERVER_ERROR', str(e), 500) @reference_file_bp.route('//parse', methods=['POST']) def trigger_file_parse(file_id): """ POST /api/reference-files//parse - Trigger parsing for a reference file Returns: Updated reference file information """ try: reference_file = ReferenceFile.query.get(file_id) if not reference_file: return not_found('Reference file') # 如果正在解析,直接返回 if reference_file.parse_status == 'parsing': return success_response({ 'file': reference_file.to_dict(), 'message': 'File is already being parsed' }) # 如果解析完成或失败,可以重新解析 if reference_file.parse_status in ['completed', 'failed']: reference_file.parse_status = 'pending' reference_file.error_message = None # 清空之前的解析结果,以便重新解析 reference_file.markdown_content = None reference_file.mineru_batch_id = None db.session.commit() # 获取文件路径 upload_folder = current_app.config['UPLOAD_FOLDER'] file_path = Path(upload_folder) / reference_file.file_path if not file_path.exists(): return error_response('FILE_NOT_FOUND', f'File not found: {file_path}', 404) # 启动异步解析 thread = threading.Thread( target=_parse_file_async, args=(reference_file.id, str(file_path), reference_file.filename, current_app._get_current_object()) ) thread.daemon = True thread.start() logger.info(f"Triggered parsing for file: {reference_file.filename} (ID: {file_id})") return success_response({ 'file': reference_file.to_dict(), 'message': 'Parsing started' }) except Exception as e: logger.error(f"Error triggering file parse: {str(e)}", exc_info=True) return error_response('SERVER_ERROR', str(e), 500) @reference_file_bp.route('//associate', methods=['POST']) def associate_file_to_project(file_id): """ POST /api/reference-files//associate - Associate a reference file to a project Request body: { "project_id": "project-id-here" } Returns: Updated reference file information """ try: reference_file = ReferenceFile.query.get(file_id) if not reference_file: return not_found('Reference file') data = request.get_json() or {} project_id = data.get('project_id') if not project_id: return bad_request("project_id is required") # Verify project exists project = Project.query.get(project_id) if not project: return not_found('Project') # Update file's project_id reference_file.project_id = project_id reference_file.updated_at = datetime.utcnow() db.session.commit() logger.info(f"Associated reference file {file_id} to project {project_id}") return success_response({'file': reference_file.to_dict()}) except Exception as e: logger.error(f"Error associating reference file: {str(e)}", exc_info=True) return error_response('SERVER_ERROR', str(e), 500) @reference_file_bp.route('//dissociate', methods=['POST']) def dissociate_file_from_project(file_id): """ POST /api/reference-files//dissociate - Remove a reference file from its project This sets the file's project_id to None, effectively making it a global file. The file itself is not deleted. Returns: Updated reference file information """ try: reference_file = ReferenceFile.query.get(file_id) if not reference_file: return not_found('Reference file') # Remove project association reference_file.project_id = None reference_file.updated_at = datetime.utcnow() db.session.commit() logger.info(f"Dissociated reference file {file_id} from project") return success_response({'file': reference_file.to_dict(), 'message': 'File removed from project'}) except Exception as e: logger.error(f"Error dissociating reference file: {str(e)}", exc_info=True) return error_response('SERVER_ERROR', str(e), 500) ================================================ FILE: backend/controllers/settings_controller.py ================================================ """Settings Controller - handles application settings endpoints""" import json import logging import os import shutil import tempfile from pathlib import Path from datetime import datetime, timezone from contextlib import contextmanager from flask import Blueprint, request, current_app from PIL import Image from models import db, Settings, Task from utils import success_response, error_response, bad_request from config import Config, PROJECT_ROOT from services.ai_service import AIService from services.file_parser_service import FileParserService from services.ai_providers.ocr.baidu_accurate_ocr_provider import create_baidu_accurate_ocr_provider from services.ai_providers.image.baidu_inpainting_provider import create_baidu_inpainting_provider from services.ai_providers import LAZYLLM_VENDORS from services.task_manager import task_manager logger = logging.getLogger(__name__) ALLOWED_PROVIDER_FORMATS = {"openai", "gemini", "lazyllm"} | LAZYLLM_VENDORS settings_bp = Blueprint( "settings", __name__, url_prefix="/api/settings" ) @contextmanager def temporary_settings_override(settings_override: dict): """ 临时应用设置覆盖的上下文管理器 使用示例: with temporary_settings_override({"api_key": "test-key"}): # 在这里使用临时设置 result = some_test_function() Args: settings_override: 要临时应用的设置字典 Yields: None """ original_values = {} try: # 应用覆盖设置 if settings_override.get("api_key"): original_values["GOOGLE_API_KEY"] = current_app.config.get("GOOGLE_API_KEY") original_values["OPENAI_API_KEY"] = current_app.config.get("OPENAI_API_KEY") current_app.config["GOOGLE_API_KEY"] = settings_override["api_key"] current_app.config["OPENAI_API_KEY"] = settings_override["api_key"] if settings_override.get("api_base_url"): original_values["GOOGLE_API_BASE"] = current_app.config.get("GOOGLE_API_BASE") original_values["OPENAI_API_BASE"] = current_app.config.get("OPENAI_API_BASE") current_app.config["GOOGLE_API_BASE"] = settings_override["api_base_url"] current_app.config["OPENAI_API_BASE"] = settings_override["api_base_url"] if settings_override.get("ai_provider_format"): original_values["AI_PROVIDER_FORMAT"] = current_app.config.get("AI_PROVIDER_FORMAT") current_app.config["AI_PROVIDER_FORMAT"] = settings_override["ai_provider_format"] if settings_override.get("text_model"): original_values["TEXT_MODEL"] = current_app.config.get("TEXT_MODEL") current_app.config["TEXT_MODEL"] = settings_override["text_model"] if settings_override.get("image_model"): original_values["IMAGE_MODEL"] = current_app.config.get("IMAGE_MODEL") current_app.config["IMAGE_MODEL"] = settings_override["image_model"] if settings_override.get("image_caption_model"): original_values["IMAGE_CAPTION_MODEL"] = current_app.config.get("IMAGE_CAPTION_MODEL") current_app.config["IMAGE_CAPTION_MODEL"] = settings_override["image_caption_model"] # Per-model source overrides (empty string = clear, to fall back to global config) for source_field, config_key in [ ("text_model_source", "TEXT_MODEL_SOURCE"), ("image_model_source", "IMAGE_MODEL_SOURCE"), ("image_caption_model_source", "IMAGE_CAPTION_MODEL_SOURCE"), ]: if source_field in settings_override: original_values[config_key] = current_app.config.get(config_key) val = settings_override[source_field] if val: current_app.config[config_key] = val else: current_app.config.pop(config_key, None) # Per-model API credentials override for model_type in ('text', 'image', 'image_caption'): prefix = model_type.upper() key_field = f'{model_type}_api_key' base_field = f'{model_type}_api_base_url' if settings_override.get(key_field): config_key = f'{prefix}_API_KEY' original_values[config_key] = current_app.config.get(config_key) current_app.config[config_key] = settings_override[key_field] if settings_override.get(base_field): config_key = f'{prefix}_API_BASE' original_values[config_key] = current_app.config.get(config_key) current_app.config[config_key] = settings_override[base_field] if settings_override.get("mineru_api_base"): original_values["MINERU_API_BASE"] = current_app.config.get("MINERU_API_BASE") current_app.config["MINERU_API_BASE"] = settings_override["mineru_api_base"] if settings_override.get("mineru_token"): original_values["MINERU_TOKEN"] = current_app.config.get("MINERU_TOKEN") current_app.config["MINERU_TOKEN"] = settings_override["mineru_token"] if settings_override.get("baidu_api_key"): original_values["BAIDU_API_KEY"] = current_app.config.get("BAIDU_API_KEY") current_app.config["BAIDU_API_KEY"] = settings_override["baidu_api_key"] if settings_override.get("image_resolution"): original_values["DEFAULT_RESOLUTION"] = current_app.config.get("DEFAULT_RESOLUTION") current_app.config["DEFAULT_RESOLUTION"] = settings_override["image_resolution"] if "enable_text_reasoning" in settings_override: original_values["ENABLE_TEXT_REASONING"] = current_app.config.get("ENABLE_TEXT_REASONING") current_app.config["ENABLE_TEXT_REASONING"] = settings_override["enable_text_reasoning"] if "text_thinking_budget" in settings_override: original_values["TEXT_THINKING_BUDGET"] = current_app.config.get("TEXT_THINKING_BUDGET") current_app.config["TEXT_THINKING_BUDGET"] = settings_override["text_thinking_budget"] if "enable_image_reasoning" in settings_override: original_values["ENABLE_IMAGE_REASONING"] = current_app.config.get("ENABLE_IMAGE_REASONING") current_app.config["ENABLE_IMAGE_REASONING"] = settings_override["enable_image_reasoning"] if "image_thinking_budget" in settings_override: original_values["IMAGE_THINKING_BUDGET"] = current_app.config.get("IMAGE_THINKING_BUDGET") current_app.config["IMAGE_THINKING_BUDGET"] = settings_override["image_thinking_budget"] yield finally: # 恢复原始配置 for key, value in original_values.items(): if value is not None: current_app.config[key] = value else: current_app.config.pop(key, None) @settings_bp.route("/", methods=["GET"], strict_slashes=False) def get_settings(): """ GET /api/settings - Get application settings """ try: settings = Settings.get_settings() return success_response(settings.to_dict()) except Exception as e: logger.error(f"Error getting settings: {str(e)}") return error_response( "GET_SETTINGS_ERROR", f"Failed to get settings: {str(e)}", 500, ) @settings_bp.route("/", methods=["PUT"], strict_slashes=False) def update_settings(): """ PUT /api/settings - Update application settings Request Body: { "api_base_url": "https://api.example.com", "api_key": "your-api-key", "image_resolution": "2K", "image_aspect_ratio": "16:9" } """ try: data = request.get_json() if not data: return bad_request("Request body is required") settings = Settings.get_settings() # Update AI provider format configuration if "ai_provider_format" in data: provider_format = data["ai_provider_format"] if provider_format not in ALLOWED_PROVIDER_FORMATS: allowed_values = "', '".join(sorted(ALLOWED_PROVIDER_FORMATS)) return bad_request(f"AI provider format must be one of '{allowed_values}'") settings.ai_provider_format = provider_format # Update API configuration if "api_base_url" in data: raw_base_url = data["api_base_url"] # Empty string from frontend means "clear override, fall back to env/default" if raw_base_url is None: settings.api_base_url = None else: value = str(raw_base_url).strip() settings.api_base_url = value if value != "" else None if "api_key" in data: settings.api_key = data["api_key"] # Update image generation configuration if "image_resolution" in data: resolution = data["image_resolution"] if resolution not in ["1K", "2K", "4K"]: return bad_request("Resolution must be 1K, 2K, or 4K") settings.image_resolution = resolution if "image_aspect_ratio" in data: aspect_ratio = data["image_aspect_ratio"] settings.image_aspect_ratio = aspect_ratio # Update worker configuration if "max_description_workers" in data: workers = int(data["max_description_workers"]) if workers < 1 or workers > 20: return bad_request( "Max description workers must be between 1 and 20" ) settings.max_description_workers = workers if "max_image_workers" in data: workers = int(data["max_image_workers"]) if workers < 1 or workers > 20: return bad_request( "Max image workers must be between 1 and 20" ) settings.max_image_workers = workers # Update model & MinerU configuration (optional, empty values fall back to Config) if "text_model" in data: settings.text_model = (data["text_model"] or "").strip() or None if "image_model" in data: settings.image_model = (data["image_model"] or "").strip() or None if "mineru_api_base" in data: settings.mineru_api_base = (data["mineru_api_base"] or "").strip() or None if "mineru_token" in data: settings.mineru_token = data["mineru_token"] if "image_caption_model" in data: settings.image_caption_model = (data["image_caption_model"] or "").strip() or None if "output_language" in data: language = data["output_language"] if language in ["zh", "en", "ja", "auto"]: settings.output_language = language else: return bad_request("Output language must be 'zh', 'en', 'ja', or 'auto'") # Update description generation mode if "description_generation_mode" in data: mode = data["description_generation_mode"] if mode not in ("streaming", "parallel"): return bad_request("description_generation_mode must be 'streaming' or 'parallel'") settings.description_generation_mode = mode # Update description extra fields if "description_extra_fields" in data: fields = data["description_extra_fields"] if not isinstance(fields, list) or not fields: return bad_request("description_extra_fields must be a non-empty array of strings") if len(fields) > 10: return bad_request("description_extra_fields allows at most 10 items") if not all(isinstance(f, str) and f.strip() for f in fields): return bad_request("Each extra field must be a non-empty string") settings.description_extra_fields = json.dumps([f.strip() for f in fields], ensure_ascii=False) if "image_prompt_extra_fields" in data: fields = data["image_prompt_extra_fields"] if not isinstance(fields, list): return bad_request("image_prompt_extra_fields must be an array of strings") # 空数组表示不传任何额外字段给图片生成 settings.image_prompt_extra_fields = json.dumps([f.strip() for f in fields if isinstance(f, str) and f.strip()], ensure_ascii=False) # Update reasoning mode configuration (separate for text and image) if "enable_text_reasoning" in data: settings.enable_text_reasoning = bool(data["enable_text_reasoning"]) if "text_thinking_budget" in data: budget = int(data["text_thinking_budget"]) if budget < 1 or budget > 8192: return bad_request("Text thinking budget must be between 1 and 8192") settings.text_thinking_budget = budget if "enable_image_reasoning" in data: settings.enable_image_reasoning = bool(data["enable_image_reasoning"]) if "image_thinking_budget" in data: budget = int(data["image_thinking_budget"]) if budget < 1 or budget > 8192: return bad_request("Image thinking budget must be between 1 and 8192") settings.image_thinking_budget = budget # Update Baidu OCR configuration if "baidu_api_key" in data: settings.baidu_api_key = data["baidu_api_key"] or None # Update per-model provider source configuration if "text_model_source" in data: settings.text_model_source = (data["text_model_source"] or "").strip() or None if "image_model_source" in data: settings.image_model_source = (data["image_model_source"] or "").strip() or None if "image_caption_model_source" in data: settings.image_caption_model_source = (data["image_caption_model_source"] or "").strip() or None # Update per-model API credentials (for gemini/openai per-model overrides) for model_type in ('text', 'image', 'image_caption'): key_field = f'{model_type}_api_key' base_field = f'{model_type}_api_base_url' if key_field in data: setattr(settings, key_field, data[key_field] or None) if base_field in data: setattr(settings, base_field, (data[base_field] or "").strip() or None) if "lazyllm_api_keys" in data: keys_data = data["lazyllm_api_keys"] if isinstance(keys_data, dict): # Merge with existing keys (only update non-empty values) existing = settings.get_lazyllm_api_keys_dict() for vendor, key in keys_data.items(): if key: # Only update if a new value is provided existing[vendor] = key settings.lazyllm_api_keys = json.dumps(existing) if existing else None elif keys_data is None: settings.lazyllm_api_keys = None settings.updated_at = datetime.now(timezone.utc) db.session.commit() # Sync to app.config _sync_settings_to_config(settings) logger.info("Settings updated successfully") return success_response( settings.to_dict(), "Settings updated successfully" ) except Exception as e: db.session.rollback() logger.error(f"Error updating settings: {str(e)}") return error_response( "UPDATE_SETTINGS_ERROR", f"Failed to update settings: {str(e)}", 500, ) @settings_bp.route("/reset", methods=["POST"], strict_slashes=False) def reset_settings(): """ POST /api/settings/reset - Reset settings to default values """ try: settings = Settings.get_settings() # Reset all fields to NULL so .env defaults take over via to_dict() settings.ai_provider_format = None settings.api_base_url = None settings.api_key = None settings.text_model = None settings.image_model = None settings.mineru_api_base = None settings.mineru_token = None settings.image_caption_model = None settings.output_language = None settings.enable_text_reasoning = False settings.text_thinking_budget = 1024 settings.enable_image_reasoning = False settings.image_thinking_budget = 1024 settings.description_generation_mode = None settings.description_extra_fields = None settings.image_prompt_extra_fields = None settings.baidu_api_key = None settings.text_model_source = None settings.image_model_source = None settings.image_caption_model_source = None settings.lazyllm_api_keys = None for model_type in ('text', 'image', 'image_caption'): setattr(settings, f'{model_type}_api_key', None) setattr(settings, f'{model_type}_api_base_url', None) settings.image_resolution = None settings.image_aspect_ratio = None settings.max_description_workers = None settings.max_image_workers = None settings.updated_at = datetime.now(timezone.utc) db.session.commit() # Sync to app.config _sync_settings_to_config(settings) logger.info("Settings reset to defaults") return success_response( settings.to_dict(), "Settings reset to defaults" ) except Exception as e: db.session.rollback() logger.error(f"Error resetting settings: {str(e)}") return error_response( "RESET_SETTINGS_ERROR", f"Failed to reset settings: {str(e)}", 500, ) @settings_bp.route("/active-config", methods=["GET"], strict_slashes=False) def get_active_config(): """ GET /api/settings/active-config - Return current app.config values for AI settings. Useful for verifying that _sync_settings_to_config correctly restored .env defaults. """ return success_response({ "ai_provider_format": current_app.config.get("AI_PROVIDER_FORMAT"), "text_model": current_app.config.get("TEXT_MODEL"), "image_model": current_app.config.get("IMAGE_MODEL"), "output_language": current_app.config.get("OUTPUT_LANGUAGE"), "image_caption_model": current_app.config.get("IMAGE_CAPTION_MODEL"), }) @settings_bp.route("/verify", methods=["POST"], strict_slashes=False) def verify_api_key(): """ POST /api/settings/verify - 验证模型配置是否可用 通过调用一个轻量测试请求(thinking_budget=0)来判断 Returns: { "data": { "available": true/false, "message": "提示信息" } } """ try: # 获取当前设置 settings = Settings.get_settings() if not settings: return success_response({ "available": False, "message": "用户设置未找到" }) # 准备设置覆盖字典 settings_override = {} if settings.api_key: settings_override["api_key"] = settings.api_key if settings.api_base_url: settings_override["api_base_url"] = settings.api_base_url if settings.ai_provider_format: settings_override["ai_provider_format"] = settings.ai_provider_format if settings.text_model: settings_override["text_model"] = settings.text_model # 使用上下文管理器临时应用用户配置进行验证 with temporary_settings_override(settings_override): from services.ai_providers import get_text_provider verification_model = ( settings.text_model or current_app.config.get("TEXT_MODEL") or Config.TEXT_MODEL or "gemini-3-flash-preview" ) # 尝试创建provider并调用一个简单的测试请求 try: provider = get_text_provider(model=verification_model) # 调用一个简单的测试请求(思考budget=0,最小开销) provider.generate_text("Hello", thinking_budget=0) logger.info("API key verification successful") return success_response({ "available": True, "message": "API key 可用" }) except ValueError as ve: # API key未配置 logger.warning(f"API key not configured: {str(ve)}") provider_format = (settings.ai_provider_format or "").lower() if provider_format == "lazyllm" or provider_format in LAZYLLM_VENDORS: source = (provider_format if provider_format in LAZYLLM_VENDORS else current_app.config.get("TEXT_MODEL_SOURCE") or Config.TEXT_MODEL_SOURCE or "unknown").upper() message = f"LazyLLM API key 未配置,请设置 {source}_API_KEY" else: message = "API key 未配置,请在设置中配置 API key 和 API Base URL" return success_response({ "available": False, "message": message }) except Exception as e: # API调用失败(可能是key无效、余额不足等) error_msg = str(e) logger.warning(f"API key verification failed: {error_msg}") # 根据错误信息判断具体原因 if "401" in error_msg or "unauthorized" in error_msg.lower() or "invalid" in error_msg.lower(): message = "API key 无效或已过期,请在设置中检查 API key 配置" elif "429" in error_msg or "quota" in error_msg.lower() or "limit" in error_msg.lower(): message = "API 调用超限或余额不足,请在设置中检查配置" elif "403" in error_msg or "forbidden" in error_msg.lower(): message = "API 访问被拒绝,请在设置中检查 API key 权限" elif "timeout" in error_msg.lower(): message = "API 调用超时,请在设置中检查网络连接和 API Base URL" else: message = f"API 调用失败,请在设置中检查配置: {error_msg}" return success_response({ "available": False, "message": message }) except Exception as e: logger.error(f"Error verifying API key: {str(e)}") return error_response( "VERIFY_API_KEY_ERROR", f"验证 API key 时出错: {str(e)}", 500, ) def _sync_settings_to_config(settings: Settings): """Sync settings to Flask app config and clear AI service cache if needed""" # Track if AI-related settings changed ai_config_changed = False # Sync AI provider format (always sync, fall back to .env default when NULL) new_format = settings.ai_provider_format or Config.AI_PROVIDER_FORMAT old_format = current_app.config.get("AI_PROVIDER_FORMAT") if old_format != new_format: ai_config_changed = True logger.info(f"AI provider format changed: {old_format} -> {new_format}") current_app.config["AI_PROVIDER_FORMAT"] = new_format # Sync API configuration (sync to both GOOGLE_* and OPENAI_* to ensure DB settings override env vars) if settings.api_base_url is not None: old_base = current_app.config.get("GOOGLE_API_BASE") if old_base != settings.api_base_url: ai_config_changed = True logger.info(f"API base URL changed: {old_base} -> {settings.api_base_url}") current_app.config["GOOGLE_API_BASE"] = settings.api_base_url current_app.config["OPENAI_API_BASE"] = settings.api_base_url else: # Restore .env defaults (pop would permanently lose .env values) env_base_google = Config.GOOGLE_API_BASE env_base_openai = Config.OPENAI_API_BASE if current_app.config.get("GOOGLE_API_BASE") != env_base_google or current_app.config.get("OPENAI_API_BASE") != env_base_openai: ai_config_changed = True logger.info("API base URL cleared, falling back to .env defaults") current_app.config["GOOGLE_API_BASE"] = env_base_google current_app.config["OPENAI_API_BASE"] = env_base_openai if settings.api_key is not None: old_key = current_app.config.get("GOOGLE_API_KEY") # Compare actual values to detect any change (but don't log the keys for security) if old_key != settings.api_key: ai_config_changed = True logger.info("API key updated") current_app.config["GOOGLE_API_KEY"] = settings.api_key current_app.config["OPENAI_API_KEY"] = settings.api_key else: # Restore .env defaults (pop would permanently lose .env values) env_key_google = Config.GOOGLE_API_KEY env_key_openai = Config.OPENAI_API_KEY if current_app.config.get("GOOGLE_API_KEY") != env_key_google or current_app.config.get("OPENAI_API_KEY") != env_key_openai: ai_config_changed = True logger.info("API key cleared, falling back to .env defaults") current_app.config["GOOGLE_API_KEY"] = env_key_google current_app.config["OPENAI_API_KEY"] = env_key_openai # Check model changes new_text_model = settings.text_model or Config.TEXT_MODEL old_model = current_app.config.get("TEXT_MODEL") if old_model != new_text_model: ai_config_changed = True logger.info(f"Text model changed: {old_model} -> {new_text_model}") current_app.config["TEXT_MODEL"] = new_text_model new_image_model = settings.image_model or Config.IMAGE_MODEL old_model = current_app.config.get("IMAGE_MODEL") if old_model != new_image_model: ai_config_changed = True logger.info(f"Image model changed: {old_model} -> {new_image_model}") current_app.config["IMAGE_MODEL"] = new_image_model # Sync image generation settings (fall back to Config when NULL) current_app.config["DEFAULT_RESOLUTION"] = settings.image_resolution or Config.DEFAULT_RESOLUTION current_app.config["DEFAULT_ASPECT_RATIO"] = settings.image_aspect_ratio or Config.DEFAULT_ASPECT_RATIO # Sync worker settings (fall back to Config when NULL) current_app.config["MAX_DESCRIPTION_WORKERS"] = settings.max_description_workers or Config.MAX_DESCRIPTION_WORKERS current_app.config["MAX_IMAGE_WORKERS"] = settings.max_image_workers or Config.MAX_IMAGE_WORKERS logger.info(f"Updated worker settings: desc={current_app.config['MAX_DESCRIPTION_WORKERS']}, img={current_app.config['MAX_IMAGE_WORKERS']}") # Sync MinerU settings (fall back to Config defaults when NULL) current_app.config["MINERU_API_BASE"] = settings.mineru_api_base or Config.MINERU_API_BASE current_app.config["MINERU_TOKEN"] = settings.mineru_token if settings.mineru_token is not None else Config.MINERU_TOKEN current_app.config["IMAGE_CAPTION_MODEL"] = settings.image_caption_model or Config.IMAGE_CAPTION_MODEL current_app.config["OUTPUT_LANGUAGE"] = settings.output_language or Config.OUTPUT_LANGUAGE # Sync reasoning mode settings (separate for text and image) # Check if reasoning configuration changed (requires AIService cache clear) old_text_reasoning = current_app.config.get("ENABLE_TEXT_REASONING") old_text_budget = current_app.config.get("TEXT_THINKING_BUDGET") old_image_reasoning = current_app.config.get("ENABLE_IMAGE_REASONING") old_image_budget = current_app.config.get("IMAGE_THINKING_BUDGET") if (old_text_reasoning != settings.enable_text_reasoning or old_text_budget != settings.text_thinking_budget or old_image_reasoning != settings.enable_image_reasoning or old_image_budget != settings.image_thinking_budget): ai_config_changed = True logger.info(f"Reasoning config changed: text={old_text_reasoning}({old_text_budget})->{settings.enable_text_reasoning}({settings.text_thinking_budget}), image={old_image_reasoning}({old_image_budget})->{settings.enable_image_reasoning}({settings.image_thinking_budget})") current_app.config["ENABLE_TEXT_REASONING"] = settings.enable_text_reasoning current_app.config["TEXT_THINKING_BUDGET"] = settings.text_thinking_budget current_app.config["ENABLE_IMAGE_REASONING"] = settings.enable_image_reasoning current_app.config["IMAGE_THINKING_BUDGET"] = settings.image_thinking_budget # Sync Baidu OCR settings (fall back to Config default when NULL) current_app.config["BAIDU_API_KEY"] = settings.baidu_api_key or Config.BAIDU_API_KEY # Sync per-model provider source settings for model_type, source_attr in [('TEXT', 'text_model_source'), ('IMAGE', 'image_model_source'), ('IMAGE_CAPTION', 'image_caption_model_source')]: source_val = getattr(settings, source_attr, None) config_key = f'{model_type}_MODEL_SOURCE' if source_val: old_source = current_app.config.get(config_key) if old_source != source_val: ai_config_changed = True current_app.config[config_key] = source_val else: if config_key in current_app.config: ai_config_changed = True current_app.config.pop(config_key, None) # Sync per-model API credentials (for gemini/openai per-model overrides) for model_type in ('text', 'image', 'image_caption'): prefix = model_type.upper() for suffix, setting_suffix in [('_API_KEY', '_api_key'), ('_API_BASE', '_api_base_url')]: config_key = f'{prefix}{suffix}' val = getattr(settings, f'{model_type}{setting_suffix}', None) if val: if current_app.config.get(config_key) != val: ai_config_changed = True current_app.config[config_key] = val else: if config_key in current_app.config: ai_config_changed = True current_app.config.pop(config_key, None) # Sync LazyLLM vendor API keys to environment variables # (lazyllm_env.py reads from os.environ via {SOURCE}_API_KEY) if settings.lazyllm_api_keys: try: keys = json.loads(settings.lazyllm_api_keys) for vendor, key in keys.items(): if key: env_key = f"{vendor.upper()}_API_KEY" if os.environ.get(env_key) != key: ai_config_changed = True os.environ[env_key] = key except (json.JSONDecodeError, TypeError): pass # Clear AI service cache if AI-related configuration changed if ai_config_changed: try: from services.ai_service_manager import clear_ai_service_cache clear_ai_service_cache() logger.warning("AI configuration changed - AIService cache cleared. New providers will be created on next request.") except Exception as e: logger.error(f"Failed to clear AI service cache: {e}") def _get_test_image_path() -> Path: test_image = Path(PROJECT_ROOT) / "assets" / "test_img.png" if not test_image.exists(): raise FileNotFoundError("未找到 test_img.png,请确认已放在项目根目录 assets 下") return test_image def _get_baidu_credentials(): """获取百度 API 凭证""" api_key = current_app.config.get("BAIDU_API_KEY") or Config.BAIDU_API_KEY if not api_key: raise ValueError("未配置 BAIDU_API_KEY") return api_key def _create_file_parser(): """创建 FileParserService 实例,根据 per-model caption 配置解析正确的凭证""" from services.ai_providers import LAZYLLM_VENDORS caption_source = current_app.config.get("IMAGE_CAPTION_MODEL_SOURCE") global_format = current_app.config.get("AI_PROVIDER_FORMAT", "gemini") # Determine effective caption provider format if caption_source: source_lower = caption_source.lower() if source_lower == 'gemini': caption_format = 'gemini' elif source_lower == 'openai': caption_format = 'openai' elif source_lower in LAZYLLM_VENDORS: caption_format = 'lazyllm' else: caption_format = global_format else: caption_format = global_format # Resolve API credentials based on caption format if caption_format == 'gemini': google_key = current_app.config.get("IMAGE_CAPTION_API_KEY") or current_app.config.get("GOOGLE_API_KEY", "") google_base = current_app.config.get("IMAGE_CAPTION_API_BASE") or current_app.config.get("GOOGLE_API_BASE", "") openai_key = "" openai_base = "" elif caption_format == 'openai': google_key = "" google_base = "" openai_key = current_app.config.get("IMAGE_CAPTION_API_KEY") or current_app.config.get("OPENAI_API_KEY", "") openai_base = current_app.config.get("IMAGE_CAPTION_API_BASE") or current_app.config.get("OPENAI_API_BASE", "") else: # lazyllm or global fallback google_key = current_app.config.get("GOOGLE_API_KEY", "") google_base = current_app.config.get("GOOGLE_API_BASE", "") openai_key = current_app.config.get("OPENAI_API_KEY", "") openai_base = current_app.config.get("OPENAI_API_BASE", "") return FileParserService( mineru_token=current_app.config.get("MINERU_TOKEN", ""), mineru_api_base=current_app.config.get("MINERU_API_BASE", ""), google_api_key=google_key, google_api_base=google_base, openai_api_key=openai_key, openai_api_base=openai_base, image_caption_model=current_app.config.get("IMAGE_CAPTION_MODEL", Config.IMAGE_CAPTION_MODEL), lazyllm_image_caption_source=caption_source or getattr( Config, 'IMAGE_CAPTION_MODEL_SOURCE', None ), provider_format=caption_format, ) # 测试函数 - 每个测试一个独立函数 def _test_baidu_ocr(): """测试百度 OCR 服务""" api_key = _get_baidu_credentials() provider = create_baidu_accurate_ocr_provider(api_key) if not provider: raise ValueError("百度 OCR Provider 初始化失败") test_image_path = _get_test_image_path() result = provider.recognize(str(test_image_path), language_type="CHN_ENG") recognized_text = provider.get_full_text(result, separator=" ") return { "recognized_text": recognized_text, "words_result_num": result.get("words_result_num", 0), }, "百度 OCR 测试成功" def _test_text_model(): """测试文本生成模型""" ai_service = AIService() reply = ai_service.text_provider.generate_text("请只回复 OK。", thinking_budget=64) return {"reply": reply.strip()}, "文本模型测试成功" def _test_caption_model(): """测试图片识别模型""" upload_folder = Path(current_app.config.get("UPLOAD_FOLDER", Config.UPLOAD_FOLDER)) mineru_root = upload_folder / "mineru_files" mineru_root.mkdir(parents=True, exist_ok=True) extract_id = datetime.now(timezone.utc).strftime("test-%Y%m%d%H%M%S") image_dir = mineru_root / extract_id image_dir.mkdir(parents=True, exist_ok=True) image_path = image_dir / "caption_test.png" try: test_image_path = _get_test_image_path() shutil.copyfile(test_image_path, image_path) parser = _create_file_parser() image_url = f"/files/mineru/{extract_id}/{image_path.name}" caption = parser._generate_single_caption(image_url).strip() if not caption: raise ValueError("图片识别模型返回空结果") return {"caption": caption}, "图片识别模型测试成功" finally: if image_path.exists(): image_path.unlink() if image_dir.exists(): try: image_dir.rmdir() except OSError: pass def _test_baidu_inpaint(): """测试百度图像修复""" api_key = _get_baidu_credentials() provider = create_baidu_inpainting_provider(api_key) if not provider: raise ValueError("百度图像修复 Provider 初始化失败") test_image_path = _get_test_image_path() with Image.open(test_image_path) as image: width, height = image.size rect_width = max(1, int(width * 0.3)) rect_height = max(1, int(height * 0.3)) left = max(0, int(width * 0.35)) top = max(0, int(height * 0.35)) rectangles = [{ "left": left, "top": top, "width": min(rect_width, width - left), "height": min(rect_height, height - top), }] result = provider.inpaint(image, rectangles) if result is None: raise ValueError("百度图像修复返回空结果") return {"image_size": result.size}, "百度图像修复测试成功" def _test_image_model(): """测试图像生成模型""" ai_service = AIService() test_image_path = _get_test_image_path() prompt = "生成一张简洁、明亮、适合演示文稿的背景图。" settings = Settings.get_settings() result = ai_service.generate_image( prompt=prompt, ref_image_path=str(test_image_path), aspect_ratio=settings.image_aspect_ratio or "16:9", resolution=settings.image_resolution or "2K" ) if result is None: raise ValueError("图像生成模型返回空结果") return {"image_size": result.size}, "图像生成模型测试成功" def _test_mineru_pdf(): """测试 MinerU PDF 解析""" mineru_token = current_app.config.get("MINERU_TOKEN", "") if not mineru_token: raise ValueError("未配置 MINERU_TOKEN") parser = _create_file_parser() tmp_file = None try: with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp: tmp_file = Path(tmp.name) test_image_path = _get_test_image_path() with Image.open(test_image_path) as image: if image.mode != "RGB": image = image.convert("RGB") image.save(tmp_file, format="PDF") batch_id, upload_url, error = parser._get_upload_url("mineru-test.pdf") if error: raise ValueError(error) upload_error = parser._upload_file(str(tmp_file), upload_url) if upload_error: raise ValueError(upload_error) markdown_content, extract_id, poll_error = parser._poll_result(batch_id, max_wait_time=30) if poll_error: if "timeout" in poll_error.lower(): return { "batch_id": batch_id, "status": "processing", "message": "服务正常,文件正在处理中" }, "MinerU 服务可用(处理中)" else: raise ValueError(poll_error) else: content_preview = (markdown_content or "").strip()[:120] return { "batch_id": batch_id, "extract_id": extract_id, "content_preview": content_preview, }, "MinerU 解析测试成功" finally: if tmp_file and tmp_file.exists(): tmp_file.unlink() # 测试函数映射 TEST_FUNCTIONS = { "baidu-ocr": _test_baidu_ocr, "text-model": _test_text_model, "caption-model": _test_caption_model, "baidu-inpaint": _test_baidu_inpaint, "image-model": _test_image_model, "mineru-pdf": _test_mineru_pdf, } def _run_test_async(task_id: str, test_name: str, test_settings: dict, app): """ 在后台异步执行测试任务 Args: task_id: 任务ID test_name: 测试名称 test_settings: 测试设置 app: Flask app 实例 """ with app.app_context(): try: # 更新状态为运行中 task = Task.query.get(task_id) if not task: logger.error(f"Task {task_id} not found") return task.status = 'PROCESSING' db.session.commit() # 应用测试设置并执行测试 with temporary_settings_override(test_settings): # 查找并执行对应的测试函数 test_func = TEST_FUNCTIONS.get(test_name) if not test_func: raise ValueError(f"未知测试类型: {test_name}") result_data, message = test_func() # 更新任务状态为完成 task = Task.query.get(task_id) if task: task.status = 'COMPLETED' task.completed_at = datetime.now(timezone.utc) task.set_progress({ 'result': result_data, 'message': message }) db.session.commit() logger.info(f"Test task {task_id} completed successfully") except Exception as e: error_msg = str(e) logger.error(f"Test task {task_id} failed: {error_msg}", exc_info=True) task = Task.query.get(task_id) if task: task.status = 'FAILED' task.error_message = error_msg task.completed_at = datetime.now(timezone.utc) db.session.commit() @settings_bp.route("/tests/", methods=["POST"], strict_slashes=False) def run_settings_test(test_name: str): """ POST /api/settings/tests/ - 启动异步服务测试 Request Body (optional): 可选的设置覆盖参数,用于测试未保存的配置 { "api_key": "test-key", "api_base_url": "https://test.api.com", "text_model": "test-model", ... } Returns: { "data": { "task_id": "uuid", "status": "PENDING" } } """ try: # 从数据库加载已保存的全局设置作为基础 global_settings = Settings.get_settings() # 构建基础测试设置(使用数据库中已保存的值) test_settings = {} if global_settings.api_key: test_settings["api_key"] = global_settings.api_key if global_settings.api_base_url: test_settings["api_base_url"] = global_settings.api_base_url if global_settings.ai_provider_format: test_settings["ai_provider_format"] = global_settings.ai_provider_format if global_settings.text_model: test_settings["text_model"] = global_settings.text_model if global_settings.image_model: test_settings["image_model"] = global_settings.image_model if global_settings.image_caption_model: test_settings["image_caption_model"] = global_settings.image_caption_model if current_app.config.get("IMAGE_CAPTION_MODEL_SOURCE"): test_settings["image_caption_model_source"] = current_app.config.get("IMAGE_CAPTION_MODEL_SOURCE") # Per-model provider sources and credentials for model_type in ('text', 'image', 'image_caption'): for suffix in ('model_source', 'api_key', 'api_base_url'): attr = f'{model_type}_{suffix}' val = getattr(global_settings, attr, None) if val: test_settings[attr] = val if global_settings.mineru_api_base: test_settings["mineru_api_base"] = global_settings.mineru_api_base if global_settings.mineru_token: test_settings["mineru_token"] = global_settings.mineru_token if global_settings.baidu_api_key: test_settings["baidu_api_key"] = global_settings.baidu_api_key if global_settings.image_resolution: test_settings["image_resolution"] = global_settings.image_resolution # 推理模式设置 test_settings["enable_text_reasoning"] = global_settings.enable_text_reasoning test_settings["text_thinking_budget"] = global_settings.text_thinking_budget test_settings["enable_image_reasoning"] = global_settings.enable_image_reasoning test_settings["image_thinking_budget"] = global_settings.image_thinking_budget # 应用前端发送的覆盖参数(如果有的话,用于测试未保存的配置) override_settings = request.get_json() or {} if override_settings: logger.info(f"Applying test setting overrides: {list(override_settings.keys())}") test_settings.update(override_settings) # 创建任务记录(使用特殊的 project_id='settings-test') task = Task( project_id='settings-test', # 特殊标记,表示这是设置测试任务 task_type=f'TEST_{test_name.upper().replace("-", "_")}', status='PENDING' ) db.session.add(task) db.session.commit() task_id = task.id # 使用 TaskManager 提交后台任务 task_manager.submit_task( task_id, _run_test_async, test_name, test_settings, current_app._get_current_object() ) logger.info(f"Started test task {task_id} for {test_name}") return success_response({ 'task_id': task_id, 'status': 'PENDING' }, '测试任务已启动') except Exception as e: logger.error(f"Failed to start test: {str(e)}", exc_info=True) return error_response( "SETTINGS_TEST_ERROR", f"启动测试失败: {str(e)}", 500 ) @settings_bp.route("/tests//status", methods=["GET"], strict_slashes=False) def get_test_status(task_id: str): """ GET /api/settings/tests//status - 查询测试任务状态 Returns: { "data": { "status": "PENDING|PROCESSING|COMPLETED|FAILED", "result": {...}, # 仅当 status=COMPLETED 时存在 "error": "...", # 仅当 status=FAILED 时存在 "message": "..." } } """ try: task = Task.query.get(task_id) if not task: return error_response("TASK_NOT_FOUND", "测试任务不存在", 404) # 构建响应数据 response_data = { 'status': task.status, 'task_type': task.task_type, 'created_at': task.created_at.isoformat() if task.created_at else None, 'completed_at': task.completed_at.isoformat() if task.completed_at else None, } # 如果任务完成,包含结果和消息 if task.status == 'COMPLETED': progress = task.get_progress() response_data['result'] = progress.get('result', {}) response_data['message'] = progress.get('message', '测试完成') # 如果任务失败,包含错误信息 elif task.status == 'FAILED': response_data['error'] = task.error_message return success_response(response_data) except Exception as e: logger.error(f"Failed to get test status: {str(e)}", exc_info=True) return error_response( "GET_TEST_STATUS_ERROR", f"获取测试状态失败: {str(e)}", 500 ) ================================================ FILE: backend/controllers/template_controller.py ================================================ """ Template Controller - handles template-related endpoints """ import logging from flask import Blueprint, request, current_app from models import db, Project, UserTemplate from utils import success_response, error_response, not_found, bad_request, allowed_file from services import FileService from datetime import datetime logger = logging.getLogger(__name__) template_bp = Blueprint('templates', __name__, url_prefix='/api/projects') user_template_bp = Blueprint('user_templates', __name__, url_prefix='/api/user-templates') @template_bp.route('//template', methods=['POST']) def upload_template(project_id): """ POST /api/projects/{project_id}/template - Upload template image Content-Type: multipart/form-data Form: template_image=@file.png """ try: project = Project.query.get(project_id) if not project: return not_found('Project') # Check if file is in request if 'template_image' not in request.files: return bad_request("No file uploaded") file = request.files['template_image'] if file.filename == '': return bad_request("No file selected") # Validate file extension if not allowed_file(file.filename, current_app.config['ALLOWED_EXTENSIONS']): return bad_request("Invalid file type. Allowed types: png, jpg, jpeg, gif, webp") # Save template file_service = FileService(current_app.config['UPLOAD_FOLDER']) file_path = file_service.save_template_image(file, project_id) # Update project project.template_image_path = file_path project.updated_at = datetime.utcnow() db.session.commit() return success_response({ 'template_image_url': f'/files/{project_id}/template/{file_path.split("/")[-1]}' }) except Exception as e: db.session.rollback() return error_response('SERVER_ERROR', str(e), 500) @template_bp.route('//template', methods=['DELETE']) def delete_template(project_id): """ DELETE /api/projects/{project_id}/template - Delete template """ try: project = Project.query.get(project_id) if not project: return not_found('Project') if not project.template_image_path: return bad_request("No template to delete") # Delete template file file_service = FileService(current_app.config['UPLOAD_FOLDER']) file_service.delete_template(project_id) # Update project project.template_image_path = None project.updated_at = datetime.utcnow() db.session.commit() return success_response(message="Template deleted successfully") except Exception as e: db.session.rollback() return error_response('SERVER_ERROR', str(e), 500) @template_bp.route('/templates', methods=['GET']) def get_system_templates(): """ GET /api/templates - Get system preset templates Note: This is a placeholder for future implementation """ # TODO: Implement system templates templates = [] return success_response({ 'templates': templates }) # ========== User Template Endpoints ========== @user_template_bp.route('', methods=['POST']) def upload_user_template(): """ POST /api/user-templates - Upload user template image Content-Type: multipart/form-data Form: template_image=@file.png Optional: name=Template Name """ try: # Check if file is in request if 'template_image' not in request.files: return bad_request("No file uploaded") file = request.files['template_image'] if file.filename == '': return bad_request("No file selected") # Validate file extension if not allowed_file(file.filename, current_app.config['ALLOWED_EXTENSIONS']): return bad_request("Invalid file type. Allowed types: png, jpg, jpeg, gif, webp") # Get optional name name = request.form.get('name', None) # Get file size before saving file.seek(0, 2) # Seek to end file_size = file.tell() file.seek(0) # Reset to beginning # Generate template ID first import uuid template_id = str(uuid.uuid4()) # Save template file first (using the generated ID) file_service = FileService(current_app.config['UPLOAD_FOLDER']) file_path = file_service.save_user_template(file, template_id) # Generate thumbnail for faster loading thumb_path = file_service.save_user_template_thumbnail(template_id, file_path) # Create template record with file_path already set template = UserTemplate( id=template_id, name=name, file_path=file_path, thumb_path=thumb_path, file_size=file_size ) db.session.add(template) db.session.commit() return success_response(template.to_dict()) except Exception as e: import traceback db.session.rollback() error_msg = str(e) logger.error(f"Error uploading user template: {error_msg}", exc_info=True) # 在开发环境中返回详细错误,生产环境返回通用错误 if current_app.config.get('DEBUG', False): return error_response('SERVER_ERROR', f"{error_msg}\n{traceback.format_exc()}", 500) else: return error_response('SERVER_ERROR', error_msg, 500) @user_template_bp.route('', methods=['GET']) def list_user_templates(): """ GET /api/user-templates - Get list of user templates """ try: templates = UserTemplate.query.order_by(UserTemplate.created_at.desc()).all() return success_response({ 'templates': [template.to_dict() for template in templates] }) except Exception as e: return error_response('SERVER_ERROR', str(e), 500) @user_template_bp.route('/', methods=['DELETE']) def delete_user_template(template_id): """ DELETE /api/user-templates/{template_id} - Delete user template """ try: template = UserTemplate.query.get(template_id) if not template: return not_found('UserTemplate') # Delete template file file_service = FileService(current_app.config['UPLOAD_FOLDER']) file_service.delete_user_template(template_id) # Delete template record db.session.delete(template) db.session.commit() return success_response(message="Template deleted successfully") except Exception as e: db.session.rollback() return error_response('SERVER_ERROR', str(e), 500) ================================================ FILE: backend/migrations/env.py ================================================ import os import sys from logging.config import fileConfig from alembic import context from sqlalchemy import engine_from_config, pool # Add the backend directory to the Python path sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from app import create_app from models import db # this is the Alembic Config object, which provides # access to the values within the .ini file in use. config = context.config # Interpret the config file for Python logging. if config.config_file_name is not None: fileConfig(config.config_file_name) # target_metadata is used for autogenerate support. target_metadata = db.metadata def get_url() -> str: """Get database URL from Flask application config.""" app = create_app() return app.config["SQLALCHEMY_DATABASE_URI"] def run_migrations_offline() -> None: """Run migrations in 'offline' mode.""" url = get_url() context.configure( url=url, target_metadata=target_metadata, literal_binds=True, compare_type=True, ) with context.begin_transaction(): context.run_migrations() def run_migrations_online() -> None: """Run migrations in 'online' mode.""" app = create_app() connectable = engine_from_config( {"sqlalchemy.url": app.config["SQLALCHEMY_DATABASE_URI"]}, prefix="sqlalchemy.", poolclass=pool.NullPool, ) with connectable.connect() as connection: context.configure( connection=connection, target_metadata=target_metadata, compare_type=True, ) with context.begin_transaction(): context.run_migrations() if context.is_offline_mode(): run_migrations_offline() else: run_migrations_online() ================================================ FILE: backend/migrations/script.py.mako ================================================ """${message} Revision ID: ${up_revision} Revises: ${down_revision | comma,n} Create Date: ${create_date} """ from alembic import op import sqlalchemy as sa ${imports if imports else ""} # revision identifiers, used by Alembic. revision = ${repr(up_revision)} down_revision = ${repr(down_revision)} branch_labels = ${repr(branch_labels)} depends_on = ${repr(depends_on)} def upgrade() -> None: ${upgrades if upgrades else "pass"} def downgrade() -> None: ${downgrades if downgrades else "pass"} ================================================ FILE: backend/migrations/versions/001_baseline_schema.py ================================================ """baseline schema - core tables only Revision ID: 001_baseline Revises: Create Date: 2025-12-17 22:00:00.000000 """ from alembic import op import sqlalchemy as sa from sqlalchemy import inspect # revision identifiers, used by Alembic. revision = '001_baseline' down_revision = None branch_labels = None depends_on = None def upgrade() -> None: """ Baseline migration - creates only the earliest core tables. Idempotent: skips if 'projects' table already exists (old project). """ bind = op.get_bind() inspector = inspect(bind) if 'projects' in inspector.get_table_names(): # Old project: tables already created by db.create_all(), skip return # New installation: create core tables (NOT including settings - it came later) op.create_table('projects', sa.Column('id', sa.String(length=36), nullable=False), sa.Column('idea_prompt', sa.Text(), nullable=True), sa.Column('outline_text', sa.Text(), nullable=True), sa.Column('description_text', sa.Text(), nullable=True), sa.Column('extra_requirements', sa.Text(), nullable=True), sa.Column('creation_type', sa.String(length=20), nullable=False), sa.Column('template_image_path', sa.String(length=500), nullable=True), sa.Column('status', sa.String(length=50), nullable=False), sa.Column('created_at', sa.DateTime(), nullable=False), sa.Column('updated_at', sa.DateTime(), nullable=False), sa.PrimaryKeyConstraint('id') ) op.create_table('user_templates', sa.Column('id', sa.String(length=36), nullable=False), sa.Column('name', sa.String(length=200), nullable=True), sa.Column('file_path', sa.String(length=500), nullable=False), sa.Column('file_size', sa.Integer(), nullable=True), sa.Column('created_at', sa.DateTime(), nullable=False), sa.Column('updated_at', sa.DateTime(), nullable=False), sa.PrimaryKeyConstraint('id') ) op.create_table('materials', sa.Column('id', sa.String(length=36), nullable=False), sa.Column('project_id', sa.String(length=36), nullable=True), sa.Column('filename', sa.String(length=500), nullable=False), sa.Column('relative_path', sa.String(length=500), nullable=False), sa.Column('url', sa.String(length=500), nullable=False), sa.Column('created_at', sa.DateTime(), nullable=False), sa.Column('updated_at', sa.DateTime(), nullable=False), sa.ForeignKeyConstraint(['project_id'], ['projects.id'], ), sa.PrimaryKeyConstraint('id') ) op.create_table('pages', sa.Column('id', sa.String(length=36), nullable=False), sa.Column('project_id', sa.String(length=36), nullable=False), sa.Column('order_index', sa.Integer(), nullable=False), sa.Column('part', sa.String(length=200), nullable=True), sa.Column('outline_content', sa.Text(), nullable=True), sa.Column('description_content', sa.Text(), nullable=True), sa.Column('generated_image_path', sa.String(length=500), nullable=True), sa.Column('status', sa.String(length=50), nullable=False), sa.Column('created_at', sa.DateTime(), nullable=False), sa.Column('updated_at', sa.DateTime(), nullable=False), sa.ForeignKeyConstraint(['project_id'], ['projects.id'], ), sa.PrimaryKeyConstraint('id') ) op.create_table('reference_files', sa.Column('id', sa.String(length=36), nullable=False), sa.Column('project_id', sa.String(length=36), nullable=True), sa.Column('filename', sa.String(length=500), nullable=False), sa.Column('file_path', sa.String(length=500), nullable=False), sa.Column('file_size', sa.Integer(), nullable=False), sa.Column('file_type', sa.String(length=50), nullable=False), sa.Column('parse_status', sa.String(length=50), nullable=False), sa.Column('markdown_content', sa.Text(), nullable=True), sa.Column('error_message', sa.Text(), nullable=True), sa.Column('mineru_batch_id', sa.String(length=100), nullable=True), sa.Column('created_at', sa.DateTime(), nullable=False), sa.Column('updated_at', sa.DateTime(), nullable=False), sa.ForeignKeyConstraint(['project_id'], ['projects.id'], ), sa.PrimaryKeyConstraint('id') ) op.create_table('tasks', sa.Column('id', sa.String(length=36), nullable=False), sa.Column('project_id', sa.String(length=36), nullable=False), sa.Column('task_type', sa.String(length=50), nullable=False), sa.Column('status', sa.String(length=50), nullable=False), sa.Column('progress', sa.Text(), nullable=True), sa.Column('error_message', sa.Text(), nullable=True), sa.Column('created_at', sa.DateTime(), nullable=False), sa.Column('completed_at', sa.DateTime(), nullable=True), sa.ForeignKeyConstraint(['project_id'], ['projects.id'], ), sa.PrimaryKeyConstraint('id') ) op.create_table('page_image_versions', sa.Column('id', sa.String(length=36), nullable=False), sa.Column('page_id', sa.String(length=36), nullable=False), sa.Column('image_path', sa.String(length=500), nullable=False), sa.Column('version_number', sa.Integer(), nullable=False), sa.Column('is_current', sa.Boolean(), nullable=False), sa.Column('created_at', sa.DateTime(), nullable=False), sa.ForeignKeyConstraint(['page_id'], ['pages.id'], ), sa.PrimaryKeyConstraint('id') ) op.create_index(op.f('ix_page_image_versions_page_id'), 'page_image_versions', ['page_id'], unique=False) def downgrade() -> None: op.drop_index(op.f('ix_page_image_versions_page_id'), table_name='page_image_versions') op.drop_table('page_image_versions') op.drop_table('tasks') op.drop_table('reference_files') op.drop_table('pages') op.drop_table('materials') op.drop_table('user_templates') op.drop_table('projects') ================================================ FILE: backend/migrations/versions/002_create_settings_table.py ================================================ """create settings table Revision ID: 002_settings Revises: 001_baseline Create Date: 2025-12-17 22:01:00.000000 """ from alembic import op import sqlalchemy as sa from sqlalchemy import inspect # revision identifiers, used by Alembic. revision = '002_settings' down_revision = '001_baseline' branch_labels = None depends_on = None def upgrade() -> None: """ Create settings table (without new model/mineru fields). Idempotent: skips if 'settings' table already exists. """ bind = op.get_bind() inspector = inspect(bind) if 'settings' in inspector.get_table_names(): # Settings table already exists (created by db.create_all() in an intermediate version) return # Create settings table with original fields only op.create_table('settings', sa.Column('id', sa.Integer(), nullable=False), sa.Column('ai_provider_format', sa.String(length=20), nullable=False), sa.Column('api_base_url', sa.String(length=500), nullable=True), sa.Column('api_key', sa.String(length=500), nullable=True), sa.Column('image_resolution', sa.String(length=20), nullable=False), sa.Column('image_aspect_ratio', sa.String(length=10), nullable=False), sa.Column('max_description_workers', sa.Integer(), nullable=False), sa.Column('max_image_workers', sa.Integer(), nullable=False), sa.Column('created_at', sa.DateTime(), nullable=False), sa.Column('updated_at', sa.DateTime(), nullable=False), sa.PrimaryKeyConstraint('id') ) def downgrade() -> None: op.drop_table('settings') ================================================ FILE: backend/migrations/versions/003_add_model_and_mineru_settings.py ================================================ """add model and mineru settings to settings table Revision ID: 003_new_fields Revises: 002_settings Create Date: 2025-12-17 22:02:00.000000 """ from alembic import op import sqlalchemy as sa from sqlalchemy import inspect # revision identifiers, used by Alembic. revision = '003_new_fields' down_revision = '002_settings' branch_labels = None depends_on = None def _column_exists(table_name: str, column_name: str) -> bool: """检查列是否存在""" bind = op.get_bind() inspector = inspect(bind) columns = [col['name'] for col in inspector.get_columns(table_name)] return column_name in columns def upgrade() -> None: """ Add new model and MinerU configuration fields to settings table. Idempotent: checks each column before adding. """ # Add text_model column if not exists if not _column_exists('settings', 'text_model'): op.add_column('settings', sa.Column('text_model', sa.String(length=100), nullable=True)) # Add image_model column if not exists if not _column_exists('settings', 'image_model'): op.add_column('settings', sa.Column('image_model', sa.String(length=100), nullable=True)) # Add mineru_api_base column if not exists if not _column_exists('settings', 'mineru_api_base'): op.add_column('settings', sa.Column('mineru_api_base', sa.String(length=255), nullable=True)) # Add image_caption_model column if not exists if not _column_exists('settings', 'image_caption_model'): op.add_column('settings', sa.Column('image_caption_model', sa.String(length=100), nullable=True)) def downgrade() -> None: op.drop_column('settings', 'image_caption_model') op.drop_column('settings', 'mineru_api_base') op.drop_column('settings', 'image_model') op.drop_column('settings', 'text_model') ================================================ FILE: backend/migrations/versions/004_add_template_style_to_projects.py ================================================ """add template_style to projects Revision ID: 004_add_template_style Revises: 38292967f3ca Create Date: 2025-12-27 00:00:00.000000 """ from alembic import op import sqlalchemy as sa # revision identifiers, used by Alembic. revision = '004_add_template_style' down_revision = '38292967f3ca' branch_labels = None depends_on = None def upgrade() -> None: """ Add template_style field to projects table. This field stores the style description when user chooses template-free mode. """ # Add template_style column (nullable, defaults to None) op.add_column('projects', sa.Column('template_style', sa.Text(), nullable=True)) def downgrade() -> None: """ Remove template_style field from projects table. """ op.drop_column('projects', 'template_style') ================================================ FILE: backend/migrations/versions/005_add_pdf_image_path.py ================================================ """add pdf_image_path placeholder (migration file was lost) Revision ID: 005_add_pdf_image_path Revises: 004_add_template_style Create Date: 2025-01-04 00:00:00.000000 Note: This is a placeholder migration. The original migration file was lost, but the migration was already applied to the database. This file exists to maintain the migration chain integrity. """ from alembic import op import sqlalchemy as sa # revision identifiers, used by Alembic. revision = '005_add_pdf_image_path' down_revision = '004_add_template_style' branch_labels = None depends_on = None def upgrade() -> None: """ Placeholder - the actual migration was already applied. """ pass def downgrade() -> None: """ Placeholder - original downgrade logic unknown. """ pass ================================================ FILE: backend/migrations/versions/006_add_export_settings_to_projects.py ================================================ """add export settings to projects Revision ID: 006_add_export_settings Revises: 005_add_pdf_image_path Create Date: 2025-01-04 00:00:00.000000 """ from alembic import op import sqlalchemy as sa # revision identifiers, used by Alembic. revision = '006_add_export_settings' down_revision = '005_add_pdf_image_path' branch_labels = None depends_on = None def upgrade() -> None: """ Add export settings fields to projects table. - export_extractor_method: Component extraction method (mineru, hybrid) - export_inpaint_method: Background generation method (generative, baidu, hybrid) """ # Add export_extractor_method column (nullable, defaults to 'hybrid') op.add_column('projects', sa.Column('export_extractor_method', sa.String(50), nullable=True, server_default='hybrid')) # Add export_inpaint_method column (nullable, defaults to 'hybrid') op.add_column('projects', sa.Column('export_inpaint_method', sa.String(50), nullable=True, server_default='hybrid')) def downgrade() -> None: """ Remove export settings fields from projects table. """ op.drop_column('projects', 'export_inpaint_method') op.drop_column('projects', 'export_extractor_method') ================================================ FILE: backend/migrations/versions/007_add_enable_reasoning_to_settings.py ================================================ """add enable_reasoning to settings Revision ID: 007_add_enable_reasoning Revises: 006_add_export_settings Create Date: 2025-01-17 00:00:00.000000 """ from alembic import op import sqlalchemy as sa from sqlalchemy import inspect # revision identifiers, used by Alembic. revision = '007_add_enable_reasoning' down_revision = '006_add_export_settings' branch_labels = None depends_on = None def _column_exists(table_name: str, column_name: str) -> bool: """Check if column exists""" bind = op.get_bind() inspector = inspect(bind) columns = [col['name'] for col in inspector.get_columns(table_name)] return column_name in columns def upgrade() -> None: """ Add enable_reasoning column to settings table with default value False. This setting controls whether AI models should use extended thinking/reasoning mode. When enabled, supported models will use deeper reasoning which may improve quality but increases response time and token consumption. Idempotent: checks if column exists before adding. """ if not _column_exists('settings', 'enable_reasoning'): op.add_column('settings', sa.Column('enable_reasoning', sa.Boolean(), nullable=False, server_default='0')) def downgrade() -> None: """ Remove enable_reasoning column from settings table. """ op.drop_column('settings', 'enable_reasoning') ================================================ FILE: backend/migrations/versions/008_add_baidu_ocr_api_key_to_settings.py ================================================ """add baidu_ocr_api_key to settings Revision ID: 008_add_baidu_ocr_api_key Revises: 007_add_enable_reasoning Create Date: 2026-01-17 00:00:00.000000 """ from alembic import op import sqlalchemy as sa from sqlalchemy import inspect # revision identifiers, used by Alembic. revision = '008_add_baidu_ocr_api_key' down_revision = '007_add_enable_reasoning' branch_labels = None depends_on = None def _column_exists(table_name: str, column_name: str) -> bool: """Check if column exists""" bind = op.get_bind() inspector = inspect(bind) columns = [col['name'] for col in inspector.get_columns(table_name)] return column_name in columns def upgrade() -> None: """ Add baidu_ocr_api_key column to settings table. This setting stores the Baidu OCR API Key used for text recognition in editable PPTX export functionality. Idempotent: checks if column exists before adding. """ if not _column_exists('settings', 'baidu_ocr_api_key'): op.add_column('settings', sa.Column('baidu_ocr_api_key', sa.String(500), nullable=True)) def downgrade() -> None: """ Remove baidu_ocr_api_key column from settings table. """ op.drop_column('settings', 'baidu_ocr_api_key') ================================================ FILE: backend/migrations/versions/009_split_reasoning_config.py ================================================ """split reasoning config into text and image Revision ID: 009_split_reasoning_config Revises: 007_add_enable_reasoning Create Date: 2026-01-17 00:00:00.000000 """ from alembic import op import sqlalchemy as sa from sqlalchemy import inspect # revision identifiers, used by Alembic. revision = '009_split_reasoning_config' down_revision = '008_add_baidu_ocr_api_key' branch_labels = None depends_on = None def _column_exists(table_name: str, column_name: str) -> bool: """Check if column exists""" bind = op.get_bind() inspector = inspect(bind) columns = [col['name'] for col in inspector.get_columns(table_name)] return column_name in columns def upgrade() -> None: """ Split enable_reasoning into separate text and image reasoning configs. - enable_text_reasoning: whether to enable reasoning for text generation - text_thinking_budget: thinking budget for text (1-8192) - enable_image_reasoning: whether to enable reasoning for image generation - image_thinking_budget: thinking budget for image (1-8192) Migrate existing enable_reasoning value to both new text and image flags. """ # Add new columns if not _column_exists('settings', 'enable_text_reasoning'): op.add_column('settings', sa.Column('enable_text_reasoning', sa.Boolean(), nullable=False, server_default='0')) if not _column_exists('settings', 'text_thinking_budget'): op.add_column('settings', sa.Column('text_thinking_budget', sa.Integer(), nullable=False, server_default='1024')) if not _column_exists('settings', 'enable_image_reasoning'): op.add_column('settings', sa.Column('enable_image_reasoning', sa.Boolean(), nullable=False, server_default='0')) if not _column_exists('settings', 'image_thinking_budget'): op.add_column('settings', sa.Column('image_thinking_budget', sa.Integer(), nullable=False, server_default='1024')) # Migrate existing enable_reasoning value to new columns if _column_exists('settings', 'enable_reasoning'): # Copy enable_reasoning value to both new text and image flags op.execute(""" UPDATE settings SET enable_text_reasoning = enable_reasoning, enable_image_reasoning = enable_reasoning """) # Drop old column op.drop_column('settings', 'enable_reasoning') def downgrade() -> None: """ Revert to single enable_reasoning column. """ # Add back old column if not _column_exists('settings', 'enable_reasoning'): op.add_column('settings', sa.Column('enable_reasoning', sa.Boolean(), nullable=False, server_default='0')) # Migrate: if either text or image reasoning is enabled, set enable_reasoning to true if _column_exists('settings', 'enable_text_reasoning'): op.execute(""" UPDATE settings SET enable_reasoning = (enable_text_reasoning OR enable_image_reasoning) """) # Drop new columns if _column_exists('settings', 'enable_text_reasoning'): op.drop_column('settings', 'enable_text_reasoning') if _column_exists('settings', 'text_thinking_budget'): op.drop_column('settings', 'text_thinking_budget') if _column_exists('settings', 'enable_image_reasoning'): op.drop_column('settings', 'enable_image_reasoning') if _column_exists('settings', 'image_thinking_budget'): op.drop_column('settings', 'image_thinking_budget') ================================================ FILE: backend/migrations/versions/010_add_cached_image_path.py ================================================ """add cached_image_path to pages Revision ID: 010_add_cached_image_path Revises: 009_split_reasoning_config Create Date: 2026-01-18 00:00:00.000000 """ from alembic import op import sqlalchemy as sa # revision identifiers, used by Alembic. revision = '010_add_cached_image_path' down_revision = '009_split_reasoning_config' branch_labels = None depends_on = None def upgrade() -> None: op.add_column('pages', sa.Column('cached_image_path', sa.String(500), nullable=True)) def downgrade() -> None: op.drop_column('pages', 'cached_image_path') ================================================ FILE: backend/migrations/versions/011_add_user_template_thumb.py ================================================ """Add thumb_path to user_templates table Revision ID: 011_add_user_template_thumb Revises: 010_add_cached_image_path Create Date: 2025-01-18 """ from alembic import op import sqlalchemy as sa # revision identifiers, used by Alembic. revision = '011_add_user_template_thumb' down_revision = '010_add_cached_image_path' branch_labels = None depends_on = None def generate_user_template_thumbnails(): """Generate thumbnails for existing user templates""" import os from pathlib import Path # Get upload folder - use parent directory's uploads folder script_dir = Path(__file__).resolve().parent project_root = script_dir.parent.parent.parent # migrations/versions -> migrations -> backend -> project upload_folder = os.environ.get('UPLOAD_FOLDER', str(project_root / 'uploads')) try: from PIL import Image except ImportError: print("PIL not available, skipping thumbnail generation") return # Get database connection connection = op.get_bind() # Query user templates without thumbnail result = connection.execute( sa.text(""" SELECT id, file_path FROM user_templates WHERE thumb_path IS NULL AND file_path IS NOT NULL """) ) templates = result.fetchall() print(f"Generating thumbnails for {len(templates)} user templates...") for template_id, file_path in templates: try: # Open original image original_path = Path(upload_folder) / file_path.replace('\\', '/') if not original_path.exists(): print(f" Skipped {template_id}: file not found") continue image = Image.open(str(original_path)) # Resize if too large (600px for template thumbnails) max_width = 600 if image.width > max_width: ratio = max_width / image.width new_height = int(image.height * ratio) image = image.resize((max_width, new_height), Image.Resampling.LANCZOS) # Convert to RGB if image.mode in ('RGBA', 'LA', 'P'): background = Image.new('RGB', image.size, (255, 255, 255)) if image.mode == 'P': image = image.convert('RGBA') if image.mode in ('RGBA', 'LA'): background.paste(image, mask=image.split()[-1]) else: background.paste(image) image = background elif image.mode != 'RGB': image = image.convert('RGB') # Save thumbnail thumb_filename = "template-thumb.webp" thumb_dir = Path(upload_folder) / "user-templates" / template_id thumb_dir.mkdir(parents=True, exist_ok=True) thumb_full_path = thumb_dir / thumb_filename thumb_relative_path = f"user-templates/{template_id}/{thumb_filename}" image.save(str(thumb_full_path), 'WEBP', quality=80) image.close() # Update database connection.execute( sa.text("UPDATE user_templates SET thumb_path = :path WHERE id = :id"), {"path": thumb_relative_path, "id": template_id} ) print(f" Generated: {thumb_relative_path}") except Exception as e: print(f" Failed for template {template_id}: {e}") continue print("User template thumbnail generation complete") def generate_page_thumbnails(): """Generate thumbnails for existing pages (in case 010 ran without this)""" import os from pathlib import Path # Get upload folder - use parent directory's uploads folder script_dir = Path(__file__).resolve().parent project_root = script_dir.parent.parent.parent # migrations/versions -> migrations -> backend -> project upload_folder = os.environ.get('UPLOAD_FOLDER', str(project_root / 'uploads')) try: from PIL import Image except ImportError: print("PIL not available, skipping page thumbnail generation") return connection = op.get_bind() result = connection.execute( sa.text(""" SELECT id, project_id, generated_image_path FROM pages WHERE generated_image_path IS NOT NULL AND cached_image_path IS NULL """) ) pages = result.fetchall() if not pages: print("No pages need thumbnail generation") return print(f"Generating thumbnails for {len(pages)} pages...") for page_id, project_id, image_path in pages: try: # Generate thumbnail filename based on original filename original_filename = Path(image_path).stem # e.g., "page_id_timestamp" or "page_id_v1" thumb_filename = f"{original_filename}_thumb.jpg" thumb_relative_path = f"{project_id}/pages/{thumb_filename}" thumb_full_path = Path(upload_folder) / thumb_relative_path if thumb_full_path.exists(): connection.execute( sa.text("UPDATE pages SET cached_image_path = :path WHERE id = :id"), {"path": thumb_relative_path, "id": page_id} ) continue original_path = Path(upload_folder) / image_path.replace('\\', '/') if not original_path.exists(): print(f" Skipped {page_id}: file not found") continue image = Image.open(str(original_path)) max_width = 1920 if image.width > max_width: ratio = max_width / image.width image = image.resize((max_width, int(image.height * ratio)), Image.Resampling.LANCZOS) if image.mode in ('RGBA', 'LA', 'P'): background = Image.new('RGB', image.size, (255, 255, 255)) if image.mode == 'P': image = image.convert('RGBA') if image.mode in ('RGBA', 'LA'): background.paste(image, mask=image.split()[-1]) else: background.paste(image) image = background elif image.mode != 'RGB': image = image.convert('RGB') thumb_full_path.parent.mkdir(parents=True, exist_ok=True) image.save(str(thumb_full_path), 'JPEG', quality=85, optimize=True) image.close() connection.execute( sa.text("UPDATE pages SET cached_image_path = :path WHERE id = :id"), {"path": thumb_relative_path, "id": page_id} ) print(f" Generated: {thumb_relative_path}") except Exception as e: print(f" Failed for page {page_id}: {e}") print("Page thumbnail generation complete") def upgrade(): # Add thumb_path column to user_templates table op.add_column('user_templates', sa.Column('thumb_path', sa.String(500), nullable=True)) # Generate thumbnails for existing user templates generate_user_template_thumbnails() # Also generate page thumbnails if 010 migration ran without the auto-generation generate_page_thumbnails() def downgrade(): # Remove thumb_path column from user_templates table op.drop_column('user_templates', 'thumb_path') ================================================ FILE: backend/migrations/versions/012_add_export_allow_partial_to_projects.py ================================================ """add export_allow_partial to projects table Revision ID: 012 Revises: 011_add_user_template_thumb Create Date: 2025-01-29 """ from alembic import op import sqlalchemy as sa # revision identifiers, used by Alembic. revision = '012' down_revision = '011_add_user_template_thumb' branch_labels = None depends_on = None def upgrade(): # Add export_allow_partial column to projects table op.add_column('projects', sa.Column('export_allow_partial', sa.Boolean(), nullable=True, server_default='0')) # 为现有行设置默认值 false,避免 NULL 状态 op.execute("UPDATE projects SET export_allow_partial = false WHERE export_allow_partial IS NULL") def downgrade(): op.drop_column('projects', 'export_allow_partial') ================================================ FILE: backend/migrations/versions/013_add_lazyllm_source_fields.py ================================================ """add lazyllm source fields to settings table Revision ID: 013 Revises: 012 Create Date: 2026-02-13 """ from alembic import op import sqlalchemy as sa # revision identifiers, used by Alembic. revision = '013' down_revision = '012' branch_labels = None depends_on = None def upgrade(): op.add_column('settings', sa.Column('text_model_source', sa.String(50), nullable=True)) op.add_column('settings', sa.Column('image_model_source', sa.String(50), nullable=True)) op.add_column('settings', sa.Column('image_caption_model_source', sa.String(50), nullable=True)) op.add_column('settings', sa.Column('lazyllm_api_keys', sa.Text(), nullable=True)) def downgrade(): op.drop_column('settings', 'lazyllm_api_keys') op.drop_column('settings', 'image_caption_model_source') op.drop_column('settings', 'image_model_source') op.drop_column('settings', 'text_model_source') ================================================ FILE: backend/migrations/versions/014_add_per_model_provider_config.py ================================================ """add per-model provider config fields to settings table Revision ID: 014 Revises: 013 Create Date: 2026-02-16 """ from alembic import op import sqlalchemy as sa # revision identifiers, used by Alembic. revision = '014' down_revision = 'ee22f1512027' branch_labels = None depends_on = None def upgrade(): op.add_column('settings', sa.Column('text_api_key', sa.String(500), nullable=True)) op.add_column('settings', sa.Column('text_api_base_url', sa.String(500), nullable=True)) op.add_column('settings', sa.Column('image_api_key', sa.String(500), nullable=True)) op.add_column('settings', sa.Column('image_api_base_url', sa.String(500), nullable=True)) op.add_column('settings', sa.Column('image_caption_api_key', sa.String(500), nullable=True)) op.add_column('settings', sa.Column('image_caption_api_base_url', sa.String(500), nullable=True)) def downgrade(): op.drop_column('settings', 'image_caption_api_base_url') op.drop_column('settings', 'image_caption_api_key') op.drop_column('settings', 'image_api_base_url') op.drop_column('settings', 'image_api_key') op.drop_column('settings', 'text_api_base_url') op.drop_column('settings', 'text_api_key') ================================================ FILE: backend/migrations/versions/015_rename_baidu_ocr_api_key.py ================================================ """rename baidu_ocr_api_key to baidu_api_key Revision ID: 015 Revises: 7acf21d5e41d Create Date: 2026-02-26 """ from alembic import op # revision identifiers, used by Alembic. revision = '015' down_revision = '7acf21d5e41d' branch_labels = None depends_on = None def upgrade(): with op.batch_alter_table('settings') as batch_op: batch_op.alter_column('baidu_ocr_api_key', new_column_name='baidu_api_key') def downgrade(): with op.batch_alter_table('settings') as batch_op: batch_op.alter_column('baidu_api_key', new_column_name='baidu_ocr_api_key') ================================================ FILE: backend/migrations/versions/38292967f3ca_add_output_language_to_settings_table.py ================================================ """add output_language to settings table Revision ID: 38292967f3ca Revises: a912a64b7a86 Create Date: 2025-12-17 22:26:19.564663 """ from alembic import op import sqlalchemy as sa from sqlalchemy import inspect # revision identifiers, used by Alembic. revision = '38292967f3ca' down_revision = 'a912a64b7a86' branch_labels = None depends_on = None def _column_exists(table_name: str, column_name: str) -> bool: """Check if column exists""" bind = op.get_bind() inspector = inspect(bind) columns = [col['name'] for col in inspector.get_columns(table_name)] return column_name in columns def upgrade() -> None: """ Add output_language column to settings table with default value. Idempotent: checks if column exists before adding. """ # ### commands auto generated by Alembic - please adjust! ### if not _column_exists('settings', 'output_language'): op.add_column('settings', sa.Column('output_language', sa.String(length=10), nullable=False, server_default='zh')) # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.drop_column('settings', 'output_language') # ### end Alembic commands ### ================================================ FILE: backend/migrations/versions/64ecc9f34de0_add_description_generation_mode_to_.py ================================================ """add description_generation_mode to settings Revision ID: 64ecc9f34de0 Revises: 88054bda1ece Create Date: 2026-03-01 22:23:58.171031 """ from alembic import op import sqlalchemy as sa # revision identifiers, used by Alembic. revision = '64ecc9f34de0' down_revision = '88054bda1ece' branch_labels = None depends_on = None def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.add_column('settings', sa.Column('description_generation_mode', sa.String(length=20), nullable=True)) # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.drop_column('settings', 'description_generation_mode') # ### end Alembic commands ### ================================================ FILE: backend/migrations/versions/7acf21d5e41d_make_settings_columns_nullable_for_env_.py ================================================ """make settings columns nullable for env fallback Revision ID: 7acf21d5e41d Revises: 014 Create Date: 2026-02-23 14:22:40.719334 """ from alembic import op import sqlalchemy as sa # revision identifiers, used by Alembic. revision = '7acf21d5e41d' down_revision = '014' branch_labels = None depends_on = None def upgrade() -> None: with op.batch_alter_table('settings') as batch_op: batch_op.alter_column('ai_provider_format', existing_type=sa.VARCHAR(length=20), nullable=True) batch_op.alter_column('image_resolution', existing_type=sa.VARCHAR(length=20), nullable=True) batch_op.alter_column('image_aspect_ratio', existing_type=sa.VARCHAR(length=10), nullable=True) batch_op.alter_column('max_description_workers', existing_type=sa.INTEGER(), nullable=True) batch_op.alter_column('max_image_workers', existing_type=sa.INTEGER(), nullable=True) batch_op.alter_column('output_language', existing_type=sa.VARCHAR(length=10), nullable=True) def downgrade() -> None: with op.batch_alter_table('settings') as batch_op: batch_op.alter_column('output_language', existing_type=sa.VARCHAR(length=10), nullable=False) batch_op.alter_column('max_image_workers', existing_type=sa.INTEGER(), nullable=False) batch_op.alter_column('max_description_workers', existing_type=sa.INTEGER(), nullable=False) batch_op.alter_column('image_aspect_ratio', existing_type=sa.VARCHAR(length=10), nullable=False) batch_op.alter_column('image_resolution', existing_type=sa.VARCHAR(length=20), nullable=False) batch_op.alter_column('ai_provider_format', existing_type=sa.VARCHAR(length=20), nullable=False) ================================================ FILE: backend/migrations/versions/88054bda1ece_add_outline_and_description_.py ================================================ """add outline and description requirements to projects Revision ID: 88054bda1ece Revises: 015 Create Date: 2026-03-01 21:57:23.450061 """ from alembic import op import sqlalchemy as sa # revision identifiers, used by Alembic. revision = '88054bda1ece' down_revision = '015' branch_labels = None depends_on = None def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.add_column('projects', sa.Column('outline_requirements', sa.Text(), nullable=True)) op.add_column('projects', sa.Column('description_requirements', sa.Text(), nullable=True)) # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.drop_column('projects', 'description_requirements') op.drop_column('projects', 'outline_requirements') # ### end Alembic commands ### ================================================ FILE: backend/migrations/versions/9439faddcdd5_add_description_extra_fields_to_settings.py ================================================ """add description_extra_fields to settings Revision ID: 9439faddcdd5 Revises: 64ecc9f34de0 Create Date: 2026-03-03 00:22:54.296186 """ from alembic import op import sqlalchemy as sa # revision identifiers, used by Alembic. revision = '9439faddcdd5' down_revision = '64ecc9f34de0' branch_labels = None depends_on = None def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.add_column('settings', sa.Column('description_extra_fields', sa.Text(), nullable=True)) # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.drop_column('settings', 'description_extra_fields') # ### end Alembic commands ### ================================================ FILE: backend/migrations/versions/9ad736fec43d_add_image_prompt_extra_fields_to_.py ================================================ """add image_prompt_extra_fields to settings Revision ID: 9ad736fec43d Revises: 9439faddcdd5 Create Date: 2026-03-04 21:52:36.488053 """ from alembic import op import sqlalchemy as sa # revision identifiers, used by Alembic. revision = '9ad736fec43d' down_revision = '9439faddcdd5' branch_labels = None depends_on = None def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.add_column('settings', sa.Column('image_prompt_extra_fields', sa.Text(), nullable=True)) # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.drop_column('settings', 'image_prompt_extra_fields') # ### end Alembic commands ### ================================================ FILE: backend/migrations/versions/a912a64b7a86_add_mineru_token_to_settings_table.py ================================================ """add mineru_token to settings table Revision ID: a912a64b7a86 Revises: 003_new_fields Create Date: 2025-12-17 22:07:23.174881 """ from alembic import op import sqlalchemy as sa from sqlalchemy import inspect # revision identifiers, used by Alembic. revision = 'a912a64b7a86' down_revision = '003_new_fields' branch_labels = None depends_on = None def _column_exists(table_name: str, column_name: str) -> bool: """Check if column exists""" bind = op.get_bind() inspector = inspect(bind) columns = [col['name'] for col in inspector.get_columns(table_name)] return column_name in columns def upgrade() -> None: """ Add mineru_token column to settings table. Idempotent: checks if column exists before adding. """ # ### commands auto generated by Alembic - please adjust! ### if not _column_exists('settings', 'mineru_token'): op.add_column('settings', sa.Column('mineru_token', sa.String(length=500), nullable=True)) # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.drop_column('settings', 'mineru_token') # ### end Alembic commands ### ================================================ FILE: backend/migrations/versions/ee22f1512027_add_image_aspect_ratio_to_project.py ================================================ """add image_aspect_ratio to project Revision ID: ee22f1512027 Revises: 013 Create Date: 2026-02-14 01:58:15.948064 """ from alembic import op import sqlalchemy as sa # revision identifiers, used by Alembic. revision = 'ee22f1512027' down_revision = '013' branch_labels = None depends_on = None def upgrade() -> None: op.add_column('projects', sa.Column('image_aspect_ratio', sa.String(length=10), server_default='16:9', nullable=False)) def downgrade() -> None: op.drop_column('projects', 'image_aspect_ratio') ================================================ FILE: backend/models/__init__.py ================================================ """Database models package""" from flask_sqlalchemy import SQLAlchemy # 创建 SQLAlchemy 实例,配置 SQLite 连接选项 db = SQLAlchemy( engine_options={ 'connect_args': { 'check_same_thread': False, # 允许跨线程使用(仅SQLite) 'timeout': 30, # 数据库锁定超时(秒)- SQLite特定 }, 'pool_pre_ping': True, # 连接前检查,确保连接有效 'pool_recycle': 3600, # 1小时回收连接,释放文件句柄 'pool_size': 5, # SQLite连接池不需要太大(建议5-10) 'max_overflow': 10, # 溢出连接数(SQLite受文件锁限制,不宜过大) 'pool_timeout': 30, # 获取连接的超时时间(秒) } ) from .project import Project from .page import Page from .task import Task from .user_template import UserTemplate from .page_image_version import PageImageVersion from .material import Material from .reference_file import ReferenceFile from .settings import Settings __all__ = ['db', 'Project', 'Page', 'Task', 'UserTemplate', 'PageImageVersion', 'Material', 'ReferenceFile', 'Settings'] ================================================ FILE: backend/models/material.py ================================================ """ Material model - stores material images """ import uuid from datetime import datetime from . import db class Material(db.Model): """ Material model - represents a material image """ __tablename__ = 'materials' id = db.Column(db.String(36), primary_key=True, default=lambda: str(uuid.uuid4())) project_id = db.Column(db.String(36), db.ForeignKey('projects.id'), nullable=True) # Can be null, for global materials not belonging to a project filename = db.Column(db.String(500), nullable=False) relative_path = db.Column(db.String(500), nullable=False) # Path relative to the upload_folder url = db.Column(db.String(500), nullable=False) # URL accessible by the frontend created_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow) updated_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow, onupdate=datetime.utcnow) # Relationships project = db.relationship('Project', back_populates='materials') def to_dict(self): """Convert to dictionary""" return { 'id': self.id, 'project_id': self.project_id, 'filename': self.filename, 'url': self.url, 'relative_path': self.relative_path, 'created_at': self.created_at.isoformat() if self.created_at else None, 'updated_at': self.updated_at.isoformat() if self.updated_at else None, } def __repr__(self): return f'' ================================================ FILE: backend/models/page.py ================================================ """ Page model """ import uuid import json from pathlib import Path from datetime import datetime from . import db class Page(db.Model): """ Page model - represents a single PPT page/slide """ __tablename__ = 'pages' id = db.Column(db.String(36), primary_key=True, default=lambda: str(uuid.uuid4())) project_id = db.Column(db.String(36), db.ForeignKey('projects.id'), nullable=False) order_index = db.Column(db.Integer, nullable=False) part = db.Column(db.String(200), nullable=True) # Optional section name outline_content = db.Column(db.Text, nullable=True) # JSON string description_content = db.Column(db.Text, nullable=True) # JSON string generated_image_path = db.Column(db.String(500), nullable=True) # Original PNG image path cached_image_path = db.Column(db.String(500), nullable=True) # Compressed JPG thumbnail path status = db.Column(db.String(50), nullable=False, default='DRAFT') created_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow) updated_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow, onupdate=datetime.utcnow) # Relationships project = db.relationship('Project', back_populates='pages') image_versions = db.relationship('PageImageVersion', back_populates='page', lazy='dynamic', cascade='all, delete-orphan', order_by='PageImageVersion.version_number.desc()') def get_outline_content(self): """Parse outline_content from JSON string""" if self.outline_content: try: return json.loads(self.outline_content) except json.JSONDecodeError: return None return None def set_outline_content(self, data): """Set outline_content as JSON string""" if data: self.outline_content = json.dumps(data, ensure_ascii=False) else: self.outline_content = None def get_description_content(self): """Parse description_content from JSON string""" if self.description_content: try: return json.loads(self.description_content) except json.JSONDecodeError: return None return None def set_description_content(self, data): """Set description_content as JSON string""" if data: self.description_content = json.dumps(data, ensure_ascii=False) else: self.description_content = None def to_dict(self, include_versions=False): """Convert to dictionary""" # Use cached image for frontend display, fallback to original if no cache display_image_path = self.cached_image_path or self.generated_image_path display_image_url = None if display_image_path: filename = Path(display_image_path).name display_image_url = f'/files/{self.project_id}/pages/{filename}' data = { 'page_id': self.id, 'order_index': self.order_index, 'part': self.part, 'outline_content': self.get_outline_content(), 'description_content': self.get_description_content(), 'generated_image_url': display_image_url, 'status': self.status, 'created_at': self.created_at.isoformat() if self.created_at else None, 'updated_at': self.updated_at.isoformat() if self.updated_at else None, } if include_versions: data['image_versions'] = [v.to_dict() for v in self.image_versions.all()] return data def __repr__(self): return f'' ================================================ FILE: backend/models/page_image_version.py ================================================ """ Page Image Version model - stores historical versions of generated images """ import uuid from datetime import datetime from . import db class PageImageVersion(db.Model): """ Page Image Version model - represents a historical version of a page's generated image """ __tablename__ = 'page_image_versions' id = db.Column(db.String(36), primary_key=True, default=lambda: str(uuid.uuid4())) page_id = db.Column(db.String(36), db.ForeignKey('pages.id'), nullable=False, index=True) image_path = db.Column(db.String(500), nullable=False) version_number = db.Column(db.Integer, nullable=False) # 版本号,从1开始递增 is_current = db.Column(db.Boolean, nullable=False, default=False) # 是否为当前使用的版本 created_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow) # Relationships page = db.relationship('Page', back_populates='image_versions') def to_dict(self): """Convert to dictionary""" # Get project_id from page relationship project_id = self.page.project_id if self.page else None # Format created_at with UTC timezone indicator for proper frontend parsing created_at_str = None if self.created_at: # Add 'Z' suffix to indicate UTC timezone, so frontend can parse it correctly created_at_str = self.created_at.isoformat() + 'Z' if not self.created_at.tzinfo else self.created_at.isoformat() return { 'version_id': self.id, 'page_id': self.page_id, 'image_path': self.image_path, 'image_url': f'/files/{project_id}/pages/{self.image_path.split("/")[-1]}' if self.image_path and project_id else None, 'version_number': self.version_number, 'is_current': self.is_current, 'created_at': created_at_str, } def __repr__(self): return f'' ================================================ FILE: backend/models/project.py ================================================ """ Project model """ import uuid from datetime import datetime from . import db class Project(db.Model): """ Project model - represents a PPT project """ __tablename__ = 'projects' id = db.Column(db.String(36), primary_key=True, default=lambda: str(uuid.uuid4())) idea_prompt = db.Column(db.Text, nullable=True) outline_text = db.Column(db.Text, nullable=True) # 用户输入的大纲文本(用于outline类型) description_text = db.Column(db.Text, nullable=True) # 用户输入的描述文本(用于description类型) extra_requirements = db.Column(db.Text, nullable=True) # 额外要求,应用到每个页面的AI提示词 outline_requirements = db.Column(db.Text, nullable=True) # 大纲生成要求 description_requirements = db.Column(db.Text, nullable=True) # 页面描述生成要求 creation_type = db.Column(db.String(20), nullable=False, default='idea') # idea|outline|descriptions template_image_path = db.Column(db.String(500), nullable=True) template_style = db.Column(db.Text, nullable=True) # 风格描述文本(无模板图模式) # 导出设置 export_extractor_method = db.Column(db.String(50), nullable=True, default='hybrid') # 组件提取方法: mineru, hybrid export_inpaint_method = db.Column(db.String(50), nullable=True, default='hybrid') # 背景图获取方法: generative, baidu, hybrid export_allow_partial = db.Column(db.Boolean, nullable=True, default=False) # 是否允许返回半成品(导出出错时继续而非停止) image_aspect_ratio = db.Column(db.String(10), nullable=False, server_default='16:9', default='16:9') status = db.Column(db.String(50), nullable=False, default='DRAFT') created_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow) updated_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow, onupdate=datetime.utcnow) # Relationships # 使用 'select' 策略支持 eager loading,同时保持灵活性 pages = db.relationship('Page', back_populates='project', lazy='select', cascade='all, delete-orphan', order_by='Page.order_index') tasks = db.relationship('Task', back_populates='project', lazy='select', cascade='all, delete-orphan') materials = db.relationship('Material', back_populates='project', lazy='select', cascade='all, delete-orphan') def to_dict(self, include_pages=False): """Convert to dictionary""" # Format created_at and updated_at with UTC timezone indicator for proper frontend parsing created_at_str = None if self.created_at: created_at_str = self.created_at.isoformat() + 'Z' if not self.created_at.tzinfo else self.created_at.isoformat() updated_at_str = None if self.updated_at: updated_at_str = self.updated_at.isoformat() + 'Z' if not self.updated_at.tzinfo else self.updated_at.isoformat() data = { 'project_id': self.id, 'idea_prompt': self.idea_prompt, 'outline_text': self.outline_text, 'description_text': self.description_text, 'extra_requirements': self.extra_requirements, 'outline_requirements': self.outline_requirements, 'description_requirements': self.description_requirements, 'creation_type': self.creation_type, 'template_image_url': f'/files/{self.id}/template/{self.template_image_path.split("/")[-1]}' if self.template_image_path else None, 'template_style': self.template_style, 'export_extractor_method': self.export_extractor_method or 'hybrid', 'export_inpaint_method': self.export_inpaint_method or 'hybrid', 'export_allow_partial': self.export_allow_partial or False, 'image_aspect_ratio': self.image_aspect_ratio, 'status': self.status, 'created_at': created_at_str, 'updated_at': updated_at_str, } if include_pages: # pages 现在是列表,不需要 order_by(已在 relationship 中定义) data['pages'] = [page.to_dict() for page in self.pages] return data def __repr__(self): return f'' ================================================ FILE: backend/models/reference_file.py ================================================ """ Reference File model - stores uploaded reference files and their parsed content """ import uuid from datetime import datetime from . import db class ReferenceFile(db.Model): """ Reference File model - represents an uploaded reference file """ __tablename__ = 'reference_files' id = db.Column(db.String(36), primary_key=True, default=lambda: str(uuid.uuid4())) project_id = db.Column(db.String(36), db.ForeignKey('projects.id'), nullable=True) # Can be null for global files filename = db.Column(db.String(500), nullable=False) file_path = db.Column(db.String(500), nullable=False) # Path relative to upload folder file_size = db.Column(db.Integer, nullable=False) # File size in bytes file_type = db.Column(db.String(50), nullable=False) # pdf, docx, pptx, etc. parse_status = db.Column(db.String(50), nullable=False, default='pending') # pending|parsing|completed|failed markdown_content = db.Column(db.Text, nullable=True) # Parsed markdown with enhanced image descriptions error_message = db.Column(db.Text, nullable=True) # Error message if parsing failed mineru_batch_id = db.Column(db.String(100), nullable=True) # Mineru service batch ID created_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow) updated_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow, onupdate=datetime.utcnow) # Relationships project = db.relationship('Project', backref='reference_files', foreign_keys=[project_id]) def to_dict(self, include_content=True, include_failed_count=False): """ Convert to dictionary Args: include_content: Whether to include markdown_content (can be large) include_failed_count: Whether to calculate failed image count (can be slow) """ result = { 'id': self.id, 'project_id': self.project_id, 'filename': self.filename, 'file_size': self.file_size, 'file_type': self.file_type, 'parse_status': self.parse_status, 'error_message': self.error_message, 'created_at': self.created_at.isoformat() if self.created_at else None, 'updated_at': self.updated_at.isoformat() if self.updated_at else None, } if include_content: result['markdown_content'] = self.markdown_content # 只有明确要求且文件已解析完成时才计算失败数 if include_failed_count and self.parse_status == 'completed': result['image_caption_failed_count'] = self.count_failed_image_captions() return result def count_failed_image_captions(self) -> int: """ Count images in markdown that don't have alt text (failed to generate captions) Returns: Number of images without captions """ if not self.markdown_content: return 0 import re # Match markdown images: ![alt](url) pattern = r'!\[(.*?)\]\([^\)]+\)' matches = re.findall(pattern, self.markdown_content) # Count images with empty alt text failed_count = sum(1 for alt_text in matches if not alt_text.strip()) return failed_count def __repr__(self): return f'' ================================================ FILE: backend/models/settings.py ================================================ """Settings model""" import json from datetime import datetime, timezone from . import db class Settings(db.Model): """ Settings model - stores global application settings """ __tablename__ = 'settings' id = db.Column(db.Integer, primary_key=True, default=1) ai_provider_format = db.Column(db.String(20), nullable=True) # AI提供商格式: openai, gemini (NULL=use .env) api_base_url = db.Column(db.String(500), nullable=True) # API基础URL api_key = db.Column(db.String(500), nullable=True) # API密钥 image_resolution = db.Column(db.String(20), nullable=True) # 图像清晰度: 1K, 2K, 4K (NULL=use .env) image_aspect_ratio = db.Column(db.String(10), nullable=True) # 图像比例: 16:9, 4:3, 1:1 (NULL=use .env) max_description_workers = db.Column(db.Integer, nullable=True) # 描述生成最大工作线程数 (NULL=use .env) max_image_workers = db.Column(db.Integer, nullable=True) # 图像生成最大工作线程数 (NULL=use .env) # 新增:大模型与 MinerU 相关可视化配置(可在设置页中编辑) text_model = db.Column(db.String(100), nullable=True) # 文本大模型名称(覆盖 Config.TEXT_MODEL) image_model = db.Column(db.String(100), nullable=True) # 图片大模型名称(覆盖 Config.IMAGE_MODEL) mineru_api_base = db.Column(db.String(255), nullable=True) # MinerU 服务地址(覆盖 Config.MINERU_API_BASE) mineru_token = db.Column(db.String(500), nullable=True) # MinerU API Token(覆盖 Config.MINERU_TOKEN) image_caption_model = db.Column(db.String(100), nullable=True) # 图片识别模型(覆盖 Config.IMAGE_CAPTION_MODEL) output_language = db.Column(db.String(10), nullable=True) # 输出语言偏好(zh, en, ja, auto)(NULL=use .env) # 推理模式配置(分别控制文本和图像生成) enable_text_reasoning = db.Column(db.Boolean, nullable=False, default=False) # 文本生成是否开启推理 text_thinking_budget = db.Column(db.Integer, nullable=False, default=1024) # 文本推理思考负载 (1-8192) enable_image_reasoning = db.Column(db.Boolean, nullable=False, default=False) # 图像生成是否开启推理 image_thinking_budget = db.Column(db.Integer, nullable=False, default=1024) # 图像推理思考负载 (1-8192) # 描述生成模式: streaming / parallel (NULL=默认 streaming) description_generation_mode = db.Column(db.String(20), nullable=True) # 描述额外字段配置: JSON 数组如 ["排版布局", "视觉素材"] (NULL=默认 DEFAULT_EXTRA_FIELDS) description_extra_fields = db.Column(db.Text, nullable=True) image_prompt_extra_fields = db.Column(db.Text, nullable=True) # JSON array: 哪些额外字段传入文生图 prompt # 百度 API 配置 baidu_api_key = db.Column(db.String(500), nullable=True) # 百度 API Key # 每种模型类型的提供商配置(source 可选 gemini/openai/lazyllm厂商名,NULL=使用全局配置) text_model_source = db.Column(db.String(50), nullable=True) # 文本模型提供商 (gemini, openai, qwen, doubao, deepseek, ...) image_model_source = db.Column(db.String(50), nullable=True) # 图片模型提供商 image_caption_model_source = db.Column(db.String(50), nullable=True) # 图片识别模型提供商 lazyllm_api_keys = db.Column(db.Text, nullable=True) # JSON: {"qwen": "key1", "doubao": "key2", ...} # Per-model API 凭证(当 source 为 gemini/openai 时使用,NULL=使用全局 api_key/api_base_url) text_api_key = db.Column(db.String(500), nullable=True) text_api_base_url = db.Column(db.String(500), nullable=True) image_api_key = db.Column(db.String(500), nullable=True) image_api_base_url = db.Column(db.String(500), nullable=True) image_caption_api_key = db.Column(db.String(500), nullable=True) image_caption_api_base_url = db.Column(db.String(500), nullable=True) created_at = db.Column(db.DateTime, nullable=False, default=lambda: datetime.now(timezone.utc)) updated_at = db.Column(db.DateTime, nullable=False, default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc)) def _val(self, attr, defaults): """Return DB value, falling back to .env default when None.""" v = getattr(self, attr) return v if v is not None else defaults.get(attr) DEFAULT_EXTRA_FIELDS = ['视觉元素', '视觉焦点', '排版布局', '演讲者备注'] DEFAULT_IMAGE_PROMPT_FIELDS = ['视觉元素', '视觉焦点', '排版布局'] # 演讲者备注默认不传入图片生成 def get_description_extra_fields(self): """Return parsed extra fields list.""" if self.description_extra_fields: try: fields = json.loads(self.description_extra_fields) if isinstance(fields, list): return fields except (json.JSONDecodeError, TypeError): pass return list(self.DEFAULT_EXTRA_FIELDS) def get_image_prompt_extra_fields(self): """Return parsed list of extra fields to include in image prompts.""" if self.image_prompt_extra_fields: try: fields = json.loads(self.image_prompt_extra_fields) if isinstance(fields, list): return fields except (json.JSONDecodeError, TypeError): pass return list(self.DEFAULT_IMAGE_PROMPT_FIELDS) def to_dict(self): """Convert to dictionary, merging .env defaults for None fields.""" d = Settings._get_config_defaults() api_key = self._val('api_key', d) mineru_token = self._val('mineru_token', d) baidu_api_key = self._val('baidu_api_key', d) text_api_key = self._val('text_api_key', d) image_api_key = self._val('image_api_key', d) image_caption_api_key = self._val('image_caption_api_key', d) return { 'id': self.id, 'ai_provider_format': self._val('ai_provider_format', d), 'api_base_url': self._val('api_base_url', d), 'api_key_length': len(api_key) if api_key else 0, 'image_resolution': self._val('image_resolution', d), 'image_aspect_ratio': self._val('image_aspect_ratio', d), 'max_description_workers': self._val('max_description_workers', d), 'max_image_workers': self._val('max_image_workers', d), 'text_model': self._val('text_model', d), 'image_model': self._val('image_model', d), 'mineru_api_base': self._val('mineru_api_base', d), 'mineru_token_length': len(mineru_token) if mineru_token else 0, 'image_caption_model': self._val('image_caption_model', d), 'output_language': self._val('output_language', d), 'description_generation_mode': self._val('description_generation_mode', d) or 'streaming', 'description_extra_fields': self.get_description_extra_fields(), 'image_prompt_extra_fields': self.get_image_prompt_extra_fields(), 'enable_text_reasoning': self.enable_text_reasoning, 'text_thinking_budget': self.text_thinking_budget, 'enable_image_reasoning': self.enable_image_reasoning, 'image_thinking_budget': self.image_thinking_budget, 'baidu_api_key_length': len(baidu_api_key) if baidu_api_key else 0, 'text_model_source': self._val('text_model_source', d), 'image_model_source': self._val('image_model_source', d), 'image_caption_model_source': self._val('image_caption_model_source', d), 'lazyllm_api_keys_info': self._get_lazyllm_api_keys_info(self._val('lazyllm_api_keys', d)), 'text_api_key_length': len(text_api_key) if text_api_key else 0, 'text_api_base_url': self._val('text_api_base_url', d), 'image_api_key_length': len(image_api_key) if image_api_key else 0, 'image_api_base_url': self._val('image_api_base_url', d), 'image_caption_api_key_length': len(image_caption_api_key) if image_caption_api_key else 0, 'image_caption_api_base_url': self._val('image_caption_api_base_url', d), 'created_at': self.created_at.isoformat() if self.created_at else None, 'updated_at': self.updated_at.isoformat() if self.updated_at else None, } def _get_lazyllm_api_keys_info(self, raw=None): """Return vendor names and key lengths (no plaintext) for frontend display.""" data = raw if raw is not None else self.lazyllm_api_keys if not data: return {} try: keys = json.loads(data) return {vendor: len(key) for vendor, key in keys.items() if key} except (json.JSONDecodeError, TypeError): return {} def get_lazyllm_api_keys_dict(self): """Parse lazyllm_api_keys JSON into a dict.""" if not self.lazyllm_api_keys: return {} try: return json.loads(self.lazyllm_api_keys) except (json.JSONDecodeError, TypeError): return {} @staticmethod def _get_config_defaults(): """Return a dict of default values from Config/env for settings fields.""" from config import Config from services.ai_providers.lazyllm_env import collect_env_lazyllm_api_keys provider = (Config.AI_PROVIDER_FORMAT or '').lower() if provider == 'openai': api_base = Config.OPENAI_API_BASE or None api_key = Config.OPENAI_API_KEY or None elif provider == 'lazyllm': api_base = None api_key = None else: api_base = Config.GOOGLE_API_BASE or None api_key = Config.GOOGLE_API_KEY or None return { 'ai_provider_format': Config.AI_PROVIDER_FORMAT, 'api_base_url': api_base, 'api_key': api_key, 'image_resolution': Config.DEFAULT_RESOLUTION, 'image_aspect_ratio': Config.DEFAULT_ASPECT_RATIO, 'max_description_workers': Config.MAX_DESCRIPTION_WORKERS, 'max_image_workers': Config.MAX_IMAGE_WORKERS, 'text_model': Config.TEXT_MODEL, 'image_model': Config.IMAGE_MODEL, 'mineru_api_base': Config.MINERU_API_BASE, 'mineru_token': Config.MINERU_TOKEN, 'image_caption_model': Config.IMAGE_CAPTION_MODEL, 'output_language': Config.OUTPUT_LANGUAGE, 'baidu_api_key': Config.BAIDU_API_KEY or None, 'text_model_source': getattr(Config, 'TEXT_MODEL_SOURCE', None), 'image_model_source': getattr(Config, 'IMAGE_MODEL_SOURCE', None), 'image_caption_model_source': getattr(Config, 'IMAGE_CAPTION_MODEL_SOURCE', None), 'lazyllm_api_keys': collect_env_lazyllm_api_keys(), } @staticmethod def get_settings(): """ Get or create the single settings instance. Returns the ORM object as-is from the database. ``.env`` defaults for ``None`` fields are merged only at serialisation time in ``to_dict()``, so this method has no write side-effects. """ settings = Settings.query.first() if settings is None: settings = Settings(id=1) db.session.add(settings) db.session.commit() return settings def __repr__(self): return f'' ================================================ FILE: backend/models/task.py ================================================ """ Task model for tracking async operations """ import uuid import json from datetime import datetime from . import db class Task(db.Model): """ Task model - tracks asynchronous generation tasks """ __tablename__ = 'tasks' id = db.Column(db.String(36), primary_key=True, default=lambda: str(uuid.uuid4())) project_id = db.Column(db.String(36), db.ForeignKey('projects.id'), nullable=False) task_type = db.Column(db.String(50), nullable=False) # GENERATE_DESCRIPTIONS|GENERATE_IMAGES status = db.Column(db.String(50), nullable=False, default='PENDING') progress = db.Column(db.Text, nullable=True) # JSON string: {"total": 10, "completed": 5, "failed": 0} error_message = db.Column(db.Text, nullable=True) created_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow) completed_at = db.Column(db.DateTime, nullable=True) # Relationships project = db.relationship('Project', back_populates='tasks') def get_progress(self): """Parse progress from JSON string""" if self.progress: try: return json.loads(self.progress) except json.JSONDecodeError: return {"total": 0, "completed": 0, "failed": 0} return {"total": 0, "completed": 0, "failed": 0} def set_progress(self, data): """Set progress as JSON string""" if data: self.progress = json.dumps(data) else: self.progress = None def update_progress(self, completed=None, failed=None): """Update progress incrementally""" prog = self.get_progress() if completed is not None: prog['completed'] = completed if failed is not None: prog['failed'] = failed self.set_progress(prog) def to_dict(self): """Convert to dictionary""" return { 'task_id': self.id, 'task_type': self.task_type, 'status': self.status, 'progress': self.get_progress(), 'error_message': self.error_message, 'created_at': self.created_at.isoformat() if self.created_at else None, 'completed_at': self.completed_at.isoformat() if self.completed_at else None, } def __repr__(self): return f'' ================================================ FILE: backend/models/user_template.py ================================================ """ User Template model - stores user-uploaded templates """ import uuid from datetime import datetime from . import db class UserTemplate(db.Model): """ User Template model - represents a user-uploaded template """ __tablename__ = 'user_templates' id = db.Column(db.String(36), primary_key=True, default=lambda: str(uuid.uuid4())) name = db.Column(db.String(200), nullable=True) # Optional template name file_path = db.Column(db.String(500), nullable=False) thumb_path = db.Column(db.String(500), nullable=True) # Thumbnail path for faster loading file_size = db.Column(db.Integer, nullable=True) # File size in bytes created_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow) updated_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow, onupdate=datetime.utcnow) def to_dict(self): """Convert to dictionary""" # Use thumbnail for preview if available if self.thumb_path: thumb_url = f'/files/user-templates/{self.id}/{self.thumb_path.split("/")[-1]}' else: thumb_url = None return { 'template_id': self.id, 'name': self.name, 'template_image_url': f'/files/user-templates/{self.id}/{self.file_path.split("/")[-1]}', 'thumb_url': thumb_url, 'created_at': self.created_at.isoformat() if self.created_at else None, 'updated_at': self.updated_at.isoformat() if self.updated_at else None, } def __repr__(self): return f'' ================================================ FILE: backend/run.bat ================================================ @echo off REM Banana Slides Backend Startup Script for Windows echo ╔══════════════════════════════════════╗ echo ║ 🍌 Banana Slides API Server 🍌 ║ echo ╚══════════════════════════════════════╝ echo. REM Check if .env exists if not exist .env ( echo ⚠️ .env file not found. Creating from .env.example... copy .env.example .env echo ✅ .env file created. Please edit it with your API keys. echo. ) REM Check if virtual environment exists if not exist venv ( echo 📦 Creating virtual environment... python -m venv venv echo ✅ Virtual environment created. echo. ) REM Activate virtual environment echo 🔄 Activating virtual environment... call venv\Scripts\activate.bat REM Install dependencies echo 📥 Installing dependencies... pip install -r requirements.txt REM Create instance folder if not exists if not exist instance mkdir instance if not exist uploads mkdir uploads echo. echo ✅ Setup complete! echo. echo 🚀 Starting server... echo. REM Run the application python app.py ================================================ FILE: backend/run.sh ================================================ #!/bin/bash # Banana Slides Backend Startup Script echo "╔══════════════════════════════════════╗" echo "║ 🍌 Banana Slides API Server 🍌 ║" echo "╚══════════════════════════════════════╝" echo "" # Check if .env exists if [ ! -f .env ]; then echo "⚠️ .env file not found. Creating from .env.example..." cp .env.example .env echo "✅ .env file created. Please edit it with your API keys." echo "" fi # Check if virtual environment exists if [ ! -d "venv" ]; then echo "📦 Creating virtual environment..." python3 -m venv venv echo "✅ Virtual environment created." echo "" fi # Activate virtual environment echo "🔄 Activating virtual environment..." source venv/bin/activate # Install dependencies echo "📥 Installing dependencies..." pip install -r requirements.txt # Create instance folder if not exists mkdir -p instance mkdir -p uploads echo "" echo "✅ Setup complete!" echo "" echo "🚀 Starting server..." echo "" # Run the application python app.py ================================================ FILE: backend/server.log ================================================ Traceback (most recent call last): File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/engine/base.py", line 143, in __init__ self._dbapi_connection = engine.raw_connection() ~~~~~~~~~~~~~~~~~~~~~^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/engine/base.py", line 3301, in raw_connection return self.pool.connect() ~~~~~~~~~~~~~~~~~^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/pool/base.py", line 447, in connect return _ConnectionFairy._checkout(self) ~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/pool/base.py", line 1264, in _checkout fairy = _ConnectionRecord.checkout(pool) File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/pool/base.py", line 711, in checkout rec = pool._do_get() File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/pool/impl.py", line 177, in _do_get with util.safe_reraise(): ~~~~~~~~~~~~~~~~~^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/util/langhelpers.py", line 224, in __exit__ raise exc_value.with_traceback(exc_tb) File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/pool/impl.py", line 175, in _do_get return self._create_connection() ~~~~~~~~~~~~~~~~~~~~~~~^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/pool/base.py", line 388, in _create_connection return _ConnectionRecord(self) File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/pool/base.py", line 673, in __init__ self.__connect() ~~~~~~~~~~~~~~^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/pool/base.py", line 899, in __connect with util.safe_reraise(): ~~~~~~~~~~~~~~~~~^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/util/langhelpers.py", line 224, in __exit__ raise exc_value.with_traceback(exc_tb) File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/pool/base.py", line 895, in __connect self.dbapi_connection = connection = pool._invoke_creator(self) ~~~~~~~~~~~~~~~~~~~~^^^^^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/engine/create.py", line 661, in connect return dialect.connect(*cargs, **cparams) ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/engine/default.py", line 629, in connect return self.loaded_dbapi.connect(*cargs, **cparams) # type: ignore[no-any-return] # NOQA: E501 ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^ sqlite3.OperationalError: unable to open database file The above exception was the direct cause of the following exception: Traceback (most recent call last): File "/mnt/d/Desktop/banana-slides/backend/app.py", line 80, in app = create_app() File "/mnt/d/Desktop/banana-slides/backend/app.py", line 55, in create_app db.create_all() ~~~~~~~~~~~~~^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/flask_sqlalchemy/extension.py", line 900, in create_all self._call_for_binds(bind_key, "create_all") ~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/flask_sqlalchemy/extension.py", line 881, in _call_for_binds getattr(metadata, op_name)(bind=engine) ~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/sql/schema.py", line 5928, in create_all bind._run_ddl_visitor( ~~~~~~~~~~~~~~~~~~~~~^ ddl.SchemaGenerator, self, checkfirst=checkfirst, tables=tables ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ) ^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/engine/base.py", line 3251, in _run_ddl_visitor with self.begin() as conn: ~~~~~~~~~~^^ File "/home/aa/miniconda3/lib/python3.13/contextlib.py", line 141, in __enter__ return next(self.gen) File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/engine/base.py", line 3241, in begin with self.connect() as conn: ~~~~~~~~~~~~^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/engine/base.py", line 3277, in connect return self._connection_cls(self) ~~~~~~~~~~~~~~~~~~~~^^^^^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/engine/base.py", line 145, in __init__ Connection._handle_dbapi_exception_noconnection( ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^ err, dialect, engine ^^^^^^^^^^^^^^^^^^^^ ) ^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/engine/base.py", line 2440, in _handle_dbapi_exception_noconnection raise sqlalchemy_exception.with_traceback(exc_info[2]) from e File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/engine/base.py", line 143, in __init__ self._dbapi_connection = engine.raw_connection() ~~~~~~~~~~~~~~~~~~~~~^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/engine/base.py", line 3301, in raw_connection return self.pool.connect() ~~~~~~~~~~~~~~~~~^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/pool/base.py", line 447, in connect return _ConnectionFairy._checkout(self) ~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/pool/base.py", line 1264, in _checkout fairy = _ConnectionRecord.checkout(pool) File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/pool/base.py", line 711, in checkout rec = pool._do_get() File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/pool/impl.py", line 177, in _do_get with util.safe_reraise(): ~~~~~~~~~~~~~~~~~^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/util/langhelpers.py", line 224, in __exit__ raise exc_value.with_traceback(exc_tb) File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/pool/impl.py", line 175, in _do_get return self._create_connection() ~~~~~~~~~~~~~~~~~~~~~~~^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/pool/base.py", line 388, in _create_connection return _ConnectionRecord(self) File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/pool/base.py", line 673, in __init__ self.__connect() ~~~~~~~~~~~~~~^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/pool/base.py", line 899, in __connect with util.safe_reraise(): ~~~~~~~~~~~~~~~~~^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/util/langhelpers.py", line 224, in __exit__ raise exc_value.with_traceback(exc_tb) File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/pool/base.py", line 895, in __connect self.dbapi_connection = connection = pool._invoke_creator(self) ~~~~~~~~~~~~~~~~~~~~^^^^^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/engine/create.py", line 661, in connect return dialect.connect(*cargs, **cparams) ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^ File "/home/aa/miniconda3/lib/python3.13/site-packages/sqlalchemy/engine/default.py", line 629, in connect return self.loaded_dbapi.connect(*cargs, **cparams) # type: ignore[no-any-return] # NOQA: E501 ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^ sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) unable to open database file (Background on this error at: https://sqlalche.me/e/20/e3q8) ================================================ FILE: backend/server_running.log ================================================ ╔══════════════════════════════════════╗ ║ 🍌 Banana Slides API Server 🍌 ║ ╚══════════════════════════════════════╝ Server starting on: http://localhost:5000 Environment: development Debug mode: True API Base URL: http://localhost:5000/api Database: sqlite:////mnt/d/Desktop/banana-slides/backend/instance/database.db Uploads: /mnt/d/Desktop/banana-slides/uploads * Serving Flask app 'app_simple' * Debug mode: on WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. * Running on all addresses (0.0.0.0) * Running on http://127.0.0.1:5000 * Running on http://172.30.207.46:5000 Press CTRL+C to quit 127.0.0.1 - - [30/Nov/2025 03:18:33] "GET /health HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:18:39] "GET /health HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:18:39] "GET /health HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:18:39] "GET / HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:18:39] "POST /api/projects HTTP/1.1" 201 - 127.0.0.1 - - [30/Nov/2025 03:18:39] "GET /api/projects/81632cd2-b693-49df-ba50-632a6753bf2b HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:18:39] "PUT /api/projects/81632cd2-b693-49df-ba50-632a6753bf2b HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:18:40] "POST /api/projects/81632cd2-b693-49df-ba50-632a6753bf2b/template HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:18:40] "GET /api/projects/templates HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:18:40] "GET /api/projects/81632cd2-b693-49df-ba50-632a6753bf2b HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:18:40] "POST /api/projects/81632cd2-b693-49df-ba50-632a6753bf2b/pages HTTP/1.1" 201 - 127.0.0.1 - - [30/Nov/2025 03:18:40] "GET /api/projects/81632cd2-b693-49df-ba50-632a6753bf2b/export/pptx?filename=test_presentation.pptx HTTP/1.1" 400 - 127.0.0.1 - - [30/Nov/2025 03:18:40] "GET /api/projects/81632cd2-b693-49df-ba50-632a6753bf2b/export/pdf?filename=test_presentation.pdf HTTP/1.1" 400 - 127.0.0.1 - - [30/Nov/2025 03:18:40] "DELETE /api/projects/81632cd2-b693-49df-ba50-632a6753bf2b/template HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:19:16] "GET /health HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:19:16] "GET /health HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:19:16] "GET / HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:19:16] "POST /api/projects HTTP/1.1" 201 - 127.0.0.1 - - [30/Nov/2025 03:19:16] "GET /api/projects/8543aa4a-4d07-437e-b0f2-3a9dfbc275f5 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:19:16] "PUT /api/projects/8543aa4a-4d07-437e-b0f2-3a9dfbc275f5 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:19:16] "POST /api/projects/8543aa4a-4d07-437e-b0f2-3a9dfbc275f5/template HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:19:16] "GET /api/projects/templates HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:19:16] "GET /api/projects/8543aa4a-4d07-437e-b0f2-3a9dfbc275f5 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:19:16] "POST /api/projects/8543aa4a-4d07-437e-b0f2-3a9dfbc275f5/pages HTTP/1.1" 201 - 127.0.0.1 - - [30/Nov/2025 03:19:16] "GET /api/projects/8543aa4a-4d07-437e-b0f2-3a9dfbc275f5/export/pptx?filename=test_presentation.pptx HTTP/1.1" 400 - 127.0.0.1 - - [30/Nov/2025 03:19:16] "GET /api/projects/8543aa4a-4d07-437e-b0f2-3a9dfbc275f5/export/pdf?filename=test_presentation.pdf HTTP/1.1" 400 - 127.0.0.1 - - [30/Nov/2025 03:19:16] "DELETE /api/projects/8543aa4a-4d07-437e-b0f2-3a9dfbc275f5/template HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:19:23] "GET /api/projects/8543aa4a-4d07-437e-b0f2-3a9dfbc275f5 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:22:38] "GET /health HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:22:43] "GET /health HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:22:43] "GET /health HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:22:43] "POST /api/projects HTTP/1.1" 201 - 127.0.0.1 - - [30/Nov/2025 03:22:43] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:22:44] "POST /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/template HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:22:51] "POST /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/generate/outline HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:22:51] "PUT /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/pages/fbcfcd9c-7b83-4d58-9710-dba90de88df9/outline HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:22:51] "POST /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/generate/descriptions HTTP/1.1" 202 - 127.0.0.1 - - [30/Nov/2025 03:22:51] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:22:56] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:23:01] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:23:06] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:23:11] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:23:15] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:23:20] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:23:25] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:23:30] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:23:35] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:23:40] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:23:45] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:23:48] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:23:53] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:23:58] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:24:03] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:24:08] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:24:13] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:24:17] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:24:22] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:24:27] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:24:32] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:24:37] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:24:39] "GET /health HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:24:39] "GET /health HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:24:39] "POST /api/projects HTTP/1.1" 201 - 127.0.0.1 - - [30/Nov/2025 03:24:39] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:24:39] "POST /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/template HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:24:42] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:24:45] "POST /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/generate/outline HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:24:45] "PUT /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/pages/86cc3e3e-7e87-4414-a4e0-24b566fa400d/outline HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:24:45] "POST /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/generate/descriptions HTTP/1.1" 202 - 127.0.0.1 - - [30/Nov/2025 03:24:45] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:24:45] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:24:50] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:24:50] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:24:55] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:24:55] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:00] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:00] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:00] "GET /health HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:00] "GET /health HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:00] "POST /api/projects HTTP/1.1" 201 - 127.0.0.1 - - [30/Nov/2025 03:25:00] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:01] "POST /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/template HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:05] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:05] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:06] "POST /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/generate/outline HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:06] "PUT /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/pages/b04d9ed5-8180-45d2-be33-7dd7996725e8/outline HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:06] "POST /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/generate/descriptions HTTP/1.1" 202 - 127.0.0.1 - - [30/Nov/2025 03:25:06] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:10] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:10] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:12] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:15] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:15] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:17] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:19] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:19] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:20] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:24] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:24] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:25] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:29] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:29] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:30] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:34] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:34] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:35] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:39] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:39] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:40] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:44] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:44] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:45] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:47] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:48] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:49] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:52] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:53] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:54] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:57] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:58] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:25:59] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:02] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:03] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:04] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:07] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:08] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:09] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:11] "GET /health HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:11] "GET /health HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:11] "POST /api/projects HTTP/1.1" 201 - 127.0.0.1 - - [30/Nov/2025 03:26:11] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:11] "POST /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/template HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:12] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:13] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:14] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:18] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:18] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:17] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:18] "POST /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/generate/outline HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:19] "PUT /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/pages/22d1ee12-fe87-4664-a306-173e0ba024c2/outline HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:19] "POST /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/generate/descriptions HTTP/1.1" 202 - 127.0.0.1 - - [30/Nov/2025 03:26:19] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:21] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:21] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:22] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:24] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:26] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:26] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:27] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:29] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:31] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:31] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:32] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:34] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:36] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:37] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:37] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:39] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:41] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:42] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:42] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:44] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:46] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:47] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:47] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:49] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:50] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:50] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:51] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:52] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:55] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:55] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:56] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:26:57] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:00] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:00] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:01] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:02] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:05] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:05] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:06] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:07] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:10] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:10] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:11] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:12] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:15] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:15] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:16] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:17] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:20] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:19] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:20] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:21] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:23] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:24] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:25] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:26] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:28] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:29] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:30] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:31] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:33] "GET /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48/tasks/2c565b27-3e6b-4a28-bcba-91a97c2c68ad HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:34] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:35] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:36] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:38] "GET /files/084c9edb-7c21-4330-aa67-840371f4cf48/template/template.png HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:38] "PUT /api/projects/084c9edb-7c21-4330-aa67-840371f4cf48 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:39] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:40] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:41] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:44] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:45] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:46] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:49] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:50] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:49] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:53] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:53] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:54] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:58] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:58] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:27:59] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:03] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:03] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:04] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:08] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:08] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:09] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:13] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:13] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:15] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:18] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:19] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:20] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:22] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:22] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:23] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:27] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:27] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:28] "GET /api/projects/b01900e7-3c3a-4fd9-a801-b4c7698cb272/tasks/07e6aa60-a3e9-495d-b4c5-2b4e58e41b28 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:32] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:32] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:37] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:37] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:42] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:42] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:47] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:47] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:52] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:51] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:55] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:28:56] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:00] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:01] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:05] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:06] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:10] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:11] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:15] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:16] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:20] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:21] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:24] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:25] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:29] "GET /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/tasks/484d73a0-115b-471c-8593-83a57f9ebbe9 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:30] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:34] "GET /files/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d/template/template.png HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:34] "PUT /api/projects/ca4be5b5-f9cf-42ab-a345-5d1c98e2a86d HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:35] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:40] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:45] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:50] "GET /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966/tasks/c85e5411-e6ff-4571-9e07-89070d676af7 HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:53] "GET /files/1952981f-27fe-4c86-870b-a3d69e6c8966/template/template.png HTTP/1.1" 200 - 127.0.0.1 - - [30/Nov/2025 03:29:53] "PUT /api/projects/1952981f-27fe-4c86-870b-a3d69e6c8966 HTTP/1.1" 200 - ================================================ FILE: backend/services/__init__.py ================================================ """Services package""" from .ai_service import AIService, ProjectContext from .file_service import FileService from .export_service import ExportService __all__ = ['AIService', 'ProjectContext', 'FileService', 'ExportService'] ================================================ FILE: backend/services/ai_providers/__init__.py ================================================ """ AI Providers factory module Provides factory functions to get the appropriate text/image generation providers based on environment configuration. Configuration priority (highest → lowest): 1. Database settings (Flask app.config, persisted via Settings page) 2. Environment variables (.env file) 3. Hard-coded defaults Supported provider formats: gemini — Google AI Studio (API key auth) openai — OpenAI-compatible endpoints vertex — Google Cloud Vertex AI (service-account auth) lazyllm — LazyLLM multi-vendor framework """ import os import logging from typing import Any, Dict, Optional from .text import TextProvider, GenAITextProvider, OpenAITextProvider, LazyLLMTextProvider from .image import ImageProvider, GenAIImageProvider, OpenAIImageProvider, LazyLLMImageProvider logger = logging.getLogger(__name__) __all__ = [ 'TextProvider', 'GenAITextProvider', 'OpenAITextProvider', 'LazyLLMTextProvider', 'ImageProvider', 'GenAIImageProvider', 'OpenAIImageProvider', 'LazyLLMImageProvider', 'get_text_provider', 'get_image_provider', 'get_provider_format', 'get_caption_provider', 'get_image_caption_provider_config', 'LAZYLLM_VENDORS', ] # LazyLLM vendor names (used to distinguish from gemini/openai formats) LAZYLLM_VENDORS = {'qwen', 'doubao', 'deepseek', 'glm', 'siliconflow', 'sensenova', 'minimax', 'kimi'} def get_provider_format() -> str: """ Get the configured AI provider format Priority: 1. Flask app.config['AI_PROVIDER_FORMAT'] (from database settings) 2. Environment variable AI_PROVIDER_FORMAT 3. Default: 'gemini' Returns: "gemini", "openai", "vertex", "lazyllm", or a lazyllm vendor name (e.g., "doubao", "qwen", "deepseek") """ # Try to get from Flask app config first (database settings) try: from flask import current_app if current_app and hasattr(current_app, 'config'): config_value = current_app.config.get('AI_PROVIDER_FORMAT') if config_value: return str(config_value).lower() except RuntimeError: # Not in Flask application context pass # Fallback to environment variable return os.getenv('AI_PROVIDER_FORMAT', 'gemini').lower() def _resolve_setting(key: str, fallback: Optional[str] = None) -> Optional[str]: """Look up a configuration value using the standard priority chain. Resolution order: 1. Flask ``app.config`` (populated from the database Settings page) 2. OS environment variable 3. *fallback* argument (may be ``None``) """ # 1) Try Flask app.config try: from flask import current_app if current_app and hasattr(current_app, 'config') and key in current_app.config: val = current_app.config[key] if val is not None: logger.debug("Setting %s resolved from app.config", key) return str(val) except RuntimeError: pass # outside Flask request context # 2) Try environment env_val = os.getenv(key) if env_val is not None: logger.debug("Setting %s resolved from environment", key) return env_val # 3) Fallback if fallback is not None: logger.debug("Setting %s using fallback: %s", key, fallback) return fallback def _build_provider_config() -> Dict[str, Any]: """Assemble provider-specific configuration dict. Returns a dict always containing ``'format'`` plus format-specific keys: - gemini / openai → ``api_key``, ``api_base`` - vertex → ``project_id``, ``location`` - lazyllm → ``text_source``, ``image_source`` Raises ``ValueError`` when required settings are missing. """ fmt = get_provider_format() cfg: Dict[str, Any] = {'format': fmt} if fmt == 'openai': cfg['api_key'] = _resolve_setting('OPENAI_API_KEY') or _resolve_setting('GOOGLE_API_KEY') cfg['api_base'] = _resolve_setting('OPENAI_API_BASE', 'https://aihubmix.com/v1') if not cfg['api_key']: raise ValueError( "OPENAI_API_KEY or GOOGLE_API_KEY (from database settings or environment) " "is required when AI_PROVIDER_FORMAT=openai." ) logger.info("Provider config — format: openai, api_base: %s", cfg['api_base']) elif fmt == 'vertex': cfg['project_id'] = _resolve_setting('VERTEX_PROJECT_ID') cfg['location'] = _resolve_setting('VERTEX_LOCATION', 'us-central1') if not cfg['project_id']: raise ValueError( "VERTEX_PROJECT_ID must be set when AI_PROVIDER_FORMAT=vertex. " "Make sure GOOGLE_APPLICATION_CREDENTIALS points to a valid service-account JSON." ) logger.info("Provider config — format: vertex, project: %s, location: %s", cfg['project_id'], cfg['location']) elif fmt in LAZYLLM_VENDORS or fmt == 'lazyllm': # fmt is a specific vendor (e.g., 'doubao') or generic 'lazyllm' (legacy) vendor = fmt if fmt in LAZYLLM_VENDORS else None cfg['format'] = 'lazyllm' cfg['text_source'] = _resolve_setting('TEXT_MODEL_SOURCE') or vendor or 'deepseek' cfg['image_source'] = _resolve_setting('IMAGE_MODEL_SOURCE') or vendor or 'doubao' logger.info("Provider config — format: lazyllm, vendor: %s, text_source: %s, image_source: %s", vendor, cfg['text_source'], cfg['image_source']) else: # gemini (default) or unknown format if fmt != 'gemini': logger.warning("Unknown provider format '%s', falling back to gemini", fmt) cfg['format'] = 'gemini' cfg['api_key'] = _resolve_setting('GOOGLE_API_KEY') cfg['api_base'] = _resolve_setting('GOOGLE_API_BASE') if not cfg['api_key']: raise ValueError("GOOGLE_API_KEY (from database settings or environment) is required") logger.info("Provider config — format: gemini, api_base: %s, api_key: %s", cfg['api_base'], '***' if cfg['api_key'] else 'None') return cfg def _get_model_type_provider_config(model_type: str) -> Dict[str, Any]: """ Get provider config for a specific model type, with fallback to global config. Each model type (text, image, image_caption) can independently choose its provider via {MODEL_TYPE}_MODEL_SOURCE. The source can be: - 'gemini': uses {MODEL_TYPE}_API_KEY + {MODEL_TYPE}_API_BASE, fallback to global - 'openai': uses {MODEL_TYPE}_API_KEY + {MODEL_TYPE}_API_BASE, fallback to global - A LazyLLM vendor name (qwen, doubao, etc.): uses lazyllm with that vendor - None/empty: falls back to global _build_provider_config() Args: model_type: "text", "image", or "image_caption" Returns: Dict with provider config (same format as _build_provider_config) """ prefix = model_type.upper() # TEXT, IMAGE, IMAGE_CAPTION source_key = f'{prefix}_MODEL_SOURCE' source = _resolve_setting(source_key) if not source: # No per-model override, use global config return _build_provider_config() source_lower = source.lower() if source_lower == 'gemini': api_key = _resolve_setting(f'{prefix}_API_KEY') or _resolve_setting('GOOGLE_API_KEY') api_base = _resolve_setting(f'{prefix}_API_BASE') or _resolve_setting('GOOGLE_API_BASE') if not api_key: raise ValueError( f"API key is required for {model_type} model with Gemini provider. " f"Set {prefix}_API_KEY or GOOGLE_API_KEY." ) logger.info("Per-model config — %s: gemini, api_base: %s", model_type, api_base) return {'format': 'gemini', 'api_key': api_key, 'api_base': api_base} elif source_lower == 'openai': api_key = (_resolve_setting(f'{prefix}_API_KEY') or _resolve_setting('OPENAI_API_KEY') or _resolve_setting('GOOGLE_API_KEY')) api_base = (_resolve_setting(f'{prefix}_API_BASE') or _resolve_setting('OPENAI_API_BASE', 'https://aihubmix.com/v1')) if not api_key: raise ValueError( f"API key is required for {model_type} model with OpenAI provider. " f"Set {prefix}_API_KEY or OPENAI_API_KEY." ) logger.info("Per-model config — %s: openai, api_base: %s", model_type, api_base) return {'format': 'openai', 'api_key': api_key, 'api_base': api_base} else: # Assume it's a LazyLLM vendor name logger.info("Per-model config — %s: lazyllm, source: %s", model_type, source_lower) return {'format': 'lazyllm', 'source': source_lower} def get_image_caption_provider_config() -> Dict[str, Any]: """Get provider config specifically for image caption model.""" return _get_model_type_provider_config('image_caption') def get_caption_provider(model: str = "gemini-3-flash-preview") -> TextProvider: """Factory: return a TextProvider for image caption (multimodal) tasks.""" config = _get_model_type_provider_config('image_caption') fmt = config['format'] if fmt == 'openai': logger.info("Caption provider: OpenAI, model=%s", model) return OpenAITextProvider(api_key=config['api_key'], api_base=config['api_base'], model=model) elif fmt == 'vertex': logger.info("Caption provider: Vertex AI, model=%s", model) return GenAITextProvider( model=model, vertexai=True, project_id=config['project_id'], location=config['location'], ) elif fmt == 'lazyllm': source = config.get('source') or config.get('text_source', 'doubao') logger.info("Caption provider: LazyLLM, model=%s, source=%s", model, source) return LazyLLMTextProvider(source=source, model=model) else: logger.info("Caption provider: Gemini, model=%s", model) return GenAITextProvider(api_key=config['api_key'], api_base=config['api_base'], model=model) def get_text_provider(model: str = "gemini-3-flash-preview") -> TextProvider: """Factory: return the appropriate text-generation provider.""" config = _get_model_type_provider_config('text') fmt = config['format'] if fmt == 'openai': logger.info("Text provider: OpenAI, model=%s", model) return OpenAITextProvider(api_key=config['api_key'], api_base=config['api_base'], model=model) elif fmt == 'vertex': logger.info("Text provider: Vertex AI, model=%s, project=%s", model, config['project_id']) return GenAITextProvider( model=model, vertexai=True, project_id=config['project_id'], location=config['location'], ) elif fmt == 'lazyllm': source = config.get('source') or config.get('text_source', 'deepseek') logger.info("Text provider: LazyLLM, model=%s, source=%s", model, source) return LazyLLMTextProvider(source=source, model=model) else: # gemini (default) logger.info("Text provider: Gemini, model=%s", model) return GenAITextProvider(api_key=config['api_key'], api_base=config['api_base'], model=model) def get_image_provider(model: str = "gemini-3-pro-image-preview") -> ImageProvider: """Factory: return the appropriate image-generation provider. Note: OpenAI format does NOT support 4K resolution — only 1K is available. Use Gemini or Vertex AI for higher resolution output. """ config = _get_model_type_provider_config('image') fmt = config['format'] if fmt == 'openai': logger.info("Image provider: OpenAI, model=%s", model) logger.warning("OpenAI format only supports 1K resolution, 4K is not available") return OpenAIImageProvider(api_key=config['api_key'], api_base=config['api_base'], model=model) elif fmt == 'vertex': logger.info("Image provider: Vertex AI, model=%s, project=%s", model, config['project_id']) return GenAIImageProvider( model=model, vertexai=True, project_id=config['project_id'], location=config['location'], ) elif fmt == 'lazyllm': source = config.get('source') or config.get('image_source', 'doubao') logger.info("Image provider: LazyLLM, model=%s, source=%s", model, source) return LazyLLMImageProvider(source=source, model=model) else: # gemini (default) logger.info("Image provider: Gemini, model=%s", model) return GenAIImageProvider(api_key=config['api_key'], api_base=config['api_base'], model=model) ================================================ FILE: backend/services/ai_providers/genai_client.py ================================================ """Shared GenAI client factory used by both text and image providers.""" import logging from google import genai from google.genai import types from config import get_config logger = logging.getLogger(__name__) def make_genai_client( *, vertexai: bool, api_key: str = None, api_base: str = None, project_id: str = None, location: str = None, ) -> genai.Client: """Construct a ``genai.Client`` for either AI Studio or Vertex AI.""" timeout_ms = int(get_config().GENAI_TIMEOUT * 1000) if vertexai: logger.info("Creating GenAI client (Vertex AI) — project=%s, location=%s", project_id, location) return genai.Client( vertexai=True, project=project_id, location=location or "us-central1", http_options=types.HttpOptions(timeout=timeout_ms), ) opts = types.HttpOptions(timeout=timeout_ms, base_url=api_base) return genai.Client(http_options=opts, api_key=api_key) ================================================ FILE: backend/services/ai_providers/image/__init__.py ================================================ """Image generation providers""" from .base import ImageProvider from .genai_provider import GenAIImageProvider from .openai_provider import OpenAIImageProvider from .baidu_inpainting_provider import BaiduInpaintingProvider, create_baidu_inpainting_provider from .lazyllm_provider import LazyLLMImageProvider __all__ = [ 'ImageProvider', 'GenAIImageProvider', 'OpenAIImageProvider', 'BaiduInpaintingProvider', 'create_baidu_inpainting_provider', 'LazyLLMImageProvider', ] ================================================ FILE: backend/services/ai_providers/image/baidu_inpainting_provider.py ================================================ """ 百度图像修复 Provider 基于百度AI的图像修复能力,在指定矩形区域去除遮挡物并用背景内容填充 API文档: https://ai.baidu.com/ai-doc/IMAGEPROCESS/Mk4i6o3w3 """ import logging import base64 import requests import json from typing import Dict, List, Any, Optional, Tuple from PIL import Image import io from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type logger = logging.getLogger(__name__) class BaiduInpaintingProvider: """ 百度图像修复 Provider 在图片中指定位置框定一个或多个规则矩形,去掉不需要的遮挡物,并用背景内容填充。 特点: - 支持多个矩形区域同时修复 - 使用背景内容智能填充 - 快速响应,适合批量处理 """ def __init__(self, api_key: str): """ 初始化百度图像修复 Provider Args: api_key: 百度API Key(BCEv3格式:bce-v3/ALTAK-...)或Access Token """ self.api_key = api_key self.api_url = "https://aip.baidubce.com/rest/2.0/image-process/v1/inpainting" if api_key.startswith('bce-v3/'): logger.info("✅ 初始化百度图像修复 Provider (使用BCEv3 API Key)") else: logger.info("✅ 初始化百度图像修复 Provider (使用Access Token)") @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=0.5, min=1, max=5), retry=retry_if_exception_type((requests.exceptions.RequestException, Exception)), reraise=True ) def inpaint( self, image: Image.Image, rectangles: List[Dict[str, int]] ) -> Optional[Image.Image]: """ 修复图片中指定的矩形区域 Args: image: PIL Image对象 rectangles: 矩形区域列表,每个矩形包含: - left: 左上角x坐标 - top: 左上角y坐标 - width: 宽度 - height: 高度 Returns: 修复后的PIL Image对象,失败返回None """ if not rectangles: logger.warning("没有提供矩形区域,返回原图") return image.copy() logger.info(f"🔧 开始百度图像修复,共 {len(rectangles)} 个区域") try: # 转换图片为RGB模式 if image.mode != 'RGB': image = image.convert('RGB') original_width, original_height = image.size logger.info(f"📏 图片尺寸: {original_width}x{original_height}") # 检查并调整图片大小(最长边不超过5000px) max_size = 5000 scale = 1.0 if original_width > max_size or original_height > max_size: scale = min(max_size / original_width, max_size / original_height) new_size = (int(original_width * scale), int(original_height * scale)) image = image.resize(new_size, Image.Resampling.LANCZOS) logger.info(f"✂️ 压缩图片: {image.size}") # 同时缩放矩形区域 rectangles = [ { 'left': int(r['left'] * scale), 'top': int(r['top'] * scale), 'width': int(r['width'] * scale), 'height': int(r['height'] * scale) } for r in rectangles ] # 过滤掉无效的矩形(宽或高为0) valid_rectangles = [ r for r in rectangles if r['width'] > 0 and r['height'] > 0 ] if not valid_rectangles: logger.warning("过滤后没有有效的矩形区域,返回原图") return image.copy() # 转为base64 buffer = io.BytesIO() image.save(buffer, format='JPEG', quality=95) image_bytes = buffer.getvalue() image_base64 = base64.b64encode(image_bytes).decode('utf-8') logger.info(f"📦 图片编码完成: {len(image_base64)} bytes, {len(valid_rectangles)} 个矩形区域") # 构建请求头 headers = { 'Content-Type': 'application/json', 'Accept': 'application/json', } # 选择认证方式 if self.api_key.startswith('bce-v3/'): headers['Authorization'] = f'Bearer {self.api_key}' url = self.api_url logger.info("🔐 使用BCEv3签名认证") else: url = f"{self.api_url}?access_token={self.api_key}" logger.info("🔐 使用Access Token认证") # 构建请求体 request_body = { 'image': image_base64, 'rectangle': valid_rectangles } logger.info("🌐 发送请求到百度图像修复API...") response = requests.post( url, headers=headers, json=request_body, timeout=60 ) response.raise_for_status() result = response.json() # 检查错误 - 抛出异常以触发 @retry 装饰器 if 'error_code' in result: error_msg = result.get('error_msg', 'Unknown error') error_code = result.get('error_code') logger.error(f"❌ 百度API错误: [{error_code}] {error_msg}") raise Exception(f"Baidu API error [{error_code}]: {error_msg}") # 解析结果 result_image_base64 = result.get('image') if not result_image_base64: logger.error("❌ 百度API返回结果中没有图片") return None # 解码返回的图片 result_image_bytes = base64.b64decode(result_image_base64) result_image = Image.open(io.BytesIO(result_image_bytes)) # 如果之前缩放过,恢复到原始尺寸 if scale < 1.0: result_image = result_image.resize( (original_width, original_height), Image.Resampling.LANCZOS ) logger.info(f"📐 恢复图片尺寸: {result_image.size}") logger.info(f"✅ 百度图像修复完成!") return result_image except Exception as e: logger.error(f"❌ 百度图像修复失败: {str(e)}") raise def inpaint_bboxes( self, image: Image.Image, bboxes: List[Tuple[float, float, float, float]], expand_pixels: int = 2 ) -> Optional[Image.Image]: """ 使用bbox格式修复图片 Args: image: PIL Image对象 bboxes: bbox列表,每个bbox格式为 (x0, y0, x1, y1) expand_pixels: 扩展像素数,默认2 Returns: 修复后的PIL Image对象 """ # 将bbox转换为rectangle格式 rectangles = [] for bbox in bboxes: x0, y0, x1, y1 = bbox # 扩展区域 x0 = max(1, x0 - expand_pixels) y0 = max(1, y0 - expand_pixels) x1 = min(image.width - 1, x1 + expand_pixels) y1 = min(image.height - 1, y1 + expand_pixels) rectangles.append({ 'left': int(x0), 'top': int(y0), 'width': int(x1 - x0), 'height': int(y1 - y0) }) return self.inpaint(image, rectangles) def create_baidu_inpainting_provider( api_key: Optional[str] = None ) -> Optional[BaiduInpaintingProvider]: """ 创建百度图像修复 Provider 实例 Args: api_key: 百度API Key,如果不提供则从Flask config或环境变量读取 Returns: BaiduInpaintingProvider实例,如果api_key不可用则返回None """ import os from config import Config if not api_key: # 优先从 Flask config 读取(数据库设置),然后从 Config,最后从环境变量 try: from flask import current_app api_key = current_app.config.get('BAIDU_API_KEY') except RuntimeError: pass # 不在 Flask 上下文中 if not api_key: api_key = Config.BAIDU_API_KEY if not api_key: api_key = os.getenv('BAIDU_API_KEY') if not api_key: logger.warning("⚠️ 未配置百度API Key (BAIDU_API_KEY), 跳过百度图像修复") return None return BaiduInpaintingProvider(api_key) ================================================ FILE: backend/services/ai_providers/image/base.py ================================================ """ Abstract base class for image generation providers """ from abc import ABC, abstractmethod from typing import Optional, List from PIL import Image class ImageProvider(ABC): """Abstract base class for image generation""" @abstractmethod def generate_image( self, prompt: str, ref_images: Optional[List[Image.Image]] = None, aspect_ratio: str = "16:9", resolution: str = "2K", enable_thinking: bool = False, thinking_budget: int = 0 ) -> Optional[Image.Image]: """ Generate image from prompt Args: prompt: The image generation prompt ref_images: Optional list of reference images (PIL Image objects) aspect_ratio: Image aspect ratio (e.g., "16:9", "1:1", "4:3") resolution: Image resolution ("1K", "2K", "4K") - note: OpenAI format only supports 1K enable_thinking: If True, enable thinking/reasoning mode (GenAI only) thinking_budget: Thinking budget for the model (GenAI only) Returns: Generated PIL Image object, or None if failed """ pass ================================================ FILE: backend/services/ai_providers/image/gemini_inpainting_provider.py ================================================ """ Gemini Inpainting 消除服务提供者 使用 Gemini 2.5 Flash Image Preview 模型进行基于 mask 的图像编辑 """ import logging from typing import Optional from PIL import Image, ImageDraw import numpy as np from tenacity import retry, stop_after_attempt, wait_exponential from .genai_provider import GenAIImageProvider from config import get_config logger = logging.getLogger(__name__) class GeminiInpaintingProvider: """Gemini Inpainting 消除服务(使用 Gemini 2.5 Flash)""" # DEFAULT_MODEL = "gemini-2.5-flash-image" DEFAULT_MODEL = "gemini-3-pro-image-preview" DEFAULT_PROMPT = """\ 你是一个专业的图片前景元素去除专家,以极高的精度进行前景元素的去除工作。 现在用户向你提供了两张不同的图片: 1. 原始图片 2. 使用黑色矩形遮罩标注后的图片,黑色矩形区域表示要移除的前景元素,你只需要处理这些区域。 你需要根据原始图片和黑色遮罩信息,重新绘制黑色遮罩标注的区域,去除前景元素,使得这些区域无缝融入周围的画面,就好像前景元素从来没有出现过。如果一个区域被整体标注,请你将其作为一个整体进行移除,而不是只移除其内部的内容。 禁止遗漏任何一个黑色矩形标注的区域。 """ def __init__( self, api_key: str, api_base: str = None, model: str = None, timeout: int = 60 ): """ 初始化 Gemini Inpainting 提供者 Args: api_key: Google API key api_base: API base URL (for proxies like aihubmix) model: Model name to use (default: gemini-2.5-flash-image) timeout: API 请求超时时间(秒) """ self.model = model or self.DEFAULT_MODEL self.timeout = timeout # 复用 GenAIImageProvider 的底层实现 self.genai_provider = GenAIImageProvider( api_key=api_key, api_base=api_base, model=self.model ) logger.info(f"✅ Gemini Inpainting Provider 初始化 (model={self.model})") @staticmethod def create_marked_image(original_image: Image.Image, mask_image: Image.Image) -> Image.Image: """ 在原图上用纯黑色框标注需要修复的区域 Args: original_image: 原始图像 mask_image: 掩码图像(白色=需要移除的区域) Returns: 标注后的图像(原图 + 纯黑色矩形覆盖) """ # 确保 mask 和原图尺寸一致 if mask_image.size != original_image.size: mask_image = mask_image.resize(original_image.size, Image.LANCZOS) # 转换为 RGB 模式 if original_image.mode != 'RGB': original_image = original_image.convert('RGB') if mask_image.mode != 'RGB': mask_image = mask_image.convert('RGB') # 创建一个副本用于标注 marked_image = original_image.copy() # 将 mask 转换为 numpy array 以便处理 mask_array = np.array(mask_image) marked_array = np.array(marked_image) # 找到白色区域(需要标注的区域) # 白色像素的 RGB 值都接近 255 white_threshold = 200 mask_regions = np.all(mask_array > white_threshold, axis=2) # 用纯黑色 (0, 0, 0) 完全覆盖标注区域 black_overlay = np.array([0, 0, 0], dtype=np.uint8) marked_array[mask_regions] = black_overlay # 转回 PIL Image marked_image = Image.fromarray(marked_array) logger.debug(f"✅ 已创建标注图像,用纯黑色覆盖了 {np.sum(mask_regions)} 个像素") return marked_image @retry( stop=stop_after_attempt(3), # 最多重试3次 wait=wait_exponential(multiplier=1, min=2, max=10), # 指数避让: 2s, 4s, 8s reraise=True ) def inpaint_image( self, original_image: Image.Image, mask_image: Image.Image, inpaint_mode: str = "remove", custom_prompt: Optional[str] = None, full_page_image: Optional[Image.Image] = None, crop_box: Optional[tuple] = None ) -> Optional[Image.Image]: """ 使用 Gemini 和掩码进行图像编辑 Args: original_image: 原始图像 mask_image: 掩码图像(白色=消除,黑色=保留) inpaint_mode: 修复模式(未使用,保留兼容性) custom_prompt: 自定义 prompt(如果为 None 则使用默认) full_page_image: 完整的 PPT 页面图像(16:9),如果提供则直接使用 crop_box: 裁剪框 (x0, y0, x1, y1),指定从完整页面结果中裁剪的区域 Returns: 处理后的图像,失败返回 None """ try: logger.info("🚀 开始调用 Gemini inpainting(标注模式)") working_image = full_page_image # 1. 扩展 mask 到完整页面大小 result_crop_box = crop_box # 保存传入的 crop_box # 直接使用完整页面图像 final_image = working_image # 扩展 mask 到完整页面大小 # 创建与完整页面同样大小的黑色 mask full_mask = Image.new('RGB', final_image.size, (0, 0, 0)) # 将原 mask 粘贴到正确的位置 x0, y0, x1, y1 = crop_box # 确保 mask 尺寸匹配 mask_resized = mask_image.resize((x1 - x0, y1 - y0), Image.LANCZOS) full_mask.paste(mask_resized, (x0, y0)) final_mask = full_mask logger.info(f"📷 完整页面模式: 页面={final_image.size}, mask扩展到={final_mask.size}, 粘贴位置={crop_box}") # 2. 创建标注图像(在原图上用纯黑色框标注需要修复的区域) logger.info("🎨 创建标注图像(纯黑色框标注需要移除的区域)...") marked_image = self.create_marked_image(final_image, final_mask) logger.info(f"✅ 标注图像创建完成: {marked_image.size}") # 3. 构建 prompt prompt = custom_prompt or self.DEFAULT_PROMPT logger.info(f"📝 Prompt: {prompt[:100]}...") # 4. 调用 GenAI Provider 生成图像(只传标注后的图像,不传 mask) logger.info("🌐 调用 GenAI Provider 进行 inpainting(仅传标注图)...") result_image = self.genai_provider.generate_image( prompt=prompt, ref_images=[full_page_image, marked_image], aspect_ratio="16:9", resolution="1K" ) if result_image is None: logger.error("❌ Gemini Inpainting 失败:未返回图像") return None # 5. 转换为 PIL Image(如果需要) # GenAI SDK 返回的是 google.genai.types.Image 对象,需要转换为 PIL Image if hasattr(result_image, '_pil_image'): logger.debug("🔄 转换 GenAI Image 为 PIL Image") result_image = result_image._pil_image logger.info(f"✅ Gemini Inpainting 成功!API返回尺寸: {result_image.size}, {result_image.mode}") # 6. Resize 到原图尺寸 if result_image.size != final_image.size: logger.info(f"🔄 Resize 从 {result_image.size} 到 {final_image.size}") result_image = result_image.resize(final_image.size, Image.LANCZOS) # 7. 合成图像:只在mask区域使用inpaint结果,其他区域保留原图 logger.info("🎨 合成图像:将inpaint结果与原图按mask合并...") # 确保所有图像都是RGB模式 if result_image.mode != 'RGB': result_image = result_image.convert('RGB') if final_image.mode != 'RGB': final_image = final_image.convert('RGB') # 将mask转换为灰度图(L模式) mask_for_composite = final_mask.convert('L') # 使用PIL的composite方法合成 # mask中白色(255)区域使用inpainting结果,黑色(0)区域使用原图 composited_image = Image.composite(result_image, final_image, mask_for_composite) logger.info(f"✅ 图像合成完成!尺寸: {composited_image.size}") # 8. 裁剪回目标尺寸 cropped_result = composited_image.crop(result_crop_box) logger.info(f"✂️ 从完整页面裁剪: {composited_image.size} -> {cropped_result.size}") return cropped_result except Exception as e: logger.error(f"❌ Gemini Inpainting 失败: {e}", exc_info=True) raise ================================================ FILE: backend/services/ai_providers/image/genai_provider.py ================================================ """ Google GenAI SDK — image generation provider Operates in two authentication modes selected at construction time: * API-key mode (Google AI Studio or compatible proxy) * Vertex AI mode (GCP service-account credentials via GOOGLE_APPLICATION_CREDENTIALS) """ import logging from typing import Optional, List from google import genai from google.genai import types from PIL import Image from io import BytesIO from tenacity import retry, stop_after_attempt, wait_exponential from .base import ImageProvider from config import get_config from ..genai_client import make_genai_client logger = logging.getLogger(__name__) class GenAIImageProvider(ImageProvider): """Image generation via Google GenAI SDK (AI Studio / Vertex AI)""" def __init__( self, model: str = "gemini-3-pro-image-preview", api_key: str = None, api_base: str = None, vertexai: bool = False, project_id: str = None, location: str = None, ): self.client = make_genai_client( vertexai=vertexai, api_key=api_key, api_base=api_base, project_id=project_id, location=location, ) self.model = model @retry( stop=stop_after_attempt(get_config().GENAI_MAX_RETRIES + 1), wait=wait_exponential(multiplier=1, min=2, max=10), reraise=True ) def generate_image( self, prompt: str, ref_images: Optional[List[Image.Image]] = None, aspect_ratio: str = "16:9", resolution: str = "2K", enable_thinking: bool = True, thinking_budget: int = 1024 ) -> Optional[Image.Image]: """ Generate image using Google GenAI SDK Args: prompt: The image generation prompt ref_images: Optional list of reference images aspect_ratio: Image aspect ratio resolution: Image resolution (supports "1K", "2K", "4K") enable_thinking: If True, enable thinking chain mode (may generate multiple images) thinking_budget: Thinking budget for the model Returns: Generated PIL Image object, or None if failed """ try: # Build contents list with prompt and reference images contents = [] # Add reference images first (if any) if ref_images: for ref_img in ref_images: contents.append(ref_img) # Add text prompt contents.append(prompt) logger.debug(f"Calling GenAI API for image generation with {len(ref_images) if ref_images else 0} reference images...") logger.debug(f"Config - aspect_ratio: {aspect_ratio}, resolution: {resolution}, enable_thinking: {enable_thinking}") # Build config config_params = { 'response_modalities': ['TEXT', 'IMAGE'], 'image_config': types.ImageConfig( aspect_ratio=aspect_ratio, image_size=resolution ) } # Add thinking config if enabled if enable_thinking: # In Vertex AI (Gemini) Thinking mode, enabling include_thoughts=True requires explicitly setting thinking_budget config_params['thinking_config'] = types.ThinkingConfig( thinking_budget=thinking_budget, include_thoughts=True ) response = self.client.models.generate_content( model=self.model, contents=contents, config=types.GenerateContentConfig(**config_params) ) logger.debug("GenAI API call completed") # Extract the final image from the response. # Earlier images are usually low resolution drafts # Therefore, always use the last image found. last_image = None for i, part in enumerate(response.parts): if part.text is not None: logger.debug(f"Part {i}: TEXT - {part.text[:100] if len(part.text) > 100 else part.text}") else: try: logger.debug(f"Part {i}: Attempting to extract image...") image = part.as_image() if image: # as_image() should return PIL Image directly (official SDK) # But proxy may return custom Image object, so we need fallbacks if isinstance(image, Image.Image): last_image = image elif hasattr(image, 'image_bytes') and image.image_bytes: last_image = Image.open(BytesIO(image.image_bytes)) elif hasattr(image, '_pil_image') and image._pil_image: last_image = image._pil_image else: logger.warning(f"Part {i}: Image object type {type(image)} has no usable conversion method") continue logger.debug(f"Successfully extracted image from part {i}") except Exception as e: logger.warning(f"Part {i}: Failed to extract image - {type(e).__name__}: {str(e)}") # Return the last image found (highest quality in thinking chain scenarios) if last_image: return last_image # No image found in response error_msg = "No image found in API response. " if response.parts: error_msg += f"Response had {len(response.parts)} parts but none contained valid images." else: error_msg += "Response had no parts." raise ValueError(error_msg) except Exception as e: error_detail = f"Error generating image with GenAI: {type(e).__name__}: {str(e)}" logger.error(error_detail, exc_info=True) raise Exception(error_detail) from e ================================================ FILE: backend/services/ai_providers/image/lazyllm_provider.py ================================================ """ Lazyllm framework implementation for image editing and generation Support models: - qwen-image-edit - qwen-image-edit-plus - qwen-image-edit-plus-2025-10-30 - ... - doubao-seedream-4-0-250828 - doubao-seededit-3-0-i2i-250628 - doubao-seedream-4.5 - ... """ import re import tempfile import os import logging import requests from io import BytesIO from typing import Optional, List, Tuple from PIL import Image from .base import ImageProvider from ..lazyllm_env import ensure_lazyllm_namespace_key logger = logging.getLogger(__name__) # Vendor-specific image dimension constraints # Format: vendor -> (min_dimension, max_dimension, min_total_pixels, separator) VENDOR_IMAGE_CONSTRAINTS = { 'qwen': { 'min_dim': 512, 'max_dim': 2048, 'min_pixels': None, # No minimum total pixels requirement 'separator': '*', }, 'doubao': { 'min_dim': None, 'max_dim': None, 'min_pixels': 3686400, # ~1920x1920, required by seedream models 'separator': 'x', }, } DEFAULT_CONSTRAINTS = { 'min_dim': None, 'max_dim': None, 'min_pixels': None, 'separator': 'x', } def _calculate_image_dimensions( resolution: str, aspect_ratio: str, source: str ) -> Tuple[int, int, str]: """ Calculate image dimensions based on resolution, aspect ratio, and vendor constraints. Args: resolution: Resolution preset (1K, 2K, 4K) aspect_ratio: Aspect ratio (16:9, 4:3, 1:1) source: Vendor name (qwen, doubao, etc.) Returns: Tuple of (width, height, size_string) """ aspect_ratios = { "16:9": (16, 9), "9:16": (9, 16), "4:3": (4, 3), "3:4": (3, 4), "3:2": (3, 2), "2:3": (2, 3), "1:1": (1, 1), } resolution_base = { "1K": 1024, "2K": 2048, "4K": 4096, } constraints = VENDOR_IMAGE_CONSTRAINTS.get(source, DEFAULT_CONSTRAINTS) min_dim = constraints['min_dim'] max_dim = constraints['max_dim'] min_pixels = constraints['min_pixels'] sep = constraints['separator'] # Start with base resolution base = resolution_base.get(resolution, 2048) if max_dim and base > max_dim: base = max_dim # Calculate dimensions from aspect ratio ratio = aspect_ratios.get(aspect_ratio) if not ratio: # Parse arbitrary "W:H" format parts = aspect_ratio.split(':') if len(parts) == 2: try: ratio = (int(parts[0]), int(parts[1])) except ValueError: pass if not ratio: logger.warning(f"Unknown aspect_ratio '{aspect_ratio}', falling back to 16:9") ratio = (16, 9) if ratio[0] >= ratio[1]: w = base h = int(base * ratio[1] / ratio[0]) else: h = base w = int(base * ratio[0] / ratio[1]) # Scale up if total pixels below minimum (e.g., doubao requires >= 3686400) if min_pixels: total = w * h if total < min_pixels: scale = (min_pixels / total) ** 0.5 w = int(w * scale) h = int(h * scale) # Round up to nearest multiple of 64 (common GPU alignment requirement) w = max(64, ((w + 63) // 64) * 64) h = max(64, ((h + 63) // 64) * 64) # Enforce minimum dimension if specified if min_dim: w = max(min_dim, w) h = max(min_dim, h) return w, h, f"{w}{sep}{h}" class LazyLLMImageProvider(ImageProvider): """Image generation using Lazyllm framework""" def __init__(self, source: str = 'doubao', model: str = 'doubao-seedream-4-0-250828'): """ Initialize GenAI image provider Args: source: image_editing model provider, support qwen,doubao,siliconflow now. model: Model name to use type: Category of the online service. Defaults to ``llm``. """ try: import lazyllm except ModuleNotFoundError as exc: raise RuntimeError( "lazyllm is required when AI_PROVIDER_FORMAT=lazyllm. " "Please install backend dependencies including lazyllm." ) from exc ensure_lazyllm_namespace_key(source, namespace='BANANA') self._source = source self.client = lazyllm.namespace('BANANA').OnlineModule( source=source, model=model, type='image_editing', ) def generate_image(self, prompt: str = None, ref_images: Optional[List[Image.Image]] = None, aspect_ratio = "16:9", resolution = "1920*1080", enable_thinking: bool = False, thinking_budget: int = 0 ) -> Optional[Image.Image]: # Calculate vendor-specific image dimensions w, h, size_str = _calculate_image_dimensions(resolution, aspect_ratio, self._source) logger.info(f"[LazyLLM] aspect_ratio={aspect_ratio}, resolution={resolution}, size={size_str}") # Convert a PIL Image object to a file path: When passing a reference image to lazyllm, you need to input a path in string format. file_paths = None temp_paths = [] decode_query_with_filepaths = None try: from lazyllm.components.formatter import decode_query_with_filepaths as _decoder decode_query_with_filepaths = _decoder except ModuleNotFoundError as exc: raise RuntimeError( "lazyllm is required when AI_PROVIDER_FORMAT=lazyllm. " "Please install backend dependencies including lazyllm." ) from exc if ref_images: file_paths = [] for img in ref_images: with tempfile.NamedTemporaryFile(prefix='lazyllm_ref_', suffix='.png', delete=False) as tmp: temp_path = tmp.name img.save(temp_path) file_paths.append(temp_path) temp_paths.append(temp_path) try: try: response_path = self.client(prompt, lazyllm_files=file_paths, size=size_str) except Exception as client_err: # LazyLLM may fail internally when the image URL returns application/octet-stream # instead of image/*. In that case, extract the URL and download manually. err_str = str(client_err) if 'content type' in err_str.lower() or 'Failed to load image from' in err_str: url_match = re.search(r'(https://[^\s"\'<>]+)', err_str) if url_match: url = url_match.group(1).rstrip('.') # Only fetch from known image-hosting domains to prevent SSRF from urllib.parse import urlparse host = urlparse(url).hostname or '' allowed = host == 's3.siliconflow.cn' or host.endswith('.s3.amazonaws.com') if not allowed: logger.warning(f"[LazyLLM] Untrusted host '{host}', skipping manual download") raise logger.warning( f"[LazyLLM] Content-type mismatch, downloading image manually: {url[:80]}..." ) max_size = 20 * 1024 * 1024 # 20 MB resp = requests.get(url, timeout=60, stream=True) resp.raise_for_status() content = b"" for chunk in resp.iter_content(chunk_size=8192): content += chunk if len(content) > max_size: raise ValueError(f"Image too large (>{max_size // 1024 // 1024}MB)") result = Image.open(BytesIO(content)).copy() logger.info(f"[LazyLLM] Manual download succeeded, size: {result.size}") return result raise image_path = decode_query_with_filepaths(response_path) # dict if not image_path: logger.warning('No images found in response') raise ValueError() if isinstance(image_path, dict): files = image_path.get('files') if files and isinstance(files, list) and len(files) > 0: image_path = files[0] else: logger.warning('No valid image path in response') return None try: with Image.open(image_path) as image: result = image.copy() logger.info(f'Successfully loaded image from: {image_path}, actual size: {result.size[0]}x{result.size[1]} (requested: {size_str})') return result except Exception as e: logger.error(f'Failed to load image: {e}') logger.warning('No valid images could be loaded') return None finally: for temp_path in temp_paths: try: os.remove(temp_path) except OSError: pass ================================================ FILE: backend/services/ai_providers/image/openai_provider.py ================================================ """ OpenAI SDK implementation for image generation Supports multiple resolution parameter formats for different OpenAI-compatible providers: - Flat style: extra_body.aspect_ratio + extra_body.resolution - Nested style: extra_body.generationConfig.imageConfig.aspectRatio + imageSize Note: Not all providers support 2K/4K resolution in OpenAI format. Some may only return 1K regardless of settings. Resolution validation is handled at the task_manager level for all providers. """ import logging import base64 import re import requests from io import BytesIO from typing import Optional, List from openai import OpenAI from PIL import Image from .base import ImageProvider from config import get_config logger = logging.getLogger(__name__) class OpenAIImageProvider(ImageProvider): """ Image generation using OpenAI SDK (compatible with Gemini via proxy) Supports multiple resolution parameter formats for different providers. Resolution support varies by provider: - Some providers support 2K/4K via extra_body parameters - Some providers only support 1K regardless of settings The provider will try multiple parameter formats to maximize compatibility. """ def __init__(self, api_key: str, api_base: str = None, model: str = "gemini-3-pro-image-preview"): """ Initialize OpenAI image provider Args: api_key: API key api_base: API base URL (e.g., https://aihubmix.com/v1) model: Model name to use """ self.client = OpenAI( api_key=api_key, base_url=api_base, timeout=get_config().OPENAI_TIMEOUT, # set timeout from config max_retries=get_config().OPENAI_MAX_RETRIES # set max retries from config ) self.api_base = api_base or "" self.model = model def _encode_image_to_base64(self, image: Image.Image) -> str: """ Encode PIL Image to base64 string Args: image: PIL Image object Returns: Base64 encoded string """ buffered = BytesIO() # Convert to RGB if necessary (e.g., RGBA images) if image.mode in ('RGBA', 'LA', 'P'): image = image.convert('RGB') image.save(buffered, format="JPEG", quality=95) return base64.b64encode(buffered.getvalue()).decode('utf-8') def _build_extra_body(self, aspect_ratio: str, resolution: str) -> dict: """ Build extra_body parameters for resolution control. Uses multiple format strategies to support different providers: 1. Flat style: aspect_ratio + resolution at top level 2. Nested style: generationConfig.imageConfig structure Args: aspect_ratio: Image aspect ratio (e.g., "16:9", "9:16") resolution: Image resolution ("1K", "2K", "4K") Returns: Dict with extra_body parameters """ # Ensure resolution is uppercase (some providers require "4K" not "4k") resolution_upper = resolution.upper() # Build comprehensive extra_body that works with multiple providers extra_body = { # Flat style parameters "aspect_ratio": aspect_ratio, "resolution": resolution_upper, # Nested style structure (compatible with some providers) "generationConfig": { "imageConfig": { "aspectRatio": aspect_ratio, "imageSize": resolution_upper, } } } return extra_body def generate_image( self, prompt: str, ref_images: Optional[List[Image.Image]] = None, aspect_ratio: str = "16:9", resolution: str = "2K", enable_thinking: bool = False, thinking_budget: int = 0 ) -> Optional[Image.Image]: """ Generate image using OpenAI SDK Supports resolution control via extra_body parameters for compatible providers. Note: Not all providers support 2K/4K resolution - some may return 1K regardless. Note: enable_thinking and thinking_budget are ignored (OpenAI format doesn't support thinking mode) The provider will: 1. Try to use extra_body parameters (API易/AvalAI style) for resolution control 2. Use system message for aspect_ratio as fallback Args: prompt: The image generation prompt ref_images: Optional list of reference images aspect_ratio: Image aspect ratio resolution: Image resolution ("1K", "2K", "4K") - support depends on provider enable_thinking: Ignored, kept for interface compatibility thinking_budget: Ignored, kept for interface compatibility Returns: Generated PIL Image object, or None if failed """ try: # Build message content content = [] # Add reference images first (if any) if ref_images: for ref_img in ref_images: base64_image = self._encode_image_to_base64(ref_img) content.append({ "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } }) # Add text prompt content.append({"type": "text", "text": prompt}) logger.debug(f"Calling OpenAI API for image generation with {len(ref_images) if ref_images else 0} reference images...") logger.debug(f"Config - aspect_ratio: {aspect_ratio}, resolution: {resolution}") # Build extra_body with resolution parameters for compatible providers extra_body = self._build_extra_body(aspect_ratio, resolution) logger.debug(f"Using extra_body for resolution control: {extra_body}") # Use both system message (for basic providers) and extra_body (for advanced providers) response = self.client.chat.completions.create( model=self.model, messages=[ {"role": "system", "content": f"aspect_ratio={aspect_ratio}, resolution={resolution}"}, {"role": "user", "content": content}, ], modalities=["text", "image"], extra_body=extra_body ) logger.debug("OpenAI API call completed") # Extract image from response - handle different response formats message = response.choices[0].message # Debug: log available attributes logger.debug(f"Response message attributes: {dir(message)}") # Try multi_mod_content first (custom format from some proxies) if hasattr(message, 'multi_mod_content') and message.multi_mod_content: parts = message.multi_mod_content for part in parts: if "text" in part: logger.debug(f"Response text: {part['text'][:100] if len(part['text']) > 100 else part['text']}") if "inline_data" in part: image_data = base64.b64decode(part["inline_data"]["data"]) image = Image.open(BytesIO(image_data)) logger.debug(f"Successfully extracted image: {image.size}, {image.mode}") return image # Try standard OpenAI content format (list of content parts) if hasattr(message, 'content') and message.content: # If content is a list (multimodal response) if isinstance(message.content, list): for part in message.content: if isinstance(part, dict): # Handle image_url type if part.get('type') == 'image_url': image_url = part.get('image_url', {}).get('url', '') if image_url.startswith('data:image'): # Extract base64 data from data URL base64_data = image_url.split(',', 1)[1] image_data = base64.b64decode(base64_data) image = Image.open(BytesIO(image_data)) logger.debug(f"Successfully extracted image from content: {image.size}, {image.mode}") return image # Handle text type elif part.get('type') == 'text': text = part.get('text', '') if text: logger.debug(f"Response text: {text[:100] if len(text) > 100 else text}") elif hasattr(part, 'type'): # Handle as object with attributes if part.type == 'image_url': image_url = getattr(part, 'image_url', {}) if isinstance(image_url, dict): url = image_url.get('url', '') else: url = getattr(image_url, 'url', '') if url.startswith('data:image'): base64_data = url.split(',', 1)[1] image_data = base64.b64decode(base64_data) image = Image.open(BytesIO(image_data)) logger.debug(f"Successfully extracted image from content object: {image.size}, {image.mode}") return image # If content is a string, try to extract image from it elif isinstance(message.content, str): content_str = message.content logger.debug(f"Response content (string): {content_str[:200] if len(content_str) > 200 else content_str}") # Try to extract Markdown image URL: ![...](url) markdown_pattern = r'!\[.*?\]\((https?://[^\s\)]+)\)' markdown_matches = re.findall(markdown_pattern, content_str) if markdown_matches: image_url = markdown_matches[0] # Use the first image URL found logger.debug(f"Found Markdown image URL: {image_url}") try: response = requests.get(image_url, timeout=30, stream=True) response.raise_for_status() image = Image.open(BytesIO(response.content)) image.load() # Ensure image is fully loaded logger.debug(f"Successfully downloaded image from Markdown URL: {image.size}, {image.mode}") return image except Exception as download_error: logger.warning(f"Failed to download image from Markdown URL: {download_error}") # Try to extract plain URL (not in Markdown format) url_pattern = r'(https?://[^\s\)\]]+\.(?:png|jpg|jpeg|gif|webp|bmp)(?:\?[^\s\)\]]*)?)' url_matches = re.findall(url_pattern, content_str, re.IGNORECASE) if url_matches: image_url = url_matches[0] logger.debug(f"Found plain image URL: {image_url}") try: response = requests.get(image_url, timeout=30, stream=True) response.raise_for_status() image = Image.open(BytesIO(response.content)) image.load() logger.debug(f"Successfully downloaded image from plain URL: {image.size}, {image.mode}") return image except Exception as download_error: logger.warning(f"Failed to download image from plain URL: {download_error}") # Try to extract base64 data URL from string base64_pattern = r'data:image/[^;]+;base64,([A-Za-z0-9+/=]+)' base64_matches = re.findall(base64_pattern, content_str) if base64_matches: base64_data = base64_matches[0] logger.debug(f"Found base64 image data in string") try: image_data = base64.b64decode(base64_data) image = Image.open(BytesIO(image_data)) logger.debug(f"Successfully extracted base64 image from string: {image.size}, {image.mode}") return image except Exception as decode_error: logger.warning(f"Failed to decode base64 image from string: {decode_error}") # Log raw response for debugging logger.warning(f"Unable to extract image. Raw message type: {type(message)}") logger.warning(f"Message content type: {type(getattr(message, 'content', None))}") raw = str(getattr(message, 'content', 'N/A')) logger.warning(f"Message content: {raw[:300]}{'...(truncated)' if len(raw) > 300 else ''}") raise ValueError("No valid multimodal response received from OpenAI API") except Exception as e: error_detail = f"Error generating image with OpenAI (model={self.model}): {type(e).__name__}: {str(e)}" logger.error(error_detail, exc_info=True) raise Exception(error_detail) from e ================================================ FILE: backend/services/ai_providers/image/volcengine_inpainting_provider.py ================================================ """ 火山引擎 Inpainting 消除服务提供者 直接HTTP调用,完全绕过SDK限制 """ import logging import base64 import json import requests from datetime import datetime from io import BytesIO from typing import Optional from PIL import Image from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type logger = logging.getLogger(__name__) class VolcengineInpaintingProvider: """火山引擎 Inpainting 消除服务(直接HTTP调用)""" API_URL = "https://visual.volcengineapi.com" SERVICE = "cv" REGION = "cn-north-1" def __init__(self, access_key: str, secret_key: str, timeout: int = 60): """ 初始化火山引擎 Inpainting 提供者 Args: access_key: 火山引擎 Access Key secret_key: 火山引擎 Secret Key timeout: API 请求超时时间(秒) """ self.access_key = access_key self.secret_key = secret_key self.timeout = timeout logger.info("火山引擎 Inpainting Provider 初始化(直接HTTP模式)") def _encode_image_to_base64(self, image: Image.Image, is_mask: bool = False) -> str: """ 将 PIL Image 编码为 base64 字符串 Args: image: PIL Image对象 is_mask: 是否是mask图(mask需要特殊处理) """ buffered = BytesIO() if is_mask: # Mask要求:单通道灰度图,或RGB值相等的三通道图 # 转换为灰度图以确保正确 if image.mode != 'L': image = image.convert('L') # 保存为PNG(文档要求8bit PNG,不嵌入ICC Profile) image.save(buffered, format="PNG", optimize=True) else: # 原图:转换为 RGB if image.mode in ('RGBA', 'LA', 'P'): if image.mode == 'RGBA': background = Image.new('RGB', image.size, (255, 255, 255)) background.paste(image, mask=image.split()[3]) image = background else: image = image.convert('RGB') # 保存为 JPEG 减小大小 image.save(buffered, format="JPEG", quality=85) return base64.b64encode(buffered.getvalue()).decode('utf-8') @retry( stop=stop_after_attempt(3), # 最多重试3次 wait=wait_exponential(multiplier=1, min=2, max=10), # 指数避让: 2s, 4s, 8s retry=retry_if_exception_type((requests.exceptions.RequestException, Exception)), reraise=True ) def inpaint_image( self, original_image: Image.Image, mask_image: Image.Image, inpaint_mode: str = "remove", full_page_image: Optional[Image.Image] = None, crop_box: Optional[tuple] = None ) -> Optional[Image.Image]: """ 使用掩码消除图像中的指定区域(带指数避让重试) Args: original_image: 原始图像 mask_image: 掩码图像(白色=消除,黑色=保留) inpaint_mode: 修复模式 Returns: 处理后的图像,失败返回 None """ try: logger.info("🚀 开始调用火山引擎 inpainting(直接HTTP)") # 1. 压缩图片(火山引擎限制5MB) max_dimension = 2048 if max(original_image.size) > max_dimension: ratio = max_dimension / max(original_image.size) new_size = tuple(int(dim * ratio) for dim in original_image.size) original_image = original_image.resize(new_size, Image.LANCZOS) mask_image = mask_image.resize(new_size, Image.LANCZOS) logger.info(f"✂️ 压缩图片: {original_image.size}") # 2. 编码为base64(mask要特殊处理为灰度图) logger.info("📦 编码图片为base64...") original_base64 = self._encode_image_to_base64(original_image, is_mask=False) mask_base64 = self._encode_image_to_base64(mask_image, is_mask=True) logger.info(f"✅ 编码完成: 原图={len(original_base64)} bytes, mask={len(mask_base64)} bytes") # 3. 构建请求参数(按官方文档) # 参考:https://www.volcengine.com/docs/86081/1804489 # mask要求:黑色(0)=保留,白色(255)=消除 request_body = { "req_key": "i2i_inpainting", "binary_data_base64": [original_base64, mask_base64], "dilate_size": 10, # mask膨胀半径,帮助完整消除 "quality": "H", # 高质量模式(最高质量) "steps": 50, # 采样步数,越大效果越好但耗时更长(默认30) "strength": 0.85 # 控制强度,越大越接近文本控制(默认0.8) } # 4. 构建请求URL url = f"{self.API_URL}/?Action=CVProcess&Version=2022-08-31" # 5. 构建请求头(简化版,使用AK/SK直接认证) headers = { "Content-Type": "application/json", "X-Date": datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') } logger.info(f"🌐 发送请求到: {url}") logger.debug(f"请求体大小: {len(json.dumps(request_body))} bytes") # 6. 使用SDK(它会处理签名) from volcengine.visual.VisualService import VisualService service = VisualService() service.set_ak(self.access_key) service.set_sk(self.secret_key) # 使用SDK的json_handler方法(这个方法会处理签名) logger.info("使用SDK发送请求(带正确签名)") try: # 使用SDK的通用API调用方法 response = service.json( "CVProcess", {}, # query params json.dumps(request_body) # body ) # 解析响应 if isinstance(response, str): response = json.loads(response) except Exception as e: error_str = str(e) logger.error(f"SDK调用错误: {error_str}") # 尝试从错误信息中提取JSON响应 if error_str.startswith("b'") and error_str.endswith("'"): try: response_text = error_str[2:-1] # 去掉 b' 和 ' response = json.loads(response_text) except Exception: logger.error("无法解析错误响应") return None else: return None # 8. 解析响应 logger.debug(f"API响应: {json.dumps(response, ensure_ascii=False)[:300]}") if response.get("code") == 10000 or response.get("status") == 10000: data = response.get("data", {}) # 尝试多种响应格式 result_base64 = None if "binary_data_base64" in data and data["binary_data_base64"]: result_base64 = data["binary_data_base64"][0] elif "image_base64" in data: result_base64 = data["image_base64"] elif "result_image" in data: result_base64 = data["result_image"] if result_base64: image_data = base64.b64decode(result_base64) inpainted_image = Image.open(BytesIO(image_data)) logger.info(f"✅ Inpainting成功!结果: {inpainted_image.size}, {inpainted_image.mode}") # 合成:只取inpainting结果的mask区域,其他区域用原图覆盖 # 确保尺寸一致 if inpainted_image.size != original_image.size: logger.warning(f"尺寸不一致,调整inpainting结果: {inpainted_image.size} -> {original_image.size}") inpainted_image = inpainted_image.resize(original_image.size, Image.LANCZOS) # 确保mask尺寸一致 if mask_image.size != original_image.size: mask_image = mask_image.resize(original_image.size, Image.LANCZOS) # 确保inpainted_image是RGB模式 if inpainted_image.mode != 'RGB': inpainted_image = inpainted_image.convert('RGB') if original_image.mode != 'RGB': original_image = original_image.convert('RGB') # 确保mask是L模式(灰度图) mask_for_composite = mask_image.convert('L') # 使用PIL的composite方法合成图像 # mask中白色(255)区域使用inpainting结果,黑色(0)区域使用原图 # 注意:Image.composite使用mask,其中白色表示使用image1,黑色表示使用image2 # 所以这里image1是inpainting结果,image2是原图 result_image = Image.composite(inpainted_image, original_image, mask_for_composite) logger.info(f"✅ 图像合成完成!最终尺寸: {result_image.size}, {result_image.mode}") return result_image else: logger.error(f"❌ 响应中无图像数据,keys: {list(data.keys())}") return None else: code = response.get("code") or response.get("status") message = response.get("message", "未知错误") logger.error(f"❌ API错误: code={code}, message={message}") return None except Exception as e: logger.error(f"❌ Inpainting失败: {str(e)}", exc_info=True) return None ================================================ FILE: backend/services/ai_providers/lazyllm_env.py ================================================ """Utilities for resolving LazyLLM API keys from vendor-prefixed env vars.""" import json import os ALLOWED_LAZYLLM_VENDORS = frozenset({ 'qwen', 'doubao', 'deepseek', 'glm', 'siliconflow', 'sensenova', 'minimax', 'openai', 'kimi', }) def collect_env_lazyllm_api_keys() -> str | None: """Scan env vars for {VENDOR}_API_KEY and return JSON string, or None.""" keys = {} for vendor in ALLOWED_LAZYLLM_VENDORS: val = os.getenv(f"{vendor.upper()}_API_KEY", "") if val: keys[vendor] = val return json.dumps(keys) if keys else None def get_lazyllm_api_key(source: str, namespace: str = "BANANA") -> str: """ Resolve API key for a LazyLLM source from vendor-prefixed key only. Expected format: {SOURCE}_API_KEY, e.g. QWEN_API_KEY. """ source_upper = (source or "").upper() if not source_upper: return "" return os.getenv(f"{source_upper}_API_KEY", "") def ensure_lazyllm_namespace_key(source: str, namespace: str = "BANANA") -> bool: """ Ensure LazyLLM namespace key exists by mapping from vendor-prefixed key. """ source_upper = (source or "").upper() if not source_upper: return False namespace_key = f"{namespace}_{source_upper}_API_KEY" resolved_key = get_lazyllm_api_key(source, namespace=namespace) if resolved_key: os.environ[namespace_key] = resolved_key return True return False ================================================ FILE: backend/services/ai_providers/ocr/__init__.py ================================================ """OCR相关的AI Provider""" from services.ai_providers.ocr.baidu_table_ocr_provider import ( BaiduTableOCRProvider, create_baidu_table_ocr_provider ) from services.ai_providers.ocr.baidu_accurate_ocr_provider import ( BaiduAccurateOCRProvider, create_baidu_accurate_ocr_provider ) __all__ = [ 'BaiduTableOCRProvider', 'create_baidu_table_ocr_provider', 'BaiduAccurateOCRProvider', 'create_baidu_accurate_ocr_provider', ] ================================================ FILE: backend/services/ai_providers/ocr/baidu_accurate_ocr_provider.py ================================================ """ 百度通用文字识别(高精度含位置版)OCR Provider 提供多场景、多语种、高精度的整图文字检测和识别服务,支持返回文字位置信息 API文档: https://ai.baidu.com/ai-doc/OCR/1k3h7y3db """ import logging import base64 import requests import urllib.parse from typing import Dict, List, Any, Optional, Literal from PIL import Image import io from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type logger = logging.getLogger(__name__) # 支持的语言类型 LanguageType = Literal[ 'auto_detect', # 自动检测语言 'CHN_ENG', # 中英文混合 'ENG', # 英文 'JAP', # 日语 'KOR', # 韩语 'FRE', # 法语 'SPA', # 西班牙语 'POR', # 葡萄牙语 'GER', # 德语 'ITA', # 意大利语 'RUS', # 俄语 'DAN', # 丹麦语 'DUT', # 荷兰语 'MAL', # 马来语 'SWE', # 瑞典语 'IND', # 印尼语 'POL', # 波兰语 'ROM', # 罗马尼亚语 'TUR', # 土耳其语 'GRE', # 希腊语 'HUN', # 匈牙利语 'THA', # 泰语 'VIE', # 越南语 'ARA', # 阿拉伯语 'HIN', # 印地语 ] class BaiduAccurateOCRProvider: """ 百度高精度OCR Provider - 通用文字识别(高精度含位置版) 特点: - 高精度文字识别 - 支持25种语言 - 返回文字位置信息(支持行级别和字符级别) - 支持图片朝向检测 - 支持段落输出 """ def __init__(self, api_key: str): """ 初始化百度高精度OCR Provider Args: api_key: 百度API Key(BCEv3格式:bce-v3/ALTAK-...)或Access Token """ self.api_key = api_key self.api_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate" if api_key.startswith('bce-v3/'): logger.info("✅ 初始化百度高精度OCR Provider (使用BCEv3 API Key)") else: logger.info("✅ 初始化百度高精度OCR Provider (使用Access Token)") @retry( stop=stop_after_attempt(3), # 最多重试3次 wait=wait_exponential(multiplier=0.5, min=1, max=5), # 指数避让: 1s, 2s, 4s retry=retry_if_exception_type((requests.exceptions.RequestException, Exception)), reraise=True ) def recognize( self, image_path: str, language_type: LanguageType = 'CHN_ENG', recognize_granularity: Literal['big', 'small'] = 'big', detect_direction: bool = False, vertexes_location: bool = False, paragraph: bool = False, probability: bool = False, char_probability: bool = False, multidirectional_recognize: bool = False, eng_granularity: Optional[Literal['word', 'letter']] = None, ) -> Dict[str, Any]: """ 识别图片中的文字(高精度含位置版) Args: image_path: 图片路径 language_type: 识别语言类型,默认中英文混合 recognize_granularity: 是否定位单字符位置,big=不定位,small=定位 detect_direction: 是否检测图像朝向 vertexes_location: 是否返回文字外接多边形顶点位置 paragraph: 是否输出段落信息 probability: 是否返回每一行的置信度 char_probability: 是否返回单字符置信度(需要recognize_granularity=small) multidirectional_recognize: 是否开启行级别的多方向文字识别 eng_granularity: 英文单字符结果维度(word/letter),当recognize_granularity=small时生效 Returns: 识别结果字典,包含: - log_id: 唯一日志ID - words_result_num: 识别结果数 - words_result: 识别结果数组 - words: 识别的文字 - location: 位置信息 {left, top, width, height} - chars: 单字符结果(当recognize_granularity=small时) - probability: 置信度(当probability=true时) - vertexes_location: 外接多边形顶点(当vertexes_location=true时) - direction: 图像方向(当detect_direction=true时) - paragraphs_result: 段落结果(当paragraph=true时) - image_size: 原始图片尺寸 """ logger.info(f"🔍 开始高精度OCR识别: {image_path}") try: # 读取图片并转为base64 original_width, original_height = 0, 0 with Image.open(image_path) as img: # 获取原始图片尺寸 original_width, original_height = img.size logger.info(f"📏 图片尺寸: {original_width}x{original_height}") # 转换为RGB模式 if img.mode != 'RGB': img = img.convert('RGB') # 压缩图片(如果太大) - 最长边不超过8192px,最短边至少15px max_size = 8192 min_size = 15 width, height = img.size if width < min_size or height < min_size: logger.warning(f"⚠️ 图片太小: {width}x{height}, 最短边需要至少{min_size}px") if width > max_size or height > max_size: ratio = min(max_size / width, max_size / height) new_size = (int(width * ratio), int(height * ratio)) img = img.resize(new_size, Image.Resampling.LANCZOS) logger.info(f"✂️ 压缩图片: {img.size}") # 转为base64 buffer = io.BytesIO() img.save(buffer, format='JPEG', quality=95) image_bytes = buffer.getvalue() image_base64 = base64.b64encode(image_bytes).decode('utf-8') # URL encode image_encoded = urllib.parse.quote(image_base64) logger.info(f"📦 图片编码完成: base64={len(image_base64)} bytes") # 构建请求头 headers = { 'Content-Type': 'application/x-www-form-urlencoded', 'Accept': 'application/json', } # 选择认证方式 if self.api_key.startswith('bce-v3/'): # 使用BCEv3签名认证 (Authorization头部) headers['Authorization'] = f'Bearer {self.api_key}' url = self.api_url logger.info("🔐 使用BCEv3签名认证") else: # 使用Access Token (URL参数) url = f"{self.api_url}?access_token={self.api_key}" logger.info("🔐 使用Access Token认证") # 构建表单数据 form_data = { 'image': image_encoded, 'language_type': language_type, 'recognize_granularity': recognize_granularity, 'detect_direction': 'true' if detect_direction else 'false', 'vertexes_location': 'true' if vertexes_location else 'false', 'paragraph': 'true' if paragraph else 'false', 'probability': 'true' if probability else 'false', 'multidirectional_recognize': 'true' if multidirectional_recognize else 'false', } if recognize_granularity == 'small' and char_probability: form_data['char_probability'] = 'true' if recognize_granularity == 'small' and eng_granularity: form_data['eng_granularity'] = eng_granularity # 转换为URL编码的表单数据 data = '&'.join([f"{k}={v}" for k, v in form_data.items()]) logger.info("🌐 发送请求到百度高精度OCR API...") response = requests.post(url, headers=headers, data=data, timeout=60) response.raise_for_status() result = response.json() # 检查错误 if 'error_code' in result: error_msg = result.get('error_msg', 'Unknown error') error_code = result.get('error_code') logger.error(f"❌ 百度API错误: [{error_code}] {error_msg}") raise Exception(f"Baidu API error [{error_code}]: {error_msg}") # 解析结果 log_id = result.get('log_id', '') words_result_num = result.get('words_result_num', 0) words_result = result.get('words_result', []) direction = result.get('direction', None) paragraphs_result_num = result.get('paragraphs_result_num', 0) paragraphs_result = result.get('paragraphs_result', []) logger.info(f"✅ 高精度OCR识别成功! log_id={log_id}, 识别到 {words_result_num} 行文字") # 解析文字行信息 text_lines = [] for line in words_result: line_info = { 'text': line.get('words', ''), 'location': line.get('location', {}), 'bbox': self._location_to_bbox(line.get('location', {})), } # 单字符结果 if 'chars' in line: line_info['chars'] = [] for char in line['chars']: char_info = { 'char': char.get('char', ''), 'location': char.get('location', {}), 'bbox': self._location_to_bbox(char.get('location', {})), } if 'char_prob' in char: char_info['probability'] = char['char_prob'] line_info['chars'].append(char_info) # 置信度 if 'probability' in line: line_info['probability'] = line['probability'] # 外接多边形顶点 if 'vertexes_location' in line: line_info['vertexes_location'] = line['vertexes_location'] if 'finegrained_vertexes_location' in line: line_info['finegrained_vertexes_location'] = line['finegrained_vertexes_location'] if 'min_finegrained_vertexes_location' in line: line_info['min_finegrained_vertexes_location'] = line['min_finegrained_vertexes_location'] text_lines.append(line_info) # 解析段落信息 paragraphs = [] if paragraphs_result: for para in paragraphs_result: para_info = { 'words_result_idx': para.get('words_result_idx', []), } if 'finegrained_vertexes_location' in para: para_info['finegrained_vertexes_location'] = para['finegrained_vertexes_location'] if 'min_finegrained_vertexes_location' in para: para_info['min_finegrained_vertexes_location'] = para['min_finegrained_vertexes_location'] paragraphs.append(para_info) return { 'log_id': log_id, 'words_result_num': words_result_num, 'words_result': words_result, # 原始结果 'text_lines': text_lines, # 解析后的文字行 'direction': direction, 'paragraphs_result_num': paragraphs_result_num, 'paragraphs_result': paragraphs_result, # 原始段落结果 'paragraphs': paragraphs, # 解析后的段落 'image_size': (original_width, original_height), } except Exception as e: logger.error(f"❌ 高精度OCR识别失败: {str(e)}") raise def _location_to_bbox(self, location: Dict[str, int]) -> List[int]: """ 将location格式转换为bbox格式 [x0, y0, x1, y1] Args: location: {left, top, width, height} Returns: bbox [x0, y0, x1, y1] """ if not location: return [0, 0, 0, 0] left = location.get('left', 0) top = location.get('top', 0) width = location.get('width', 0) height = location.get('height', 0) return [left, top, left + width, top + height] def get_full_text(self, result: Dict[str, Any], separator: str = '\n') -> str: """ 从识别结果中提取完整文本 Args: result: recognize()返回的结果 separator: 行分隔符,默认换行 Returns: 完整的文本字符串 """ text_lines = result.get('text_lines', []) return separator.join([line.get('text', '') for line in text_lines]) def get_text_with_positions(self, result: Dict[str, Any]) -> List[Dict[str, Any]]: """ 获取带位置信息的文字列表 Args: result: recognize()返回的结果 Returns: 文字位置列表,每项包含 text 和 bbox """ text_lines = result.get('text_lines', []) return [ { 'text': line.get('text', ''), 'bbox': line.get('bbox', [0, 0, 0, 0]), } for line in text_lines ] def create_baidu_accurate_ocr_provider( api_key: Optional[str] = None ) -> Optional[BaiduAccurateOCRProvider]: """ 创建百度高精度OCR Provider实例 Args: api_key: 百度API Key(BCEv3格式或Access Token),如果不提供则从Flask config或环境变量读取 Returns: BaiduAccurateOCRProvider实例,如果api_key不可用则返回None """ from config import Config if not api_key: # 优先从 Flask config 读取(数据库设置),然后从 Config(含 env 回退) try: from flask import current_app api_key = current_app.config.get('BAIDU_API_KEY') except RuntimeError: pass # 不在 Flask 上下文中 if not api_key: api_key = Config.BAIDU_API_KEY if not api_key: logger.warning("⚠️ 未配置百度API Key, 跳过百度高精度OCR") return None return BaiduAccurateOCRProvider(api_key) ================================================ FILE: backend/services/ai_providers/ocr/baidu_table_ocr_provider.py ================================================ """ 百度表格识别OCR Provider 提供基于百度AI的表格识别能力,支持精确到单元格级别的识别 API文档: https://ai.baidu.com/ai-doc/OCR/1k3h7y3db """ import logging import base64 import requests import urllib.parse from typing import Dict, List, Any, Optional from PIL import Image import io from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type logger = logging.getLogger(__name__) class BaiduTableOCRProvider: """百度表格OCR Provider - 支持BCEv3签名认证""" def __init__(self, api_key: str): """ 初始化百度表格OCR Provider Args: api_key: 百度API Key(BCEv3格式:bce-v3/ALTAK-...)或Access Token """ self.api_key = api_key self.api_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/table" if api_key.startswith('bce-v3/'): logger.info("✅ 初始化百度表格OCR Provider (使用BCEv3 API Key)") else: logger.info("✅ 初始化百度表格OCR Provider (使用Access Token)") @retry( stop=stop_after_attempt(3), # 最多重试3次 wait=wait_exponential(multiplier=0.5, min=1, max=5), # 指数避让: 1s, 2s, 4s retry=retry_if_exception_type((requests.exceptions.RequestException, Exception)), reraise=True ) def recognize_table( self, image_path: str, cell_contents: bool = True, # 默认开启,获取单元格文字位置 return_excel: bool = False ) -> Dict[str, Any]: """ 识别表格图片(带指数避让重试) Args: image_path: 图片路径 cell_contents: 是否识别单元格内容位置信息,默认True return_excel: 是否返回Excel格式,默认False Returns: 识别结果字典,包含: - log_id: 日志ID - table_num: 表格数量 - tables_result: 表格结果列表 - cells: 解析后的单元格列表(扁平化) - image_size: 原始图片尺 """ logger.info(f"🔍 开始识别表格图片: {image_path}") try: # 读取图片并转为base64 original_width, original_height = 0, 0 with Image.open(image_path) as img: # 获取原始图片尺寸 original_width, original_height = img.size logger.info(f"📏 图片尺寸: {original_width}x{original_height}") # 转换为RGB模式 if img.mode != 'RGB': img = img.convert('RGB') # 压缩图片(如果太大) - 最长边不超过8192px,最短边至少15px max_size = 8192 min_size = 15 width, height = img.size if width < min_size or height < min_size: logger.warning(f"⚠️ 图片太小: {width}x{height}, 最短边需要至少{min_size}px") if width > max_size or height > max_size: ratio = min(max_size / width, max_size / height) new_size = (int(width * ratio), int(height * ratio)) img = img.resize(new_size, Image.Resampling.LANCZOS) logger.info(f"✂️ 压缩图片: {img.size}") # 转为base64 buffer = io.BytesIO() img.save(buffer, format='JPEG', quality=95) image_bytes = buffer.getvalue() image_base64 = base64.b64encode(image_bytes).decode('utf-8') # URL encode image_encoded = urllib.parse.quote(image_base64) logger.info(f"📦 图片编码完成: base64={len(image_base64)} bytes, urlencode={len(image_encoded)} bytes") # 构建请求头 headers = { 'Content-Type': 'application/x-www-form-urlencoded', 'Accept': 'application/json', } # 选择认证方式 if self.api_key.startswith('bce-v3/'): # 使用BCEv3签名认证 (Authorization头部) headers['Authorization'] = f'Bearer {self.api_key}' url = self.api_url logger.info(f"🔐 使用BCEv3签名认证") else: # 使用Access Token (URL参数) url = f"{self.api_url}?access_token={self.api_key}" logger.info(f"🔐 使用Access Token认证") # 构建表单数据 data = f"image={image_encoded}&cell_contents={'true' if cell_contents else 'false'}&return_excel={'true' if return_excel else 'false'}" logger.info(f"🌐 发送请求到百度表格OCR API...") response = requests.post(url, headers=headers, data=data, timeout=60) response.raise_for_status() result = response.json() # 检查错误 if 'error_code' in result: error_msg = result.get('error_msg', 'Unknown error') error_code = result.get('error_code') logger.error(f"❌ 百度API错误: [{error_code}] {error_msg}") raise Exception(f"Baidu API error [{error_code}]: {error_msg}") # 解析结果 log_id = result.get('log_id', '') table_num = result.get('table_num', 0) tables_result = result.get('tables_result', []) excel_file = result.get('excel_file', None) logger.info(f"✅ 表格识别成功! log_id={log_id}, 识别到 {table_num} 个表格") # 解析单元格信息(扁平化) cells = [] for table_idx, table in enumerate(tables_result): table_location = table.get('table_location', []) header = table.get('header', []) body = table.get('body', []) footer = table.get('footer', []) logger.info(f" 表格 {table_idx + 1}: header={len(header)}, body={len(body)}, footer={len(footer)}") # 解析表头 for idx, header_cell in enumerate(header): cell_info = { 'table_idx': table_idx, 'section': 'header', 'section_idx': idx, 'text': header_cell.get('words', ''), 'bbox': self._location_to_bbox(header_cell.get('location', [])), } cells.append(cell_info) # 解析表体 for cell in body: cell_info = { 'table_idx': table_idx, 'section': 'body', 'row_start': cell.get('row_start', 0), 'row_end': cell.get('row_end', 0), 'col_start': cell.get('col_start', 0), 'col_end': cell.get('col_end', 0), 'text': cell.get('words', ''), 'bbox': self._location_to_bbox(cell.get('cell_location', [])), 'contents': cell.get('contents', []), # 单元格内文字分行信息 } cells.append(cell_info) # 解析表尾 for idx, footer_cell in enumerate(footer): cell_info = { 'table_idx': table_idx, 'section': 'footer', 'section_idx': idx, 'text': footer_cell.get('words', ''), 'bbox': self._location_to_bbox(footer_cell.get('location', [])), } cells.append(cell_info) return { 'log_id': log_id, 'table_num': table_num, 'tables_result': tables_result, 'cells': cells, 'image_size': (original_width, original_height), 'excel_file': excel_file, } except Exception as e: logger.error(f"❌ 表格识别失败: {str(e)}") raise def _location_to_bbox(self, location: List[Dict[str, int]]) -> List[int]: """ 将四个角点坐标转换为bbox格式 [x0, y0, x1, y1] Args: location: 四个角点 [{x, y}, {x, y}, {x, y}, {x, y}] Returns: bbox [x0, y0, x1, y1] """ if not location or len(location) < 2: return [0, 0, 0, 0] xs = [p['x'] for p in location] ys = [p['y'] for p in location] return [min(xs), min(ys), max(xs), max(ys)] def get_table_structure(self, cells: List[Dict[str, Any]]) -> Dict[str, Any]: """ 从单元格列表中提取表格结构 Args: cells: 单元格列表 Returns: 表格结构信息: - rows: 行数 - cols: 列数 - cells_by_position: {(row, col): cell_info} """ if not cells: return {'rows': 0, 'cols': 0, 'cells_by_position': {}} max_row = max(cell['row_end'] for cell in cells) max_col = max(cell['col_end'] for cell in cells) cells_by_position = {} for cell in cells: # 使用起始位置作为key key = (cell['row_start'], cell['col_start']) cells_by_position[key] = cell return { 'rows': max_row, 'cols': max_col, 'cells_by_position': cells_by_position, } def create_baidu_table_ocr_provider( api_key: Optional[str] = None ) -> Optional[BaiduTableOCRProvider]: """ 创建百度表格OCR Provider实例 Args: api_key: 百度API Key(BCEv3格式或Access Token),如果不提供则从Flask config或环境变量读取 Returns: BaiduTableOCRProvider实例,如果api_key不可用则返回None """ from config import Config if not api_key: # 优先从 Flask config 读取(数据库设置),然后从 Config(含 env 回退) try: from flask import current_app api_key = current_app.config.get('BAIDU_API_KEY') except RuntimeError: pass # 不在 Flask 上下文中 if not api_key: api_key = Config.BAIDU_API_KEY if not api_key: logger.warning("⚠️ 未配置百度API Key, 跳过百度表格识别") return None return BaiduTableOCRProvider(api_key) ================================================ FILE: backend/services/ai_providers/text/__init__.py ================================================ """Text generation providers""" from .base import TextProvider, strip_think_tags from .genai_provider import GenAITextProvider from .openai_provider import OpenAITextProvider from .lazyllm_provider import LazyLLMTextProvider __all__ = ['TextProvider', 'GenAITextProvider', 'OpenAITextProvider', 'LazyLLMTextProvider', 'strip_think_tags'] ================================================ FILE: backend/services/ai_providers/text/base.py ================================================ """ Abstract base class for text generation providers """ import re from abc import ABC, abstractmethod from typing import Generator def strip_think_tags(text: str) -> str: """Remove ... blocks (including multiline) from AI responses.""" if not text: return text return re.sub(r'.*?\s*', '', text, flags=re.DOTALL).strip() class TextProvider(ABC): """Abstract base class for text generation""" @abstractmethod def generate_text(self, prompt: str, thinking_budget: int = 1000) -> str: """ Generate text content from prompt Args: prompt: The input prompt for text generation thinking_budget: Budget for thinking/reasoning (provider-specific) Returns: Generated text content """ pass def generate_text_stream(self, prompt: str, thinking_budget: int = 0) -> Generator[str, None, None]: """ Stream text content from prompt, yielding chunks as they arrive. Default implementation falls back to non-streaming generate_text. Subclasses should override for true streaming support. """ yield self.generate_text(prompt, thinking_budget=thinking_budget) ================================================ FILE: backend/services/ai_providers/text/genai_provider.py ================================================ """ Google GenAI SDK — text generation provider Operates in two authentication modes selected at construction time: * API-key mode (Google AI Studio or compatible proxy) * Vertex AI mode (GCP service-account credentials via GOOGLE_APPLICATION_CREDENTIALS) """ import logging from typing import Generator from google import genai from google.genai import types from tenacity import retry, stop_after_attempt, wait_exponential from .base import TextProvider, strip_think_tags from config import get_config from ..genai_client import make_genai_client logger = logging.getLogger(__name__) def _log_retry(retry_state): """记录重试信息""" logger.warning( f"GenAI 请求失败,正在重试 ({retry_state.attempt_number}/{get_config().GENAI_MAX_RETRIES + 1})," f"错误: {retry_state.outcome.exception() if retry_state.outcome else 'unknown'}" ) def _validate_response(response): """验证响应是否有效,无效则抛出异常触发重试""" if response.text is None: if hasattr(response, 'candidates') and response.candidates: candidate = response.candidates[0] if hasattr(candidate, 'finish_reason'): logger.warning(f"Response text is None, finish_reason: {candidate.finish_reason}") if hasattr(candidate, 'safety_ratings'): logger.warning(f"Safety ratings: {candidate.safety_ratings}") raise ValueError("AI model returned empty response (response.text is None)") return strip_think_tags(response.text) class GenAITextProvider(TextProvider): """Text generation via Google GenAI SDK (AI Studio / Vertex AI)""" def __init__( self, model: str = "gemini-3-flash-preview", api_key: str = None, api_base: str = None, vertexai: bool = False, project_id: str = None, location: str = None, ): self.client = make_genai_client( vertexai=vertexai, api_key=api_key, api_base=api_base, project_id=project_id, location=location, ) self.model = model @retry( stop=stop_after_attempt(get_config().GENAI_MAX_RETRIES + 1), wait=wait_exponential(multiplier=1, min=2, max=10), reraise=True, before_sleep=_log_retry ) def generate_text(self, prompt: str, thinking_budget: int = 0) -> str: """ Generate text using Google GenAI SDK Args: prompt: The input prompt thinking_budget: Thinking budget for the model (0 = disable thinking) Returns: Generated text """ # 构建配置,只有在 thinking_budget > 0 时才启用推理模式 config_params = {} if thinking_budget > 0: config_params['thinking_config'] = types.ThinkingConfig(thinking_budget=thinking_budget) response = self.client.models.generate_content( model=self.model, contents=prompt, config=types.GenerateContentConfig(**config_params) if config_params else None, ) return _validate_response(response) @retry( stop=stop_after_attempt(get_config().GENAI_MAX_RETRIES + 1), wait=wait_exponential(multiplier=1, min=2, max=10), reraise=True, before_sleep=_log_retry ) def generate_with_image(self, prompt: str, image_path: str, thinking_budget: int = 0) -> str: """ Generate text with image input using Google GenAI SDK (multimodal) Args: prompt: The input prompt image_path: Path to the image file thinking_budget: Thinking budget for the model (0 = disable thinking) Returns: Generated text """ from PIL import Image # 加载图片 img = Image.open(image_path) # 构建多模态内容 contents = [img, prompt] # 构建配置,只有在 thinking_budget > 0 时才启用推理模式 config_params = {} if thinking_budget > 0: config_params['thinking_config'] = types.ThinkingConfig(thinking_budget=thinking_budget) response = self.client.models.generate_content( model=self.model, contents=contents, config=types.GenerateContentConfig(**config_params) if config_params else None, ) return _validate_response(response) def generate_text_stream(self, prompt: str, thinking_budget: int = 0) -> Generator[str, None, None]: """Stream text using Google GenAI SDK's generate_content_stream.""" config_params = {} if thinking_budget > 0: config_params['thinking_config'] = types.ThinkingConfig(thinking_budget=thinking_budget) response = self.client.models.generate_content_stream( model=self.model, contents=prompt, config=types.GenerateContentConfig(**config_params) if config_params else None, ) for chunk in response: # Skip thinking chunks, only yield text content if chunk.text: yield chunk.text ================================================ FILE: backend/services/ai_providers/text/lazyllm_provider.py ================================================ """ Lazyllm framework for text generation Supports modes: - Qwen - Deepseek - doubao - GLM - MINIMAX - sensenova - ... """ import threading from .base import TextProvider, strip_think_tags from ..lazyllm_env import ensure_lazyllm_namespace_key class LazyLLMTextProvider(TextProvider): """Text generation using lazyllm""" def __init__(self, source: str = 'deepseek', model: str = "deepseek-v3-1-terminus"): """ Initialize lazyllm text provider Args: source: text model provider, support qwen,doubao,deepseek,siliconflow,glm... model: Model name to use type: Category of the online service. Defaults to ``llm``. """ try: import lazyllm except ModuleNotFoundError as exc: raise RuntimeError( "lazyllm is required when AI_PROVIDER_FORMAT=lazyllm. " "Please install backend dependencies including lazyllm." ) from exc self._source = source self._model = model self._vlm_client = None self._vlm_lock = threading.Lock() ensure_lazyllm_namespace_key(source, namespace='BANANA') self.client = lazyllm.namespace('BANANA').OnlineModule( source = source, model = model, type = 'llm', ) def generate_text(self, prompt, thinking_budget = 1000): message = self.client(prompt) return strip_think_tags(message) def generate_with_image(self, prompt: str, image_path: str, thinking_budget: int = 0) -> str: if self._vlm_client is None: with self._vlm_lock: if self._vlm_client is None: import lazyllm ensure_lazyllm_namespace_key(self._source, namespace='BANANA') self._vlm_client = lazyllm.namespace('BANANA').OnlineModule( source=self._source, model=self._model, type='vlm', ) message = self._vlm_client(prompt, lazyllm_files=[image_path]) return strip_think_tags(message) ================================================ FILE: backend/services/ai_providers/text/openai_provider.py ================================================ """ OpenAI SDK implementation for text generation """ import base64 import logging from typing import Generator from openai import OpenAI from .base import TextProvider, strip_think_tags from config import get_config logger = logging.getLogger(__name__) class OpenAITextProvider(TextProvider): """Text generation using OpenAI SDK (compatible with Gemini via proxy)""" def __init__(self, api_key: str, api_base: str = None, model: str = "gemini-3-flash-preview"): """ Initialize OpenAI text provider Args: api_key: API key api_base: API base URL (e.g., https://aihubmix.com/v1) model: Model name to use """ self.client = OpenAI( api_key=api_key, base_url=api_base, timeout=get_config().OPENAI_TIMEOUT, # set timeout from config max_retries=get_config().OPENAI_MAX_RETRIES # set max retries from config ) self.model = model def generate_text(self, prompt: str, thinking_budget: int = 0) -> str: """ Generate text using OpenAI SDK Args: prompt: The input prompt thinking_budget: Not used in OpenAI format, kept for interface compatibility (0 = default) Returns: Generated text """ response = self.client.chat.completions.create( model=self.model, messages=[ {"role": "user", "content": prompt} ] ) return strip_think_tags(response.choices[0].message.content) def generate_text_stream(self, prompt: str, thinking_budget: int = 0) -> Generator[str, None, None]: """Stream text using OpenAI SDK with stream=True.""" response = self.client.chat.completions.create( model=self.model, messages=[{"role": "user", "content": prompt}], stream=True, ) for chunk in response: delta = chunk.choices[0].delta if chunk.choices else None if delta and delta.content: yield delta.content def generate_with_image(self, prompt: str, image_path: str, thinking_budget: int = 0) -> str: """Generate text with image input using OpenAI-compatible chat completions.""" with open(image_path, "rb") as image_file: encoded = base64.b64encode(image_file.read()).decode("ascii") response = self.client.chat.completions.create( model=self.model, messages=[ { "role": "user", "content": [ {"type": "text", "text": prompt}, { "type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded}"}, }, ], } ], ) message_content = response.choices[0].message.content if isinstance(message_content, str): return strip_think_tags(message_content) parts = [] for item in message_content or []: text = item.get("text") if isinstance(item, dict) else getattr(item, "text", None) if text: parts.append(text) return strip_think_tags("\n".join(parts)) ================================================ FILE: backend/services/ai_service.py ================================================ """ AI Service - handles all AI model interactions Based on demo.py and gemini_genai.py TODO: use structured output API """ import os import json import re import logging import requests from typing import List, Dict, Optional, Union from textwrap import dedent from PIL import Image from tenacity import retry, stop_after_attempt, retry_if_exception_type from .prompts import ( get_outline_generation_prompt, get_outline_parsing_prompt, get_page_description_prompt, get_all_descriptions_stream_prompt, get_image_generation_prompt, get_image_edit_prompt, get_description_to_outline_prompt, get_description_split_prompt, get_outline_refinement_prompt, get_descriptions_refinement_prompt, get_ppt_page_content_extraction_prompt, get_layout_caption_prompt, get_style_extraction_prompt, get_outline_generation_prompt_markdown, get_outline_parsing_prompt_markdown, get_description_to_outline_prompt_markdown, ) from .ai_providers import get_text_provider, get_image_provider, get_caption_provider, TextProvider, ImageProvider from config import get_config logger = logging.getLogger(__name__) class ProjectContext: """项目上下文数据类,统一管理 AI 需要的所有项目信息""" def __init__(self, project_or_dict, reference_files_content: Optional[List[Dict[str, str]]] = None): """ Args: project_or_dict: 项目对象(Project model)或项目字典(project.to_dict()) reference_files_content: 参考文件内容列表 """ # 支持直接传入 Project 对象,避免 to_dict() 调用,提升性能 if hasattr(project_or_dict, 'idea_prompt'): # 是 Project 对象 self.idea_prompt = project_or_dict.idea_prompt self.outline_text = project_or_dict.outline_text self.description_text = project_or_dict.description_text self.creation_type = project_or_dict.creation_type or 'idea' self.outline_requirements = project_or_dict.outline_requirements self.description_requirements = project_or_dict.description_requirements else: # 是字典 self.idea_prompt = project_or_dict.get('idea_prompt') self.outline_text = project_or_dict.get('outline_text') self.description_text = project_or_dict.get('description_text') self.creation_type = project_or_dict.get('creation_type', 'idea') self.outline_requirements = project_or_dict.get('outline_requirements') self.description_requirements = project_or_dict.get('description_requirements') self.reference_files_content = reference_files_content or [] def to_dict(self) -> Dict: """转换为字典,方便传递""" return { 'idea_prompt': self.idea_prompt, 'outline_text': self.outline_text, 'description_text': self.description_text, 'creation_type': self.creation_type, 'outline_requirements': self.outline_requirements, 'description_requirements': self.description_requirements, 'reference_files_content': self.reference_files_content } class AIService: """Service for AI model interactions using pluggable providers""" def __init__(self, text_provider: TextProvider = None, image_provider: ImageProvider = None, caption_provider: TextProvider = None): """ Initialize AI service with providers Args: text_provider: Optional pre-configured TextProvider. If None, created from factory. image_provider: Optional pre-configured ImageProvider. If None, created from factory. """ config = get_config() # 优先使用 Flask app.config(可由 Settings 覆盖),否则回退到 Config 默认值 try: from flask import current_app, has_app_context except ImportError: current_app = None # type: ignore has_app_context = lambda: False # type: ignore if has_app_context() and current_app and hasattr(current_app, "config"): self.text_model = current_app.config.get("TEXT_MODEL", config.TEXT_MODEL) self.image_model = current_app.config.get("IMAGE_MODEL", config.IMAGE_MODEL) # 分离的文本和图像推理配置 self.enable_text_reasoning = current_app.config.get("ENABLE_TEXT_REASONING", False) self.text_thinking_budget = current_app.config.get("TEXT_THINKING_BUDGET", 1024) self.enable_image_reasoning = current_app.config.get("ENABLE_IMAGE_REASONING", False) self.image_thinking_budget = current_app.config.get("IMAGE_THINKING_BUDGET", 1024) else: self.text_model = config.TEXT_MODEL self.image_model = config.IMAGE_MODEL self.enable_text_reasoning = False self.text_thinking_budget = 1024 self.enable_image_reasoning = False self.image_thinking_budget = 1024 # Caption model for multimodal (image→text) tasks if has_app_context() and current_app and hasattr(current_app, "config"): self.caption_model = current_app.config.get("IMAGE_CAPTION_MODEL", config.IMAGE_CAPTION_MODEL) else: self.caption_model = config.IMAGE_CAPTION_MODEL # Use provided providers or create from factory based on AI_PROVIDER_FORMAT (from Flask config or env var) self.text_provider = text_provider or get_text_provider(model=self.text_model) self.image_provider = image_provider or get_image_provider(model=self.image_model) self.caption_provider = caption_provider or get_caption_provider(model=self.caption_model) def _get_text_thinking_budget(self) -> int: """ 获取文本生成的思考负载 Returns: 如果启用文本推理则返回配置的 budget,否则返回 0 """ return self.text_thinking_budget if self.enable_text_reasoning else 0 def _get_image_thinking_budget(self) -> int: """ 获取图像生成的思考负载 Returns: 如果启用图像推理则返回配置的 budget,否则返回 0 """ return self.image_thinking_budget if self.enable_image_reasoning else 0 @staticmethod def extract_image_urls_from_markdown(text: str) -> List[str]: """ 从 markdown 文本中提取图片 URL Args: text: Markdown 文本,可能包含 ![](url) 格式的图片 Returns: 图片 URL 列表(包括 http/https URL 和 /files/ 开头的本地路径) """ if not text: return [] # 匹配 markdown 图片语法: ![](url) 或 ![alt](url) pattern = r'!\[.*?\]\((.*?)\)' matches = re.findall(pattern, text) # 过滤掉空字符串,支持 http/https URL 和 /files/ 开头的本地路径(包括 mineru、materials 等) urls = [] for url in matches: url = url.strip() if url and (url.startswith('http://') or url.startswith('https://') or url.startswith('/files/')): urls.append(url) return urls @staticmethod def remove_markdown_images(text: str) -> str: """ 从文本中移除 Markdown 图片链接,只保留 alt text(描述文字) Args: text: 包含 Markdown 图片语法的文本 Returns: 移除图片链接后的文本,保留描述文字 """ if not text: return text # 将 ![描述文字](url) 替换为 描述文字 # 如果没有描述文字(空的 alt text),则完全删除该图片链接 def replace_image(match): alt_text = match.group(1).strip() # 如果有描述文字,保留它;否则删除整个链接 return alt_text if alt_text else '' pattern = r'!\[(.*?)\]\([^\)]+\)' cleaned_text = re.sub(pattern, replace_image, text) # 清理可能产生的多余空行 cleaned_text = re.sub(r'\n\s*\n\s*\n', '\n\n', cleaned_text) return cleaned_text @retry( stop=stop_after_attempt(3), retry=retry_if_exception_type((json.JSONDecodeError, ValueError)), reraise=True ) def generate_json(self, prompt: str, thinking_budget: int = 1000) -> Union[Dict, List]: """ 生成并解析JSON,如果解析失败则重新生成 Args: prompt: 生成提示词 thinking_budget: 思考预算(会根据 enable_text_reasoning 配置自动调整) Returns: 解析后的JSON对象(字典或列表) Raises: json.JSONDecodeError: JSON解析失败(重试3次后仍失败) """ # 调用AI生成文本(根据 enable_text_reasoning 配置调整 thinking_budget) actual_budget = self._get_text_thinking_budget() response_text = self.text_provider.generate_text(prompt, thinking_budget=actual_budget) # 清理响应文本:移除markdown代码块标记和多余空白 cleaned_text = response_text.strip().strip("```json").strip("```").strip() try: return json.loads(cleaned_text) except json.JSONDecodeError as e: logger.warning(f"JSON解析失败,将重新生成。原始文本: {cleaned_text[:200]}... 错误: {str(e)}") raise @retry( stop=stop_after_attempt(3), retry=retry_if_exception_type((json.JSONDecodeError, ValueError)), reraise=True ) def generate_json_with_image(self, prompt: str, image_path: str, thinking_budget: int = 1000) -> Union[Dict, List]: """ 带图片输入的JSON生成,如果解析失败则重新生成(最多重试3次) Args: prompt: 生成提示词 image_path: 图片文件路径 thinking_budget: 思考预算(会根据 enable_text_reasoning 配置自动调整) Returns: 解析后的JSON对象(字典或列表) Raises: json.JSONDecodeError: JSON解析失败(重试3次后仍失败) ValueError: caption_provider 不支持图片输入 """ # 使用 caption_provider(支持图片输入的多模态模型) actual_budget = self._get_text_thinking_budget() provider = self.caption_provider if hasattr(provider, 'generate_with_image'): response_text = provider.generate_with_image( prompt=prompt, image_path=image_path, thinking_budget=actual_budget ) elif hasattr(provider, 'generate_text_with_images'): response_text = provider.generate_text_with_images( prompt=prompt, images=[image_path], thinking_budget=actual_budget ) else: raise ValueError("caption_provider 不支持图片输入") # 清理响应文本:移除markdown代码块标记和多余空白 cleaned_text = response_text.strip().removeprefix("```json").removeprefix("```").removesuffix("```").strip() try: return json.loads(cleaned_text) except json.JSONDecodeError as e: logger.warning(f"JSON解析失败(带图片),将重新生成。原始文本: {cleaned_text[:200]}... 错误: {str(e)}") raise @staticmethod def _convert_mineru_path_to_local(mineru_path: str) -> Optional[str]: """ 将 /files/mineru/{extract_id}/{rel_path} 格式的路径转换为本地文件系统路径(支持前缀匹配) Args: mineru_path: MinerU URL 路径,格式为 /files/mineru/{extract_id}/{rel_path} Returns: 本地文件系统路径,如果转换失败则返回 None """ from utils.path_utils import find_mineru_file_with_prefix matched_path = find_mineru_file_with_prefix(mineru_path) return str(matched_path) if matched_path else None @staticmethod def download_image_from_url(url: str) -> Optional[Image.Image]: """ 从 URL 下载图片并返回 PIL Image 对象 Args: url: 图片 URL Returns: PIL Image 对象,如果下载失败则返回 None """ try: logger.debug(f"Downloading image from URL: {url}") response = requests.get(url, timeout=30, stream=True) response.raise_for_status() # 从响应内容创建 PIL Image image = Image.open(response.raw) # 确保图片被加载 image.load() logger.debug(f"Successfully downloaded image: {image.size}, {image.mode}") return image except Exception as e: logger.error(f"Failed to download image from {url}: {str(e)}") return None def generate_outline(self, project_context: ProjectContext, language: str = None) -> List[Dict]: """ Generate PPT outline from idea prompt Based on demo.py gen_outline() Args: project_context: 项目上下文对象,包含所有原始信息 Returns: List of outline items (may contain parts with pages or direct pages) """ outline_prompt = get_outline_generation_prompt(project_context, language) outline = self.generate_json(outline_prompt, thinking_budget=1000) return outline @staticmethod def parse_markdown_outline(markdown: str) -> List[Dict]: """ Parse markdown outline into structured page data. Format: # Part Name → sets current part ## Page Title → starts a new page - Point text → adds a bullet point to current page Returns list of dicts: [{"title": ..., "points": [...], "part": ...}, ...] """ pages = [] current_part = None current_page = None for line in markdown.split('\n'): stripped = line.strip() if not stripped: continue if stripped.startswith('# ') and not stripped.startswith('## '): # Part header current_part = stripped[2:].strip() elif stripped.startswith('## '): # New page — flush previous if current_page: pages.append(current_page) current_page = { 'title': stripped[3:].strip(), 'points': [], } if current_part: current_page['part'] = current_part elif stripped.startswith('- ') and current_page is not None: current_page['points'].append(stripped[2:].strip()) # Flush last page if current_page: pages.append(current_page) return pages def generate_outline_stream(self, project_context: ProjectContext, language: str = None): """ Stream outline generation, yielding each completed page as it's detected. Yields dicts: {"title": ..., "points": [...], "part": ...} """ creation_type = project_context.creation_type or 'idea' if creation_type == 'outline': prompt = get_outline_parsing_prompt_markdown(project_context, language) elif creation_type == 'descriptions': prompt = get_description_to_outline_prompt_markdown(project_context, language) else: prompt = get_outline_generation_prompt_markdown(project_context, language) actual_budget = self._get_text_thinking_budget() buffer = "" current_part = None current_page = None stream_complete = False for chunk in self.text_provider.generate_text_stream(prompt, thinking_budget=actual_budget): buffer += chunk # Process complete lines from buffer while '\n' in buffer: line, buffer = buffer.split('\n', 1) stripped = line.strip() if not stripped: continue if stripped == '': stream_complete = True continue if stripped.startswith('# ') and not stripped.startswith('## '): current_part = stripped[2:].strip() elif stripped.startswith('## '): # New page detected — yield previous page if current_page: yield current_page current_page = { 'title': stripped[3:].strip(), 'points': [], } if current_part: current_page['part'] = current_part elif stripped.startswith('- ') and current_page is not None: current_page['points'].append(stripped[2:].strip()) # Process remaining buffer (same logic as main loop) if buffer.strip(): buffer += '\n' while '\n' in buffer: line, buffer = buffer.split('\n', 1) stripped = line.strip() if not stripped: continue if stripped == '': stream_complete = True continue if stripped.startswith('# ') and not stripped.startswith('## '): current_part = stripped[2:].strip() elif stripped.startswith('## '): if current_page: yield current_page current_page = { 'title': stripped[3:].strip(), 'points': [], } if current_part: current_page['part'] = current_part elif stripped.startswith('- ') and current_page is not None: current_page['points'].append(stripped[2:].strip()) # Yield last page if current_page: yield current_page # Yield completion sentinel yield {'__stream_complete__': stream_complete} def parse_outline_text(self, project_context: ProjectContext, language: str = None) -> List[Dict]: """ Parse user-provided outline text into structured outline format This method analyzes the text and splits it into pages without modifying the original text Args: project_context: 项目上下文对象,包含所有原始信息 Returns: List of outline items (may contain parts with pages or direct pages) """ parse_prompt = get_outline_parsing_prompt(project_context, language) outline = self.generate_json(parse_prompt, thinking_budget=1000) return outline def flatten_outline(self, outline: List[Dict]) -> List[Dict]: """ Flatten outline structure to page list Based on demo.py flatten_outline() """ pages = [] for item in outline: if "part" in item and "pages" in item: # This is a part, expand its pages for page in item["pages"]: page_with_part = page.copy() page_with_part["part"] = item["part"] pages.append(page_with_part) else: # This is a direct page pages.append(item) return pages @staticmethod def _parse_extra_fields(text: str, field_names: list) -> tuple: """ 从描述文本中解析额外字段,返回 (cleaned_text, extra_fields_dict)。 遍历 field_names,按出现顺序依次提取每个字段的内容。 两个相邻字段之间的文本属于前一个字段。 """ if not field_names: return text, {} extra_fields = {} # 找到所有字段在文本中的起始位置 positions = [] for name in field_names: match = re.search(rf'\n{re.escape(name)}[::]\s*', text) if match: positions.append((match.start(), match.end(), name)) if not positions: return text, {} # 按位置排序 positions.sort(key=lambda x: x[0]) # 提取每个字段的值 for i, (start, end, name) in enumerate(positions): if i + 1 < len(positions): value = text[end:positions[i + 1][0]].strip() else: value = text[end:].strip() # 清理 HTML 注释标记 value = re.sub(r'', '', value).strip() if value: extra_fields[name] = value # 清理后的描述文本(截取到第一个字段之前) cleaned_text = text[:positions[0][0]].strip() return cleaned_text, extra_fields @staticmethod def _get_extra_field_names() -> list: """从 Settings 读取配置的额外字段名列表。""" try: from models import Settings settings = Settings.get_settings() return settings.get_description_extra_fields() except Exception: logger.warning("Failed to get extra field names from settings", exc_info=True) return ['视觉元素', '视觉焦点', '排版布局', '演讲者备注'] def generate_page_description(self, project_context: ProjectContext, outline: List[Dict], page_outline: Dict, page_index: int, language='zh', detail_level: str = 'default') -> Dict: """ Generate description for a single page Based on demo.py gen_desc() logic Args: project_context: 项目上下文对象,包含所有原始信息 outline: Complete outline page_outline: Outline for this specific page page_index: Page number (1-indexed) detail_level: Description detail level (concise/default/detailed) Returns: Dict with 'text' and optional 'extra_fields' """ extra_field_names = self._get_extra_field_names() part_info = f"\nThis page belongs to: {page_outline['part']}" if 'part' in page_outline else "" desc_prompt = get_page_description_prompt( project_context=project_context, outline=outline, page_outline=page_outline, page_index=page_index, part_info=part_info, language=language, detail_level=detail_level, extra_fields=extra_field_names, ) # 根据 enable_text_reasoning 配置调整 thinking_budget actual_budget = self._get_text_thinking_budget() response_text = self.text_provider.generate_text(desc_prompt, thinking_budget=actual_budget) text = dedent(response_text) description_text, extra_fields = self._parse_extra_fields(text, extra_field_names) result = {'text': description_text} if extra_fields: result['extra_fields'] = extra_fields return result def generate_descriptions_stream(self, project_context: ProjectContext, outline: List[Dict], flat_pages: List[Dict], language: str = 'zh', detail_level: str = 'default'): """ Stream description generation for all pages, yielding each page as it's completed. Yields dicts: {page_index, description_text, extra_fields} Final yield: {__stream_complete__: bool} """ extra_field_names = self._get_extra_field_names() prompt = get_all_descriptions_stream_prompt( project_context=project_context, outline=outline, flat_pages=flat_pages, language=language, detail_level=detail_level, extra_fields=extra_field_names, ) # Build regex pattern to detect any configured extra field header field_pattern = self._build_extra_field_pattern(extra_field_names) actual_budget = self._get_text_thinking_budget() buffer = "" page_index = -1 current_lines: list = [] current_field: Optional[str] = None # None = description, str = field name extra_fields: Dict[str, str] = {} stream_complete = False def _build_page_result(): """Build result dict from accumulated state.""" desc_text = "\n".join(current_lines).strip() result: Dict = { 'page_index': page_index, 'description_text': desc_text, } if extra_fields: result['extra_fields'] = dict(extra_fields) return result def _reset_page_state(): nonlocal current_lines, current_field, extra_fields current_lines = [] current_field = None extra_fields = {} def _process_line(line: str, stripped: str): nonlocal page_index, current_field, stream_complete if stripped == '': if page_index < 0: page_index = 0 return 'continue' if stripped == '': stream_complete = True return 'continue' if stripped == '': if page_index >= 0 and (current_lines or extra_fields): return 'yield_page' return 'continue' if page_index < 0: return 'continue' # Check for extra field header if field_pattern: field_match = field_pattern.match(stripped) if field_match: field_name = field_match.group(1) current_field = field_name value = field_match.group(2).strip() if value: extra_fields[field_name] = value return 'continue' if not stripped: return 'continue' if current_field: # Append to current extra field (multi-line) if current_field in extra_fields: extra_fields[current_field] += "\n" + stripped else: extra_fields[current_field] = stripped else: current_lines.append(line.rstrip()) return 'continue' for chunk in self.text_provider.generate_text_stream(prompt, thinking_budget=actual_budget): buffer += chunk while '\n' in buffer: line, buffer = buffer.split('\n', 1) stripped = line.strip() action = _process_line(line, stripped) if action == 'yield_page': yield _build_page_result() _reset_page_state() page_index += 1 # Process remaining buffer if buffer.strip(): for line in buffer.split('\n'): stripped = line.strip() action = _process_line(line, stripped) if action == 'yield_page': yield _build_page_result() _reset_page_state() page_index += 1 # Yield last page if not yet yielded if page_index >= 0 and current_lines: yield _build_page_result() yield {'__stream_complete__': stream_complete} @staticmethod def _build_extra_field_pattern(field_names: list): """Build a compiled regex pattern that matches any extra field header.""" if not field_names: return None escaped = '|'.join(re.escape(name) for name in field_names) return re.compile(rf'^({escaped})[::]\s*(.*)') def generate_outline_text(self, outline: List[Dict]) -> str: """ Convert outline to text format for prompts Based on demo.py gen_outline_text() """ text_parts = [] for i, item in enumerate(outline, 1): if "part" in item and "pages" in item: text_parts.append(f"{i}. {item['part']}") else: text_parts.append(f"{i}. {item.get('title', 'Untitled')}") result = "\n".join(text_parts) return dedent(result) def generate_image_prompt(self, outline: List[Dict], page: Dict, page_desc: str, page_index: int, has_material_images: bool = False, extra_requirements: Optional[str] = None, language='zh', has_template: bool = True, aspect_ratio: str = "16:9") -> str: """ Generate image generation prompt for a page Based on demo.py gen_prompts() Args: outline: Complete outline page: Page outline data page_desc: Page description text page_index: Page number (1-indexed) has_material_images: 是否有素材图片(从项目描述中提取的图片) extra_requirements: Optional extra requirements to apply to all pages language: Output language has_template: 是否有模板图片(False表示无模板图模式) Returns: Image generation prompt """ outline_text = self.generate_outline_text(outline) # Determine current section if 'part' in page: current_section = page['part'] else: current_section = f"{page.get('title', 'Untitled')}" # 在传给文生图模型之前,移除 Markdown 图片链接 # 图片本身已经通过 additional_ref_images 传递,只保留文字描述 cleaned_page_desc = self.remove_markdown_images(page_desc) prompt = get_image_generation_prompt( page_desc=cleaned_page_desc, outline_text=outline_text, current_section=current_section, has_material_images=has_material_images, extra_requirements=extra_requirements, language=language, has_template=has_template, page_index=page_index, aspect_ratio=aspect_ratio ) return prompt def generate_image(self, prompt: str, ref_image_path: Optional[str] = None, aspect_ratio: str = "16:9", resolution: str = "2K", additional_ref_images: Optional[List[Union[str, Image.Image]]] = None) -> Optional[Image.Image]: """ Generate image using configured image provider Based on gemini_genai.py gen_image() Args: prompt: Image generation prompt ref_image_path: Path to reference image (optional). If None, will generate based on prompt only. aspect_ratio: Image aspect ratio resolution: Image resolution (note: OpenAI format only supports 1K) additional_ref_images: 额外的参考图片列表,可以是本地路径、URL 或 PIL Image 对象 Returns: PIL Image object or None if failed Raises: Exception with detailed error message if generation fails """ try: logger.debug(f"Reference image: {ref_image_path}") if additional_ref_images: logger.debug(f"Additional reference images: {len(additional_ref_images)}") logger.debug(f"Config - aspect_ratio: {aspect_ratio}, resolution: {resolution}") # 构建参考图片列表 ref_images = [] # 添加主参考图片(如果提供了路径) if ref_image_path: if not os.path.exists(ref_image_path): raise FileNotFoundError(f"Reference image not found: {ref_image_path}") main_ref_image = Image.open(ref_image_path) ref_images.append(main_ref_image) # 添加额外的参考图片 if additional_ref_images: for ref_img in additional_ref_images: if isinstance(ref_img, Image.Image): # 已经是 PIL Image 对象 ref_images.append(ref_img) elif isinstance(ref_img, str): # 可能是本地路径或 URL if os.path.exists(ref_img): # 本地路径 ref_images.append(Image.open(ref_img)) elif ref_img.startswith('http://') or ref_img.startswith('https://'): # URL,需要下载 downloaded_img = self.download_image_from_url(ref_img) if downloaded_img: ref_images.append(downloaded_img) else: logger.warning(f"Failed to download image from URL: {ref_img}, skipping...") elif ref_img.startswith('/files/mineru/'): # MinerU 本地文件路径,需要转换为文件系统路径(支持前缀匹配) local_path = self._convert_mineru_path_to_local(ref_img) if local_path and os.path.exists(local_path): ref_images.append(Image.open(local_path)) logger.debug(f"Loaded MinerU image from local path: {local_path}") else: logger.warning(f"MinerU image file not found (with prefix matching): {ref_img}, skipping...") elif ref_img.startswith('/files/'): # 通用 /files/ 路径(materials、项目文件等),转换为文件系统路径 upload_folder = get_config().UPLOAD_FOLDER relative_path = ref_img[len('/files/'):].lstrip('/') local_path = os.path.abspath(os.path.join(upload_folder, relative_path)) if not local_path.startswith(os.path.abspath(upload_folder)): logger.warning(f"Path traversal attempt blocked: {ref_img}, skipping...") elif os.path.exists(local_path): ref_images.append(Image.open(local_path)) logger.debug(f"Loaded image from local path: {local_path}") else: logger.warning(f"Local file not found: {local_path} (from {ref_img}), skipping...") else: logger.warning(f"Invalid image reference: {ref_img}, skipping...") logger.debug(f"Calling image provider for generation with {len(ref_images)} reference images...") logger.debug(f"Enable image reasoning/thinking: {self.enable_image_reasoning}, budget: {self._get_image_thinking_budget()}") # 使用 image_provider 生成图片 # 根据 enable_image_reasoning 配置控制图像生成的思考模式 return self.image_provider.generate_image( prompt=prompt, ref_images=ref_images if ref_images else None, aspect_ratio=aspect_ratio, resolution=resolution, enable_thinking=self.enable_image_reasoning, thinking_budget=self._get_image_thinking_budget() ) except Exception as e: error_detail = f"Error generating image: {type(e).__name__}: {str(e)}" logger.error(error_detail, exc_info=True) raise Exception(error_detail) from e def edit_image(self, prompt: str, current_image_path: str, aspect_ratio: str = "16:9", resolution: str = "2K", original_description: str = None, additional_ref_images: Optional[List[Union[str, Image.Image]]] = None) -> Optional[Image.Image]: """ Edit existing image with natural language instruction Uses current image as reference Args: prompt: Edit instruction current_image_path: Path to current page image aspect_ratio: Image aspect ratio resolution: Image resolution original_description: Original page description to include in prompt additional_ref_images: 额外的参考图片列表,可以是本地路径、URL 或 PIL Image 对象 Returns: PIL Image object or None if failed """ # Build edit instruction with original description if available edit_instruction = get_image_edit_prompt( edit_instruction=prompt, original_description=original_description ) return self.generate_image(edit_instruction, current_image_path, aspect_ratio, resolution, additional_ref_images) def parse_description_to_outline(self, project_context: ProjectContext, language='zh') -> List[Dict]: """ 从描述文本解析出大纲结构 Args: project_context: 项目上下文对象,包含所有原始信息 Returns: List of outline items (may contain parts with pages or direct pages) """ parse_prompt = get_description_to_outline_prompt(project_context, language) outline = self.generate_json(parse_prompt, thinking_budget=1000) return outline def parse_description_to_page_descriptions(self, project_context: ProjectContext, outline: List[Dict], language='zh') -> List[str]: """ 从描述文本切分出每页描述 Args: project_context: 项目上下文对象,包含所有原始信息 outline: 已解析出的大纲结构 Returns: List of page descriptions (strings), one for each page in the outline """ split_prompt = get_description_split_prompt(project_context, outline, language) descriptions = self.generate_json(split_prompt, thinking_budget=1000) # 确保返回的是字符串列表 if isinstance(descriptions, list): return [str(desc) for desc in descriptions] else: raise ValueError("Expected a list of page descriptions, but got: " + str(type(descriptions))) def refine_outline(self, current_outline: List[Dict], user_requirement: str, project_context: ProjectContext, previous_requirements: Optional[List[str]] = None, language='zh') -> List[Dict]: """ 根据用户要求修改已有大纲 Args: current_outline: 当前的大纲结构 user_requirement: 用户的新要求 project_context: 项目上下文对象,包含所有原始信息 previous_requirements: 之前的修改要求列表(可选) Returns: 修改后的大纲结构 """ refinement_prompt = get_outline_refinement_prompt( current_outline=current_outline, user_requirement=user_requirement, project_context=project_context, previous_requirements=previous_requirements, language=language ) outline = self.generate_json(refinement_prompt, thinking_budget=1000) return outline def refine_descriptions(self, current_descriptions: List[Dict], user_requirement: str, project_context: ProjectContext, outline: List[Dict] = None, previous_requirements: Optional[List[str]] = None, language='zh') -> List[str]: """ 根据用户要求修改已有页面描述 Args: current_descriptions: 当前的页面描述列表,每个元素包含 {index, title, description_content} user_requirement: 用户的新要求 project_context: 项目上下文对象,包含所有原始信息 outline: 完整的大纲结构(可选) previous_requirements: 之前的修改要求列表(可选) Returns: 修改后的页面描述列表(字符串列表) """ refinement_prompt = get_descriptions_refinement_prompt( current_descriptions=current_descriptions, user_requirement=user_requirement, project_context=project_context, outline=outline, previous_requirements=previous_requirements, language=language ) descriptions = self.generate_json(refinement_prompt, thinking_budget=1000) # 确保返回的是字符串列表 if isinstance(descriptions, list): return [str(desc) for desc in descriptions] else: raise ValueError("Expected a list of page descriptions, but got: " + str(type(descriptions))) def extract_page_content(self, markdown_text: str, language: str = 'zh') -> Dict: """ 从 fileparser 解析出的 markdown 文本中提取页面结构化内容 Args: markdown_text: 单页 PDF 解析出的 markdown 文本 language: 输出语言 Returns: Dict with keys: title, points, description """ prompt = get_ppt_page_content_extraction_prompt(markdown_text, language=language) result = self.generate_json(prompt, thinking_budget=1000) # Ensure required fields exist if not isinstance(result, dict): raise ValueError(f"Expected dict, got {type(result)}") result.setdefault('title', '') result.setdefault('points', []) result.setdefault('description', '') return result def _generate_text_from_image(self, prompt: str, image_path: str) -> str: """Helper to generate text from a prompt and an image, using caption_provider.""" actual_budget = self._get_text_thinking_budget() provider = self.caption_provider if hasattr(provider, 'generate_with_image'): response_text = provider.generate_with_image( prompt=prompt, image_path=image_path, thinking_budget=actual_budget ) elif hasattr(provider, 'generate_text_with_images'): response_text = provider.generate_text_with_images( prompt=prompt, images=[image_path], thinking_budget=actual_budget ) else: raise ValueError("caption_provider 不支持图片输入") return response_text.strip() def generate_layout_caption(self, image_path: str) -> str: """使用 caption model 描述 PPT 页面的排版布局""" return self._generate_text_from_image(get_layout_caption_prompt(), image_path) def extract_style_description(self, image_path: str) -> str: """从图片中提取风格描述""" return self._generate_text_from_image(get_style_extraction_prompt(), image_path) ================================================ FILE: backend/services/ai_service_manager.py ================================================ """ AIService singleton manager for optimizing provider initialization This module provides a singleton pattern implementation for AIService to avoid repeated initialization of AI providers (TextProvider and ImageProvider) on every request. Benefits: - Reuses AI provider instances across requests - Reduces initialization overhead - Better resource management - Thread-safe for Flask multi-threaded environment Usage: from services.ai_service_manager import get_ai_service # In your controller ai_service = get_ai_service() outline = ai_service.generate_outline(project_context) """ import logging from threading import Lock from typing import Optional from flask import current_app, has_app_context from .ai_service import AIService from .ai_providers import get_text_provider, get_image_provider, get_caption_provider, TextProvider, ImageProvider logger = logging.getLogger(__name__) # Global singleton instance _ai_service_instance: Optional[AIService] = None _lock = Lock() # Provider cache to avoid re-initialization when models don't change _text_provider_cache: dict = {} _image_provider_cache: dict = {} _caption_provider_cache: dict = {} _cache_lock = Lock() def _get_cached_text_provider(model: str) -> TextProvider: """ Get or create a cached text provider instance Args: model: Model name to use Returns: Cached or new TextProvider instance """ with _cache_lock: if model not in _text_provider_cache: logger.info(f"Creating new TextProvider for model: {model}") _text_provider_cache[model] = get_text_provider(model=model) else: logger.debug(f"Reusing cached TextProvider for model: {model}") return _text_provider_cache[model] def _get_cached_image_provider(model: str) -> ImageProvider: """ Get or create a cached image provider instance Args: model: Model name to use Returns: Cached or new ImageProvider instance """ with _cache_lock: if model not in _image_provider_cache: logger.info(f"Creating new ImageProvider for model: {model}") _image_provider_cache[model] = get_image_provider(model=model) else: logger.debug(f"Reusing cached ImageProvider for model: {model}") return _image_provider_cache[model] def _get_cached_caption_provider(model: str) -> TextProvider: """Get or create a cached caption provider instance""" with _cache_lock: if model not in _caption_provider_cache: logger.info(f"Creating new CaptionProvider for model: {model}") _caption_provider_cache[model] = get_caption_provider(model=model) return _caption_provider_cache[model] def get_ai_service(force_new: bool = False) -> AIService: """ Get the singleton AIService instance with optimized provider caching This function creates and returns a singleton AIService instance that reuses AI providers (TextProvider and ImageProvider) across requests, significantly reducing initialization overhead. Args: force_new: If True, forces creation of a new instance (useful for testing) Returns: AIService singleton instance with cached providers Note: The providers are cached per model name. If TEXT_MODEL or IMAGE_MODEL changes in Flask config, new providers will be created automatically. """ global _ai_service_instance if force_new: with _lock: logger.info("Force creating new AIService instance") _ai_service_instance = None if _ai_service_instance is None: with _lock: # Double-check locking pattern if _ai_service_instance is None: logger.info("Initializing AIService singleton with provider caching") # Get model names from Flask config or use defaults from config import get_config config = get_config() if has_app_context() and current_app and hasattr(current_app, "config"): text_model = current_app.config.get("TEXT_MODEL", config.TEXT_MODEL) image_model = current_app.config.get("IMAGE_MODEL", config.IMAGE_MODEL) caption_model = current_app.config.get("IMAGE_CAPTION_MODEL", config.IMAGE_CAPTION_MODEL) else: text_model = config.TEXT_MODEL image_model = config.IMAGE_MODEL caption_model = config.IMAGE_CAPTION_MODEL # Get cached providers text_provider = _get_cached_text_provider(text_model) image_provider = _get_cached_image_provider(image_model) caption_provider = _get_cached_caption_provider(caption_model) # Create AIService with cached providers _ai_service_instance = AIService( text_provider=text_provider, image_provider=image_provider, caption_provider=caption_provider ) logger.info(f"AIService singleton created with models: text={text_model}, image={image_model}, caption={caption_model}") return _ai_service_instance def clear_ai_service_cache(): """ Clear the AIService singleton and provider cache This is useful when: - Configuration changes (API keys, endpoints, models) - Testing scenarios requiring fresh instances - Memory cleanup needed Note: - Uses nested locks to ensure atomic cache clearing operation - Prevents race conditions where new instances could be created with stale cached providers during the clearing process """ global _ai_service_instance with _lock: _ai_service_instance = None logger.info("AIService singleton cache cleared") with _cache_lock: _text_provider_cache.clear() _image_provider_cache.clear() _caption_provider_cache.clear() logger.info("Provider cache cleared") def get_provider_cache_info() -> dict: """ Get information about cached providers (for debugging/monitoring) Returns: Dictionary with cache statistics """ with _cache_lock: return { "text_providers": list(_text_provider_cache.keys()), "image_providers": list(_image_provider_cache.keys()), "caption_providers": list(_caption_provider_cache.keys()), "total_cached": len(_text_provider_cache) + len(_image_provider_cache) + len(_caption_provider_cache) } ================================================ FILE: backend/services/export_service.py ================================================ """ Export Service - handles PPTX and PDF export Based on demo.py create_pptx_from_images() """ import math import os import json import logging import tempfile import base64 import hashlib from datetime import datetime, timezone from pathlib import Path from typing import List, Dict, Any, Optional, Tuple from textwrap import dedent from dataclasses import dataclass, field from pptx import Presentation from pptx.util import Inches from PIL import Image import io import tempfile import img2pdf import fitz # PyMuPDF logger = logging.getLogger(__name__) class ExportError(Exception): """ 导出过程中的错误异常 当 fail_fast=True 时,任何导出错误都会抛出此异常, 包含详细的错误信息和帮助提示。 """ def __init__(self, message: str, error_type: str = 'unknown', details: Dict[str, Any] = None, help_text: str = None): """ Args: message: 错误消息 error_type: 错误类型 (style_extraction, text_render, image_add, inpaint, config, service) details: 详细错误信息 help_text: 帮助提示文本 """ super().__init__(message) self.message = message self.error_type = error_type self.details = details or {} self.help_text = help_text or self._get_default_help_text(error_type) def _get_default_help_text(self, error_type: str) -> str: """根据错误类型返回默认帮助提示""" help_texts = { 'style_extraction': '样式提取失败可能是由于百度OCR API配置问题。请检查「项目设置 -> 导出设置」中的配置,或尝试切换到「MinerU提取」方法。', 'text_render': '文本渲染失败可能是由于字体或编码问题。请检查页面内容是否包含特殊字符。', 'image_add': '图片添加失败可能是由于图片文件损坏或路径错误。请尝试重新生成该页面的图片。', 'inpaint': '背景修复失败可能是由于API配置问题。请检查「项目设置 -> 导出设置」中的背景图获取方法配置。', 'config': '配置错误。请检查「项目设置 -> 导出设置」中的相关配置。', 'service': '服务不可用。请稍后重试或联系管理员。', } return help_texts.get(error_type, '如果问题持续出现,可以在「项目设置 -> 导出设置」中开启「返回半成品」选项以跳过错误继续导出。') def to_dict(self) -> Dict[str, Any]: """转换为字典格式""" return { 'message': self.message, 'error_type': self.error_type, 'details': self.details, 'help_text': self.help_text } @dataclass class ExportWarnings: """ 导出过程中收集的警告信息 用于追踪哪些操作没有按预期执行,并反馈给前端 """ # 样式提取失败的元素 style_extraction_failed: List[Dict[str, Any]] = field(default_factory=list) # 文本渲染失败的元素 text_render_failed: List[Dict[str, Any]] = field(default_factory=list) # 图片添加失败 image_add_failed: List[Dict[str, Any]] = field(default_factory=list) # JSON 解析失败(重试后仍失败) json_parse_failed: List[Dict[str, Any]] = field(default_factory=list) # 其他警告 other_warnings: List[str] = field(default_factory=list) def add_style_extraction_failed(self, element_id: str, reason: str): """记录样式提取失败""" self.style_extraction_failed.append({ 'element_id': element_id, 'reason': reason }) def add_text_render_failed(self, text: str, reason: str): """记录文本渲染失败""" self.text_render_failed.append({ 'text': text[:50] + '...' if len(text) > 50 else text, 'reason': reason }) def add_image_failed(self, path: str, reason: str): """记录图片添加失败""" self.image_add_failed.append({ 'path': path, 'reason': reason }) def add_json_parse_failed(self, context: str, reason: str): """记录 JSON 解析失败""" self.json_parse_failed.append({ 'context': context, 'reason': reason }) def add_warning(self, message: str): """添加其他警告""" self.other_warnings.append(message) def has_warnings(self) -> bool: """是否有警告""" return bool( self.style_extraction_failed or self.text_render_failed or self.image_add_failed or self.json_parse_failed or self.other_warnings ) def to_summary(self) -> List[str]: """生成警告摘要(适合前端展示)""" summary = [] if self.style_extraction_failed: summary.append(f"⚠️ {len(self.style_extraction_failed)} 个文本元素样式提取失败") if self.text_render_failed: summary.append(f"⚠️ {len(self.text_render_failed)} 个文本元素渲染失败") if self.image_add_failed: summary.append(f"⚠️ {len(self.image_add_failed)} 张图片添加失败") if self.json_parse_failed: summary.append(f"⚠️ {len(self.json_parse_failed)} 次 AI 响应解析失败") for warning in self.other_warnings[:5]: # 最多显示5条其他警告 summary.append(f"⚠️ {warning}") if len(self.other_warnings) > 5: summary.append(f" ...还有 {len(self.other_warnings) - 5} 条其他警告") return summary def to_dict(self) -> Dict[str, Any]: """转换为字典(详细信息)""" return { 'style_extraction_failed': self.style_extraction_failed, 'text_render_failed': self.text_render_failed, 'image_add_failed': self.image_add_failed, 'json_parse_failed': self.json_parse_failed, 'other_warnings': self.other_warnings, 'total_warnings': ( len(self.style_extraction_failed) + len(self.text_render_failed) + len(self.image_add_failed) + len(self.json_parse_failed) + len(self.other_warnings) ) } def _get_page_size_inches(aspect_ratio: str = '16:9', base: float = 10.0) -> Tuple[float, float]: """Return (width, height) in inches for a given aspect ratio string.""" try: w, h = (float(x) for x in aspect_ratio.split(':')) if not (math.isfinite(w) and math.isfinite(h) and w > 0 and h > 0): raise ValueError(f"invalid dimensions: {w}:{h}") except (ValueError, AttributeError) as e: logger.warning(f"Invalid aspect ratio '{aspect_ratio}', falling back to 16:9: {e}") w, h = 16.0, 9.0 if w >= h: return base, base * h / w else: return base * w / h, base class ExportService: """Service for exporting presentations""" # NOTE: clean background生成功能已迁移到解耦的InpaintProvider实现 # - DefaultInpaintProvider: 基于mask的精确区域重绘(Volcengine) # - GenerativeEditInpaintProvider: 基于生成式大模型的整图编辑重绘(Gemini等) # 使用方式: from services.image_editability import InpaintProviderFactory @staticmethod def _build_style_extraction_error( message: str, *, element_id: Optional[str] = None, text_content: Optional[str] = None, page_idx: Optional[int] = None ) -> ExportError: details: Dict[str, Any] = {} if element_id: details['element_id'] = element_id if text_content: details['text_content'] = text_content[:50] if page_idx is not None: details['page'] = page_idx + 1 lowered = message.lower() if '不支持图片输入' in message or 'support image input' in lowered: help_text = ( '当前用于图片样式提取的 caption/image_caption 模型不支持图片输入。' '请在设置中改成支持视觉输入的模型,或检查 OpenAI 格式下的 image caption provider / model 配置。' ) else: help_text = ( '文本样式提取依赖视觉模型分析文本截图。请检查 image caption provider、模型名与 API 权限;' '如果只想先拿到可编辑结果,也可以在「项目设置 -> 导出设置」中开启「返回半成品」。' ) return ExportError( message=f"文本样式提取失败: {message}", error_type='style_extraction', details=details, help_text=help_text, ) @staticmethod def create_pptx_from_images(image_paths: List[str], output_file: str = None, aspect_ratio: str = '16:9') -> bytes: """ Create PPTX file from image paths Based on demo.py create_pptx_from_images() Args: image_paths: List of absolute paths to images output_file: Optional output file path (if None, returns bytes) Returns: PPTX file as bytes if output_file is None """ # Create presentation prs = Presentation() # Set author/date metadata for exported PPTX try: core = prs.core_properties now = datetime.now(timezone.utc) core.author = "banana-slides" core.last_modified_by = "banana-slides" core.created = now core.modified = now core.last_printed = None except Exception as e: logger.warning(f"Failed to set core properties: {e}") # Set slide dimensions based on aspect ratio page_w, page_h = _get_page_size_inches(aspect_ratio) prs.slide_width = Inches(page_w) prs.slide_height = Inches(page_h) # Add each image as a slide for image_path in image_paths: if not os.path.exists(image_path): logger.warning(f"Image not found: {image_path}") continue # Add blank slide layout (layout 6 is typically blank) blank_slide_layout = prs.slide_layouts[6] slide = prs.slides.add_slide(blank_slide_layout) # Add image to fill entire slide slide.shapes.add_picture( image_path, left=0, top=0, width=prs.slide_width, height=prs.slide_height ) # Save or return bytes if output_file: prs.save(output_file) return None else: # Save to bytes pptx_bytes = io.BytesIO() prs.save(pptx_bytes) pptx_bytes.seek(0) return pptx_bytes.getvalue() @staticmethod def create_pdf_from_images(image_paths: List[str], output_file: str = None, aspect_ratio: str = '16:9') -> Optional[bytes]: """ Create PDF file from image paths using img2pdf (low memory usage) Args: image_paths: List of absolute paths to images output_file: Optional output file path (if None, returns bytes) Returns: PDF file as bytes if output_file is None, otherwise None """ # Validate images exist and log warnings for missing files valid_paths = [] for p in image_paths: if os.path.exists(p): valid_paths.append(p) else: logger.warning(f"Image not found and will be skipped for PDF export: {p}") if not valid_paths: raise ValueError("No valid images found for PDF export") try: logger.info(f"Using img2pdf for PDF export ({len(valid_paths)} pages, low memory mode)") page_w, page_h = _get_page_size_inches(aspect_ratio) layout_fun = img2pdf.get_layout_fun( pagesize=(img2pdf.in_to_pt(page_w), img2pdf.in_to_pt(page_h)), fit=img2pdf.FitMode.fill, ) # Convert images to PDF pdf_bytes = img2pdf.convert(valid_paths, layout_fun=layout_fun) # Add metadata pdf_bytes = ExportService._add_pdf_metadata(pdf_bytes) if output_file: with open(output_file, "wb") as f: f.write(pdf_bytes) return None else: return pdf_bytes except (img2pdf.ImageOpenError, ValueError, IOError) as e: logger.warning(f"img2pdf conversion failed: {e}. Falling back to Pillow (high memory usage).") return ExportService.create_pdf_from_images_pillow(valid_paths, output_file, aspect_ratio) @staticmethod def _add_pdf_metadata(pdf_bytes: bytes) -> bytes: """Add author metadata to PDF (including XMP for Windows compatibility)""" try: doc = fitz.open(stream=pdf_bytes, filetype="pdf") doc.set_metadata({ "author": "banana-slides", "producer": "banana-slides", "creator": "banana-slides" }) now = datetime.now(timezone.utc) iso_time = now.isoformat() content_hash = hashlib.md5(pdf_bytes[:1024]).hexdigest() xmp = dedent(f'''\ banana-slides banana-slides banana-slides {iso_time} {iso_time} uuid:{content_hash} ''') doc.set_xml_metadata(xmp) return doc.tobytes() except Exception as e: logger.warning(f"Failed to add PDF metadata: {e}") return pdf_bytes @staticmethod def create_pdf_from_images_pillow(image_paths: List[str], output_file: str = None, aspect_ratio: str = '16:9') -> Optional[bytes]: """ Create PDF file from image paths using Pillow (original method) Note: This method loads all images into memory at once. For large projects (50+ pages with 20MB/page), use create_pdf_from_images instead. Args: image_paths: List of absolute paths to images output_file: Optional output file path (if None, returns bytes) Returns: PDF file as bytes if output_file is None, otherwise None """ images = [] page_w, page_h = _get_page_size_inches(aspect_ratio) # Load all images for image_path in image_paths: if not os.path.exists(image_path): logger.warning(f"Image not found: {image_path}") continue img = Image.open(image_path) # Convert to RGB if necessary (PDF requires RGB) if img.mode != 'RGB': img = img.convert('RGB') # Set DPI so PDF page matches target dimensions img.info['dpi'] = (img.width / page_w, img.height / page_h) images.append(img) if not images: raise ValueError("No valid images found for PDF export") # Save as PDF if output_file: images[0].save( output_file, save_all=True, append_images=images[1:], format='PDF' ) return None else: # Save to bytes pdf_bytes = io.BytesIO() images[0].save( pdf_bytes, save_all=True, append_images=images[1:], format='PDF' ) pdf_bytes.seek(0) return ExportService._add_pdf_metadata(pdf_bytes.getvalue()) @staticmethod def _add_mineru_text_to_slide(builder, slide, text_item: Dict[str, Any], scale_x: float = 1.0, scale_y: float = 1.0): """ Add text item from MinerU to slide Args: builder: PPTXBuilder instance slide: Target slide text_item: Text item from MinerU content_list scale_x: X-axis scale factor scale_y: Y-axis scale factor """ text = text_item.get('text', '').strip() if not text: return bbox = text_item.get('bbox') if not bbox or len(bbox) != 4: logger.warning(f"Invalid bbox for text item: {text_item}") return original_bbox = bbox.copy() # Apply scale factors to bbox x0, y0, x1, y1 = bbox bbox = [ int(x0 * scale_x), int(y0 * scale_y), int(x1 * scale_x), int(y1 * scale_y) ] if scale_x != 1.0 or scale_y != 1.0: logger.debug(f"Text bbox scaled: {original_bbox} -> {bbox} (scale: {scale_x:.3f}x{scale_y:.3f})") # Determine text level (only used for styling like bold, NOT for font size) # Font size is purely calculated from bbox dimensions item_type = text_item.get('type', 'text') text_level = text_item.get('text_level') # Map to level for styling purposes (bold titles) if item_type == 'title' or text_level == 1: level = 'title' # Will be bold else: level = 'default' # Add text element # Note: text_level is only used for bold styling, not font size calculation try: builder.add_text_element( slide=slide, text=text, bbox=bbox, text_level=level # For styling (bold) only, not font size ) except Exception as e: logger.error(f"Failed to add text element: {str(e)}") @staticmethod def _add_table_cell_elements_to_slide( builder, slide, cell_elements: List[Dict[str, Any]], scale_x: float = 1.0, scale_y: float = 1.0 ): """ Add table cell elements as individual text boxes to slide 这些单元格元素已经有正确的全局bbox坐标 Args: builder: PPTXBuilder instance slide: Target slide cell_elements: List of EditableElement (table_cell type) scale_x: X-axis scale factor scale_y: Y-axis scale factor """ from pptx.util import Pt from pptx.dml.color import RGBColor logger.info(f"开始添加表格单元格元素,共 {len(cell_elements)} 个") for cell_elem in cell_elements: text = cell_elem.get('content', '') bbox_global = cell_elem.get('bbox_global', {}) if not text.strip(): continue # bbox_global已经是全局坐标,直接使用并应用缩放 x0 = bbox_global.get('x0', 0) y0 = bbox_global.get('y0', 0) x1 = bbox_global.get('x1', 0) y1 = bbox_global.get('y1', 0) # 构建bbox列表 [x0, y0, x1, y1] 并应用缩放 bbox = [ int(x0 * scale_x), int(y0 * scale_y), int(x1 * scale_x), int(y1 * scale_y) ] try: # 使用已有的 add_text_element 方法添加文本框(不添加边框) builder.add_text_element( slide=slide, text=text, bbox=bbox, text_level=None, align='center' ) logger.debug(f" 添加单元格: '{text[:10]}...' at bbox {bbox}") except Exception as e: logger.warning(f"添加单元格失败: {e}") logger.info(f"✓ 表格单元格添加完成,共 {len(cell_elements)} 个") @staticmethod def _add_mineru_image_to_slide( builder, slide, image_item: Dict[str, Any], mineru_dir: Path, scale_x: float = 1.0, scale_y: float = 1.0 ): """ Add image or table item from MinerU to slide Args: builder: PPTXBuilder instance slide: Target slide image_item: Image/table item from MinerU content_list mineru_dir: MinerU result directory scale_x: X-axis scale factor scale_y: Y-axis scale factor """ bbox = image_item.get('bbox') if not bbox or len(bbox) != 4: logger.warning(f"Invalid bbox for image item: {image_item}") return original_bbox = bbox.copy() # Apply scale factors to bbox x0, y0, x1, y1 = bbox bbox = [ int(x0 * scale_x), int(y0 * scale_y), int(x1 * scale_x), int(y1 * scale_y) ] if scale_x != 1.0 or scale_y != 1.0: logger.debug(f"Item bbox scaled: {original_bbox} -> {bbox} (scale: {scale_x:.3f}x{scale_y:.3f})") # Check if this is a table with子元素 (cells from Baidu OCR) item_type = image_item.get('element_type') or image_item.get('type', 'image') children = image_item.get('children', []) logger.debug(f"Processing {item_type} element, has {len(children)} children") if children and item_type == 'table': # Add editable table from child elements (cells) try: # Filter only table_cell elements cell_elements = [child for child in children if child.get('element_type') == 'table_cell'] if cell_elements: logger.info(f"添加可编辑表格({len(cell_elements)}个单元格)") ExportService._add_table_cell_elements_to_slide( builder=builder, slide=slide, cell_elements=cell_elements, scale_x=scale_x, scale_y=scale_y ) return # Table added successfully except Exception as e: logger.exception("Failed to add table cells, falling back to image") # Fall through to add as image instead # Check if this is a table with HTML data (legacy) html_table = image_item.get('html_table') if html_table and item_type == 'table': # Add editable table from HTML try: builder.add_table_element( slide=slide, html_table=html_table, bbox=bbox ) logger.info(f"Added editable table at bbox {bbox}") return # Table added successfully except Exception as e: logger.error(f"Failed to add table: {str(e)}, falling back to image") # Fall through to add as image instead # Add as image (either image type or table fallback) img_path_str = image_item.get('img_path', '') if not img_path_str: logger.warning(f"No img_path in item: {image_item}") return # Try to find the image file # MinerU may store images in 'images/' subdirectory possible_paths = [ mineru_dir / img_path_str, mineru_dir / 'images' / Path(img_path_str).name, mineru_dir / Path(img_path_str).name, ] image_path = None for path in possible_paths: if path.exists(): image_path = str(path) break if not image_path: logger.warning(f"Image file not found: {img_path_str}") # Add placeholder builder.add_image_placeholder(slide, bbox) return # Add image element try: builder.add_image_element( slide=slide, image_path=image_path, bbox=bbox ) except Exception as e: logger.error(f"Failed to add image element: {str(e)}") @staticmethod def _collect_text_elements_for_extraction( elements: List, # List[EditableElement] depth: int = 0 ) -> List[tuple]: """ 递归收集所有需要提取样式的文本元素 Args: elements: EditableElement列表 depth: 当前递归深度 Returns: 元组列表,每个元组为 (element_id, image_path, text_content) """ text_items = [] for elem in elements: elem_type = elem.element_type # 文本类型元素需要提取样式 if elem_type in ['text', 'title', 'table_cell', 'list', 'paragraph', 'header', 'footer', 'heading', 'table_caption', 'image_caption']: if elem.content and elem.image_path and os.path.exists(elem.image_path): text = elem.content.strip() if text: text_items.append((elem.element_id, elem.image_path, text)) # 递归处理子元素 if hasattr(elem, 'children') and elem.children: child_items = ExportService._collect_text_elements_for_extraction( elements=elem.children, depth=depth + 1 ) text_items.extend(child_items) return text_items @staticmethod def _batch_extract_text_styles( text_items: List[tuple], text_attribute_extractor, max_workers: int = 8 ) -> Dict[str, Any]: """ 批量并行提取文本样式(逐个裁剪区域分析) 此方法对每一段文字的裁剪区域单独进行分析。 经测试,此方法效果较好,目前仍在使用。 备选方案:_batch_extract_text_styles_with_full_image 可一次性分析全图所有文本。 Args: text_items: 元组列表,每个元组为 (element_id, image_path, text_content) text_attribute_extractor: 文本属性提取器 max_workers: 并发数 Returns: 字典,key为element_id,value为TextStyleResult """ from concurrent.futures import ThreadPoolExecutor, as_completed if not text_items or not text_attribute_extractor: return {} logger.info(f"并行提取 {len(text_items)} 个文本元素的样式(并发数: {max_workers})...") results = {} def extract_single(item): element_id, image_path, text_content = item try: style = text_attribute_extractor.extract( image=image_path, text_content=text_content ) return element_id, style except Exception as e: logger.warning(f"提取文字样式失败 [{element_id}]: {e}") return element_id, None with ThreadPoolExecutor(max_workers=max_workers) as executor: futures = {executor.submit(extract_single, item): item[0] for item in text_items} for future in as_completed(futures): element_id, style = future.result() if style is not None: results[element_id] = style logger.info(f"✓ 文本样式提取完成,成功 {len(results)}/{len(text_items)} 个") return results @staticmethod def _collect_text_elements_for_batch_extraction( elements: List, # List[EditableElement] depth: int = 0 ) -> List[Dict[str, Any]]: """ 递归收集所有需要批量提取样式的文本元素(新格式,包含bbox) Args: elements: EditableElement列表 depth: 当前递归深度 Returns: 字典列表,每个字典包含 element_id, bbox, content """ text_items = [] for elem in elements: elem_type = elem.element_type # 文本类型元素需要提取样式 if elem_type in ['text', 'title', 'table_cell', 'list', 'paragraph', 'header', 'footer', 'heading', 'table_caption', 'image_caption']: if elem.content: text = elem.content.strip() if text: # 使用全局坐标 bbox_global bbox = elem.bbox_global if hasattr(elem, 'bbox_global') and elem.bbox_global else elem.bbox text_items.append({ 'element_id': elem.element_id, 'bbox': [bbox.x0, bbox.y0, bbox.x1, bbox.y1], 'content': text }) # 递归处理子元素 if hasattr(elem, 'children') and elem.children: child_items = ExportService._collect_text_elements_for_batch_extraction( elements=elem.children, depth=depth + 1 ) text_items.extend(child_items) return text_items @staticmethod def _batch_extract_text_styles_with_full_image( editable_images: List, # List[EditableImage] text_attribute_extractor, max_workers: int = 4 ) -> Dict[str, Any]: """ 【新逻辑】使用全图批量提取所有文本样式 新方法:给 caption model 提供全图,以及提取后的所有文本 bbox 和内容, 让模型一次性分析所有文本的样式属性(颜色、粗体、对齐等)。 优势:模型可以看到全局信息,分析更准确。 Args: editable_images: EditableImage列表,每个对应一张PPT页面 text_attribute_extractor: 文本属性提取器(需要有 extract_batch_with_full_image 方法) max_workers: 并发处理页面数 Returns: 字典,key为element_id,value为TextStyleResult """ from concurrent.futures import ThreadPoolExecutor, as_completed if not editable_images or not text_attribute_extractor: return {} # 检查提取器是否支持批量提取 if not hasattr(text_attribute_extractor, 'extract_batch_with_full_image'): logger.warning("提取器不支持 extract_batch_with_full_image 方法,回退到旧逻辑") # 回退到旧逻辑 all_text_items = [] for editable_img in editable_images: text_items = ExportService._collect_text_elements_for_extraction(editable_img.elements) all_text_items.extend(text_items) return ExportService._batch_extract_text_styles( text_items=all_text_items, text_attribute_extractor=text_attribute_extractor, max_workers=max_workers * 2 ) logger.info(f"【新逻辑】使用全图批量分析 {len(editable_images)} 页的文本样式...") all_results = {} def process_single_page(editable_img, page_idx): """处理单个页面的文本样式提取""" try: # 收集该页面的所有文本元素 text_elements = ExportService._collect_text_elements_for_batch_extraction( editable_img.elements ) if not text_elements: logger.info(f" 页面 {page_idx + 1}: 无文本元素") return {} logger.info(f" 页面 {page_idx + 1}: 分析 {len(text_elements)} 个文本元素...") # 使用原始图片路径作为全图 full_image_path = editable_img.image_path # 调用批量提取方法 page_results = text_attribute_extractor.extract_batch_with_full_image( full_image=full_image_path, text_elements=text_elements ) logger.info(f" 页面 {page_idx + 1}: 成功提取 {len(page_results)} 个元素的样式") return page_results except Exception as e: logger.error(f"页面 {page_idx + 1} 文本样式提取失败: {e}", exc_info=True) return {} # 并发处理所有页面 with ThreadPoolExecutor(max_workers=max_workers) as executor: futures = { executor.submit(process_single_page, img, idx): idx for idx, img in enumerate(editable_images) } for future in as_completed(futures): page_idx = futures[future] try: page_results = future.result() all_results.update(page_results) except Exception as e: logger.error(f"页面 {page_idx + 1} 处理失败: {e}") total_elements = sum( len(ExportService._collect_text_elements_for_batch_extraction(img.elements)) for img in editable_images ) logger.info(f"✓ 全图批量文本样式提取完成,成功 {len(all_results)}/{total_elements} 个") return all_results @staticmethod def _batch_extract_text_styles_hybrid( editable_images: List, # List[EditableImage] text_attribute_extractor, max_workers: int = 8, fail_fast: bool = False ) -> Tuple[Dict[str, Any], List[Tuple[str, str]]]: """ 【混合策略】结合全局识别和单个裁剪识别的优势 策略: - 全局识别(全图分析):获取 is_bold、is_italic、is_underline、text_alignment 因为这些属性需要看整体布局和上下文才能判断准确 - 单个裁剪识别:获取 font_color 因为颜色需要精确看局部像素才能识别准确 Args: editable_images: EditableImage列表,每个对应一张PPT页面 text_attribute_extractor: 文本属性提取器 max_workers: 并发数 Returns: (results, failed_extractions): - results: 字典,key为element_id,value为TextStyleResult(合并后的结果) - failed_extractions: 失败列表,每项为 (element_id, error_reason) """ from concurrent.futures import ThreadPoolExecutor, as_completed from services.image_editability.text_attribute_extractors import TextStyleResult if not editable_images or not text_attribute_extractor: return {}, [] # 检查提取器是否支持批量提取 if not hasattr(text_attribute_extractor, 'extract_batch_with_full_image'): logger.warning("提取器不支持混合策略,回退到单个裁剪识别") all_text_items = [] for editable_img in editable_images: text_items = ExportService._collect_text_elements_for_extraction(editable_img.elements) all_text_items.extend(text_items) results = ExportService._batch_extract_text_styles( text_items=all_text_items, text_attribute_extractor=text_attribute_extractor, max_workers=max_workers ) return results, [] # 回退方法暂不收集失败信息 logger.info(f"【混合策略】开始分析 {len(editable_images)} 页的文本样式...") logger.info(f" - 全局识别: is_bold, is_italic, is_underline, text_alignment") logger.info(f" - 单个识别: font_color") # Step 1: 收集所有文本元素 all_text_items = [] # 用于单个裁剪识别 (element_id, image_path, content) page_text_elements = {} # 用于全局识别 {page_idx: [text_elements]} for page_idx, editable_img in enumerate(editable_images): # 收集用于单个裁剪识别的数据 text_items = ExportService._collect_text_elements_for_extraction(editable_img.elements) all_text_items.extend(text_items) # 收集用于全局识别的数据 batch_elements = ExportService._collect_text_elements_for_batch_extraction(editable_img.elements) if batch_elements: page_text_elements[page_idx] = { 'image_path': editable_img.image_path, 'elements': batch_elements } if not all_text_items: return {} # Step 2: 并行执行两种识别 global_results = {} # 全局识别结果 local_results = {} # 单个裁剪识别结果 def extract_global_for_page(page_idx, page_data): """全局识别单页""" try: results = text_attribute_extractor.extract_batch_with_full_image( full_image=page_data['image_path'], text_elements=page_data['elements'] ) return page_idx, results, None except Exception as e: logger.warning(f"全局识别页面 {page_idx + 1} 失败: {e}") return page_idx, {}, str(e) # 收集失败信息 failed_extractions = [] # [(element_id, reason), ...] def extract_local_single(item): """单个裁剪识别""" element_id, image_path, text_content = item try: style = text_attribute_extractor.extract( image=image_path, text_content=text_content ) # Check for real success: style must exist and not be an error result # (CaptionModelTextAttributeExtractor returns TextStyleResult(confidence=0.0, metadata={'error':...}) on failure) is_error = style and style.confidence == 0.0 and style.metadata.get('error') if style and not is_error: return element_id, style, None else: error_msg = style.metadata.get('error', '样式提取返回空') if style else "样式提取返回空" if fail_fast: raise ExportService._build_style_extraction_error( error_msg, element_id=element_id, text_content=text_content ) return element_id, None, error_msg except ExportError: raise # 重新抛出 ExportError except Exception as e: logger.warning(f"单个识别失败 [{element_id}]: {e}") if fail_fast: raise ExportService._build_style_extraction_error( str(e), element_id=element_id, text_content=text_content ) return element_id, None, str(e) # 并发执行全局识别和单个裁剪识别 logger.info(f" 并发执行: 全局识别 {len(page_text_elements)} 页 + 单个识别 {len(all_text_items)} 个元素...") with ThreadPoolExecutor(max_workers=max_workers) as executor: # 提交全局识别任务 global_futures = { executor.submit(extract_global_for_page, idx, data): ('global', idx) for idx, data in page_text_elements.items() } # 提交单个裁剪识别任务 local_futures = { executor.submit(extract_local_single, item): ('local', item[0]) for item in all_text_items } # 收集全局识别结果 for future in as_completed(global_futures): task_type, page_idx = global_futures[future] try: _, page_results, page_error = future.result() global_results.update(page_results) expected_element_ids = { element['element_id'] for element in page_text_elements[page_idx]['elements'] } missing_element_ids = expected_element_ids - set(page_results.keys()) if page_error: if fail_fast: raise ExportService._build_style_extraction_error(page_error, page_idx=page_idx) failed_extractions.extend( (element_id, f"全局识别失败: {page_error}") for element_id in expected_element_ids ) elif missing_element_ids: reason = "全局识别未返回完整结果" if fail_fast: raise ExportService._build_style_extraction_error(reason, page_idx=page_idx) failed_extractions.extend((element_id, reason) for element_id in missing_element_ids) except Exception as e: logger.error(f"全局识别任务失败: {e}") if fail_fast: if isinstance(e, ExportError): raise raise ExportService._build_style_extraction_error(str(e), page_idx=page_idx) from e expected_element_ids = [ element['element_id'] for element in page_text_elements[page_idx]['elements'] ] failed_extractions.extend( (element_id, f"全局识别失败: {e}") for element_id in expected_element_ids ) # 收集单个裁剪识别结果 for future in as_completed(local_futures): task_type, element_id = local_futures[future] try: elem_id, style, error = future.result() if style is not None: local_results[elem_id] = style if error: failed_extractions.append((elem_id, error)) except Exception as e: logger.error(f"单个识别任务失败: {e}") if fail_fast: raise failed_extractions.append((element_id, str(e))) # Step 3: 合并结果 # 优先使用全局识别的布局属性,使用单个识别的颜色属性 merged_results = {} all_element_ids = set(global_results.keys()) | set(local_results.keys()) for element_id in all_element_ids: global_style = global_results.get(element_id) local_style = local_results.get(element_id) if global_style and local_style: # 混合:颜色用单个识别(包括 colored_segments),布局用全局识别 merged_results[element_id] = TextStyleResult( font_color_rgb=local_style.font_color_rgb, # 单个识别的颜色 colored_segments=local_style.colored_segments, # 单个识别的多颜色片段 is_bold=global_style.is_bold, # 全局识别的粗体 is_italic=global_style.is_italic, # 全局识别的斜体 is_underline=global_style.is_underline, # 全局识别的下划线 text_alignment=global_style.text_alignment, # 全局识别的对齐 confidence=0.9, metadata={ 'source': 'hybrid', 'color_source': 'local', 'layout_source': 'global' } ) elif local_style: # 只有单个识别结果 merged_results[element_id] = local_style elif global_style: # 只有全局识别结果 merged_results[element_id] = global_style logger.info(f"✓ 混合策略完成: 全局识别 {len(global_results)} 个, 单个识别 {len(local_results)} 个, 合并 {len(merged_results)} 个, 失败 {len(failed_extractions)} 个") return merged_results, failed_extractions @staticmethod def create_editable_pptx_with_recursive_analysis( image_paths: List[str] = None, output_file: str = None, slide_width_pixels: int = 1920, slide_height_pixels: int = 1080, max_depth: int = 2, max_workers: int = 8, editable_images: List = None, # 可选:直接传入已分析的EditableImage列表 text_attribute_extractor = None, # 可选:文字属性提取器,用于提取颜色、粗体、斜体等样式 progress_callback = None, # 可选:进度回调函数 (step, message, percent) -> None export_extractor_method: str = 'hybrid', # 组件提取方法: mineru, hybrid export_inpaint_method: str = 'hybrid', # 背景修复方法: generative, baidu, hybrid fail_fast: bool = True # 是否在遇到错误时立即停止(False则收集警告继续) ) -> Tuple[Optional[bytes], ExportWarnings]: """ 使用递归图片可编辑化服务创建可编辑PPTX 这是新的架构方法,使用ImageEditabilityService进行递归版面分析。 两种使用方式: 1. 传入 image_paths:自动分析图片并生成PPTX 2. 传入 editable_images:直接使用已分析的结果(避免重复分析) 配置(如 MinerU token)自动从 Flask app.config 获取。 Args: image_paths: 图片路径列表(可选,与editable_images二选一) output_file: 输出文件路径(可选) slide_width_pixels: 目标幻灯片宽度 slide_height_pixels: 目标幻灯片高度 max_depth: 最大递归深度 max_workers: 并发处理数 editable_images: 已分析的EditableImage列表(可选,与image_paths二选一) text_attribute_extractor: 文字属性提取器(可选),用于提取文字颜色、粗体、斜体等样式 可通过 TextAttributeExtractorFactory.create_caption_model_extractor() 创建 export_extractor_method: 组件提取方法 ('mineru' 或 'hybrid',默认 'hybrid') export_inpaint_method: 背景修复方法 ('generative', 'baidu', 'hybrid',默认 'hybrid') fail_fast: 是否在遇到错误时立即停止(默认 True)。设为 False 则收集警告继续导出。 Returns: (pptx_bytes, warnings): 元组,包含 PPTX 字节流和警告信息 - pptx_bytes: PPTX 文件字节流(如果 output_file 为 None),否则为 None - warnings: ExportWarnings 对象,包含所有警告信息 """ from services.image_editability import ServiceConfig, ImageEditabilityService from utils.pptx_builder import PPTXBuilder # 初始化警告收集器 warnings = ExportWarnings() # 辅助函数:报告进度 def report_progress(step: str, message: str, percent: int): logger.info(f"[进度 {percent}%] {step}: {message}") if progress_callback: try: progress_callback(step, message, percent) except Exception as e: logger.warning(f"进度回调失败: {e}") # 如果已提供分析结果,直接使用;否则需要分析 if editable_images is not None: logger.info(f"使用已提供的 {len(editable_images)} 个分析结果创建PPTX") report_progress("准备", f"使用已有分析结果({len(editable_images)} 页)", 10) else: if not image_paths: raise ValueError("必须提供 image_paths 或 editable_images 之一") total_pages = len(image_paths) logger.info(f"开始使用递归分析方法创建可编辑PPTX,共 {total_pages} 页") report_progress("开始", f"准备分析 {total_pages} 页幻灯片...", 0) # 1. 创建ImageEditabilityService(配置自动从 Flask config 获取,使用项目导出设置) logger.info(f"使用导出设置: extractor={export_extractor_method}, inpaint={export_inpaint_method}") config = ServiceConfig.from_defaults( max_depth=max_depth, extractor_method=export_extractor_method, inpaint_method=export_inpaint_method ) editability_service = ImageEditabilityService(config) # 2. 并发处理所有页面,生成EditableImage结构 report_progress("版面分析", f"开始分析 {total_pages} 张图片(并发数: {max_workers})...", 5) from concurrent.futures import ThreadPoolExecutor, as_completed editable_images = [] completed_count = 0 with ThreadPoolExecutor(max_workers=max_workers) as executor: futures = { executor.submit(editability_service.make_image_editable, img_path): idx for idx, img_path in enumerate(image_paths) } results = [None] * len(image_paths) for future in as_completed(futures): idx = futures[future] try: results[idx] = future.result() completed_count += 1 # 版面分析占 5% - 40% 的进度 percent = 5 + int(35 * completed_count / total_pages) report_progress("版面分析", f"已完成第 {completed_count}/{total_pages} 页的版面分析", percent) except Exception as e: logger.error(f"处理图片 {image_paths[idx]} 失败: {e}") raise editable_images = results # 2.5. 使用混合策略提取所有文本元素的样式(如果提供了提取器) # 混合策略:全局识别(粗体/斜体/下划线/对齐)+ 单个裁剪识别(颜色) text_styles_cache = {} if text_attribute_extractor: report_progress("样式提取", "开始提取文本样式(混合策略)...", 45) # 统计文本元素数量 total_text_count = sum( len(ExportService._collect_text_elements_for_extraction(img.elements)) for img in editable_images ) if total_text_count > 0: report_progress("样式提取", f"混合策略分析 {total_text_count} 个文本元素...", 50) text_styles_cache, failed_extractions = ExportService._batch_extract_text_styles_hybrid( editable_images=editable_images, text_attribute_extractor=text_attribute_extractor, max_workers=max_workers * 2, fail_fast=fail_fast ) # 记录样式提取失败的元素(详细) for element_id, reason in failed_extractions: warnings.add_style_extraction_failed(element_id, reason) # 记录汇总信息 extracted_count = len(text_styles_cache) failed_count = len(failed_extractions) if failed_count > 0: logger.warning(f"样式提取: {failed_count}/{total_text_count} 个元素失败") report_progress("样式提取", f"✓ 完成 {extracted_count}/{total_text_count} 个文本样式提取({failed_count} 个失败)", 70) report_progress("构建PPTX", "开始构建可编辑PPTX文件...", 75) # 4. 创建PPTX构建器 builder = PPTXBuilder() builder.create_presentation() builder.setup_presentation_size(slide_width_pixels, slide_height_pixels) # 5. 为每个页面构建幻灯片 total_pages = len(editable_images) for page_idx, editable_img in enumerate(editable_images): # 构建PPTX占 75% - 95% 的进度 percent = 75 + int(20 * page_idx / total_pages) report_progress("构建PPTX", f"构建第 {page_idx + 1}/{total_pages} 页...", percent) logger.info(f" 构建第 {page_idx + 1}/{total_pages} 页...") # 创建空白幻灯片 slide = builder.add_blank_slide() # 添加背景图(参考原实现,使用slide.shapes.add_picture) if editable_img.clean_background and os.path.exists(editable_img.clean_background): logger.info(f" 添加clean background: {editable_img.clean_background}") try: slide.shapes.add_picture( editable_img.clean_background, left=0, top=0, width=builder.prs.slide_width, height=builder.prs.slide_height ) except Exception as e: logger.error(f"Failed to add background: {e}") else: # 回退到原图 logger.info(f" 使用原图作为背景: {editable_img.image_path}") try: slide.shapes.add_picture( editable_img.image_path, left=0, top=0, width=builder.prs.slide_width, height=builder.prs.slide_height ) except Exception as e: logger.error(f"Failed to add background: {e}") # 添加所有元素(递归地) # 计算缩放比例:将原始图片坐标映射到统一的幻灯片坐标 # 背景图已经缩放到幻灯片尺寸,所以元素坐标也需要相应缩放 scale_x = slide_width_pixels / editable_img.width scale_y = slide_height_pixels / editable_img.height logger.info(f" 元素数量: {len(editable_img.elements)}, 图片尺寸: {editable_img.width}x{editable_img.height}, " f"幻灯片尺寸: {slide_width_pixels}x{slide_height_pixels}, 缩放比例: {scale_x:.3f}x{scale_y:.3f}") ExportService._add_editable_elements_to_slide( builder=builder, slide=slide, elements=editable_img.elements, scale_x=scale_x, scale_y=scale_y, depth=0, text_styles_cache=text_styles_cache, # 使用预提取的样式缓存 warnings=warnings, # 收集警告 fail_fast=fail_fast # 传递 fail_fast 参数 ) logger.info(f" ✓ 第 {page_idx + 1} 页完成,添加了 {len(editable_img.elements)} 个元素") # 5. 保存或返回字节流 report_progress("保存文件", "正在保存PPTX文件...", 95) if output_file: builder.save(output_file) report_progress("完成", f"✓ 可编辑PPTX已保存", 100) logger.info(f"✓ 可编辑PPTX已保存: {output_file}") # 输出警告摘要 if warnings.has_warnings(): logger.warning(f"导出完成,但有 {len(warnings.to_summary())} 条警告") return None, warnings else: pptx_bytes = builder.to_bytes() report_progress("完成", f"✓ 可编辑PPTX已生成", 100) logger.info(f"✓ 可编辑PPTX已生成({len(pptx_bytes)} 字节)") # 输出警告摘要 if warnings.has_warnings(): logger.warning(f"导出完成,但有 {len(warnings.to_summary())} 条警告") return pptx_bytes, warnings @staticmethod def _add_editable_elements_to_slide( builder, slide, elements: List, # List[EditableElement] scale_x: float = 1.0, scale_y: float = 1.0, depth: int = 0, text_styles_cache: Dict[str, Any] = None, # 预提取的文本样式缓存,key为element_id warnings: 'ExportWarnings' = None, # 警告收集器 fail_fast: bool = False # 是否在遇到错误时立即停止 ): """ 递归地将EditableElement添加到幻灯片 Args: builder: PPTXBuilder实例 slide: 幻灯片对象 elements: EditableElement列表 scale_x: X轴缩放因子 scale_y: Y轴缩放因子 depth: 当前递归深度 text_styles_cache: 预提取的文本样式缓存(可选),由 _batch_extract_text_styles 生成 Note: elem.image_path 现在是绝对路径,无需额外的目录参数 """ if text_styles_cache is None: text_styles_cache = {} for elem in elements: elem_type = elem.element_type # 根据深度决定使用局部坐标还是全局坐标 # depth=0: 顶层元素,使用局部坐标(bbox) # depth>0: 子元素,需要使用全局坐标(bbox_global) if depth == 0: bbox = elem.bbox # 顶层元素使用局部坐标 else: bbox = elem.bbox_global if hasattr(elem, 'bbox_global') and elem.bbox_global else elem.bbox # 转换BBox对象为列表并应用缩放 bbox_list = [ int(bbox.x0 * scale_x), int(bbox.y0 * scale_y), int(bbox.x1 * scale_x), int(bbox.y1 * scale_y) ] logger.info(f"{' ' * depth} 添加元素: type={elem_type}, bbox={bbox_list}, content={elem.content[:30] if elem.content else None}, image_path={elem.image_path}, 使用{'全局' if depth > 0 else '局部'}坐标") # 根据类型添加元素(参考原实现的_add_mineru_text_to_slide和_add_mineru_image_to_slide) if elem_type in ['text', 'title', 'list', 'paragraph', 'header', 'footer', 'heading', 'table_caption', 'image_caption']: # 添加文本(参考_add_mineru_text_to_slide) if elem.content: text = elem.content.strip() if text: try: # 确定文本级别 level = 'title' if elem_type in ['title', 'heading'] else 'default' # 从缓存获取预提取的文字样式 text_style = text_styles_cache.get(elem.element_id) if text_style: logger.debug(f"{' ' * depth} 使用缓存的文字样式: color={text_style.font_color_rgb}, bold={text_style.is_bold}") builder.add_text_element( slide=slide, text=text, bbox=bbox_list, text_level=level, text_style=text_style ) except Exception as e: logger.warning(f"添加文本元素失败: {e}") if fail_fast: raise ExportError( message=f"添加文本元素失败: {str(e)}", error_type='text_render', details={'text': text[:50], 'bbox': bbox_list} ) if warnings: warnings.add_text_render_failed(text, str(e)) elif elem_type == 'table_cell': # 添加表格单元格(带边框的文本框) if elem.content: text = elem.content.strip() if text: try: # 从缓存获取预提取的文字样式 text_style = text_styles_cache.get(elem.element_id) # 表格单元格已经在上面统一处理了bbox_global和缩放 # 直接使用bbox_list即可 builder.add_text_element( slide=slide, text=text, bbox=bbox_list, text_level=None, align='center', text_style=text_style ) except Exception as e: logger.warning(f"添加单元格失败: {e}") if fail_fast: raise ExportError( message=f"添加表格单元格失败: {str(e)}", error_type='text_render', details={'text': text[:50], 'bbox': bbox_list} ) if warnings: warnings.add_text_render_failed(text, str(e)) elif elem_type == 'table': # 如果表格有子元素(单元格),使用inpainted背景 + 单元格 if elem.children and elem.inpainted_background_path: logger.info(f"{' ' * depth} 表格有 {len(elem.children)} 个单元格,使用可编辑格式") # 先添加inpainted背景(干净的表格框架) if os.path.exists(elem.inpainted_background_path): try: builder.add_image_element( slide=slide, image_path=elem.inpainted_background_path, bbox=bbox_list ) except Exception as e: logger.error(f"Failed to add table background: {e}") # 递归添加单元格 ExportService._add_editable_elements_to_slide( builder=builder, slide=slide, elements=elem.children, scale_x=scale_x, scale_y=scale_y, depth=depth + 1, text_styles_cache=text_styles_cache, warnings=warnings, fail_fast=fail_fast ) else: # 没有子元素,添加整体表格图片 # elem.image_path 现在是绝对路径 if elem.image_path and os.path.exists(elem.image_path): try: builder.add_image_element( slide=slide, image_path=elem.image_path, bbox=bbox_list ) except Exception as e: logger.error(f"Failed to add table image: {e}") else: logger.warning(f"Table image not found: {elem.image_path}") builder.add_image_placeholder(slide, bbox_list) elif elem_type in ['image', 'figure', 'chart']: # 检查是否应该使用递归渲染 should_use_recursive_render = False if elem.children and elem.inpainted_background_path: # 检查是否有任意子元素占据父元素绝大部分面积 parent_area = (bbox.x1 - bbox.x0) * (bbox.y1 - bbox.y0) max_child_coverage_ratio = 0.85 # 阈值 has_dominant_child = False for child in elem.children: if hasattr(child, 'bbox_global') and child.bbox_global: child_bbox = child.bbox_global else: child_bbox = child.bbox child_area = child_bbox.area coverage_ratio = child_area / parent_area if parent_area > 0 else 0 if coverage_ratio > max_child_coverage_ratio: logger.info(f"{' ' * depth} 子元素 {child.element_id} 占父元素面积 {coverage_ratio*100:.1f}% (>{max_child_coverage_ratio*100:.0f}%),跳过递归渲染,直接使用原图") has_dominant_child = True break should_use_recursive_render = not has_dominant_child # 如果有子元素且应该递归渲染 if should_use_recursive_render: logger.debug(f"{' ' * depth} 元素有 {len(elem.children)} 个子元素,递归添加") # 先添加inpainted背景 if os.path.exists(elem.inpainted_background_path): try: builder.add_image_element(slide, elem.inpainted_background_path, bbox_list) except Exception as e: logger.error(f"Failed to add inpainted background: {e}") # 递归添加子元素 ExportService._add_editable_elements_to_slide( builder=builder, slide=slide, elements=elem.children, scale_x=scale_x, scale_y=scale_y, depth=depth + 1, text_styles_cache=text_styles_cache, warnings=warnings, fail_fast=fail_fast ) else: # 没有子元素或子元素占比过大,直接添加原图 # elem.image_path 现在是绝对路径 if elem.image_path and os.path.exists(elem.image_path): try: builder.add_image_element( slide=slide, image_path=elem.image_path, bbox=bbox_list ) except Exception as e: logger.error(f"Failed to add image: {e}") else: logger.warning(f"Image file not found: {elem.image_path}") builder.add_image_placeholder(slide, bbox_list) else: # 其他类型 logger.debug(f"{' ' * depth} 跳过未知类型: {elem_type}") ================================================ FILE: backend/services/file_parser_service.py ================================================ """ File Parser Service - handles file parsing using MinerU service and image captioning """ import os import re import time import logging import zipfile import io import base64 import requests import tempfile from typing import Optional, List from concurrent.futures import ThreadPoolExecutor, as_completed from PIL import Image from markitdown import MarkItDown from services.ai_providers.lazyllm_env import ensure_lazyllm_namespace_key, get_lazyllm_api_key from services.ai_providers.text import strip_think_tags logger = logging.getLogger(__name__) def _get_ai_provider_format(provider_format: str = None) -> str: """Get the configured AI provider format Priority: 1. Provided provider_format parameter 2. Flask app.config['AI_PROVIDER_FORMAT'] (from database settings) 3. Environment variable AI_PROVIDER_FORMAT 4. Default: 'gemini' Args: provider_format: Optional provider format string. If not provided, reads from Flask config or environment variable. """ if provider_format: return provider_format.lower() # Try to get from Flask app config first (database settings) try: from flask import current_app if current_app and hasattr(current_app, 'config'): config_value = current_app.config.get('AI_PROVIDER_FORMAT') if config_value: return str(config_value).lower() except RuntimeError: # Not in Flask application context pass # Fallback to environment variable return os.getenv('AI_PROVIDER_FORMAT', 'gemini').lower() class FileParserService: """Service for parsing files using MinerU and enhancing with image captions""" def __init__(self, mineru_token: str, mineru_api_base: str = "https://mineru.net", google_api_key: str = "", google_api_base: str = "", openai_api_key: str = "", openai_api_base: str = "", image_caption_model: str = "gemini-3-flash-preview", lazyllm_image_caption_source: str = "", provider_format: str = None, mineru_model_version: str = "vlm", ): """ Initialize the file parser service Args: mineru_token: MinerU API token mineru_api_base: MinerU API base URL google_api_key: Google Gemini API key for image captioning (used when AI_PROVIDER_FORMAT=gemini) google_api_base: Google Gemini API base URL openai_api_key: OpenAI API key for image captioning (used when AI_PROVIDER_FORMAT=openai) openai_api_base: OpenAI API base URL image_caption_model: Model to use for image captioning lazyllm_image_caption_source: image caption model provider for lazyllm provider_format: AI provider format ('gemini' or 'openai'). If not provided, reads from environment variable. mineru_model_version: MinerU model version ('vlm' or 'pipeline'). Default is 'vlm'. """ self.mineru_token = mineru_token self.mineru_api_base = mineru_api_base self.mineru_model_version = mineru_model_version self.get_upload_url_api = f"{mineru_api_base}/api/v4/file-urls/batch" self.get_result_api_template = f"{mineru_api_base}/api/v4/extract-results/batch/{{}}" # Store config for lazy initialization self._google_api_key = google_api_key self._google_api_base = google_api_base self._openai_api_key = openai_api_key self._openai_api_base = openai_api_base self._image_caption_model = image_caption_model self._lazyllm_image_caption_source = lazyllm_image_caption_source # Clients will be initialized lazily based on AI_PROVIDER_FORMAT self._gemini_client = None self._openai_client = None self._lazyllm_client = None self._provider_format = _get_ai_provider_format(provider_format) def _get_gemini_client(self): """Lazily initialize Gemini client""" if self._gemini_client is None and self._google_api_key: from google import genai from google.genai import types self._gemini_client = genai.Client( http_options=types.HttpOptions(base_url=self._google_api_base) if self._google_api_base else None, api_key=self._google_api_key ) return self._gemini_client def _get_openai_client(self): """Lazily initialize OpenAI client""" if self._openai_client is None and self._openai_api_key: from openai import OpenAI self._openai_client = OpenAI( api_key=self._openai_api_key, base_url=self._openai_api_base ) return self._openai_client def _get_lazyllm_client(self): """Lazily initialize LazyLLM client""" if self._lazyllm_client is None: import lazyllm source = self._lazyllm_image_caption_source or "qwen" model = self._image_caption_model or "qwen-vl-plus" ensure_lazyllm_namespace_key(source, namespace='BANANA') self._lazyllm_client = lazyllm.namespace('BANANA').OnlineModule( source=source, model=model, type="vlm", ) return self._lazyllm_client def _can_generate_captions(self) -> bool: """Check if image caption generation is available""" if self._provider_format == 'openai': return bool(self._openai_api_key) elif self._provider_format == 'lazyllm': source = self._lazyllm_image_caption_source or "qwen" return bool(get_lazyllm_api_key(source, namespace='BANANA')) else: return bool(self._google_api_key) def parse_file(self, file_path: str, filename: str) -> tuple[Optional[str], Optional[str], Optional[str], Optional[str], int]: """ Parse a file using MinerU service and enhance with image captions Args: file_path: Path to the file to parse filename: Original filename Returns: Tuple of (batch_id, markdown_content, extract_id, error_message, failed_image_count) - batch_id: MinerU batch ID (for tracking, None for text files) - markdown_content: Parsed markdown with enhanced image descriptions - extract_id: Unique ID for the extracted files directory (None for text files) - error_message: Error message if parsing failed - failed_image_count: Number of images that failed to generate captions """ try: # Check if it's a plain text file that doesn't need MinerU parsing file_ext = filename.rsplit('.', 1)[-1].lower() if '.' in filename else '' if file_ext in ['txt', 'md', 'markdown']: logger.info(f"File {filename} is a plain text file, reading directly...") return self._parse_text_file(file_path, filename) # Check if it's a spreadsheet file (xlsx, csv) - use markitdown if file_ext in ['xlsx', 'xls', 'csv']: logger.info(f"File {filename} is a spreadsheet file, using markitdown...") return self._parse_spreadsheet_file(file_path, filename) # For other file types, use MinerU service logger.info(f"File {filename} requires MinerU parsing...") # Step 1: Get upload URL logger.info(f"Step 1/4: Requesting upload URL for {filename}...") batch_id, upload_url, error = self._get_upload_url(filename) if error: return None, None, None, error, 0 logger.info(f"Got upload URL. Batch ID: {batch_id}") # Step 2: Upload file logger.info(f"Step 2/4: Uploading file {filename}...") error = self._upload_file(file_path, upload_url) if error: return batch_id, None, None, error, 0 logger.info("File uploaded successfully.") # Step 3: Poll for parsing result logger.info("Step 3/4: Waiting for parsing to complete...") markdown_content, extract_id, error = self._poll_result(batch_id) if error: return batch_id, None, None, error, 0 logger.info("File parsed successfully.") # Step 4: Enhance markdown with image captions if markdown_content and self._can_generate_captions(): logger.info("Step 4/4: Enhancing markdown with image captions...") enhanced_content, failed_count = self._enhance_markdown_with_captions(markdown_content) if failed_count > 0: logger.warning(f"Markdown enhanced with image captions, but {failed_count} images failed to generate captions.") else: logger.info("Markdown enhanced with image captions (all images succeeded).") return batch_id, enhanced_content, extract_id, None, failed_count else: logger.info("Skipping image caption enhancement (caption model unavailable).") return batch_id, markdown_content, extract_id, None, 0 except Exception as e: error_msg = f"Unexpected error during file parsing: {str(e)}" logger.error(error_msg, exc_info=True) return None, None, None, error_msg, 0 def _parse_text_file(self, file_path: str, filename: str) -> tuple[Optional[str], Optional[str], Optional[str], Optional[str], int]: """ Parse plain text file directly without MinerU Args: file_path: Path to the file filename: Original filename Returns: Tuple of (batch_id, markdown_content, extract_id, error_message, failed_image_count) """ try: # Read file content with open(file_path, 'r', encoding='utf-8') as f: content = f.read() logger.info(f"Text file read successfully: {len(content)} characters") # Enhance markdown with image captions if it contains images if content and self._can_generate_captions(): # Check if content has markdown images if '![' in content and '](' in content: logger.info("Text file contains images, enhancing with captions...") enhanced_content, failed_count = self._enhance_markdown_with_captions(content) if failed_count > 0: logger.warning(f"Text file enhanced with image captions, but {failed_count} images failed to generate captions.") else: logger.info("Text file enhanced with image captions (all images succeeded).") return None, enhanced_content, None, None, failed_count return None, content, None, None, 0 except UnicodeDecodeError: # Try with different encoding try: with open(file_path, 'r', encoding='gbk') as f: content = f.read() logger.info(f"Text file read successfully with GBK encoding: {len(content)} characters") if content and self._can_generate_captions() and '![' in content and '](' in content: logger.info("Text file contains images, enhancing with captions...") enhanced_content, failed_count = self._enhance_markdown_with_captions(content) if failed_count > 0: logger.warning(f"Text file enhanced with image captions, but {failed_count} images failed to generate captions.") else: logger.info("Text file enhanced with image captions (all images succeeded).") return None, enhanced_content, None, None, failed_count return None, content, None, None, 0 except Exception as e: error_msg = f"Failed to read text file with multiple encodings: {str(e)}" logger.error(error_msg) return None, None, None, error_msg, 0 except Exception as e: error_msg = f"Failed to read text file: {str(e)}" logger.error(error_msg) return None, None, None, error_msg, 0 def _parse_spreadsheet_file(self, file_path: str, filename: str) -> tuple[Optional[str], Optional[str], Optional[str], Optional[str], int]: """ Parse spreadsheet files (xlsx, xls, csv) using markitdown Args: file_path: Path to the file filename: Original filename Returns: Tuple of (batch_id, markdown_content, extract_id, error_message, failed_image_count) """ try: # Use markitdown to convert spreadsheet to markdown md = MarkItDown() result = md.convert(file_path) markdown_content = result.text_content logger.info(f"Spreadsheet file converted successfully: {len(markdown_content)} characters") # Spreadsheet files typically don't have images, so no need for caption enhancement return None, markdown_content, None, None, 0 except Exception as e: error_msg = f"Failed to parse spreadsheet file: {str(e)}" logger.error(error_msg, exc_info=True) return None, None, None, error_msg, 0 def _get_upload_url(self, filename: str) -> tuple[Optional[str], Optional[str], Optional[str]]: """Get upload URL from MinerU""" headers = { "Content-Type": "application/json", "Authorization": f"Bearer {self.mineru_token}" } upload_data = { "files": [{"name": filename}], "model_version": self.mineru_model_version # "vlm" or "pipeline" } try: response = requests.post( self.get_upload_url_api, headers=headers, json=upload_data, timeout=30 ) response.raise_for_status() result = response.json() if result.get("code") != 0: error_msg = f"Failed to get upload URL: {result.get('msg')}" logger.error(error_msg) return None, None, error_msg batch_id = result["data"]["batch_id"] upload_url = result["data"]["file_urls"][0] return batch_id, upload_url, None except requests.exceptions.RequestException as e: error_msg = f"Network error while requesting upload URL: {str(e)}" logger.error(error_msg) return None, None, error_msg def _upload_file(self, file_path: str, upload_url: str) -> Optional[str]: """Upload file to MinerU""" try: with open(file_path, 'rb') as f: response = requests.put( upload_url, data=f, headers={"Authorization": None}, # Remove auth for upload timeout=300 # 5 minutes timeout for large files ) response.raise_for_status() return None except requests.exceptions.RequestException as e: error_msg = f"File upload failed: {str(e)}" logger.error(error_msg) return error_msg except IOError as e: error_msg = f"Failed to read file: {str(e)}" logger.error(error_msg) return error_msg def _poll_result(self, batch_id: str, max_wait_time: int = 600) -> tuple[Optional[str], Optional[str], Optional[str]]: """Poll for parsing result Returns: Tuple of (markdown_content, extract_id, error_message) """ headers = { "Content-Type": "application/json", "Authorization": f"Bearer {self.mineru_token}" } result_url = self.get_result_api_template.format(batch_id) start_time = time.time() while True: if time.time() - start_time > max_wait_time: error_msg = f"Parsing timeout after {max_wait_time} seconds" logger.error(error_msg) return None, None, error_msg try: response = requests.get(result_url, headers=headers, timeout=30) response.raise_for_status() task_info = response.json() if task_info.get("code") != 0: error_msg = f"Failed to query task status: {task_info.get('msg')}" logger.error(error_msg) return None, None, error_msg task_status = task_info["data"]["extract_result"][0]["state"] if task_status == "done": logger.info("File parsing completed!") full_zip_url = task_info["data"]["extract_result"][0]["full_zip_url"] # Download and extract markdown return self._download_markdown(full_zip_url) elif task_status == "failed": err_msg = task_info["data"]["extract_result"][0].get("err_msg", "Unknown error") error_msg = f"File parsing failed: {err_msg}" logger.error(error_msg) return None, None, error_msg else: logger.debug(f"Current task status: {task_status}, waiting...") time.sleep(2) # Wait 2 seconds before next poll except requests.exceptions.RequestException as e: logger.warning(f"Network error while polling result: {str(e)}, retrying...") time.sleep(2) def _download_markdown(self, zip_url: str) -> tuple[Optional[str], Optional[str], Optional[str]]: """Download and extract markdown from result zip, save images to local server Returns: Tuple of (markdown_content, extract_id, error_message) """ try: response = requests.get(zip_url, timeout=60) response.raise_for_status() # Generate unique directory name for this extraction import uuid extract_id = str(uuid.uuid4())[:8] # Get upload folder from Flask config (we'll need to pass this) # For now, use a hardcoded path relative to project root import os from pathlib import Path # Navigate to project root (assuming this file is in backend/services/) current_file = Path(__file__).resolve() backend_dir = current_file.parent.parent project_root = backend_dir.parent # Create directory for mineru extracts mineru_storage = project_root / 'uploads' / 'mineru_files' / extract_id mineru_storage.mkdir(parents=True, exist_ok=True) logger.info(f"Extracting ZIP to: {mineru_storage}") markdown_content = None markdown_file_path = None with zipfile.ZipFile(io.BytesIO(response.content)) as z: # Extract all files z.extractall(mineru_storage) logger.info(f"Extracted {len(z.namelist())} files from ZIP") # Find markdown file (usually full.md or similar) for name in z.namelist(): if name.endswith('.md') or name.endswith('.MD'): markdown_file_path = name md_full_path = mineru_storage / name with open(md_full_path, 'r', encoding='utf-8') as f: markdown_content = f.read() logger.info(f"Found markdown file: {name}") break if not markdown_content: error_msg = "No markdown file found in result zip" logger.error(error_msg) return None, None, error_msg # Replace relative image paths with local server URLs markdown_content = self._replace_image_paths( markdown_content, markdown_file_path, extract_id ) return markdown_content, extract_id, None except requests.exceptions.RequestException as e: error_msg = f"Failed to download result: {str(e)}" logger.error(error_msg) return None, None, error_msg except zipfile.BadZipFile: error_msg = "Downloaded file is not a valid ZIP archive" logger.error(error_msg) return None, None, error_msg except Exception as e: error_msg = f"Failed to process ZIP file: {str(e)}" logger.error(error_msg) return None, None, error_msg @staticmethod def extract_header_footer_from_layout(extract_id: str) -> str: """ 从 MinerU layout.json 的 discarded_blocks 中提取页眉页脚文本。 Args: extract_id: MinerU 解析结果的 extract_id Returns: 提取到的页眉页脚文本,如无则返回空字符串 """ import json from pathlib import Path current_file = Path(__file__).resolve() project_root = current_file.parent.parent.parent mineru_dir = project_root / 'uploads' / 'mineru_files' / extract_id layout_file = mineru_dir / 'layout.json' if not layout_file.exists(): return '' try: with open(layout_file, 'r', encoding='utf-8') as f: layout_data = json.load(f) if 'pdf_info' not in layout_data or not layout_data['pdf_info']: return '' texts = [] for page_info in layout_data['pdf_info']: for block in page_info.get('discarded_blocks', []): block_type = block.get('type', '') if block_type not in ('header', 'footer'): continue for line in block.get('lines', []): for span in line.get('spans', []): if span.get('type') == 'text' and span.get('content', '').strip(): content = span['content'].strip() if content != '#': texts.append(content) return '\n'.join(texts) except Exception as e: logger.warning(f"Failed to extract header/footer from layout.json: {e}") return '' def _replace_image_paths(self, markdown_content: str, markdown_file_path: str, extract_id: str) -> str: """Replace relative image paths in markdown with local server URLs""" import os # Get the directory where the markdown file is located (within the extracted ZIP) md_dir = os.path.dirname(markdown_file_path) def replace_link(match): alt_text = match.group(1) img_path = match.group(2) # Skip if already an absolute URL if img_path.startswith(('http://', 'https://')): return match.group(0) # Handle /file/ or /files/ paths (MinerU may generate these) # These are relative to the extracted directory if img_path.startswith('/file/') or img_path.startswith('/files/'): # Remove leading slash and use as relative path rel_path = img_path.lstrip('/') # Remove 'file/' or 'files/' prefix if present if rel_path.startswith('file/'): rel_path = rel_path[5:] # Remove 'file/' prefix elif rel_path.startswith('files/'): rel_path = rel_path[6:] # Remove 'files/' prefix else: # Calculate the relative path from the markdown file if md_dir: # Normalize path separators rel_path = os.path.normpath(os.path.join(md_dir, img_path)).replace('\\', '/') else: rel_path = img_path.replace('\\', '/') # Construct the local server URL # The files are served at /files/mineru/{extract_id}/{rel_path} new_url = f"/files/mineru/{extract_id}/{rel_path[:15]}.{rel_path.split('.')[-1]}" # "images/...(8)" logger.debug(f"Replacing image path: {img_path} -> {new_url}") return f"![{alt_text}]({new_url})" # Match markdown image syntax pattern = r"!\[(.*?)\]\((.*?)\)" replaced_content = re.sub(pattern, replace_link, markdown_content) return replaced_content def _enhance_markdown_with_captions(self, markdown_content: str) -> tuple[str, int]: """ Enhance markdown by adding captions to images that don't have alt text Args: markdown_content: Original markdown content Returns: Tuple of (enhanced_markdown, failed_image_count) """ if not self._can_generate_captions(): return markdown_content, 0 # Extract all image URLs from markdown (both with and without alt text) # Support both http/https URLs and relative paths image_pattern = r'!\[(.*?)\]\(([^\)]+)\)' matches = list(re.finditer(image_pattern, markdown_content)) logger.info(f"Found {len(matches)} markdown image references") if not matches: logger.info("No markdown image syntax found") return markdown_content, 0 # Filter to only images without alt text (empty brackets) images_to_caption = [] for match in matches: alt_text = match.group(1).strip() image_url = match.group(2).strip() logger.debug(f"Image found: alt='{alt_text}', url='{image_url}'") if not alt_text: # Only process images with empty alt text images_to_caption.append(match) if not images_to_caption: logger.info(f"Found {len(matches)} images in markdown, but all have descriptions. Skipping caption generation.") return markdown_content, 0 logger.info(f"Found {len(images_to_caption)} images without descriptions out of {len(matches)} total, generating captions...") # Generate captions in parallel (only for images without alt text) image_urls = [match.group(2) for match in images_to_caption] captions, failed_count = self._generate_captions_parallel(image_urls) # Log results success_count = len(images_to_caption) - failed_count logger.info(f"Image caption generation completed: {success_count} succeeded, {failed_count} failed out of {len(images_to_caption)} total") # Replace image syntax with captioned version (in reverse order to maintain positions) enhanced_content = markdown_content for match, caption in zip(reversed(images_to_caption), reversed(captions)): old_text = match.group(0) url = match.group(2) # Use caption as alt text (empty if generation failed) new_text = f"![{caption}]({url})" enhanced_content = enhanced_content[:match.start()] + new_text + enhanced_content[match.end():] return enhanced_content, failed_count def _generate_captions_parallel(self, image_urls: List[str], max_workers: int = 12, max_retries: int = 3) -> tuple[List[str], int]: """ Generate captions for multiple images in parallel with retry mechanism Args: image_urls: List of image URLs max_workers: Maximum number of parallel workers max_retries: Maximum number of retries for each image Returns: Tuple of (list of captions, number of failed images) """ captions = [""] * len(image_urls) failed_count = 0 def generate_with_retry(url: str, idx: int) -> tuple[int, str, bool]: """Generate caption with retry logic""" for attempt in range(max_retries): try: caption = self._generate_single_caption(url) if caption: logger.debug(f"Generated caption for image {idx + 1}/{len(image_urls)} (attempt {attempt + 1})") return (idx, caption, True) else: logger.warning(f"Empty caption for image {idx + 1} (attempt {attempt + 1}/{max_retries})") except Exception as e: logger.warning(f"Failed to generate caption for image {idx + 1} (attempt {attempt + 1}/{max_retries}): {str(e)}") if attempt < max_retries - 1: import time time.sleep(1 * (attempt + 1)) # Exponential backoff: 1s, 2s, 3s # All retries failed logger.error(f"Failed to generate caption for image {idx + 1} after {max_retries} attempts") return (idx, "", False) with ThreadPoolExecutor(max_workers=max_workers) as executor: future_to_idx = { executor.submit(generate_with_retry, url, idx): idx for idx, url in enumerate(image_urls) } for future in as_completed(future_to_idx): try: idx, caption, success = future.result() captions[idx] = caption if not success: failed_count += 1 except Exception as e: idx = future_to_idx[future] logger.error(f"Unexpected error generating caption for image {idx + 1}: {str(e)}") failed_count += 1 return captions, failed_count def _generate_single_caption(self, image_url: str) -> str: """ Generate caption for a single image (supports both HTTP URLs and local paths) Args: image_url: URL or local path of the image Returns: Generated caption """ try: # Load image based on URL type if image_url.startswith('http://') or image_url.startswith('https://'): # Download from HTTP(S) URL response = requests.get(image_url, timeout=30) response.raise_for_status() image = Image.open(io.BytesIO(response.content)) elif image_url.startswith('/files/mineru/'): # Local MinerU extracted file with prefix matching support from utils.path_utils import find_mineru_file_with_prefix # Find file with prefix matching img_path = find_mineru_file_with_prefix(image_url) if img_path is None or not img_path.exists(): logger.warning(f"Local image file not found (with prefix matching): {image_url}") return "" image = Image.open(img_path) else: # Unsupported path type logger.warning(f"Unsupported image path type: {image_url}") return "" # Generate caption based on provider format prompt = "请用一句简短的中文描述这张图片的主要内容。只返回描述文字,不要其他解释。" if self._provider_format == 'openai': # Use OpenAI SDK format client = self._get_openai_client() if not client: logger.warning("OpenAI client not initialized, skipping caption generation") return "" # Encode image to base64 buffered = io.BytesIO() if image.mode in ('RGBA', 'LA', 'P'): image = image.convert('RGB') image.save(buffered, format="JPEG", quality=95) base64_image = base64.b64encode(buffered.getvalue()).decode('utf-8') response = client.chat.completions.create( model=self._image_caption_model, messages=[ { "role": "user", "content": [ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}, {"type": "text", "text": prompt} ] } ], temperature=0.3 ) caption = response.choices[0].message.content.strip() elif self._provider_format == 'lazyllm': # Use LazyLLM format client = self._get_lazyllm_client() with tempfile.NamedTemporaryFile(prefix='lazyllm_ref_', suffix='.png', delete=False) as tmp: temp_path = tmp.name try: image.save(temp_path) caption = client(prompt, lazyllm_files=[temp_path]) finally: try: os.remove(temp_path) except OSError: pass else: # Use Gemini SDK format (default) from google.genai import types client = self._get_gemini_client() if not client: logger.warning("Gemini client not initialized, skipping caption generation") return "" result = client.models.generate_content( model=self._image_caption_model, contents=[image, prompt], config=types.GenerateContentConfig( temperature=0.3, # Lower temperature for more consistent captions ) ) caption = result.text.strip() # Strip ... tags from reasoning models caption = strip_think_tags(caption) return caption except Exception as e: logger.warning(f"Failed to generate caption for {image_url}: {str(e)}") return "" # Return empty string on failure ================================================ FILE: backend/services/file_service.py ================================================ """ File Service - handles all file operations """ import os import uuid from pathlib import Path from typing import Optional from werkzeug.utils import secure_filename from PIL import Image from models import Project from models import db def convert_image_to_rgb(image: Image.Image) -> Image.Image: """ Convert image to RGB mode for JPEG compatibility. Handles RGBA, LA, P (palette) and other modes by compositing onto white background. Args: image: PIL Image object Returns: PIL Image in RGB mode """ if image.mode in ('RGBA', 'LA', 'P'): # Create white background for transparent images background = Image.new('RGB', image.size, (255, 255, 255)) # Convert palette mode to RGBA to handle transparency if image.mode == 'P': image = image.convert('RGBA') # Paste image onto white background using alpha channel as mask # For RGBA and LA modes, the last channel is the alpha/transparency channel if image.mode in ('RGBA', 'LA'): background.paste(image, mask=image.split()[-1]) else: # This shouldn't happen after P->RGBA conversion, but handle just in case background.paste(image) return background elif image.mode != 'RGB': return image.convert('RGB') return image def resize_image_for_thumbnail(image: Image.Image, max_width: int = 1920) -> Image.Image: """ Resize image for thumbnail if it exceeds max width. Maintains aspect ratio. Args: image: PIL Image object max_width: Maximum width in pixels (default 1920) Returns: Resized PIL Image (or original if already smaller) """ if image.width > max_width: ratio = max_width / image.width new_height = int(image.height * ratio) return image.resize((max_width, new_height), Image.Resampling.LANCZOS) return image class FileService: """Service for file management""" def __init__(self, upload_folder: str): """Initialize file service""" self.upload_folder = Path(upload_folder) self.upload_folder.mkdir(exist_ok=True, parents=True) def _get_project_dir(self, project_id: str) -> Path: """Get project directory""" project_dir = self.upload_folder / project_id project_dir.mkdir(exist_ok=True, parents=True) return project_dir def _get_template_dir(self, project_id: str) -> Path: """Get template directory for project""" template_dir = self._get_project_dir(project_id) / "template" template_dir.mkdir(exist_ok=True, parents=True) return template_dir def _get_pages_dir(self, project_id: str) -> Path: """Get pages directory for project""" pages_dir = self._get_project_dir(project_id) / "pages" pages_dir.mkdir(exist_ok=True, parents=True) return pages_dir def _get_exports_dir(self, project_id: str) -> Path: """Get exports directory for project (for generated PPT/PDF files)""" exports_dir = self._get_project_dir(project_id) / "exports" exports_dir.mkdir(exist_ok=True, parents=True) return exports_dir def _get_materials_dir(self, project_id: str) -> Path: """Get materials directory for project (for standalone generated assets)""" materials_dir = self._get_project_dir(project_id) / "materials" materials_dir.mkdir(exist_ok=True, parents=True) return materials_dir def save_template_image(self, file, project_id: str) -> str: """ Save template image file Args: file: FileStorage object from Flask request project_id: Project ID Returns: Relative file path from upload folder """ template_dir = self._get_template_dir(project_id) # Secure filename and add unique suffix original_filename = secure_filename(file.filename) ext = original_filename.rsplit('.', 1)[1].lower() if '.' in original_filename else 'png' filename = f"template.{ext}" filepath = template_dir / filename file.save(str(filepath)) # Return relative path return filepath.relative_to(self.upload_folder).as_posix() def save_generated_image(self, image: Image.Image, project_id: str, page_id: str, image_format: str = 'PNG', version_number: int = None) -> str: """ Save generated image with version support Args: image: PIL Image object project_id: Project ID page_id: Page ID image_format: Image format (PNG, JPEG, etc.) version_number: Optional version number. If None, uses timestamp-based naming Returns: Relative file path from upload folder """ pages_dir = self._get_pages_dir(project_id) # Use lowercase extension ext = image_format.lower() # Generate filename with version number or timestamp if version_number is not None: filename = f"{page_id}_v{version_number}.{ext}" else: # Use timestamp for unique filename import time timestamp = int(time.time() * 1000) # milliseconds filename = f"{page_id}_{timestamp}.{ext}" filepath = pages_dir / filename # Save image - format is determined by file extension or explicitly specified # Some PIL Image objects may not support format parameter, so we use extension image.save(str(filepath)) # Return relative path return filepath.relative_to(self.upload_folder).as_posix() def get_cached_image_path(self, project_id: str, page_id: str, version_number: int) -> str: """ Generate the relative path for a cached thumbnail image. This method centralizes the path generation logic for cached images, ensuring consistency across the codebase (DRY principle). Args: project_id: Project ID page_id: Page ID version_number: Version number Returns: Relative file path from upload folder (e.g., "project_id/pages/page_id_v1_thumb.jpg") """ filename = f"{page_id}_v{version_number}_thumb.jpg" return f"{project_id}/pages/{filename}" def save_cached_image(self, image: Image.Image, project_id: str, page_id: str, version_number: int, quality: int = 85, max_width: int = 1920) -> str: """ Save compressed JPG thumbnail for faster frontend loading Args: image: PIL Image object project_id: Project ID page_id: Page ID version_number: Version number quality: JPEG quality (1-100), default 85 max_width: Maximum thumbnail width in pixels (default 1920) Returns: Relative file path from upload folder """ pages_dir = self._get_pages_dir(project_id) # Use centralized path generation relative_path = self.get_cached_image_path(project_id, page_id, version_number) filename = Path(relative_path).name filepath = pages_dir / filename # Resize image if too large (for faster loading) image = resize_image_for_thumbnail(image, max_width) # Convert to RGB using shared function image = convert_image_to_rgb(image) # Save as compressed JPEG image.save(str(filepath), 'JPEG', quality=quality, optimize=True) # Return relative path return relative_path def save_material_image(self, image: Image.Image, project_id: Optional[str], image_format: str = 'PNG') -> str: """ Save standalone generated material image (not bound to a specific page) Args: image: PIL Image object project_id: Project ID (None for global materials) image_format: Image format (PNG, JPEG, etc.) Returns: Relative file path from upload folder """ # Handle global materials (project_id is None) if project_id is None: materials_dir = self.upload_folder / "materials" materials_dir.mkdir(exist_ok=True, parents=True) else: materials_dir = self._get_materials_dir(project_id) # Use lowercase extension ext = image_format.lower() # Generate unique filename import time timestamp = int(time.time() * 1000) # milliseconds filename = f"material_{timestamp}.{ext}" filepath = materials_dir / filename # Save image image.save(str(filepath)) # Return relative path return filepath.relative_to(self.upload_folder).as_posix() def delete_page_image_version(self, image_path: str) -> bool: """ Delete a specific image version file and its cache Args: image_path: Relative path to the image file Returns: True if deleted successfully """ filepath = self.upload_folder / image_path.replace('\\', '/') deleted = False if filepath.exists() and filepath.is_file(): filepath.unlink() deleted = True # Also delete corresponding cache file (_thumb.jpg) # e.g., xxx_v1.png -> xxx_v1_thumb.jpg cache_filepath = filepath.parent / f"{filepath.stem}_thumb.jpg" if cache_filepath.exists() and cache_filepath.is_file(): cache_filepath.unlink() return deleted def get_file_url(self, project_id: Optional[str], file_type: str, filename: str) -> str: """ Generate file URL for frontend access Args: project_id: Project ID (None for global materials) file_type: 'template', 'pages', or 'materials' filename: File name Returns: URL path for file access """ if project_id is None: # Global materials return f"/files/materials/{filename}" return f"/files/{project_id}/{file_type}/{filename}" def get_absolute_path(self, relative_path: str) -> str: """ Get absolute file path from relative path Args: relative_path: Relative path from upload folder Returns: Absolute file path """ result = (self.upload_folder / relative_path.replace('\\', '/')).resolve() if not str(result).startswith(str(self.upload_folder.resolve())): raise ValueError(f"Path traversal detected: {relative_path}") return str(result) def delete_template(self, project_id: str) -> bool: """ Delete template for project Args: project_id: Project ID Returns: True if deleted successfully """ template_dir = self._get_template_dir(project_id) # Delete all files in template directory for file in template_dir.iterdir(): if file.is_file(): file.unlink() return True def delete_page_image(self, project_id: str, page_id: str) -> bool: """ Delete all page images (all versions and their caches) Args: project_id: Project ID page_id: Page ID Returns: True if deleted successfully """ pages_dir = self._get_pages_dir(project_id) # Find and delete all page image files (all versions and caches) # Pattern matches: {page_id}_v1.png, {page_id}_v1_thumb.jpg, etc. for file in pages_dir.glob(f"{page_id}_*"): if file.is_file(): file.unlink() return True def delete_project_files(self, project_id: str) -> bool: """ Delete all files for a project Args: project_id: Project ID Returns: True if deleted successfully """ import shutil project_dir = self._get_project_dir(project_id) if project_dir.exists(): shutil.rmtree(project_dir) return True def file_exists(self, relative_path: str) -> bool: """Check if file exists""" filepath = self.upload_folder / relative_path.replace('\\', '/') return filepath.exists() and filepath.is_file() def get_template_path(self, project_id: str) -> Optional[str]: """ Get template file path for project Args: project_id: Project ID Returns: Absolute path to template file or None """ # 刷新数据库会话,确保获取最新数据 db.session.expire_all() project = Project.query.get(project_id) if project and project.template_image_path: # template_image_path 是相对路径,需要转换为绝对路径 template_path = self.upload_folder / project.template_image_path if template_path.exists() and template_path.is_file(): return str(template_path) # 如果数据库中没有,回退到目录查找(兼容旧数据) template_dir = self._get_template_dir(project_id) if template_dir.exists(): # 按修改时间排序,返回最新的模板文件 template_files = [ f for f in template_dir.iterdir() if f.is_file() and f.stem == 'template' ] if template_files: # 返回修改时间最新的文件 latest_file = max(template_files, key=lambda f: f.stat().st_mtime) return str(latest_file) return None def _get_user_templates_dir(self) -> Path: """Get user templates directory""" templates_dir = self.upload_folder / "user-templates" templates_dir.mkdir(exist_ok=True, parents=True) return templates_dir def save_user_template(self, file, template_id: str) -> str: """ Save user template image file Args: file: FileStorage object from Flask request template_id: Template ID Returns: Relative file path from upload folder """ templates_dir = self._get_user_templates_dir() template_dir = templates_dir / template_id template_dir.mkdir(exist_ok=True, parents=True) # Secure filename and preserve extension original_filename = secure_filename(file.filename) ext = original_filename.rsplit('.', 1)[1].lower() if '.' in original_filename else 'png' filename = f"template.{ext}" filepath = template_dir / filename file.save(str(filepath)) # Return relative path return filepath.relative_to(self.upload_folder).as_posix() def delete_user_template(self, template_id: str) -> bool: """ Delete user template Args: template_id: Template ID Returns: True if deleted successfully """ import shutil templates_dir = self._get_user_templates_dir() template_dir = templates_dir / template_id if template_dir.exists(): shutil.rmtree(template_dir) return True def save_user_template_thumbnail(self, template_id: str, original_path: str, quality: int = 80, max_width: int = 600) -> Optional[str]: """ Generate and save thumbnail for user template Args: template_id: Template ID original_path: Relative path to original template image quality: JPEG quality (1-100), default 80 max_width: Maximum thumbnail width in pixels (default 600) Returns: Relative file path to thumbnail, or None if failed """ try: # Get full path to original image original_full_path = self.upload_folder / original_path.replace('\\', '/') if not original_full_path.exists(): return None # Open and process image image = Image.open(str(original_full_path)) # Resize if needed image = resize_image_for_thumbnail(image, max_width) # Convert to RGB for JPEG image = convert_image_to_rgb(image) # Save thumbnail templates_dir = self._get_user_templates_dir() template_dir = templates_dir / template_id template_dir.mkdir(exist_ok=True, parents=True) thumb_filename = "template-thumb.webp" thumb_filepath = template_dir / thumb_filename image.save(str(thumb_filepath), 'WEBP', quality=quality) image.close() return thumb_filepath.relative_to(self.upload_folder).as_posix() except Exception: return None ================================================ FILE: backend/services/image_editability/__init__.py ================================================ """ 图片可编辑化服务模块 核心设计: - 无状态服务 - 线程安全,可并行调用 - 依赖注入 - 通过配置对象注入所有依赖 - 单一职责 - 只负责单张图片的可编辑化,批量处理由调用者控制 组件: - 数据模型(BBox, EditableElement, EditableImage) - 元素提取器(ElementExtractor及其实现) - Inpaint提供者(InpaintProvider及其实现) - 工厂和配置(ServiceConfig) - 主服务类(ImageEditabilityService) Example: >>> from services.image_editability import ServiceConfig, ImageEditabilityService >>> >>> # 创建配置 >>> config = ServiceConfig.from_defaults(mineru_token="your_token") >>> >>> # 创建服务 >>> service = ImageEditabilityService(config) >>> >>> # 串行处理 >>> result = service.make_image_editable("image.png") >>> >>> # 并行处理(推荐) >>> from concurrent.futures import ThreadPoolExecutor, as_completed >>> >>> images = ["img1.png", "img2.png", "img3.png"] >>> with ThreadPoolExecutor(max_workers=4) as executor: ... futures = {executor.submit(service.make_image_editable, img): img ... for img in images} ... results = {images[i]: future.result() ... for i, future in enumerate(as_completed(futures))} """ # 数据模型 from .data_models import BBox, EditableElement, EditableImage # 坐标映射 from .coordinate_mapper import CoordinateMapper # 元素提取器 from .extractors import ( ElementExtractor, MinerUElementExtractor, BaiduOCRElementExtractor, BaiduAccurateOCRElementExtractor, ExtractorRegistry ) # 混合提取器 from .hybrid_extractor import ( HybridElementExtractor, BBoxUtils, create_hybrid_extractor ) # Inpaint提供者 from .inpaint_providers import ( InpaintProvider, DefaultInpaintProvider, GenerativeEditInpaintProvider, BaiduInpaintProvider, HybridInpaintProvider, InpaintProviderRegistry ) # 文字属性提取器 from .text_attribute_extractors import ( TextStyleResult, TextAttributeExtractor, CaptionModelTextAttributeExtractor, TextAttributeExtractorRegistry ) # 工厂和配置 from .factories import ( ExtractorFactory, InpaintProviderFactory, TextAttributeExtractorFactory, ServiceConfig ) # 主服务 from .service import ImageEditabilityService __all__ = [ # 数据模型 'BBox', 'EditableElement', 'EditableImage', # 坐标映射 'CoordinateMapper', # 元素提取器 'ElementExtractor', 'MinerUElementExtractor', 'BaiduOCRElementExtractor', 'BaiduAccurateOCRElementExtractor', 'ExtractorRegistry', # 混合提取器 'HybridElementExtractor', 'BBoxUtils', 'create_hybrid_extractor', # Inpaint提供者 'InpaintProvider', 'DefaultInpaintProvider', 'GenerativeEditInpaintProvider', 'BaiduInpaintProvider', 'HybridInpaintProvider', 'InpaintProviderRegistry', # 文字属性提取器 'TextStyleResult', 'TextAttributeExtractor', 'CaptionModelTextAttributeExtractor', 'TextAttributeExtractorRegistry', # 工厂和配置 'ExtractorFactory', 'InpaintProviderFactory', 'TextAttributeExtractorFactory', 'ServiceConfig', # 主服务 'ImageEditabilityService', ] ================================================ FILE: backend/services/image_editability/coordinate_mapper.py ================================================ """ 坐标映射工具 - 处理父子图片间的坐标转换 """ from typing import Tuple from .data_models import BBox class CoordinateMapper: """坐标映射工具 - 处理父子图片间的坐标转换""" @staticmethod def local_to_global( local_bbox: BBox, parent_bbox: BBox, local_image_size: Tuple[int, int], parent_image_size: Tuple[int, int] ) -> BBox: """ 将子图的局部坐标转换为父图(或根图)的全局坐标 Args: local_bbox: 子图坐标系中的bbox parent_bbox: 子图在父图中的位置 local_image_size: 子图尺寸 (width, height) parent_image_size: 父图尺寸 (width, height) Returns: 在父图坐标系中的bbox """ # 计算缩放比例(子图实际像素 vs 子图在父图中的bbox尺寸) scale_x = parent_bbox.width / local_image_size[0] scale_y = parent_bbox.height / local_image_size[1] # 先缩放到父图bbox的尺寸 scaled_bbox = local_bbox.scale(scale_x, scale_y) # 再平移到父图bbox的位置 global_bbox = scaled_bbox.translate(parent_bbox.x0, parent_bbox.y0) return global_bbox @staticmethod def global_to_local( global_bbox: BBox, parent_bbox: BBox, local_image_size: Tuple[int, int], parent_image_size: Tuple[int, int] ) -> BBox: """ 将父图的全局坐标转换为子图的局部坐标(逆向映射) Args: global_bbox: 父图坐标系中的bbox parent_bbox: 子图在父图中的位置 local_image_size: 子图尺寸 (width, height) parent_image_size: 父图尺寸 (width, height) Returns: 在子图坐标系中的bbox """ # 先平移(相对于parent_bbox的原点) translated_bbox = global_bbox.translate(-parent_bbox.x0, -parent_bbox.y0) # 再缩放 scale_x = local_image_size[0] / parent_bbox.width scale_y = local_image_size[1] / parent_bbox.height local_bbox = translated_bbox.scale(scale_x, scale_y) return local_bbox ================================================ FILE: backend/services/image_editability/data_models.py ================================================ """ 数据模型 - 图片可编辑化服务的核心数据结构 """ from typing import Dict, Any, List, Optional, Tuple from dataclasses import dataclass, field @dataclass class BBox: """边界框坐标""" x0: float y0: float x1: float y1: float @property def width(self) -> float: return self.x1 - self.x0 @property def height(self) -> float: return self.y1 - self.y0 @property def area(self) -> float: return self.width * self.height def to_tuple(self) -> Tuple[float, float, float, float]: """转换为元组格式 (x0, y0, x1, y1)""" return (self.x0, self.y0, self.x1, self.y1) def to_dict(self) -> Dict[str, float]: """转换为字典格式""" return { 'x0': self.x0, 'y0': self.y0, 'x1': self.x1, 'y1': self.y1 } def scale(self, scale_x: float, scale_y: float) -> 'BBox': """缩放bbox""" return BBox( x0=self.x0 * scale_x, y0=self.y0 * scale_y, x1=self.x1 * scale_x, y1=self.y1 * scale_y ) def translate(self, offset_x: float, offset_y: float) -> 'BBox': """平移bbox""" return BBox( x0=self.x0 + offset_x, y0=self.y0 + offset_y, x1=self.x1 + offset_x, y1=self.y1 + offset_y ) @dataclass class EditableElement: """可编辑元素""" element_id: str # 唯一标识 element_type: str # text, image, table, figure, equation等 bbox: BBox # 在父容器(EditableImage)坐标系中的位置 bbox_global: BBox # 在根图片(最顶层EditableImage)坐标系中的位置(预计算存储,避免前端/后续使用时重新遍历计算) content: Optional[str] = None # 文字内容、HTML表格等 image_path: Optional[str] = None # 图片路径(MinerU提取的) # 递归子元素(如果是图片或图表,可能有子元素) children: List['EditableElement'] = field(default_factory=list) # 子图的inpaint背景(如果此元素是递归分析的图片/图表) inpainted_background_path: Optional[str] = None # 元数据 metadata: Dict[str, Any] = field(default_factory=dict) def to_dict(self) -> Dict[str, Any]: """转换为字典(可序列化)""" result = { 'element_id': self.element_id, 'element_type': self.element_type, 'bbox': self.bbox.to_dict(), 'bbox_global': self.bbox_global.to_dict(), 'content': self.content, 'image_path': self.image_path, 'inpainted_background_path': self.inpainted_background_path, 'metadata': self.metadata, 'children': [child.to_dict() for child in self.children] } return result @dataclass class EditableImage: """可编辑化的图片结构""" image_id: str # 唯一标识 image_path: str # 原始图片路径 width: int # 图片宽度 height: int # 图片高度 # 所有提取的元素 elements: List[EditableElement] = field(default_factory=list) # Inpaint后的背景图(消除所有元素) clean_background: Optional[str] = None # 递归层级 depth: int = 0 # 父图片ID(如果是子图) parent_id: Optional[str] = None # 元数据 metadata: Dict[str, Any] = field(default_factory=dict) def to_dict(self) -> Dict[str, Any]: """转换为字典(可序列化)""" return { 'image_id': self.image_id, 'image_path': self.image_path, 'width': self.width, 'height': self.height, 'elements': [elem.to_dict() for elem in self.elements], 'clean_background': self.clean_background, 'depth': self.depth, 'parent_id': self.parent_id, 'metadata': self.metadata } ================================================ FILE: backend/services/image_editability/extractors.py ================================================ """ 元素提取器 - 抽象不同的元素识别方法 包含: - ElementExtractor: 提取器抽象接口 - MinerUElementExtractor: MinerU版面分析提取器 - BaiduOCRElementExtractor: 百度表格OCR提取器 - BaiduAccurateOCRElementExtractor: 百度高精度OCR提取器(文字识别) - ExtractorRegistry: 元素类型到提取器的映射注册表 """ import os import json import logging import tempfile import uuid from abc import ABC, abstractmethod from typing import Dict, Any, List, Optional, Tuple, Type from pathlib import Path from PIL import Image logger = logging.getLogger(__name__) class ExtractionContext: """提取上下文 - 提取器可能需要的额外信息""" def __init__( self, result_dir: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None ): """ Args: result_dir: 结果目录(如MinerU的输出目录) metadata: 其他元数据 """ self.result_dir = result_dir self.metadata = metadata or {} class ExtractionResult: """提取结果""" def __init__( self, elements: List[Dict[str, Any]], context: Optional[ExtractionContext] = None, error: Optional[str] = None ): """ Args: elements: 提取的元素列表 context: 提取上下文(用于后续递归处理) error: 提取过程中的错误信息(如果有) """ self.elements = elements self.context = context or ExtractionContext() self.error = error @property def has_error(self) -> bool: """是否有错误""" return self.error is not None class ElementExtractor(ABC): """ 元素提取器抽象接口 用于抽象不同的元素识别方法,支持接入多种实现: - MinerU解析器(当前默认) - 百度OCR(用于表格) - PaddleOCR - Tesseract OCR - 其他自定义识别服务 """ @abstractmethod def extract( self, image_path: str, element_type: Optional[str] = None, **kwargs ) -> ExtractionResult: """ 从图像中提取元素 Args: image_path: 图像文件路径 element_type: 元素类型提示(如 'table', 'text', 'image'等),可选 **kwargs: 其他由具体实现自定义的参数 Returns: ExtractionResult对象,包含: - elements: 元素字典列表,每个字典包含: - bbox: List[float] - 边界框 [x0, y0, x1, y1] - type: str - 元素类型('text', 'image', 'table', 'title'等) - content: Optional[str] - 文本内容 - image_path: Optional[str] - 图片相对路径 - metadata: Dict[str, Any] - 其他元数据 - context: 提取上下文(用于后续递归处理) """ pass @abstractmethod def supports_type(self, element_type: Optional[str]) -> bool: """ 检查提取器是否支持指定的元素类型 Args: element_type: 元素类型(如 'table', 'image'等),None表示通用 Returns: 是否支持该类型 """ pass class MinerUElementExtractor(ElementExtractor): """ 基于MinerU的元素提取器(默认实现) 从MinerU的解析结果中提取文本、图片、表格等元素 自包含:自己处理PDF转换、MinerU解析、结果提取 """ def __init__(self, parser_service, upload_folder: Path): """ 初始化MinerU提取器 Args: parser_service: FileParserService实例 upload_folder: 上传文件夹路径 """ self._parser_service = parser_service self._upload_folder = upload_folder def supports_type(self, element_type: Optional[str]) -> bool: """MinerU支持所有通用类型(除了特殊的表格单元格)""" return element_type != 'table_cell' def extract( self, image_path: str, element_type: Optional[str] = None, **kwargs ) -> ExtractionResult: """ 从图像中提取元素(自动处理PDF转换和MinerU解析) 支持的kwargs: - depth: int, 递归深度(用于日志) """ depth = kwargs.get('depth', 0) # 获取图片尺寸 img = Image.open(image_path) image_size = img.size # (width, height) # 1. 检查缓存 cached_dir = self._find_cache(image_path) if cached_dir: logger.info(f"{' ' * depth}使用MinerU缓存") mineru_result_dir = cached_dir parse_error = None else: # 2. 解析图片 mineru_result_dir, parse_error = self._parse_image(image_path, depth) if not mineru_result_dir: return ExtractionResult(elements=[], error=parse_error) # 3. 提取元素 elements = self._extract_from_result( mineru_result_dir=mineru_result_dir, target_image_size=image_size, depth=depth ) # 4. 返回结果(带上下文) context = ExtractionContext( result_dir=mineru_result_dir, metadata={'source': 'mineru', 'image_size': image_size} ) return ExtractionResult(elements=elements, context=context) def _find_cache(self, image_path: str) -> Optional[str]: """查找缓存的MinerU结果""" try: import hashlib import time img_path = Path(image_path) if not img_path.exists(): return None mineru_files_dir = self._upload_folder / 'mineru_files' if not mineru_files_dir.exists(): return None # 简单策略:不使用缓存(更安全) return None except Exception as e: logger.debug(f"查找缓存失败: {e}") return None def _parse_image(self, image_path: str, depth: int) -> Tuple[Optional[str], Optional[str]]: """解析图片,返回MinerU结果目录和错误信息 Returns: Tuple of (result_dir, error_message) """ from services.export_service import ExportService # 转换为PDF with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as tmp_pdf: pdf_path = tmp_pdf.name try: ExportService.create_pdf_from_images([image_path], output_file=pdf_path) # 调用MinerU解析 image_id = str(uuid.uuid4())[:8] batch_id, markdown_content, extract_id, error_message, failed_image_count = \ self._parser_service.parse_file(pdf_path, f"image_{image_id}.pdf") if error_message or not extract_id: logger.error(f"{' ' * depth}MinerU解析失败: {error_message}") return None, error_message or "MinerU解析失败,未返回extract_id" mineru_result_dir = (self._upload_folder / 'mineru_files' / extract_id).resolve() if not mineru_result_dir.exists(): err = f"MinerU结果目录不存在: {mineru_result_dir}" logger.error(f"{' ' * depth}{err}") return None, err return str(mineru_result_dir), None finally: if os.path.exists(pdf_path): os.remove(pdf_path) def _extract_from_result( self, mineru_result_dir: str, target_image_size: Tuple[int, int], depth: int ) -> List[Dict[str, Any]]: """从MinerU结果目录中提取元素""" elements = [] try: mineru_dir = Path(mineru_result_dir) # 加载layout.json和content_list.json layout_file = mineru_dir / 'layout.json' content_list_files = list(mineru_dir.glob("*_content_list.json")) if not layout_file.exists() or not content_list_files: logger.warning(f"layout.json或content_list.json不存在") return [] with open(layout_file, 'r', encoding='utf-8') as f: layout_data = json.load(f) with open(content_list_files[0], 'r', encoding='utf-8') as f: content_list = json.load(f) # 从layout.json提取元素 if 'pdf_info' not in layout_data or not layout_data['pdf_info']: return [] page_info = layout_data['pdf_info'][0] source_page_size = page_info.get('page_size', target_image_size) # 计算缩放比例 scale_x = target_image_size[0] / source_page_size[0] scale_y = target_image_size[1] / source_page_size[1] # 处理块的通用函数 def process_block(block): bbox = block.get('bbox') block_type = block.get('type', 'text') if not bbox or len(bbox) != 4: return None # 过滤掉 type 为 header/footer 且内容仅为 "#" 的特殊标记 if block_type in ['header', 'footer']: if block.get('lines'): # 提取所有文本内容 all_text = [] for line in block['lines']: for span in line.get('spans', []): if span.get('type') == 'text' and span.get('content'): all_text.append(span['content']) # 如果所有文本合并后仅为"#",则跳过此块 combined_text = ''.join(all_text).strip() if combined_text == '#': return None # 缩放bbox到目标尺寸 scaled_bbox = [ bbox[0] * scale_x, bbox[1] * scale_y, bbox[2] * scale_x, bbox[3] * scale_y ] # 对于 header/footer,需要根据实际内容判断类型 actual_content_type = block_type if block_type in ['header', 'footer']: # 检查是否包含图片 has_image = False if block.get('blocks'): for sub_block in block['blocks']: if sub_block.get('type') == 'image_body': has_image = True break # 检查是否包含文本 has_text = False if block.get('lines'): for line in block['lines']: for span in line.get('spans', []): if span.get('type') in ['text', 'inline_equation'] and span.get('content', '').strip(): has_text = True break if has_text: break # 根据内容判断实际类型 if has_image and not has_text: actual_content_type = 'image' elif has_text: actual_content_type = 'text' # 将 header/footer 转换为 text else: # 默认当作文本处理 actual_content_type = 'text' # 辅助函数:从 lines 提取文本 def extract_text_from_lines(lines): """从 lines 数组提取所有文本内容""" line_texts = [] for line in lines: span_texts = [] for span in line.get('spans', []): span_type = span.get('type', '') span_content = span.get('content', '') if span_type == 'text' and span_content: span_texts.append(span_content) elif span_type == 'inline_equation' and span_content: from utils.latex_utils import latex_to_text converted = latex_to_text(span_content) span_texts.append(converted) if span_texts: line_text = ''.join(span_texts) line_texts.append(line_text) return line_texts # 提取content(文本)- 包括 caption 类型 content = None if actual_content_type in ['text', 'title', 'table_caption', 'image_caption']: if block.get('lines'): line_texts = extract_text_from_lines(block['lines']) if line_texts: content = '\n'.join(line_texts).strip() elif actual_content_type == 'list': # list 类型包含 blocks 子数组,每个 block 有 lines if block.get('blocks'): all_line_texts = [] for sub_block in block['blocks']: if sub_block.get('lines'): sub_texts = extract_text_from_lines(sub_block['lines']) all_line_texts.extend(sub_texts) if all_line_texts: content = '\n'.join(all_line_texts).strip() # 提取img_path(图片/表格)- 转换为绝对路径 img_path = None if actual_content_type in ['image', 'table']: if block.get('blocks'): for sub_block in block['blocks']: for line in sub_block.get('lines', []): for span in line.get('spans', []): if span.get('image_path'): relative_path = span['image_path'] if not relative_path.startswith('images/'): relative_path = 'images/' + relative_path # 转换为绝对路径 abs_path = mineru_dir / relative_path if abs_path.exists(): img_path = str(abs_path) break if img_path: break if img_path: break return { 'bbox': scaled_bbox, 'type': actual_content_type, # 使用实际内容类型而不是原始类型 'content': content, 'image_path': img_path, # 现在是绝对路径 'metadata': { **block, 'original_type': block_type # 保留原始类型(header/footer)在metadata中 } } # 处理主要内容块(para_blocks) for block in page_info.get('para_blocks', []): element = process_block(block) if element: elements.append(element) # 递归处理子块(table_caption, image_caption 等) # 注意:list 类型的子块已在 process_block 中处理,不需要再递归 block_type = block.get('type', '') if block_type != 'list': for sub_block in block.get('blocks', []): sub_elem = process_block(sub_block) if sub_elem: elements.append(sub_elem) # 处理页眉页脚(discarded_blocks) for block in page_info.get('discarded_blocks', []): element = process_block(block) if element: elements.append(element) # 递归处理子块 # 注意:list 类型的子块已在 process_block 中处理,不需要再递归 block_type = block.get('type', '') if block_type != 'list': for sub_block in block.get('blocks', []): sub_elem = process_block(sub_block) if sub_elem: elements.append(sub_elem) logger.info(f"MinerU提取了 {len(elements)} 个元素") except Exception as e: logger.error(f"MinerU提取元素失败: {e}", exc_info=True) return elements class BaiduOCRElementExtractor(ElementExtractor): """ 基于百度OCR的元素提取器 专门用于表格识别,提取表格单元格 自包含:自己处理OCR调用和单元格提取 """ def __init__(self, baidu_table_ocr_provider): """ 初始化百度OCR提取器 Args: baidu_table_ocr_provider: 百度表格OCR Provider实例 """ self._ocr_provider = baidu_table_ocr_provider def supports_type(self, element_type: Optional[str]) -> bool: """百度OCR主要支持表格类型""" return element_type in ['table', 'table_cell', None] def extract( self, image_path: str, element_type: Optional[str] = None, **kwargs ) -> ExtractionResult: """ 从表格图片中提取单元格 支持的kwargs: - depth: int, 递归深度(用于日志) - shrink_cells: bool, 是否收缩单元格以避免重叠,默认True """ depth = kwargs.get('depth', 0) shrink_cells = kwargs.get('shrink_cells', True) elements = [] try: # 调用百度OCR识别表格 ocr_result = self._ocr_provider.recognize_table( image_path, cell_contents=True ) table_cells = ocr_result.get('cells', []) # OCR结果通常会包含image_size,如果没有则自己获取 table_img_size = ocr_result.get('image_size') if not table_img_size: img = Image.open(image_path) table_img_size = img.size logger.info(f"{' ' * depth}百度OCR识别到 {len(table_cells)} 个单元格") # 只处理body单元格 body_cells = [cell for cell in table_cells if cell.get('section') == 'body'] valid_cells = [cell for cell in body_cells if cell.get('text', '').strip()] if not valid_cells: logger.warning(f"{' ' * depth}没有有效的单元格") return ExtractionResult(elements=elements) # 处理单元格(可选择性收缩) cell_bboxes = [] if shrink_cells: cell_bboxes = self._shrink_cells_to_avoid_overlap(valid_cells, depth) else: cell_bboxes = [cell.get('bbox', [0, 0, 0, 0]) for cell in valid_cells] # 构建元素列表 for idx, (cell, bbox) in enumerate(zip(valid_cells, cell_bboxes)): elements.append({ 'bbox': bbox, 'type': 'table_cell', 'content': cell.get('text', ''), 'image_path': None, 'metadata': { 'row_start': cell.get('row_start'), 'row_end': cell.get('row_end'), 'col_start': cell.get('col_start'), 'col_end': cell.get('col_end'), 'table_idx': cell.get('table_idx', 0) } }) logger.info(f"{' ' * depth}百度OCR提取了 {len(elements)} 个单元格元素") except Exception as e: error_msg = f"百度OCR识别失败: {e}" logger.error(f"{' ' * depth}{error_msg}", exc_info=True) return ExtractionResult(elements=elements, error=error_msg) # 百度OCR不需要result_dir(表格单元格不会有子元素) return ExtractionResult(elements=elements) def _shrink_cells_to_avoid_overlap( self, valid_cells: List[Dict], depth: int ) -> List[List[float]]: """收缩单元格以避免重叠(算法同原实现)""" TARGET_MIN_GAP = 6 SHRINK_STEP = 0.02 MIN_SIZE_RATIO = 0.4 MAX_ITERATIONS = 20 cell_data = [] for cell in valid_cells: bbox = cell.get('bbox', [0, 0, 0, 0]) x0, y0, x1, y1 = bbox cell_data.append({ 'cell': cell, 'original_bbox': bbox, 'current_bbox': [float(x0), float(y0), float(x1), float(y1)], 'original_width': x1 - x0, 'original_height': y1 - y0 }) def calculate_min_gap(cell_data): if len(cell_data) <= 1: return float('inf') min_gap = float('inf') for i, data1 in enumerate(cell_data): x0_1, y0_1, x1_1, y1_1 = data1['current_bbox'] for j, data2 in enumerate(cell_data): if i >= j: continue x0_2, y0_2, x1_2, y1_2 = data2['current_bbox'] x_overlap = not (x1_1 <= x0_2 or x1_2 <= x0_1) y_overlap = not (y1_1 <= y0_2 or y1_2 <= y0_1) if x_overlap and y_overlap: overlap_x = min(x1_1, x1_2) - max(x0_1, x0_2) overlap_y = min(y1_1, y1_2) - max(y0_1, y0_2) min_gap = min(min_gap, -min(overlap_x, overlap_y)) elif x_overlap: gap = y0_2 - y1_1 if y1_1 <= y0_2 else y0_1 - y1_2 min_gap = min(min_gap, gap) elif y_overlap: gap = x0_2 - x1_1 if x1_1 <= x0_2 else x0_1 - x1_2 min_gap = min(min_gap, gap) return min_gap iteration = 0 total_shrink_ratio = 0 while iteration < MAX_ITERATIONS: current_min_gap = calculate_min_gap(cell_data) if current_min_gap >= TARGET_MIN_GAP: if iteration == 0: logger.info(f"{' ' * depth}单元格间距已满足要求(最小={current_min_gap:.1f}px),无需收缩") else: logger.info(f"{' ' * depth}收缩完成:{iteration}次迭代,最小间距={current_min_gap:.1f}px") break all_cells_can_shrink = True for data in cell_data: x0, y0, x1, y1 = data['current_bbox'] current_width = x1 - x0 current_height = y1 - y0 min_width = data['original_width'] * MIN_SIZE_RATIO min_height = data['original_height'] * MIN_SIZE_RATIO if current_width <= min_width or current_height <= min_height: all_cells_can_shrink = False break shrink_x = max(0.5, current_width * SHRINK_STEP) shrink_y = max(0.5, current_height * SHRINK_STEP) new_x0 = x0 + shrink_x new_y0 = y0 + shrink_y new_x1 = x1 - shrink_x new_y1 = y1 - shrink_y if (new_x1 - new_x0) < min_width: new_x0 = x0 + (current_width - min_width) / 2 new_x1 = x1 - (current_width - min_width) / 2 if (new_y1 - new_y0) < min_height: new_y0 = y0 + (current_height - min_height) / 2 new_y1 = y1 - (current_height - min_height) / 2 data['current_bbox'] = [new_x0, new_y0, new_x1, new_y1] if not all_cells_can_shrink: logger.warning(f"{' ' * depth}达到最小尺寸限制,当前最小间距={current_min_gap:.1f}px") break total_shrink_ratio += SHRINK_STEP iteration += 1 if iteration >= MAX_ITERATIONS: current_min_gap = calculate_min_gap(cell_data) logger.warning(f"{' ' * depth}达到最大迭代次数,当前最小间距={current_min_gap:.1f}px") return [data['current_bbox'] for data in cell_data] class BaiduAccurateOCRElementExtractor(ElementExtractor): """ 基于百度高精度OCR的元素提取器 专门用于文字识别,提取文本行元素 支持多语种、高精度识别,返回文字位置信息 """ def __init__(self, baidu_accurate_ocr_provider): """ 初始化百度高精度OCR提取器 Args: baidu_accurate_ocr_provider: 百度高精度OCR Provider实例 """ self._ocr_provider = baidu_accurate_ocr_provider def supports_type(self, element_type: Optional[str]) -> bool: """百度高精度OCR主要支持文字类型""" return element_type in ['text', 'title', 'paragraph', None] def extract( self, image_path: str, element_type: Optional[str] = None, **kwargs ) -> ExtractionResult: """ 从图片中提取文字元素 支持的kwargs: - depth: int, 递归深度(用于日志) - language_type: str, 识别语言类型,默认'CHN_ENG' - recognize_granularity: str, 是否定位单字符位置,'big'或'small' - detect_direction: bool, 是否检测图像朝向 - paragraph: bool, 是否输出段落信息 """ depth = kwargs.get('depth', 0) language_type = kwargs.get('language_type', 'CHN_ENG') recognize_granularity = kwargs.get('recognize_granularity', 'big') detect_direction = kwargs.get('detect_direction', False) paragraph = kwargs.get('paragraph', False) elements = [] try: # 调用百度高精度OCR识别 ocr_result = self._ocr_provider.recognize( image_path, language_type=language_type, recognize_granularity=recognize_granularity, detect_direction=detect_direction, paragraph=paragraph, probability=True, # 获取置信度 ) text_lines = ocr_result.get('text_lines', []) image_size = ocr_result.get('image_size', (0, 0)) direction = ocr_result.get('direction', None) logger.info(f"{' ' * depth}百度高精度OCR识别到 {len(text_lines)} 行文字") # 只处理有内容的文字行 valid_lines = [line for line in text_lines if line.get('text', '').strip()] if not valid_lines: logger.warning(f"{' ' * depth}没有识别到有效的文字") return ExtractionResult(elements=elements) # 构建元素列表 for idx, line in enumerate(valid_lines): bbox = line.get('bbox', [0, 0, 0, 0]) text = line.get('text', '') element = { 'bbox': bbox, 'type': 'text', 'content': text, 'image_path': None, 'metadata': { 'line_idx': idx, 'source': 'baidu_accurate_ocr', } } # 添加置信度信息 if 'probability' in line: element['metadata']['probability'] = line['probability'] # 添加单字符信息 if 'chars' in line: element['metadata']['chars'] = line['chars'] # 添加外接多边形顶点 if 'vertexes_location' in line: element['metadata']['vertexes_location'] = line['vertexes_location'] elements.append(element) logger.info(f"{' ' * depth}百度高精度OCR提取了 {len(elements)} 个文字元素") # 添加图片方向信息到上下文 context = ExtractionContext( metadata={ 'source': 'baidu_accurate_ocr', 'image_size': image_size, 'direction': direction, } ) return ExtractionResult(elements=elements, context=context) except Exception as e: error_msg = f"百度高精度OCR识别失败: {e}" logger.error(f"{' ' * depth}{error_msg}", exc_info=True) return ExtractionResult(elements=elements, error=error_msg) class ExtractorRegistry: """ 元素类型到提取器的映射注册表 用于管理不同元素类型应该使用哪个提取器进行子元素提取: - 图片/图表元素 → MinerU 版面分析 - 表格元素 → 百度表格OCR - 其他类型 → 默认提取器 使用方式: >>> registry = ExtractorRegistry() >>> registry.register('table', baidu_ocr_extractor) >>> registry.register('image', mineru_extractor) >>> registry.register_default(mineru_extractor) >>> >>> extractor = registry.get_extractor('table') # 返回 baidu_ocr_extractor >>> extractor = registry.get_extractor('chart') # 返回 mineru_extractor (默认) """ # 预定义的元素类型分组 TABLE_TYPES = {'table', 'table_cell'} IMAGE_TYPES = {'image', 'figure', 'chart', 'diagram'} TEXT_TYPES = {'text', 'title', 'paragraph', 'header', 'footer', 'list'} def __init__(self): """初始化注册表""" self._type_mapping: Dict[str, ElementExtractor] = {} self._default_extractor: Optional[ElementExtractor] = None def register(self, element_type: str, extractor: ElementExtractor) -> 'ExtractorRegistry': """ 注册元素类型到提取器的映射 Args: element_type: 元素类型(如 'table', 'image' 等) extractor: 对应的提取器实例 Returns: self,支持链式调用 """ self._type_mapping[element_type] = extractor logger.debug(f"注册提取器: {element_type} -> {extractor.__class__.__name__}") return self def register_types(self, element_types: List[str], extractor: ElementExtractor) -> 'ExtractorRegistry': """ 批量注册多个元素类型到同一个提取器 Args: element_types: 元素类型列表 extractor: 对应的提取器实例 Returns: self,支持链式调用 """ for t in element_types: self.register(t, extractor) return self def register_default(self, extractor: ElementExtractor) -> 'ExtractorRegistry': """ 注册默认提取器(当没有特定类型映射时使用) Args: extractor: 默认提取器实例 Returns: self,支持链式调用 """ self._default_extractor = extractor logger.debug(f"注册默认提取器: {extractor.__class__.__name__}") return self def get_extractor(self, element_type: Optional[str]) -> Optional[ElementExtractor]: """ 根据元素类型获取对应的提取器 Args: element_type: 元素类型,None表示使用默认提取器 Returns: 对应的提取器,如果没有注册则返回默认提取器 """ if element_type is None: return self._default_extractor # 先查找精确匹配 if element_type in self._type_mapping: return self._type_mapping[element_type] # 返回默认提取器 return self._default_extractor def get_all_extractors(self) -> List[ElementExtractor]: """ 获取所有已注册的提取器(去重) Returns: 提取器列表 """ extractors = list(set(self._type_mapping.values())) if self._default_extractor and self._default_extractor not in extractors: extractors.append(self._default_extractor) return extractors @classmethod def create_default( cls, mineru_extractor: ElementExtractor, baidu_ocr_extractor: Optional[ElementExtractor] = None, baidu_accurate_ocr_extractor: Optional[ElementExtractor] = None ) -> 'ExtractorRegistry': """ 创建默认配置的注册表 默认配置: - 表格类型 → 百度表格OCR(如果可用) - 文字类型 → 百度高精度OCR(如果可用),否则MinerU - 图片类型 → MinerU - 其他类型 → MinerU(默认) Args: mineru_extractor: MinerU提取器实例 baidu_ocr_extractor: 百度表格OCR提取器实例(可选) baidu_accurate_ocr_extractor: 百度高精度OCR提取器实例(可选) Returns: 配置好的注册表实例 """ registry = cls() # 设置默认提取器 registry.register_default(mineru_extractor) # 图片类型使用MinerU registry.register_types(list(cls.IMAGE_TYPES), mineru_extractor) # 表格类型使用百度表格OCR(如果可用),否则使用MinerU table_extractor = baidu_ocr_extractor if baidu_ocr_extractor else mineru_extractor registry.register_types(list(cls.TABLE_TYPES), table_extractor) # 文字类型使用百度高精度OCR(如果可用),否则使用MinerU text_extractor = baidu_accurate_ocr_extractor if baidu_accurate_ocr_extractor else mineru_extractor registry.register_types(list(cls.TEXT_TYPES), text_extractor) logger.info(f"创建默认ExtractorRegistry: " f"表格->{table_extractor.__class__.__name__}, " f"文字->{text_extractor.__class__.__name__}, " f"图片->{mineru_extractor.__class__.__name__}") return registry ================================================ FILE: backend/services/image_editability/factories.py ================================================ """ 工厂类 - 负责创建和配置具体的提取器和Inpaint提供者 """ import logging from typing import List, Optional, Any from pathlib import Path from .extractors import ElementExtractor, MinerUElementExtractor, BaiduOCRElementExtractor, BaiduAccurateOCRElementExtractor, ExtractorRegistry from .hybrid_extractor import HybridElementExtractor, create_hybrid_extractor from .inpaint_providers import ( InpaintProvider, DefaultInpaintProvider, GenerativeEditInpaintProvider, BaiduInpaintProvider, HybridInpaintProvider, InpaintProviderRegistry ) from .text_attribute_extractors import ( TextAttributeExtractor, CaptionModelTextAttributeExtractor, TextAttributeExtractorRegistry, TextStyleResult ) logger = logging.getLogger(__name__) class ExtractorFactory: """元素提取器工厂""" @staticmethod def create_default_extractors( parser_service: Any, upload_folder: Path, baidu_table_ocr_provider: Optional[Any] = None ) -> List[ElementExtractor]: """ 创建默认的元素提取器列表 Args: parser_service: MinerU解析服务实例 upload_folder: 上传文件夹路径 baidu_table_ocr_provider: 百度表格OCR Provider实例(可选) Returns: 提取器列表(按优先级排序) Note: 推荐使用 create_extractor_registry() 方法,它提供更清晰的类型到提取器映射 """ extractors: List[ElementExtractor] = [] # 1. 百度OCR提取器(用于表格) if baidu_table_ocr_provider is None: try: from services.ai_providers.ocr import create_baidu_table_ocr_provider baidu_provider = create_baidu_table_ocr_provider() if baidu_provider: extractors.append(BaiduOCRElementExtractor(baidu_provider)) logger.info("✅ 百度表格OCR提取器已启用") except Exception as e: logger.warning(f"无法初始化百度表格OCR: {e}") else: extractors.append(BaiduOCRElementExtractor(baidu_table_ocr_provider)) logger.info("✅ 百度表格OCR提取器已启用") # 2. MinerU提取器(默认通用提取器) mineru_extractor = MinerUElementExtractor(parser_service, upload_folder) extractors.append(mineru_extractor) logger.info("✅ MinerU提取器已启用") return extractors @staticmethod def create_extractor_registry( parser_service: Any, upload_folder: Path, baidu_table_ocr_provider: Optional[Any] = None ) -> ExtractorRegistry: """ 创建元素类型到提取器的注册表 默认配置: - 表格类型(table, table_cell)→ 百度OCR(如果可用),否则MinerU - 图片类型(image, figure, chart)→ MinerU - 其他类型 → MinerU(默认) Args: parser_service: MinerU解析服务实例 upload_folder: 上传文件夹路径 baidu_table_ocr_provider: 百度表格OCR Provider实例(可选) Returns: 配置好的ExtractorRegistry实例 """ # 创建MinerU提取器 mineru_extractor = MinerUElementExtractor(parser_service, upload_folder) logger.info("✅ MinerU提取器已创建") # 尝试创建百度OCR提取器 baidu_ocr_extractor = None if baidu_table_ocr_provider is None: try: from services.ai_providers.ocr import create_baidu_table_ocr_provider baidu_provider = create_baidu_table_ocr_provider() if baidu_provider: baidu_ocr_extractor = BaiduOCRElementExtractor(baidu_provider) logger.info("✅ 百度表格OCR提取器已创建") except Exception as e: logger.warning(f"无法初始化百度表格OCR: {e}") else: baidu_ocr_extractor = BaiduOCRElementExtractor(baidu_table_ocr_provider) logger.info("✅ 百度表格OCR提取器已创建") # 尝试创建百度高精度OCR提取器 baidu_accurate_ocr_extractor = None try: from services.ai_providers.ocr import create_baidu_accurate_ocr_provider baidu_accurate_provider = create_baidu_accurate_ocr_provider() if baidu_accurate_provider: baidu_accurate_ocr_extractor = BaiduAccurateOCRElementExtractor(baidu_accurate_provider) logger.info("✅ 百度高精度OCR提取器已创建") except Exception as e: logger.warning(f"无法初始化百度高精度OCR: {e}") # 使用注册表的工厂方法创建默认配置 return ExtractorRegistry.create_default( mineru_extractor=mineru_extractor, baidu_ocr_extractor=baidu_ocr_extractor, baidu_accurate_ocr_extractor=baidu_accurate_ocr_extractor ) @staticmethod def create_baidu_accurate_ocr_extractor( baidu_accurate_ocr_provider: Optional[Any] = None ) -> Optional[BaiduAccurateOCRElementExtractor]: """ 创建百度高精度OCR提取器 Args: baidu_accurate_ocr_provider: 百度高精度OCR Provider实例(可选,自动创建) Returns: BaiduAccurateOCRElementExtractor实例,如果不可用则返回None """ if baidu_accurate_ocr_provider is None: try: from services.ai_providers.ocr import create_baidu_accurate_ocr_provider baidu_accurate_ocr_provider = create_baidu_accurate_ocr_provider() except Exception as e: logger.warning(f"无法初始化百度高精度OCR Provider: {e}") return None if baidu_accurate_ocr_provider is None: return None return BaiduAccurateOCRElementExtractor(baidu_accurate_ocr_provider) @staticmethod def create_hybrid_extractor( parser_service: Any, upload_folder: Path, baidu_accurate_ocr_provider: Optional[Any] = None, contain_threshold: float = 0.8, intersection_threshold: float = 0.3 ) -> Optional[HybridElementExtractor]: """ 创建混合元素提取器 混合提取器结合MinerU版面分析和百度高精度OCR: - MinerU负责识别元素类型和整体布局 - 百度OCR负责精确的文字识别和定位 合并策略: 1. 图片类型bbox里包含的百度OCR bbox → 删除(图片内的文字不需要单独提取) 2. 表格类型bbox里包含的百度OCR bbox → 保留百度OCR结果,删除MinerU表格bbox 3. 其他类型(文字等)与百度OCR bbox有交集 → 使用百度OCR结果,删除MinerU bbox Args: parser_service: MinerU解析服务实例 upload_folder: 上传文件夹路径 baidu_accurate_ocr_provider: 百度高精度OCR Provider实例(可选,自动创建) contain_threshold: 包含判断阈值,默认0.8(80%面积在内部算包含) intersection_threshold: 交集判断阈值,默认0.3(30%重叠算有交集) Returns: HybridElementExtractor实例,如果无法创建则返回None """ # 创建MinerU提取器 mineru_extractor = MinerUElementExtractor(parser_service, upload_folder) logger.info("✅ MinerU提取器已创建(用于混合提取)") # 创建百度高精度OCR提取器 baidu_ocr_extractor = ExtractorFactory.create_baidu_accurate_ocr_extractor( baidu_accurate_ocr_provider ) if baidu_ocr_extractor is None: logger.warning("无法创建百度高精度OCR提取器,混合提取器创建失败") return None logger.info("✅ 百度高精度OCR提取器已创建(用于混合提取)") return HybridElementExtractor( mineru_extractor=mineru_extractor, baidu_ocr_extractor=baidu_ocr_extractor, contain_threshold=contain_threshold, intersection_threshold=intersection_threshold ) @staticmethod def create_hybrid_extractor_registry( parser_service: Any, upload_folder: Path, baidu_table_ocr_provider: Optional[Any] = None, baidu_accurate_ocr_provider: Optional[Any] = None, contain_threshold: float = 0.8, intersection_threshold: float = 0.3 ) -> ExtractorRegistry: """ 创建使用混合提取器的注册表 默认配置: - 所有类型 → 混合提取器(如果可用) - 回退到MinerU(如果混合提取器不可用) Args: parser_service: MinerU解析服务实例 upload_folder: 上传文件夹路径 baidu_table_ocr_provider: 百度表格OCR Provider实例(可选) baidu_accurate_ocr_provider: 百度高精度OCR Provider实例(可选) contain_threshold: 包含判断阈值 intersection_threshold: 交集判断阈值 Returns: 配置好的ExtractorRegistry实例 """ # 创建MinerU提取器作为回退 mineru_extractor = MinerUElementExtractor(parser_service, upload_folder) logger.info("✅ MinerU提取器已创建") # 尝试创建混合提取器 hybrid_extractor = ExtractorFactory.create_hybrid_extractor( parser_service=parser_service, upload_folder=upload_folder, baidu_accurate_ocr_provider=baidu_accurate_ocr_provider, contain_threshold=contain_threshold, intersection_threshold=intersection_threshold ) # 尝试创建百度表格OCR提取器 baidu_table_ocr_extractor = None if baidu_table_ocr_provider is None: try: from services.ai_providers.ocr import create_baidu_table_ocr_provider baidu_provider = create_baidu_table_ocr_provider() if baidu_provider: from .extractors import BaiduOCRElementExtractor baidu_table_ocr_extractor = BaiduOCRElementExtractor(baidu_provider) logger.info("✅ 百度表格OCR提取器已创建") except Exception as e: logger.warning(f"无法初始化百度表格OCR: {e}") else: from .extractors import BaiduOCRElementExtractor baidu_table_ocr_extractor = BaiduOCRElementExtractor(baidu_table_ocr_provider) logger.info("✅ 百度表格OCR提取器已创建") # 创建注册表 registry = ExtractorRegistry() # 设置默认提取器 if hybrid_extractor: registry.register_default(hybrid_extractor) logger.info("✅ 使用混合提取器作为默认提取器") else: registry.register_default(mineru_extractor) logger.info("⚠️ 混合提取器不可用,回退到MinerU提取器") # 表格类型使用百度表格OCR(如果可用) if baidu_table_ocr_extractor: registry.register_types(list(ExtractorRegistry.TABLE_TYPES), baidu_table_ocr_extractor) return registry class InpaintProviderFactory: """Inpaint提供者工厂""" @staticmethod def create_default_provider(inpainting_service: Optional[Any] = None) -> Optional[InpaintProvider]: """ 创建默认的Inpaint提供者(使用Volcengine Inpainting服务) Args: inpainting_service: InpaintingService实例(可选) Returns: InpaintProvider实例,失败返回None """ if inpainting_service is None: from services.inpainting_service import get_inpainting_service inpainting_service = get_inpainting_service() logger.info("创建DefaultInpaintProvider") return DefaultInpaintProvider(inpainting_service) @staticmethod def create_generative_edit_provider( ai_service: Optional[Any] = None, aspect_ratio: str = "16:9", resolution: str = "2K" ) -> InpaintProvider: """ 创建基于生成式大模型的Inpaint提供者 使用生成式大模型(如Gemini图片编辑)通过自然语言指令移除图片中的文字和图标。 适用于不需要精确bbox的场景,大模型自动理解并移除相关元素。 Args: ai_service: AIService实例(可选,如果不提供则自动获取) aspect_ratio: 目标宽高比 resolution: 目标分辨率 Returns: GenerativeEditInpaintProvider实例 Raises: 如果AI服务初始化失败,会抛出异常 """ if ai_service is None: from services.ai_service_manager import get_ai_service ai_service = get_ai_service() logger.info("创建GenerativeEditInpaintProvider") return GenerativeEditInpaintProvider(ai_service, aspect_ratio, resolution) @staticmethod def create_inpaint_registry( mask_provider: Optional[InpaintProvider] = None, generative_provider: Optional[InpaintProvider] = None, default_provider_type: str = "generative" ) -> InpaintProviderRegistry: """ 创建重绘方法注册表 支持动态注册新元素类型,不限于预定义类型。 Args: mask_provider: 基于mask的重绘提供者(可选,自动创建) generative_provider: 生成式重绘提供者(可选,自动创建) default_provider_type: 默认使用的提供者类型 ("mask" 或 "generative") Returns: 配置好的InpaintProviderRegistry实例 """ # 自动创建提供者 if mask_provider is None: mask_provider = InpaintProviderFactory.create_default_provider() if generative_provider is None: generative_provider = InpaintProviderFactory.create_generative_edit_provider() # 创建注册表 registry = InpaintProviderRegistry() # 设置默认提供者 if default_provider_type == "generative" and generative_provider: registry.register_default(generative_provider) elif mask_provider: registry.register_default(mask_provider) elif generative_provider: registry.register_default(generative_provider) # 注册类型映射(可通过registry.register()动态扩展) if mask_provider: # 文本和表格使用mask-based精确移除 registry.register_types(['text', 'title', 'paragraph'], mask_provider) registry.register_types(['table', 'table_cell'], mask_provider) if generative_provider: # 图片和图表使用生成式重绘 registry.register_types(['image', 'figure', 'chart', 'diagram'], generative_provider) logger.info(f"创建InpaintProviderRegistry: 默认={default_provider_type}, " f"mask={mask_provider is not None}, generative={generative_provider is not None}") return registry @staticmethod def create_baidu_inpaint_provider() -> Optional[BaiduInpaintProvider]: """ 创建百度图像修复提供者 使用百度AI在指定矩形区域去除遮挡物并用背景内容填充。 Returns: BaiduInpaintProvider实例,如果不可用则返回None """ try: from services.ai_providers.image.baidu_inpainting_provider import create_baidu_inpainting_provider baidu_provider = create_baidu_inpainting_provider() if baidu_provider: logger.info("✅ 创建BaiduInpaintProvider") return BaiduInpaintProvider(baidu_provider) else: logger.warning("⚠️ 无法创建百度图像修复Provider(API Key未配置)") return None except Exception as e: logger.warning(f"⚠️ 创建BaiduInpaintProvider失败: {e}") return None @staticmethod def create_hybrid_inpaint_provider( baidu_provider: Optional[BaiduInpaintProvider] = None, generative_provider: Optional[GenerativeEditInpaintProvider] = None, ai_service: Optional[Any] = None, enhance_quality: bool = True ) -> Optional[HybridInpaintProvider]: """ 创建混合Inpaint提供者(百度修复 + 生成式画质提升) 工作流程: 1. 先使用百度图像修复API精确去除文字 2. 再使用生成式大模型提升整体画质 Args: baidu_provider: 百度图像修复提供者(可选,自动创建) generative_provider: 生成式编辑提供者(可选,自动创建) ai_service: AI服务实例(用于创建生成式提供者) enhance_quality: 是否启用画质提升,默认True Returns: HybridInpaintProvider实例,如果无法创建则返回None """ # 创建百度修复提供者 if baidu_provider is None: baidu_provider = InpaintProviderFactory.create_baidu_inpaint_provider() if baidu_provider is None: logger.warning("⚠️ 无法创建百度图像修复Provider,混合Provider创建失败") return None # 创建生成式提供者(用于画质提升) if generative_provider is None: generative_provider = InpaintProviderFactory.create_generative_edit_provider( ai_service=ai_service ) logger.info("✅ 创建HybridInpaintProvider(百度修复 + 生成式画质提升)") return HybridInpaintProvider( baidu_provider=baidu_provider, generative_provider=generative_provider, enhance_quality=enhance_quality ) class ServiceConfig: """服务配置类 - 纯配置,不持有具体服务引用""" def __init__( self, upload_folder: Path, extractor_registry: ExtractorRegistry, inpaint_registry: InpaintProviderRegistry, max_depth: int = 1, min_image_size: int = 200, min_image_area: int = 40000 ): """ 初始化服务配置 Args: upload_folder: 上传文件夹路径 extractor_registry: 元素类型到提取器的注册表 inpaint_registry: 元素类型到重绘方法的注册表 max_depth: 最大递归深度(默认1) min_image_size: 最小图片尺寸 min_image_area: 最小图片面积 """ self.upload_folder = upload_folder self.extractor_registry = extractor_registry self.inpaint_registry = inpaint_registry self.max_depth = max_depth self.min_image_size = min_image_size self.min_image_area = min_image_area @classmethod def from_defaults( cls, mineru_token: Optional[str] = None, mineru_api_base: Optional[str] = None, upload_folder: Optional[str] = None, ai_service: Optional[Any] = None, use_hybrid_extractor: bool = True, use_hybrid_inpaint: bool = True, extractor_method: Optional[str] = None, # 'mineru' 或 'hybrid',优先于 use_hybrid_extractor inpaint_method: Optional[str] = None, # 'generative', 'baidu', 'hybrid',优先于 use_hybrid_inpaint **kwargs ) -> 'ServiceConfig': """ 从默认参数创建配置 默认配置(推荐用于导出PPTX): - 元素提取:混合提取器(MinerU版面分析 + 百度高精度OCR) - 背景生成:混合Inpaint(百度图像修复 + 生成式画质提升) - 递归深度:1 混合提取器合并策略: 1. 图片类型bbox里包含的百度OCR bbox → 删除 2. 表格类型bbox里包含的百度OCR bbox → 保留百度OCR结果,删除MinerU表格bbox 3. 其他类型与百度OCR bbox有交集 → 使用百度OCR结果 混合Inpaint策略: 1. 先用百度图像修复精确去除指定区域的文字 2. 再用生成式模型提升整体画质 支持动态注册新的元素类型到不同的提取器/重绘方法。 如果不提供参数,会自动从 Flask app.config 获取配置。 Args: mineru_token: MinerU API token(可选,默认从 Flask config 获取) mineru_api_base: MinerU API base URL(可选,默认从 Flask config 获取) upload_folder: 上传文件夹路径(可选,默认从 Flask config 获取) ai_service: AI服务实例(可选,用于生成式重绘) use_hybrid_extractor: 是否使用混合提取器(默认True,会被 extractor_method 覆盖) use_hybrid_inpaint: 是否使用混合Inpaint(默认True,会被 inpaint_method 覆盖) extractor_method: 组件提取方法,'mineru' 或 'hybrid'(优先于 use_hybrid_extractor) inpaint_method: 背景修复方法,'generative', 'baidu', 'hybrid'(优先于 use_hybrid_inpaint) **kwargs: 其他配置参数 - max_depth: 最大递归深度(默认1) - min_image_size: 最小图片尺寸(默认200) - min_image_area: 最小图片面积(默认40000) - contain_threshold: 混合提取器包含判断阈值(默认0.8) - intersection_threshold: 混合提取器交集判断阈值(默认0.3) - enhance_quality: 混合Inpaint是否启用画质提升(默认True) Returns: ServiceConfig实例 Raises: ValueError: 如果 mineru_token 未配置 """ # 处理新参数:extractor_method 优先于 use_hybrid_extractor if extractor_method is not None: use_hybrid_extractor = (extractor_method == 'hybrid') logger.info(f"extractor_method={extractor_method} -> use_hybrid_extractor={use_hybrid_extractor}") # 自动从 Flask config 获取配置 from flask import current_app, has_app_context if has_app_context() and current_app: if mineru_token is None: mineru_token = current_app.config.get('MINERU_TOKEN') if mineru_api_base is None: mineru_api_base = current_app.config.get('MINERU_API_BASE', 'https://mineru.net') if upload_folder is None: upload_folder = current_app.config.get('UPLOAD_FOLDER', './uploads') else: # 回退到默认值 if mineru_api_base is None: mineru_api_base = 'https://mineru.net' if upload_folder is None: upload_folder = './uploads' # 验证必需配置 if not mineru_token: raise ValueError("MinerU token is required. Please configure MINERU_TOKEN.") from services.file_parser_service import FileParserService # 解析upload_folder路径 upload_path = Path(upload_folder) if not upload_path.is_absolute(): current_file = Path(__file__).resolve() backend_dir = current_file.parent.parent project_root = backend_dir.parent upload_path = project_root / upload_folder.lstrip('./') logger.info(f"Upload folder resolved to: {upload_path}") # 创建MinerU解析服务 parser_service = FileParserService( mineru_token=mineru_token, mineru_api_base=mineru_api_base ) # 创建提取器注册表 extractor_registry = ExtractorRegistry() if use_hybrid_extractor: # 尝试创建混合提取器(MinerU + 百度高精度OCR) hybrid_extractor = ExtractorFactory.create_hybrid_extractor( parser_service=parser_service, upload_folder=upload_path, contain_threshold=kwargs.get('contain_threshold', 0.8), intersection_threshold=kwargs.get('intersection_threshold', 0.3) ) if hybrid_extractor: extractor_registry.register_default(hybrid_extractor) logger.info("✅ 混合提取器已创建(MinerU + 百度高精度OCR)") else: # 回退到MinerU mineru_extractor = MinerUElementExtractor(parser_service, upload_path) extractor_registry.register_default(mineru_extractor) logger.warning("⚠️ 混合提取器创建失败,回退到MinerU提取器") else: # 使用纯MinerU提取器 mineru_extractor = MinerUElementExtractor(parser_service, upload_path) extractor_registry.register_default(mineru_extractor) logger.info("✅ MinerU提取器已创建(通用分割)") # 创建Inpaint提供者 inpaint_registry = InpaintProviderRegistry() # 处理 inpaint_method 参数(优先于 use_hybrid_inpaint) effective_inpaint_method = inpaint_method if effective_inpaint_method is None: # 向后兼容:根据 use_hybrid_inpaint 转换 effective_inpaint_method = 'hybrid' if use_hybrid_inpaint else 'generative' logger.info(f"inpaint_method={effective_inpaint_method}") if effective_inpaint_method == 'hybrid': # 混合Inpaint提供者(百度修复 + 生成式画质提升) hybrid_inpaint = InpaintProviderFactory.create_hybrid_inpaint_provider( ai_service=ai_service, enhance_quality=kwargs.get('enhance_quality', True) ) if hybrid_inpaint: inpaint_registry.register_default(hybrid_inpaint) logger.info("✅ 混合Inpaint提供者已创建(百度修复 + 生成式画质提升)") else: # 回退到纯生成式重绘 generative_provider = InpaintProviderFactory.create_generative_edit_provider( ai_service=ai_service ) inpaint_registry.register_default(generative_provider) logger.warning("⚠️ 混合Inpaint创建失败,回退到GenerativeEdit") elif effective_inpaint_method == 'baidu': # 只用百度图像修复(不使用生成式模型,低成本) baidu_inpaint = InpaintProviderFactory.create_baidu_inpaint_provider() if baidu_inpaint: inpaint_registry.register_default(baidu_inpaint) logger.info("✅ 百度Inpaint提供者已创建(纯百度修复)") else: # 回退到生成式 generative_provider = InpaintProviderFactory.create_generative_edit_provider( ai_service=ai_service ) inpaint_registry.register_default(generative_provider) logger.warning("⚠️ 百度Inpaint创建失败,回退到GenerativeEdit") else: # 'generative' 或其他 # 使用纯生成式重绘 generative_provider = InpaintProviderFactory.create_generative_edit_provider( ai_service=ai_service ) inpaint_registry.register_default(generative_provider) logger.info("✅ 重绘注册表已创建(GenerativeEdit通用)") return cls( upload_folder=upload_path, extractor_registry=extractor_registry, inpaint_registry=inpaint_registry, max_depth=kwargs.get('max_depth', 1), min_image_size=kwargs.get('min_image_size', 200), min_image_area=kwargs.get('min_image_area', 40000) ) class TextAttributeExtractorFactory: """文字属性提取器工厂""" @staticmethod def create_caption_model_extractor( ai_service: Optional[Any] = None, prompt_template: Optional[str] = None ) -> TextAttributeExtractor: """ 创建基于Caption Model的文字属性提取器 使用视觉语言模型(如Gemini)分析文字区域图像, 通过生成JSON的方式获取字体颜色、是否粗体、是否斜体等属性。 Args: ai_service: AIService实例(可选,如果不提供则自动获取) prompt_template: 自定义的prompt模板(可选),必须使用 {content_hint} 作为占位符 Returns: CaptionModelTextAttributeExtractor实例 Raises: 如果AI服务初始化失败,会抛出异常 """ if ai_service is None: from services.ai_service_manager import get_ai_service ai_service = get_ai_service() logger.info("创建CaptionModelTextAttributeExtractor") return CaptionModelTextAttributeExtractor(ai_service, prompt_template) @staticmethod def create_text_attribute_registry( caption_extractor: Optional[TextAttributeExtractor] = None, ai_service: Optional[Any] = None ) -> TextAttributeExtractorRegistry: """ 创建文字属性提取器注册表 支持动态注册新元素类型,不限于预定义类型。 Args: caption_extractor: Caption Model提取器(可选,自动创建) ai_service: AIService实例(可选,用于自动创建提取器) Returns: 配置好的TextAttributeExtractorRegistry实例 Raises: 如果提取器创建失败,会抛出异常 """ # 自动创建提取器 if caption_extractor is None: caption_extractor = TextAttributeExtractorFactory.create_caption_model_extractor( ai_service=ai_service ) # 创建注册表 registry = TextAttributeExtractorRegistry() # 设置默认提取器 registry.register_default(caption_extractor) # 注册文本类型 registry.register_types( ['text', 'title', 'paragraph', 'heading', 'table_cell'], caption_extractor ) logger.info("创建TextAttributeExtractorRegistry") return registry ================================================ FILE: backend/services/image_editability/helpers.py ================================================ """ 辅助函数和工具方法 纯函数,不依赖任何具体实现 """ import logging import tempfile from typing import List from PIL import Image from .data_models import EditableElement, BBox logger = logging.getLogger(__name__) def collect_bboxes_from_elements(elements: List[EditableElement]) -> List[tuple]: """ 收集当前层级元素的bbox列表(不递归到子元素) Args: elements: 元素列表 Returns: bbox元组列表 [(x0, y0, x1, y1), ...] """ bboxes = [] for elem in elements: bbox_tuple = elem.bbox.to_tuple() bboxes.append(bbox_tuple) logger.debug(f"元素 {elem.element_id} ({elem.element_type}): bbox={bbox_tuple}") return bboxes def crop_element_from_image( source_image_path: str, bbox: BBox ) -> str: """ 从源图片中裁剪出元素区域 Args: source_image_path: 源图片路径 bbox: 裁剪区域 Returns: 裁剪后图片的临时文件路径 """ img = Image.open(source_image_path) # 裁剪 crop_box = (int(bbox.x0), int(bbox.y0), int(bbox.x1), int(bbox.y1)) cropped = img.crop(crop_box) # 保存到临时文件 with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp: cropped.save(tmp.name) return tmp.name def should_recurse_into_element( element: EditableElement, parent_image_size: tuple, min_image_size: int, min_image_area: int, max_child_coverage_ratio: float ) -> bool: """ 判断是否应该对元素进行递归分析 Args: element: 待判断的元素 parent_image_size: 父图尺寸 (width, height) min_image_size: 最小图片尺寸 min_image_area: 最小图片面积 max_child_coverage_ratio: 最大子图覆盖比例 """ # 如果已经有子元素(例如表格单元格),不再递归 if element.children: logger.debug(f" 元素 {element.element_id} 已有 {len(element.children)} 个子元素,不递归") return False # 只对图片和图表类型递归 if element.element_type not in ['image', 'figure', 'chart', 'table']: return False # 检查尺寸是否足够大 bbox = element.bbox if bbox.width < min_image_size or bbox.height < min_image_size: logger.debug(f" 元素 {element.element_id} 尺寸过小 ({bbox.width}x{bbox.height}),不递归") return False if bbox.area < min_image_area: logger.debug(f" 元素 {element.element_id} 面积过小 ({bbox.area}),不递归") return False # 检查子图是否占据父图绝大部分面积 parent_width, parent_height = parent_image_size parent_area = parent_width * parent_height coverage_ratio = bbox.area / parent_area if parent_area > 0 else 0 if coverage_ratio > max_child_coverage_ratio: logger.info(f" 元素 {element.element_id} 占父图面积 {coverage_ratio*100:.1f}% (>{max_child_coverage_ratio*100:.0f}%),不递归") return False return True ================================================ FILE: backend/services/image_editability/hybrid_extractor.py ================================================ """ 混合元素提取器 - 结合MinerU版面分析和百度高精度OCR的提取策略 工作流程: 1. MinerU和百度OCR并行识别(提升速度) 2. 结果合并: - 图片类型bbox里包含的百度OCR bbox → 删除百度OCR bbox - 表格类型bbox里包含的百度OCR bbox → 保留百度OCR bbox,删除MinerU表格bbox - 其他类型bbox与百度OCR bbox有交集 → 使用百度OCR结果,删除MinerU bbox """ import logging from typing import Dict, Any, List, Optional, Tuple from concurrent.futures import ThreadPoolExecutor, as_completed from PIL import Image from .extractors import ( ElementExtractor, ExtractionResult, ExtractionContext, MinerUElementExtractor, BaiduAccurateOCRElementExtractor ) logger = logging.getLogger(__name__) class BBoxUtils: """边界框工具类""" @staticmethod def is_contained(inner_bbox: List[float], outer_bbox: List[float], threshold: float = 0.8) -> bool: """ 判断inner_bbox是否被outer_bbox包含 Args: inner_bbox: 内部bbox [x0, y0, x1, y1] outer_bbox: 外部bbox [x0, y0, x1, y1] threshold: 包含阈值,inner_bbox有多少比例在outer_bbox内算作包含,默认0.8 Returns: 是否被包含 """ if not inner_bbox or not outer_bbox: return False ix0, iy0, ix1, iy1 = inner_bbox ox0, oy0, ox1, oy1 = outer_bbox # 计算交集 inter_x0 = max(ix0, ox0) inter_y0 = max(iy0, oy0) inter_x1 = min(ix1, ox1) inter_y1 = min(iy1, oy1) if inter_x1 <= inter_x0 or inter_y1 <= inter_y0: return False # 计算交集面积 inter_area = (inter_x1 - inter_x0) * (inter_y1 - inter_y0) # 计算inner_bbox面积 inner_area = (ix1 - ix0) * (iy1 - iy0) if inner_area <= 0: return False # 判断包含比例 return (inter_area / inner_area) >= threshold @staticmethod def has_intersection(bbox1: List[float], bbox2: List[float], min_overlap_ratio: float = 0.1) -> bool: """ 判断两个bbox是否有交集 Args: bbox1: 第一个bbox [x0, y0, x1, y1] bbox2: 第二个bbox [x0, y0, x1, y1] min_overlap_ratio: 最小重叠比例(相对于较小bbox的面积),默认0.1 Returns: 是否有交集 """ if not bbox1 or not bbox2: return False x0_1, y0_1, x1_1, y1_1 = bbox1 x0_2, y0_2, x1_2, y1_2 = bbox2 # 计算交集 inter_x0 = max(x0_1, x0_2) inter_y0 = max(y0_1, y0_2) inter_x1 = min(x1_1, x1_2) inter_y1 = min(y1_1, y1_2) if inter_x1 <= inter_x0 or inter_y1 <= inter_y0: return False # 计算交集面积 inter_area = (inter_x1 - inter_x0) * (inter_y1 - inter_y0) # 计算两个bbox的面积 area1 = (x1_1 - x0_1) * (y1_1 - y0_1) area2 = (x1_2 - x0_2) * (y1_2 - y0_2) # 取较小面积作为基准 min_area = min(area1, area2) if min_area <= 0: return False # 判断重叠比例 return (inter_area / min_area) >= min_overlap_ratio @staticmethod def get_intersection_ratio(bbox1: List[float], bbox2: List[float]) -> Tuple[float, float]: """ 计算两个bbox的交集比例 Args: bbox1: 第一个bbox bbox2: 第二个bbox Returns: (交集占bbox1的比例, 交集占bbox2的比例) """ if not bbox1 or not bbox2: return (0.0, 0.0) x0_1, y0_1, x1_1, y1_1 = bbox1 x0_2, y0_2, x1_2, y1_2 = bbox2 # 计算交集 inter_x0 = max(x0_1, x0_2) inter_y0 = max(y0_1, y0_2) inter_x1 = min(x1_1, x1_2) inter_y1 = min(y1_1, y1_2) if inter_x1 <= inter_x0 or inter_y1 <= inter_y0: return (0.0, 0.0) inter_area = (inter_x1 - inter_x0) * (inter_y1 - inter_y0) area1 = (x1_1 - x0_1) * (y1_1 - y0_1) area2 = (x1_2 - x0_2) * (y1_2 - y0_2) ratio1 = inter_area / area1 if area1 > 0 else 0.0 ratio2 = inter_area / area2 if area2 > 0 else 0.0 return (ratio1, ratio2) class HybridElementExtractor(ElementExtractor): """ 混合元素提取器 结合MinerU版面分析和百度高精度OCR,实现更精确的元素识别: - MinerU负责识别元素类型和整体布局 - 百度OCR负责精确的文字识别和定位 合并策略: 1. 图片类型bbox里包含的百度OCR bbox → 删除(图片内的文字不需要单独提取) 2. 表格类型bbox里包含的百度OCR bbox → 保留百度OCR结果,删除MinerU表格bbox 3. 其他类型(文字等)与百度OCR bbox有交集 → 使用百度OCR结果,删除MinerU bbox """ # 元素类型分类 IMAGE_TYPES = {'image', 'figure', 'chart', 'diagram'} TABLE_TYPES = {'table', 'table_cell'} TEXT_TYPES = {'text', 'title', 'paragraph', 'header', 'footer', 'list'} def __init__( self, mineru_extractor: MinerUElementExtractor, baidu_ocr_extractor: BaiduAccurateOCRElementExtractor, contain_threshold: float = 0.8, intersection_threshold: float = 0.3 ): """ 初始化混合提取器 Args: mineru_extractor: MinerU元素提取器 baidu_ocr_extractor: 百度高精度OCR提取器 contain_threshold: 包含判断阈值,默认0.8(80%面积在内部算包含) intersection_threshold: 交集判断阈值,默认0.3(30%重叠算有交集) """ self._mineru_extractor = mineru_extractor self._baidu_ocr_extractor = baidu_ocr_extractor self._contain_threshold = contain_threshold self._intersection_threshold = intersection_threshold def supports_type(self, element_type: Optional[str]) -> bool: """混合提取器支持所有类型""" return True def extract( self, image_path: str, element_type: Optional[str] = None, **kwargs ) -> ExtractionResult: """ 从图像中提取元素(混合策略) 工作流程: 1. 调用MinerU提取器获取版面分析结果 2. 调用百度OCR提取器获取文字识别结果 3. 合并结果 Args: image_path: 图像文件路径 element_type: 元素类型提示(可选) **kwargs: 其他参数 - depth: 递归深度 - language_type: 百度OCR语言类型 Returns: 合并后的ExtractionResult """ depth = kwargs.get('depth', 0) indent = ' ' * depth logger.info(f"{indent}🔀 开始混合提取: {image_path}") # 1. MinerU版面分析 和 百度高精度OCR 并行执行 logger.info(f"{indent}📄🔤 Step 1: MinerU + 百度OCR 并行识别...") mineru_result = None baidu_result = None mineru_error = None baidu_error = None def run_mineru(): return self._mineru_extractor.extract(image_path, element_type, **kwargs) def run_baidu_ocr(): return self._baidu_ocr_extractor.extract(image_path, element_type, **kwargs) with ThreadPoolExecutor(max_workers=2) as executor: future_mineru = executor.submit(run_mineru) future_baidu = executor.submit(run_baidu_ocr) # 等待两个任务完成 for future in as_completed([future_mineru, future_baidu]): try: if future == future_mineru: mineru_result = future.result() # 检查结果是否带有错误 if mineru_result.has_error: mineru_error = mineru_result.error logger.error(f"{indent} ❌ MinerU提取错误: {mineru_error}") else: logger.info(f"{indent} ✅ MinerU识别到 {len(mineru_result.elements)} 个元素") else: baidu_result = future.result() if baidu_result.has_error: baidu_error = baidu_result.error logger.error(f"{indent} ❌ 百度OCR提取错误: {baidu_error}") else: logger.info(f"{indent} ✅ 百度OCR识别到 {len(baidu_result.elements)} 个元素") except Exception as e: if future == future_mineru: mineru_error = str(e) logger.error(f"{indent} ❌ MinerU提取失败: {e}") else: baidu_error = str(e) logger.error(f"{indent} ❌ 百度OCR提取失败: {e}") # 确保两个结果都存在(即使有错误也创建空结果以便继续合并) if mineru_result is None: mineru_result = ExtractionResult(elements=[], error=mineru_error) if baidu_result is None: baidu_result = ExtractionResult(elements=[], error=baidu_error) mineru_elements = mineru_result.elements baidu_elements = baidu_result.elements # 2. 合并结果 logger.info(f"{indent}🔧 Step 2: 合并结果...") merged_elements = self._merge_results(mineru_elements, baidu_elements, depth) logger.info(f"{indent} 合并后共 {len(merged_elements)} 个元素") # 合并错误信息 errors = [] if mineru_result.has_error: errors.append(f"MinerU: {mineru_result.error}") if baidu_result.has_error: errors.append(f"百度OCR: {baidu_result.error}") combined_error = "; ".join(errors) if errors else None # 合并上下文 context = ExtractionContext( result_dir=mineru_result.context.result_dir, metadata={ 'source': 'hybrid', 'mineru_count': len(mineru_elements), 'baidu_count': len(baidu_elements), 'merged_count': len(merged_elements), 'mineru_error': mineru_result.error, 'baidu_error': baidu_result.error, **mineru_result.context.metadata } ) return ExtractionResult(elements=merged_elements, context=context, error=combined_error) def _merge_results( self, mineru_elements: List[Dict[str, Any]], baidu_elements: List[Dict[str, Any]], depth: int = 0 ) -> List[Dict[str, Any]]: """ 合并MinerU和百度OCR的结果 合并规则: 1. 图片类型bbox里包含的百度OCR bbox → 删除百度OCR bbox 2. 表格类型bbox里包含的百度OCR bbox → 保留百度OCR bbox,删除MinerU表格bbox 3. 其他类型与百度OCR bbox有交集 → 使用百度OCR结果,删除MinerU bbox Args: mineru_elements: MinerU识别的元素列表 baidu_elements: 百度OCR识别的元素列表 depth: 递归深度(用于日志) Returns: 合并后的元素列表 """ indent = ' ' * depth # 分类MinerU元素 image_elements = [] table_elements = [] other_elements = [] for elem in mineru_elements: elem_type = elem.get('type', '') if elem_type in self.IMAGE_TYPES: image_elements.append(elem) elif elem_type in self.TABLE_TYPES: table_elements.append(elem) else: other_elements.append(elem) logger.info(f"{indent} MinerU分类: 图片={len(image_elements)}, 表格={len(table_elements)}, 其他={len(other_elements)}") # 标记需要保留/删除的百度OCR元素 baidu_to_keep = set(range(len(baidu_elements))) # 初始全部保留 baidu_in_table = set() # 在表格内的百度OCR元素 # 规则1: 图片类型bbox里包含的百度OCR bbox → 删除 for img_elem in image_elements: img_bbox = img_elem.get('bbox', []) for idx, baidu_elem in enumerate(baidu_elements): baidu_bbox = baidu_elem.get('bbox', []) if BBoxUtils.is_contained(baidu_bbox, img_bbox, self._contain_threshold): baidu_to_keep.discard(idx) logger.debug(f"{indent} 百度OCR[{idx}]被图片包含,删除") # 规则2: 表格类型bbox里包含的百度OCR bbox → 保留,并标记 tables_to_remove = set() for table_idx, table_elem in enumerate(table_elements): table_bbox = table_elem.get('bbox', []) has_contained_text = False for idx, baidu_elem in enumerate(baidu_elements): baidu_bbox = baidu_elem.get('bbox', []) if BBoxUtils.is_contained(baidu_bbox, table_bbox, self._contain_threshold): baidu_in_table.add(idx) has_contained_text = True logger.debug(f"{indent} 百度OCR[{idx}]在表格内,保留") if has_contained_text: tables_to_remove.add(table_idx) logger.debug(f"{indent} 表格[{table_idx}]有文字,删除表格bbox") # 规则3: 其他类型与百度OCR bbox有交集 → 使用百度OCR结果 other_to_remove = set() for other_idx, other_elem in enumerate(other_elements): other_bbox = other_elem.get('bbox', []) for idx, baidu_elem in enumerate(baidu_elements): if idx not in baidu_to_keep: continue baidu_bbox = baidu_elem.get('bbox', []) if BBoxUtils.has_intersection(other_bbox, baidu_bbox, self._intersection_threshold): other_to_remove.add(other_idx) logger.debug(f"{indent} MinerU其他[{other_idx}]与百度OCR[{idx}]有交集,使用百度OCR") break # 构建最终结果 merged = [] # 添加图片元素(全部保留) for elem in image_elements: elem_copy = elem.copy() elem_copy['metadata'] = elem_copy.get('metadata', {}).copy() elem_copy['metadata']['source'] = 'mineru' merged.append(elem_copy) # 添加表格元素(删除有文字的表格bbox) for idx, elem in enumerate(table_elements): if idx not in tables_to_remove: elem_copy = elem.copy() elem_copy['metadata'] = elem_copy.get('metadata', {}).copy() elem_copy['metadata']['source'] = 'mineru' merged.append(elem_copy) # 添加其他MinerU元素(删除与百度OCR有交集的) for idx, elem in enumerate(other_elements): if idx not in other_to_remove: elem_copy = elem.copy() elem_copy['metadata'] = elem_copy.get('metadata', {}).copy() elem_copy['metadata']['source'] = 'mineru' merged.append(elem_copy) # 添加保留的百度OCR元素 for idx in baidu_to_keep: elem = baidu_elements[idx] elem_copy = elem.copy() elem_copy['metadata'] = elem_copy.get('metadata', {}).copy() elem_copy['metadata']['source'] = 'baidu_ocr' if idx in baidu_in_table: elem_copy['metadata']['in_table'] = True merged.append(elem_copy) logger.info(f"{indent} 合并结果: 保留图片={len(image_elements)}, " f"保留表格={len(table_elements) - len(tables_to_remove)}, " f"保留MinerU其他={len(other_elements) - len(other_to_remove)}, " f"保留百度OCR={len(baidu_to_keep)}") return merged def create_hybrid_extractor( mineru_extractor: Optional[MinerUElementExtractor] = None, baidu_ocr_extractor: Optional[BaiduAccurateOCRElementExtractor] = None, parser_service: Optional[Any] = None, upload_folder: Optional[Any] = None, contain_threshold: float = 0.8, intersection_threshold: float = 0.3 ) -> Optional[HybridElementExtractor]: """ 创建混合元素提取器 Args: mineru_extractor: MinerU提取器(可选,自动创建) baidu_ocr_extractor: 百度OCR提取器(可选,自动创建) parser_service: FileParserService实例(用于创建MinerU提取器) upload_folder: 上传文件夹路径(用于创建MinerU提取器) contain_threshold: 包含判断阈值 intersection_threshold: 交集判断阈值 Returns: HybridElementExtractor实例,如果无法创建则返回None """ from pathlib import Path # 创建MinerU提取器 if mineru_extractor is None: if parser_service is None or upload_folder is None: logger.error("创建混合提取器需要提供 parser_service 和 upload_folder,或者直接提供 mineru_extractor") return None if isinstance(upload_folder, str): upload_folder = Path(upload_folder) mineru_extractor = MinerUElementExtractor(parser_service, upload_folder) logger.info("✅ MinerU提取器已创建") # 创建百度OCR提取器 if baidu_ocr_extractor is None: try: from services.ai_providers.ocr import create_baidu_accurate_ocr_provider baidu_provider = create_baidu_accurate_ocr_provider() if baidu_provider is None: logger.warning("无法创建百度高精度OCR Provider") return None baidu_ocr_extractor = BaiduAccurateOCRElementExtractor(baidu_provider) logger.info("✅ 百度高精度OCR提取器已创建") except Exception as e: logger.error(f"创建百度高精度OCR提取器失败: {e}") return None return HybridElementExtractor( mineru_extractor=mineru_extractor, baidu_ocr_extractor=baidu_ocr_extractor, contain_threshold=contain_threshold, intersection_threshold=intersection_threshold ) ================================================ FILE: backend/services/image_editability/inpaint_providers.py ================================================ """ Inpaint提供者 - 抽象不同的inpaint实现 提供多种重绘方法: 1. DefaultInpaintProvider - 基于mask的精确区域重绘(使用Volcengine Inpainting服务) 2. GenerativeEditInpaintProvider - 基于生成式大模型的整图编辑重绘(如Gemini图片编辑) 3. BaiduInpaintProvider - 基于百度图像修复API的区域重绘 4. HybridInpaintProvider - 混合方法:先百度修复去除文字,再生成式提升画质 以及注册表: - InpaintProviderRegistry - 元素类型到重绘方法的映射注册表 """ import logging import tempfile from abc import ABC, abstractmethod from typing import List, Optional, Dict from PIL import Image from utils.mask_utils import create_mask_from_bboxes logger = logging.getLogger(__name__) class InpaintProvider(ABC): """ Inpaint提供者抽象接口 用于抽象不同的inpaint方法,支持接入多种实现: - 基于InpaintingService的实现(当前默认) - Gemini API实现 - SD/SDXL等其他模型实现 - 第三方API实现 """ @abstractmethod def inpaint_regions( self, image: Image.Image, bboxes: List[tuple], types: Optional[List[str]] = None, **kwargs ) -> Optional[Image.Image]: """ 对图像中指定区域进行inpaint处理 Args: image: 原始PIL图像对象 bboxes: 边界框列表,每个bbox格式为 (x0, y0, x1, y1) types: 可选的元素类型列表,与bboxes一一对应(如 'text', 'image', 'table'等) **kwargs: 其他由具体实现自定义的参数 Returns: 处理后的PIL图像对象,失败返回None """ pass class DefaultInpaintProvider(InpaintProvider): """ 基于InpaintingService的默认Inpaint提供者 这是当前系统使用的实现,调用已有的InpaintingService """ def __init__(self, inpainting_service): """ 初始化默认Inpaint提供者 Args: inpainting_service: InpaintingService实例 """ self.inpainting_service = inpainting_service def inpaint_regions( self, image: Image.Image, bboxes: List[tuple], types: Optional[List[str]] = None, **kwargs ) -> Optional[Image.Image]: """ 使用InpaintingService处理inpaint 支持的kwargs参数: - expand_pixels: int, 扩展像素数,默认10 - merge_bboxes: bool, 是否合并bbox,默认False - merge_threshold: int, 合并阈值,默认20 - save_mask_path: str, mask保存路径,可选 - full_page_image: Image.Image, 完整页面图像(用于Gemini),可选 - crop_box: tuple, 裁剪框 (x0, y0, x1, y1),可选 """ expand_pixels = kwargs.get('expand_pixels', 10) merge_bboxes = kwargs.get('merge_bboxes', False) merge_threshold = kwargs.get('merge_threshold', 20) save_mask_path = kwargs.get('save_mask_path') full_page_image = kwargs.get('full_page_image') crop_box = kwargs.get('crop_box') try: result_img = self.inpainting_service.remove_regions_by_bboxes( image=image, bboxes=bboxes, expand_pixels=expand_pixels, merge_bboxes=merge_bboxes, merge_threshold=merge_threshold, save_mask_path=save_mask_path, full_page_image=full_page_image, crop_box=crop_box ) return result_img except Exception as e: logger.error(f"DefaultInpaintProvider处理失败: {e}", exc_info=True) return None class GenerativeEditInpaintProvider(InpaintProvider): """ 基于生成式大模型图片编辑的Inpaint提供者 使用生成式大模型(如Gemini的图片编辑功能)通过自然语言指令移除图片中的文字、图标等元素。 与DefaultInpaintProvider的区别: - DefaultInpaintProvider: 基于mask的精确区域重绘(需要准确的bbox) - GenerativeEditInpaintProvider: 整图生成式编辑(通过prompt描述要移除的内容) 优点:不需要精确的bbox,大模型自动理解并移除相关元素 缺点:可能改变背景细节,生成速度较慢,消耗更多token 适用场景: - bbox不够精确时 - 需要移除复杂或分散的元素时 - 作为mask-based方法的备选方案 """ def __init__(self, ai_service, aspect_ratio: str = "16:9", resolution: str = "2K"): """ 初始化生成式编辑Inpaint提供者 Args: ai_service: AIService实例(需要支持edit_image方法) aspect_ratio: 目标宽高比 resolution: 目标分辨率 """ self.ai_service = ai_service self.aspect_ratio = aspect_ratio self.resolution = resolution def inpaint_regions( self, image: Image.Image, bboxes: List[tuple], types: Optional[List[str]] = None, **kwargs ) -> Optional[Image.Image]: """ 使用生成式大模型编辑生成干净背景 注意:此方法忽略bboxes参数,通过大模型自动识别并移除所有文字和图标 支持的kwargs参数: - aspect_ratio: str, 宽高比,默认使用初始化时的值 - resolution: str, 分辨率,默认使用初始化时的值 """ aspect_ratio = kwargs.get('aspect_ratio', self.aspect_ratio) resolution = kwargs.get('resolution', self.resolution) try: from services.prompts import get_clean_background_prompt # 获取清理背景的prompt edit_instruction = get_clean_background_prompt() # 保存临时图片文件(AI服务需要文件路径) with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp_file: tmp_path = tmp_file.name image.save(tmp_path) logger.info("GenerativeEditInpaintProvider: 开始生成式编辑重绘...") # 调用AI服务编辑图片 clean_bg_image = self.ai_service.edit_image( prompt=edit_instruction, current_image_path=tmp_path, aspect_ratio=aspect_ratio, resolution=resolution, original_description=None, additional_ref_images=None ) if not clean_bg_image: logger.error("GenerativeEditInpaintProvider: 生成式编辑返回空结果") return None # 转换为PIL Image if not isinstance(clean_bg_image, Image.Image): # Google GenAI返回自己的Image类型,需要提取_pil_image if hasattr(clean_bg_image, '_pil_image'): clean_bg_image = clean_bg_image._pil_image else: logger.error(f"GenerativeEditInpaintProvider: 未知的图片类型: {type(clean_bg_image)}") return None logger.info("GenerativeEditInpaintProvider: 重绘完成") return clean_bg_image except Exception as e: logger.error(f"GenerativeEditInpaintProvider处理失败: {e}", exc_info=True) return None class BaiduInpaintProvider(InpaintProvider): """ 基于百度图像修复API的Inpaint提供者 使用百度AI在指定矩形区域去除遮挡物并用背景内容填充。 特点: - 基于bbox的精确区域修复 - 快速响应,使用背景内容智能填充 - 适合去除文字、水印等规则区域 注意:修复质量可能不如生成式模型,但速度快且稳定 """ def __init__(self, baidu_inpainting_provider): """ 初始化百度图像修复提供者 Args: baidu_inpainting_provider: BaiduInpaintingProvider实例(来自ai_providers.image) """ self._provider = baidu_inpainting_provider def inpaint_regions( self, image: Image.Image, bboxes: List[tuple], types: Optional[List[str]] = None, **kwargs ) -> Optional[Image.Image]: """ 使用百度图像修复API处理指定区域 支持的kwargs参数: - expand_pixels: int, 扩展像素数,默认2 """ expand_pixels = kwargs.get('expand_pixels', 2) try: logger.info(f"BaiduInpaintProvider: 开始修复 {len(bboxes)} 个区域...") result_image = self._provider.inpaint_bboxes( image=image, bboxes=bboxes, expand_pixels=expand_pixels ) if result_image: logger.info("BaiduInpaintProvider: 修复完成") else: logger.warning("BaiduInpaintProvider: 修复返回空结果") return None # 合并原图和修复后的图片,只取bboxes区域的修复结果(不扩展,避免影响bbox外的区域) mask = create_mask_from_bboxes(image.size, bboxes, expand_pixels=0) return Image.composite(result_image, image, mask.convert('L')) except Exception as e: logger.error(f"BaiduInpaintProvider处理失败: {e}", exc_info=True) return None class HybridInpaintProvider(InpaintProvider): """ 混合Inpaint提供者 - 百度修复 + 生成式画质提升 工作流程: 1. 先使用百度图像修复API去除指定区域的内容(如文字、水印) 2. 再使用生成式大模型(如Gemini)提升整体画质,保持内容不变 优点: - 百度修复快速精确地去除文字,不会遗漏 - 生成式模型提升画质,使修复痕迹更自然 适用场景: - 需要精确去除文字且保证高画质的场景 - 单独使用生成式模型容易遗漏文字的情况 """ def __init__( self, baidu_provider: BaiduInpaintProvider, generative_provider: 'GenerativeEditInpaintProvider', enhance_quality: bool = True ): """ 初始化混合Inpaint提供者 Args: baidu_provider: 百度图像修复提供者 generative_provider: 生成式编辑提供者(用于画质提升) enhance_quality: 是否在百度修复后使用生成式模型提升画质,默认True """ self._baidu_provider = baidu_provider self._generative_provider = generative_provider self._enhance_quality = enhance_quality def inpaint_regions( self, image: Image.Image, bboxes: List[tuple], types: Optional[List[str]] = None, **kwargs ) -> Optional[Image.Image]: """ 混合处理:先百度修复,再生成式画质提升 支持的kwargs参数: - expand_pixels: int, 百度修复的扩展像素数,默认2 - enhance_quality: bool, 是否提升画质,默认使用初始化时的值 - aspect_ratio: str, 画质提升的宽高比 - resolution: str, 画质提升的分辨率 """ expand_pixels = kwargs.get('expand_pixels', 2) enhance_quality = kwargs.get('enhance_quality', self._enhance_quality) try: # Step 1: 百度图像修复 - 精确去除文字 logger.info(f"HybridInpaintProvider Step 1: 百度修复 {len(bboxes)} 个区域...") repaired_image = self._baidu_provider.inpaint_regions( image=image, bboxes=bboxes, types=types, expand_pixels=expand_pixels ) if repaired_image is None: logger.error("HybridInpaintProvider: 百度修复失败") return None logger.info("HybridInpaintProvider: 百度修复完成") # Step 2: 生成式画质提升(可选) if enhance_quality and self._generative_provider: logger.info("HybridInpaintProvider Step 2: 生成式画质提升...") # 使用专门的画质提升prompt,传入被修复的区域信息 enhanced_image = self._enhance_image_quality( repaired_image, inpainted_bboxes=bboxes, # 传入被修复的区域 aspect_ratio=kwargs.get('aspect_ratio'), resolution=kwargs.get('resolution') ) if enhanced_image: logger.info("HybridInpaintProvider: 画质提升完成") return enhanced_image else: logger.warning("HybridInpaintProvider: 画质提升失败,返回百度修复结果") return repaired_image else: logger.info("HybridInpaintProvider: 跳过画质提升") return repaired_image except Exception as e: logger.error(f"HybridInpaintProvider处理失败: {e}", exc_info=True) return None def _enhance_image_quality( self, image: Image.Image, inpainted_bboxes: Optional[List[tuple]] = None, aspect_ratio: Optional[str] = None, resolution: Optional[str] = None ) -> Optional[Image.Image]: """ 使用生成式模型提升图像画质 Args: image: 需要提升画质的图像 inpainted_bboxes: 被修复区域的bbox列表,格式为 [(x0, y0, x1, y1), ...] aspect_ratio: 宽高比(可选) resolution: 分辨率(可选) Returns: 提升画质后的图像 """ try: # 保存临时图片 with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp_file: tmp_path = tmp_file.name image.save(tmp_path) # 将bboxes转换为百分比形式(相对于图片宽高) regions = None if inpainted_bboxes: # 先合并上下间距很小的bbox(减少传递给生成式模型的区域数量) from utils.mask_utils import merge_vertical_nearby_bboxes original_count = len(inpainted_bboxes) merged_bboxes = merge_vertical_nearby_bboxes(inpainted_bboxes) if len(merged_bboxes) < original_count: logger.info(f"合并相邻文字行后:{original_count} -> {len(merged_bboxes)} 个区域") img_width, img_height = image.size regions = [] for bbox in merged_bboxes: x0, y0, x1, y1 = bbox # 转换为百分比(0-100) regions.append({ 'left': round(x0 / img_width * 100, 1), 'top': round(y0 / img_height * 100, 1), 'right': round(x1 / img_width * 100, 1), 'bottom': round(y1 / img_height * 100, 1), 'width_percent': round((x1 - x0) / img_width * 100, 1), 'height_percent': round((y1 - y0) / img_height * 100, 1) }) logger.info(f"传递 {len(regions)} 个被修复区域给生成式模型(百分比坐标)") # 获取画质提升的prompt(包含被修复区域信息) from services.prompts import get_quality_enhancement_prompt enhance_prompt = get_quality_enhancement_prompt(inpainted_regions=regions) # 使用AI服务的aspect_ratio和resolution(如果提供) ar = aspect_ratio or self._generative_provider.aspect_ratio res = resolution or self._generative_provider.resolution # 调用AI服务 enhanced_image = self._generative_provider.ai_service.edit_image( prompt=enhance_prompt, current_image_path=tmp_path, aspect_ratio=ar, resolution=res, original_description=None, additional_ref_images=None ) if not enhanced_image: return None # 转换为PIL Image if not isinstance(enhanced_image, Image.Image): if hasattr(enhanced_image, '_pil_image'): enhanced_image = enhanced_image._pil_image else: logger.error(f"未知的图片类型: {type(enhanced_image)}") return None return enhanced_image except Exception as e: logger.error(f"画质提升失败: {e}", exc_info=True) return None class InpaintProviderRegistry: """ 元素类型到重绘方法的映射注册表 根据元素类型选择合适的重绘方法: - 文本元素 → DefaultInpaintProvider(mask-based精确移除) - 表格元素 → DefaultInpaintProvider(保持表格框架) - 图片/图表元素 → GenerativeEditInpaintProvider(整图重绘) - 其他类型 → 默认提供者 使用方式: >>> registry = InpaintProviderRegistry() >>> registry.register('text', mask_provider) >>> registry.register('image', generative_provider) >>> registry.register_default(mask_provider) >>> >>> provider = registry.get_provider('text') # 返回 mask_provider >>> provider = registry.get_provider('chart') # 返回 generative_provider """ # 预定义的元素类型分组 TEXT_TYPES = {'text', 'title', 'paragraph', 'header', 'footer', 'list'} TABLE_TYPES = {'table', 'table_cell'} IMAGE_TYPES = {'image', 'figure', 'chart', 'diagram'} def __init__(self): """初始化注册表""" self._type_mapping: Dict[str, InpaintProvider] = {} self._default_provider: Optional[InpaintProvider] = None def register(self, element_type: str, provider: InpaintProvider) -> 'InpaintProviderRegistry': """ 注册元素类型到重绘方法的映射 Args: element_type: 元素类型(如 'text', 'image' 等) provider: 对应的重绘提供者实例 Returns: self,支持链式调用 """ self._type_mapping[element_type] = provider logger.debug(f"注册重绘提供者: {element_type} -> {provider.__class__.__name__}") return self def register_types(self, element_types: List[str], provider: InpaintProvider) -> 'InpaintProviderRegistry': """ 批量注册多个元素类型到同一个重绘方法 Args: element_types: 元素类型列表 provider: 对应的重绘提供者实例 Returns: self,支持链式调用 """ for t in element_types: self.register(t, provider) return self def register_default(self, provider: InpaintProvider) -> 'InpaintProviderRegistry': """ 注册默认重绘方法(当没有特定类型映射时使用) Args: provider: 默认重绘提供者实例 Returns: self,支持链式调用 """ self._default_provider = provider logger.debug(f"注册默认重绘提供者: {provider.__class__.__name__}") return self def get_provider(self, element_type: Optional[str]) -> Optional[InpaintProvider]: """ 根据元素类型获取对应的重绘方法 Args: element_type: 元素类型,None表示使用默认提供者 Returns: 对应的重绘提供者,如果没有注册则返回默认提供者 """ if element_type is None: return self._default_provider # 先查找精确匹配 if element_type in self._type_mapping: return self._type_mapping[element_type] # 返回默认提供者 return self._default_provider def get_all_providers(self) -> List[InpaintProvider]: """ 获取所有已注册的重绘提供者(去重) Returns: 重绘提供者列表 """ providers = list(set(self._type_mapping.values())) if self._default_provider and self._default_provider not in providers: providers.append(self._default_provider) return providers @classmethod def create_default( cls, mask_provider: Optional[InpaintProvider] = None, generative_provider: Optional[InpaintProvider] = None ) -> 'InpaintProviderRegistry': """ 创建默认配置的注册表 默认配置: - 文本类型 → mask-based(精确移除文字区域) - 表格类型 → mask-based(保持表格框架,只移除单元格内容) - 图片/图表类型 → generative(整图重绘,处理复杂图形) - 其他类型 → mask-based(默认) Args: mask_provider: 基于mask的重绘提供者(DefaultInpaintProvider) generative_provider: 生成式重绘提供者(GenerativeEditInpaintProvider) Returns: 配置好的注册表实例 """ registry = cls() # 如果没有提供任何provider,返回空注册表 if not mask_provider and not generative_provider: logger.warning("创建InpaintProviderRegistry时未提供任何provider") return registry # 设置默认提供者(优先使用mask_provider) default_provider = mask_provider or generative_provider registry.register_default(default_provider) # 文本类型使用mask-based if mask_provider: registry.register_types(list(cls.TEXT_TYPES), mask_provider) registry.register_types(list(cls.TABLE_TYPES), mask_provider) # 图片类型使用generative(如果可用),否则使用mask-based image_provider = generative_provider or mask_provider if image_provider: registry.register_types(list(cls.IMAGE_TYPES), image_provider) logger.info(f"创建默认InpaintProviderRegistry: " f"文本/表格->{mask_provider.__class__.__name__ if mask_provider else 'None'}, " f"图片->{image_provider.__class__.__name__ if image_provider else 'None'}") return registry ================================================ FILE: backend/services/image_editability/service.py ================================================ """ 图片可编辑化服务 - 核心服务类 设计原则: 1. 无状态设计 - 线程安全,可并行调用 2. 单一职责 - 只负责单张图片的可编辑化 3. 依赖注入 - 通过配置对象注入所有依赖 4. 零具体实现依赖 - 完全依赖抽象接口 """ import logging import uuid from typing import List, Optional, Tuple from PIL import Image from .data_models import BBox, EditableElement, EditableImage from .coordinate_mapper import CoordinateMapper from .extractors import ElementExtractor, ExtractionResult from .inpaint_providers import InpaintProvider from .factories import ServiceConfig from .helpers import collect_bboxes_from_elements, should_recurse_into_element, crop_element_from_image logger = logging.getLogger(__name__) class ImageEditabilityService: """ 图片可编辑化服务 线程安全的无状态服务,可并行调用 make_image_editable() 完全依赖抽象接口,不知道任何具体实现细节 Example: >>> config = ServiceConfig.from_defaults(mineru_token="xxx") >>> service = ImageEditabilityService(config) >>> >>> # 串行处理 >>> result = service.make_image_editable("image.png") >>> >>> # 并行处理(由调用者控制) >>> from concurrent.futures import ThreadPoolExecutor >>> with ThreadPoolExecutor() as executor: ... futures = [executor.submit(service.make_image_editable, img) ... for img in image_paths] ... results = [f.result() for f in futures] """ def __init__(self, config: ServiceConfig): """ 初始化服务 Args: config: ServiceConfig配置对象,包含所有依赖 """ # 只读配置,线程安全 self._upload_folder = config.upload_folder self._extractor_registry = config.extractor_registry self._inpaint_registry = config.inpaint_registry self._max_depth = config.max_depth self._min_image_size = config.min_image_size self._min_image_area = config.min_image_area self._max_child_coverage_ratio = 0.85 extractors = self._extractor_registry.get_all_extractors() inpaint_providers = self._inpaint_registry.get_all_providers() logger.info( f"ImageEditabilityService: {len(extractors)} extractors, " f"{len(inpaint_providers)} inpaint providers, " f"max_depth={self._max_depth}" ) def make_image_editable( self, image_path: str, depth: int = 0, parent_id: Optional[str] = None, parent_bbox: Optional[BBox] = None, root_image_size: Optional[Tuple[int, int]] = None, element_type: Optional[str] = None, root_image_path: Optional[str] = None ) -> EditableImage: """ 将图片转换为可编辑结构(递归) 线程安全:此方法可以被多个线程并行调用 Args: image_path: 图片路径 depth: 当前递归深度(内部使用) parent_id: 父图片ID(内部使用) parent_bbox: 当前图片在父图中的bbox位置(内部使用) root_image_size: 根图片尺寸(内部使用) element_type: 元素类型,用于选择提取器(内部使用) root_image_path: 根图片路径(内部使用) Returns: EditableImage对象 Raises: FileNotFoundError: 图片文件不存在 ValueError: 图片格式不支持 """ image_id = str(uuid.uuid4())[:8] logger.info(f"{' ' * depth}[{image_id}] 开始处理") # 1. 加载图片 try: img = Image.open(image_path) width, height = img.size except Exception as e: logger.error(f"无法加载图片 {image_path}: {e}") raise # 记录根图片信息 if root_image_size is None: root_image_size = (width, height) if root_image_path is None: root_image_path = image_path # 2. 提取元素 extraction_result = self._extract_elements( image_path=image_path, element_type=element_type, depth=depth ) # 检查提取是否有错误(根层级必须成功,否则报错) if extraction_result.has_error and depth == 0: raise RuntimeError(f"版面分析失败: {extraction_result.error}") # 从context获取image_size(提取器自己获取) extracted_image_size = extraction_result.context.metadata.get('image_size', (width, height)) elements = self._convert_to_editable_elements( element_dicts=extraction_result.elements, image_id=image_id, parent_bbox=parent_bbox, image_size=extracted_image_size, root_image_size=root_image_size, source_image_path=image_path # 传入源图片路径用于裁剪 ) logger.info(f"{' ' * depth}提取到 {len(elements)} 个元素") # 3. 生成clean background(根据元素类型选择重绘方法) clean_background = None if self._inpaint_registry and elements: clean_background = self._generate_clean_background( image_path=image_path, elements=elements, image_id=image_id, depth=depth, parent_bbox=parent_bbox, root_image_path=root_image_path, image_size=(width, height), element_type=element_type # 传递元素类型以选择对应的重绘方法 ) # 4. 递归处理子元素 # max_depth 语义:max_depth=1 表示只处理1层不递归,max_depth=2 递归一次 if depth + 1 < self._max_depth: self._process_children( elements=elements, current_image_path=image_path, depth=depth, image_id=image_id, root_image_size=root_image_size, current_image_size=(width, height), root_image_path=root_image_path ) # 5. 构建结果 editable_image = EditableImage( image_id=image_id, image_path=image_path, width=width, height=height, elements=elements, clean_background=clean_background, depth=depth, parent_id=parent_id ) logger.info(f"{' ' * depth}[{image_id}] 处理完成") return editable_image def _extract_elements( self, image_path: str, element_type: Optional[str], depth: int ) -> ExtractionResult: """提取元素(完全依赖提取器接口)""" logger.info(f"{' ' * depth}提取元素...") # 选择提取器 extractor = self._select_extractor(element_type) # 调用提取器(提取器自己处理所有细节,包括获取image_size) return extractor.extract( image_path=image_path, element_type=element_type, depth=depth ) def _select_extractor(self, element_type: Optional[str]) -> ElementExtractor: """根据元素类型从注册表选择对应的提取器""" extractor = self._extractor_registry.get_extractor(element_type) if extractor is None: raise ValueError(f"未找到元素类型 '{element_type}' 对应的提取器") return extractor def _convert_to_editable_elements( self, element_dicts: List[dict], image_id: str, parent_bbox: Optional[BBox], image_size: Tuple[int, int], root_image_size: Tuple[int, int], source_image_path: Optional[str] = None ) -> List[EditableElement]: """ 将提取器返回的字典转换为EditableElement对象 对每个元素根据 bbox 从原图裁剪并保存图片,不依赖 MinerU 提取的图片。 这样所有元素(包括文字)都有 image_path,可用于样式提取。 """ elements = [] # 准备输出目录 output_dir = None source_img = None if source_image_path: output_dir = self._upload_folder / 'editable_images' / image_id / 'elements' output_dir.mkdir(parents=True, exist_ok=True) try: source_img = Image.open(source_image_path) except Exception as e: logger.warning(f"无法加载源图片进行裁剪: {e}") for idx, elem_dict in enumerate(element_dicts): bbox_list = elem_dict['bbox'] local_bbox = BBox( x0=bbox_list[0], y0=bbox_list[1], x1=bbox_list[2], y1=bbox_list[3] ) # 计算全局坐标 if parent_bbox is None: global_bbox = local_bbox else: global_bbox = CoordinateMapper.local_to_global( local_bbox=local_bbox, parent_bbox=parent_bbox, local_image_size=image_size, parent_image_size=root_image_size ) # 为每个元素裁剪并保存图片(统一使用自己裁剪的图片) element_image_path = None if source_img and output_dir: try: # 裁剪元素区域 crop_box = ( max(0, int(local_bbox.x0)), max(0, int(local_bbox.y0)), min(source_img.width, int(local_bbox.x1)), min(source_img.height, int(local_bbox.y1)) ) # 检查裁剪区域有效性 if crop_box[2] > crop_box[0] and crop_box[3] > crop_box[1]: cropped = source_img.crop(crop_box) element_image_path = str(output_dir / f"{idx}_{elem_dict['type']}.png") cropped.save(element_image_path) except Exception as e: logger.warning(f"裁剪元素 {idx} 失败: {e}") element = EditableElement( element_id=f"{image_id}_{idx}", element_type=elem_dict['type'], bbox=local_bbox, bbox_global=global_bbox, content=elem_dict.get('content'), image_path=element_image_path, # 使用自己裁剪的图片路径 metadata=elem_dict.get('metadata', {}) ) elements.append(element) # 关闭源图片 if source_img: source_img.close() return elements def _generate_clean_background( self, image_path: str, elements: List[EditableElement], image_id: str, depth: int, parent_bbox: Optional[BBox], root_image_path: str, image_size: Tuple[int, int], element_type: Optional[str] = None ) -> Optional[str]: """ 生成clean background 根据元素类型从注册表选择对应的重绘方法: - 如果指定了element_type,使用该类型对应的重绘方法 - 否则使用默认的重绘方法 """ logger.info(f"{' ' * depth}生成clean background (element_type={element_type})...") # 从注册表获取重绘方法 inpaint_provider = self._inpaint_registry.get_provider(element_type) if inpaint_provider is None: logger.warning(f"{' ' * depth}未找到重绘方法,跳过") return None try: bboxes = collect_bboxes_from_elements(elements) img = Image.open(image_path) img_width, img_height = img.size element_types = [elem.element_type for elem in elements] # 计算crop_box if depth == 0: crop_box = (0, 0, img_width, img_height) elif parent_bbox: crop_box = ( int(parent_bbox.x0), int(parent_bbox.y0), int(parent_bbox.x1), int(parent_bbox.y1) ) else: crop_box = None # 加载完整页面图像 full_page_img = None if root_image_path != image_path: full_page_img = Image.open(root_image_path) # 过滤覆盖过大的bbox filtered_bboxes = [] filtered_types = [] for bbox, elem_type in zip(bboxes, element_types): if isinstance(bbox, (tuple, list)) and len(bbox) == 4: x0, y0, x1, y1 = bbox coverage = ((x1 - x0) * (y1 - y0)) / (img_width * img_height) if coverage > 0.95: continue filtered_bboxes.append(bbox) filtered_types.append(elem_type) if not filtered_bboxes: return None # 准备输出 output_dir = self._upload_folder / 'editable_images' / image_id output_dir.mkdir(parents=True, exist_ok=True) # 调用注册表中选择的重绘方法 logger.info(f"{' ' * depth}使用 {inpaint_provider.__class__.__name__} 进行重绘") result_img = inpaint_provider.inpaint_regions( image=img, bboxes=filtered_bboxes, types=filtered_types, expand_pixels=10, save_mask_path=str(output_dir / 'mask.png'), full_page_image=full_page_img, crop_box=crop_box ) if result_img is None: return None # 保存结果 output_path = output_dir / 'clean_background.png' result_img.save(str(output_path)) return str(output_path) except Exception as e: logger.error(f"生成clean background失败: {e}", exc_info=True) return None def _process_children( self, elements: List[EditableElement], current_image_path: str, depth: int, image_id: str, root_image_size: Tuple[int, int], current_image_size: Tuple[int, int], root_image_path: str ): """递归处理子元素(通过裁剪原图获取子图,并行处理多个子元素)""" logger.info(f"{' ' * depth}递归处理子元素...") # 筛选需要递归的元素 elements_to_process = [] for element in elements: if should_recurse_into_element( element=element, parent_image_size=current_image_size, min_image_size=self._min_image_size, min_image_area=self._min_image_area, max_child_coverage_ratio=self._max_child_coverage_ratio ): elements_to_process.append(element) if not elements_to_process: return # 并行处理多个子元素 from concurrent.futures import ThreadPoolExecutor, as_completed def process_single_element(element): """处理单个子元素""" try: # 从当前图片裁剪出子区域 child_image_path = crop_element_from_image( source_image_path=current_image_path, bbox=element.bbox ) child_editable = self.make_image_editable( image_path=child_image_path, depth=depth + 1, parent_id=image_id, parent_bbox=element.bbox_global, root_image_size=root_image_size, element_type=element.element_type, root_image_path=root_image_path ) return element, child_editable, None except Exception as e: return element, None, e logger.info(f"{' ' * depth} 并行处理 {len(elements_to_process)} 个子元素...") # 使用线程池并行处理 max_workers = min(8, len(elements_to_process)) # 限制并发数 with ThreadPoolExecutor(max_workers=max_workers) as executor: futures = {executor.submit(process_single_element, elem): elem for elem in elements_to_process} for future in as_completed(futures): element, child_editable, error = future.result() if error: logger.error(f"{' ' * depth} ✗ {element.element_id} 失败: {error}") else: element.children = child_editable.elements element.inpainted_background_path = child_editable.clean_background logger.info(f"{' ' * depth} ✓ {element.element_id} 完成: {len(child_editable.elements)} 个子元素") ================================================ FILE: backend/services/image_editability/text_attribute_extractors.py ================================================ """ 文字属性提取器 - 从文字区域图像中提取文字的视觉属性 包含: - TextStyleResult: 文字样式数据结构 - TextAttributeExtractor: 提取器抽象接口 - CaptionModelTextAttributeExtractor: 基于Caption Model的默认实现 - TextAttributeExtractorRegistry: 提取器注册表 """ import logging from abc import ABC, abstractmethod from dataclasses import dataclass, field, asdict from typing import Dict, Any, List, Optional, Tuple, Union from PIL import Image from services.prompts import get_text_attribute_extraction_prompt logger = logging.getLogger(__name__) @dataclass class ColoredSegment: """ 带颜色的文字片段 用于表示一段文字及其颜色,支持 LaTeX 公式 """ text: str # 文字内容(如果是公式则为 LaTeX 格式) color_rgb: Tuple[int, int, int] = (0, 0, 0) # RGB颜色 (0-255) is_latex: bool = False # 是否为 LaTeX 公式 def to_dict(self) -> Dict[str, Any]: """转换为字典""" result = { 'text': self.text, 'color': f"#{self.color_rgb[0]:02x}{self.color_rgb[1]:02x}{self.color_rgb[2]:02x}" } if self.is_latex: result['is_latex'] = True return result @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'ColoredSegment': """从字典创建实例""" text = data.get('text', '') color = data.get('color', '#000000') is_latex = bool(data.get('is_latex', False)) # 解析颜色 if isinstance(color, str): color = color.lstrip('#') if len(color) == 3: color = ''.join(c * 2 for c in color) try: r = int(color[0:2], 16) g = int(color[2:4], 16) b = int(color[4:6], 16) color_rgb = (r, g, b) except (ValueError, IndexError): color_rgb = (0, 0, 0) else: color_rgb = (0, 0, 0) return cls(text=text, color_rgb=color_rgb, is_latex=is_latex) @dataclass class TextStyleResult: """ 文字样式数据结构 包含从文字区域图像中提取的视觉属性 Note: 字体大小不在此处提取,因为传入的是裁剪后的子图,无法准确估算。 字体大小应由 PPTXBuilder.calculate_font_size 根据bbox计算。 """ # 字体颜色 RGB (0-255) - 默认颜色,用于整体颜色或兜底 font_color_rgb: Tuple[int, int, int] = (0, 0, 0) # 带颜色的文字片段列表 - 支持一行文字多种颜色 # 如果有值,渲染时优先使用这个,文字内容也以这里的为准 colored_segments: List[ColoredSegment] = field(default_factory=list) # 是否粗体 is_bold: bool = False # 是否斜体 is_italic: bool = False # 是否有下划线 is_underline: bool = False # 文字对齐方式 - 可选 ('left', 'center', 'right', 'justify') text_alignment: Optional[str] = None # 置信度 (0.0-1.0) confidence: float = 1.0 # 额外的元数据 metadata: Dict[str, Any] = field(default_factory=dict) def to_dict(self) -> Dict[str, Any]: """转换为字典""" result = asdict(self) # 将 tuple 转换为 list 以便 JSON 序列化 result['font_color_rgb'] = list(self.font_color_rgb) # 转换 colored_segments result['colored_segments'] = [seg.to_dict() if isinstance(seg, ColoredSegment) else seg for seg in self.colored_segments] return result @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'TextStyleResult': """从字典创建实例""" if 'font_color_rgb' in data and isinstance(data['font_color_rgb'], list): data['font_color_rgb'] = tuple(data['font_color_rgb']) # 转换 colored_segments if 'colored_segments' in data: data['colored_segments'] = [ ColoredSegment.from_dict(seg) if isinstance(seg, dict) else seg for seg in data['colored_segments'] ] return cls(**data) def get_hex_color(self) -> str: """获取十六进制颜色值(默认颜色)""" r, g, b = self.font_color_rgb return f"#{r:02x}{g:02x}{b:02x}" def get_full_text(self) -> str: """获取完整的文字内容(从 colored_segments 拼接)""" if self.colored_segments: return ''.join(seg.text for seg in self.colored_segments) return "" def has_multi_color(self) -> bool: """是否有多种颜色""" if not self.colored_segments or len(self.colored_segments) <= 1: return False colors = set(seg.color_rgb for seg in self.colored_segments) return len(colors) > 1 class TextAttributeExtractor(ABC): """ 文字属性提取器抽象接口 用于从文字区域图像中提取文字的视觉属性,支持接入多种实现: - CaptionModelTextAttributeExtractor: 使用视觉语言模型(如Gemini)分析图像 - 未来可扩展:基于传统CV的方法、专用OCR模型等 """ @abstractmethod def extract( self, image: Union[str, Image.Image], text_content: Optional[str] = None, **kwargs ) -> TextStyleResult: """ 从文字区域图像中提取文字样式属性 Args: image: 文字区域的图像,可以是文件路径或PIL Image对象 text_content: 文字内容(可选,某些实现可能用于辅助识别) **kwargs: 其他由具体实现自定义的参数 Returns: TextStyleResult对象,包含提取的文字样式属性 """ pass @abstractmethod def supports_batch(self) -> bool: """ 是否支持批量处理 Returns: 如果支持批量处理返回True """ pass def extract_batch( self, items: List[Tuple[Union[str, Image.Image], Optional[str]]], **kwargs ) -> List[TextStyleResult]: """ 批量提取文字样式属性 默认实现:逐个调用extract方法 子类可以覆盖此方法以实现更高效的批量处理 Args: items: 列表,每个元素是 (image, text_content) 元组 **kwargs: 其他参数 Returns: TextStyleResult列表 """ results = [] for image, text_content in items: try: result = self.extract(image, text_content, **kwargs) results.append(result) except Exception as e: logger.error(f"批量提取文字属性失败: {e}") # 返回默认结果 results.append(TextStyleResult(confidence=0.0)) return results class CaptionModelTextAttributeExtractor(TextAttributeExtractor): """ 基于Caption Model(视觉语言模型)的文字属性提取器 使用视觉语言模型(如Gemini)分析文字区域图像, 通过生成JSON的方式获取字体颜色、是否粗体、是否斜体等属性。 """ @staticmethod def build_prompt(text_content: Optional[str] = None) -> str: """ 构建合并后的prompt 如果text_content存在则插入提示,否则省略 """ if text_content: content_hint = f'图片中的文字内容是: "{text_content}"' else: content_hint = "" return get_text_attribute_extraction_prompt(content_hint=content_hint) def __init__(self, ai_service, prompt_template: Optional[str] = None): """ 初始化Caption Model文字属性提取器 Args: ai_service: AIService实例(需要支持generate_json方法和图片输入) prompt_template: 自定义的prompt模板(可选),必须使用 {content_hint} 作为占位符 """ self.ai_service = ai_service self.prompt_template = prompt_template def supports_batch(self) -> bool: """当前实现不支持批量处理""" return False def extract( self, image: Union[str, Image.Image], text_content: Optional[str] = None, **kwargs ) -> TextStyleResult: """ 使用Caption Model提取文字样式属性 Args: image: 文字区域的图像 text_content: 文字内容(可选,用于辅助识别) **kwargs: - thinking_budget: int, 思考预算,默认500 Returns: TextStyleResult对象 """ thinking_budget = kwargs.get('thinking_budget', 500) try: # 准备图片 if isinstance(image, str): pil_image = Image.open(image) else: pil_image = image # 构建prompt # 统一使用 content_hint 格式 if text_content: content_hint = f'图片中的文字内容是: "{text_content}"' else: content_hint = "" if self.prompt_template: # 自定义模板必须使用 {content_hint} 占位符 prompt = self.prompt_template.format(content_hint=content_hint) else: prompt = get_text_attribute_extraction_prompt(content_hint=content_hint) # 调用AI服务(需要支持图片输入的generate_json) # 这里假设text_provider支持带图片的generate方法 result_json = self._call_vision_model(pil_image, prompt, thinking_budget) # 解析结果 return self._parse_result(result_json) except Exception as e: logger.error(f"CaptionModelTextAttributeExtractor提取失败: {e}", exc_info=True) return TextStyleResult(confidence=0.0, metadata={'error': str(e)}) def _call_vision_model(self, image: Image.Image, prompt: str, thinking_budget: int) -> Dict[str, Any]: """ 调用视觉语言模型,使用 ai_service.generate_json_with_image(带重试机制) Args: image: PIL Image对象 prompt: 提示词 thinking_budget: 思考预算 Returns: 解析后的JSON结果 """ import tempfile import os # 保存临时图片文件 with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp_file: tmp_path = tmp_file.name image.save(tmp_path) try: # 使用 ai_service.generate_json_with_image(带重试机制) result = self.ai_service.generate_json_with_image( prompt=prompt, image_path=tmp_path, thinking_budget=thinking_budget ) return result if isinstance(result, dict) else {} except ValueError as e: raise RuntimeError(f"当前图片样式提取模型不支持图片输入: {e}") from e except Exception as e: raise RuntimeError(f"调用视觉模型提取文本样式失败: {e}") from e finally: if os.path.exists(tmp_path): os.remove(tmp_path) @staticmethod def _hex_to_rgb(hex_color: str) -> Tuple[int, int, int]: """ 将十六进制颜色转换为RGB元组 Args: hex_color: 十六进制颜色,如 "#FF6B6B" 或 "FF6B6B" Returns: RGB元组 (R, G, B) """ # 移除 # 前缀 hex_color = hex_color.lstrip('#') # 处理简写格式 (如 #FFF -> #FFFFFF) if len(hex_color) == 3: hex_color = ''.join(c * 2 for c in hex_color) if len(hex_color) != 6: return (0, 0, 0) # 无效格式,返回黑色 try: r = int(hex_color[0:2], 16) g = int(hex_color[2:4], 16) b = int(hex_color[4:6], 16) return (r, g, b) except ValueError: return (0, 0, 0) def _parse_result(self, result_json: Dict[str, Any]) -> TextStyleResult: """ 解析AI返回的JSON结果 Args: result_json: AI返回的JSON字典,支持两种格式: - 新格式:包含 colored_segments 数组(文字-颜色对) - 旧格式:包含 font_color 单一颜色 Returns: TextStyleResult对象 """ if not result_json: return TextStyleResult( confidence=0.0, metadata={'error': '视觉模型未返回可解析的样式结果'} ) try: # 解析 colored_segments(新格式:支持一行多颜色) colored_segments = [] segments_data = result_json.get('colored_segments', []) if segments_data and isinstance(segments_data, list): for seg in segments_data: if isinstance(seg, dict): colored_segments.append(ColoredSegment.from_dict(seg)) # 计算默认颜色(从 segments 取第一个,或用旧格式的 font_color) if colored_segments: font_color_rgb = colored_segments[0].color_rgb else: # 兼容旧格式 font_color_hex = result_json.get('font_color', '#000000') if isinstance(font_color_hex, str): font_color_rgb = self._hex_to_rgb(font_color_hex) else: font_color_rgb = (0, 0, 0) # 解析布尔值 is_bold = bool(result_json.get('is_bold', False)) is_italic = bool(result_json.get('is_italic', False)) is_underline = bool(result_json.get('is_underline', False)) # 解析文字对齐方式 text_alignment = result_json.get('text_alignment') if text_alignment not in ('left', 'center', 'right', 'justify', None): text_alignment = None return TextStyleResult( font_color_rgb=font_color_rgb, colored_segments=colored_segments, is_bold=is_bold, is_italic=is_italic, is_underline=is_underline, text_alignment=text_alignment, confidence=0.9, # 模型返回的结果给予较高置信度 metadata={'source': 'caption_model', 'raw_response': result_json} ) except Exception as e: logger.error(f"解析结果失败: {e}") return TextStyleResult(confidence=0.0, metadata={'error': str(e)}) def extract_batch_with_full_image( self, full_image: Union[str, Image.Image], text_elements: List[Dict[str, Any]], **kwargs ) -> Dict[str, TextStyleResult]: """ 【新逻辑】使用全图一次性提取所有文本元素的样式属性 优势:模型可以看到全局上下文,提高分析准确性 Args: full_image: 完整的页面图片,可以是文件路径或PIL Image对象 text_elements: 文本元素列表,每个元素包含: - element_id: 元素唯一标识 - bbox: 边界框 [x0, y0, x1, y1] - content: 文字内容 **kwargs: - thinking_budget: int, 思考预算,默认1000 Returns: 字典,key为element_id,value为TextStyleResult """ import json import tempfile from services.prompts import get_batch_text_attribute_extraction_prompt thinking_budget = kwargs.get('thinking_budget', 1000) if not text_elements: return {} try: # 准备图片 if isinstance(full_image, str): pil_image = Image.open(full_image) tmp_path = full_image # 如果已经是路径,直接使用 need_cleanup = False else: pil_image = full_image # 保存临时图片文件 with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp_file: tmp_path = tmp_file.name pil_image.save(tmp_path) need_cleanup = True # 构建文本元素的 JSON 描述 elements_for_prompt = [] for elem in text_elements: elements_for_prompt.append({ 'element_id': elem['element_id'], 'bbox': elem['bbox'], 'content': elem['content'] }) text_elements_json = json.dumps(elements_for_prompt, ensure_ascii=False, indent=2) # 构建 prompt prompt = get_batch_text_attribute_extraction_prompt(text_elements_json) # 调用 ai_service.generate_json_with_image(带重试机制) try: result = self.ai_service.generate_json_with_image( prompt=prompt, image_path=tmp_path, thinking_budget=thinking_budget ) # 确保结果是列表 if isinstance(result, list): result_list = result elif isinstance(result, dict): # 如果返回的是字典,尝试获取列表 result_list = result.get('results', [result]) else: result_list = [] # 解析结果 return self._parse_batch_result(result_list, text_elements) except ValueError as e: raise RuntimeError(f"当前图片样式提取模型不支持图片输入: {e}") from e except Exception as e: raise RuntimeError(f"批量调用视觉模型提取文本样式失败: {e}") from e finally: if need_cleanup: import os if os.path.exists(tmp_path): os.remove(tmp_path) except Exception as e: logger.error(f"批量提取文字属性失败: {e}", exc_info=True) raise def _parse_batch_result( self, result_list: List[Dict[str, Any]], original_elements: List[Dict[str, Any]] ) -> Dict[str, TextStyleResult]: """ 解析批量提取的 AI 返回结果 Args: result_list: AI 返回的 JSON 列表,每个元素包含样式属性 original_elements: 原始输入的元素列表,用于匹配 element_id Returns: 字典,key 为 element_id,value 为 TextStyleResult """ results = {} # 创建 element_id 到原始元素的映射,用于回退 original_map = {elem['element_id']: elem for elem in original_elements} for item in result_list: try: element_id = item.get('element_id') if not element_id: continue # 解析颜色(十六进制格式) font_color_hex = item.get('font_color', '#000000') if isinstance(font_color_hex, str): font_color_rgb = self._hex_to_rgb(font_color_hex) else: font_color_rgb = (0, 0, 0) # 解析布尔值 is_bold = bool(item.get('is_bold', False)) is_italic = bool(item.get('is_italic', False)) is_underline = bool(item.get('is_underline', False)) # 解析文字对齐方式 text_alignment = item.get('text_alignment') if text_alignment not in ('left', 'center', 'right', 'justify', None): text_alignment = None results[element_id] = TextStyleResult( font_color_rgb=font_color_rgb, is_bold=is_bold, is_italic=is_italic, is_underline=is_underline, text_alignment=text_alignment, confidence=0.9, metadata={'source': 'batch_caption_model', 'raw_response': item} ) except Exception as e: logger.warning(f"解析元素 {item.get('element_id', 'unknown')} 的样式失败: {e}") continue logger.info(f"批量解析完成: 成功 {len(results)}/{len(original_elements)} 个元素") return results class TextAttributeExtractorRegistry: """ 文字属性提取器注册表 管理不同元素类型应该使用哪个文字属性提取器: - 普通文本 → CaptionModelTextAttributeExtractor - 标题文本 → 可使用不同配置的提取器 - 其他类型 → 默认提取器 使用方式: >>> registry = TextAttributeExtractorRegistry() >>> registry.register('text', caption_extractor) >>> registry.register('title', title_extractor) >>> registry.register_default(caption_extractor) >>> >>> extractor = registry.get_extractor('text') >>> extractor = registry.get_extractor('unknown_type') # 返回默认提取器 """ # 预定义的元素类型分组 TEXT_TYPES = {'text', 'title', 'paragraph', 'heading', 'header', 'footer', 'list'} TABLE_TEXT_TYPES = {'table_cell'} def __init__(self): """初始化注册表""" self._type_mapping: Dict[str, TextAttributeExtractor] = {} self._default_extractor: Optional[TextAttributeExtractor] = None def register(self, element_type: str, extractor: TextAttributeExtractor) -> 'TextAttributeExtractorRegistry': """ 注册元素类型到提取器的映射 Args: element_type: 元素类型(如 'text', 'title' 等) extractor: 对应的提取器实例 Returns: self,支持链式调用 """ self._type_mapping[element_type] = extractor logger.debug(f"注册文字属性提取器: {element_type} -> {extractor.__class__.__name__}") return self def register_types(self, element_types: List[str], extractor: TextAttributeExtractor) -> 'TextAttributeExtractorRegistry': """ 批量注册多个元素类型到同一个提取器 Args: element_types: 元素类型列表 extractor: 对应的提取器实例 Returns: self,支持链式调用 """ for t in element_types: self.register(t, extractor) return self def register_default(self, extractor: TextAttributeExtractor) -> 'TextAttributeExtractorRegistry': """ 注册默认提取器(当没有特定类型映射时使用) Args: extractor: 默认提取器实例 Returns: self,支持链式调用 """ self._default_extractor = extractor logger.debug(f"注册默认文字属性提取器: {extractor.__class__.__name__}") return self def get_extractor(self, element_type: Optional[str]) -> Optional[TextAttributeExtractor]: """ 根据元素类型获取对应的提取器 Args: element_type: 元素类型,None表示使用默认提取器 Returns: 对应的提取器,如果没有注册则返回默认提取器 """ if element_type is None: return self._default_extractor # 先查找精确匹配 if element_type in self._type_mapping: return self._type_mapping[element_type] # 返回默认提取器 return self._default_extractor def get_all_extractors(self) -> List[TextAttributeExtractor]: """ 获取所有已注册的提取器(去重) Returns: 提取器列表 """ extractors = list(set(self._type_mapping.values())) if self._default_extractor and self._default_extractor not in extractors: extractors.append(self._default_extractor) return extractors @classmethod def create_default( cls, caption_extractor: Optional[TextAttributeExtractor] = None ) -> 'TextAttributeExtractorRegistry': """ 创建默认配置的注册表 默认配置: - 所有文本类型 → CaptionModelTextAttributeExtractor - 其他类型 → 默认提取器 Args: caption_extractor: Caption Model提取器实例 Returns: 配置好的注册表实例 """ registry = cls() if not caption_extractor: logger.warning("创建TextAttributeExtractorRegistry时未提供任何extractor") return registry # 设置默认提取器 registry.register_default(caption_extractor) # 所有文本类型使用相同的提取器 registry.register_types(list(cls.TEXT_TYPES), caption_extractor) registry.register_types(list(cls.TABLE_TEXT_TYPES), caption_extractor) logger.info(f"创建默认TextAttributeExtractorRegistry: " f"默认提取器->{caption_extractor.__class__.__name__}") return registry ================================================ FILE: backend/services/inpainting_service.py ================================================ """ Inpainting 服务 提供基于多种 provider 的图像区域消除和背景重新生成功能 支持的 provider: - volcengine: 火山引擎 Inpainting - gemini: Google Gemini 2.5 Flash Image Preview """ import logging from typing import List, Tuple, Union, Optional from PIL import Image from services.ai_providers.image.volcengine_inpainting_provider import VolcengineInpaintingProvider from services.ai_providers.image.gemini_inpainting_provider import GeminiInpaintingProvider from utils.mask_utils import ( create_mask_from_bboxes, create_inverse_mask_from_bboxes, create_mask_from_image_and_bboxes, merge_overlapping_bboxes, visualize_mask_overlay ) from config import get_config logger = logging.getLogger(__name__) class InpaintingService: """ Inpainting 服务类 主要功能: 1. 从 bbox 生成掩码图像 2. 调用 inpainting provider 消除指定区域 3. 提供便捷的背景重生成接口 支持的 provider: - volcengine: 火山引擎 Inpainting - gemini: Google Gemini 2.5 Flash Image Preview """ def __init__(self, provider=None, provider_type: str = "volcengine"): """ 初始化 Inpainting 服务 Args: provider: Inpainting 提供者实例,如果为 None 则从配置创建 provider_type: Provider 类型 ('volcengine' 或 'gemini') """ if provider is None: config = get_config() if provider_type == "gemini": # 使用 Gemini Inpainting Provider api_key = config.GOOGLE_API_KEY api_base = config.GOOGLE_API_BASE timeout = config.GENAI_TIMEOUT if not api_key: raise ValueError("Google API Key 未配置") self.provider = GeminiInpaintingProvider( api_key=api_key, api_base=api_base, timeout=timeout ) self.provider_type = "gemini" else: # 使用火山引擎 Inpainting Provider(默认) access_key = config.VOLCENGINE_ACCESS_KEY secret_key = config.VOLCENGINE_SECRET_KEY timeout = config.VOLCENGINE_INPAINTING_TIMEOUT if not access_key or not secret_key: raise ValueError("火山引擎 Access Key 和 Secret Key 未配置") self.provider = VolcengineInpaintingProvider( access_key=access_key, secret_key=secret_key, timeout=timeout ) self.provider_type = "volcengine" else: self.provider = provider self.provider_type = provider_type self.config = get_config() def remove_regions_by_bboxes( self, image: Image.Image, bboxes: List[Union[Tuple[int, int, int, int], dict]], expand_pixels: int = 5, merge_bboxes: bool = False, merge_threshold: int = 10, save_mask_path: Optional[str] = None, full_page_image: Optional[Image.Image] = None, crop_box: Optional[tuple] = None ) -> Optional[Image.Image]: """ 根据边界框列表消除图像中的指定区域 Args: image: 原始图像(PIL Image) bboxes: 边界框列表,支持以下格式: - (x1, y1, x2, y2) 元组 - {"x1": x1, "y1": y1, "x2": x2, "y2": y2} 字典 - {"x": x, "y": y, "width": w, "height": h} 字典 expand_pixels: 扩展像素数,让掩码区域略微扩大(默认5像素) merge_bboxes: 是否合并重叠或相邻的边界框(默认False) merge_threshold: 合并阈值,边界框距离小于此值时会合并(默认10像素) save_mask_path: Mask 保存路径(可选) full_page_image: 完整的 PPT 页面图像(仅用于 Gemini provider) crop_box: 裁剪框 (x0, y0, x1, y1),从完整页面结果中裁剪的区域(仅用于 Gemini provider) Returns: 处理后的图像,失败返回 None """ try: logger.info(f"开始处理图像消除,原始 bbox 数量: {len(bboxes)}") # 合并重叠的边界框(如果启用) if merge_bboxes and len(bboxes) > 1: # 先标准化所有 bbox 格式 normalized_bboxes = [] for bbox in bboxes: if isinstance(bbox, dict): if 'x1' in bbox: normalized_bboxes.append((bbox['x1'], bbox['y1'], bbox['x2'], bbox['y2'])) elif 'x' in bbox: normalized_bboxes.append((bbox['x'], bbox['y'], bbox['x'] + bbox['width'], bbox['y'] + bbox['height'])) else: normalized_bboxes.append(tuple(bbox)) bboxes = merge_overlapping_bboxes(normalized_bboxes, merge_threshold) logger.info(f"合并后 bbox 数量: {len(bboxes)}") # 生成掩码图像 mask = create_mask_from_image_and_bboxes( image, bboxes, expand_pixels=expand_pixels ) logger.info(f"掩码图像已生成,尺寸: {mask.size}") # 保存mask图像(如果指定了路径) if save_mask_path: try: mask.save(save_mask_path) logger.info(f"📷 Mask图像已保存: {save_mask_path}") except Exception as e: logger.warning(f"⚠️ 保存mask图像失败: {e}") # 调用 inpainting 服务(已内置重试逻辑) result = self.provider.inpaint_image( original_image=image, mask_image=mask, full_page_image=full_page_image, crop_box=crop_box ) if result is not None: logger.info(f"图像消除成功,结果尺寸: {result.size}") else: logger.error("图像消除失败") return result except Exception as e: logger.error(f"消除区域失败: {str(e)}", exc_info=True) return None def regenerate_background( self, image: Image.Image, foreground_bboxes: List[Union[Tuple[int, int, int, int], dict]], expand_pixels: int = 5 ) -> Optional[Image.Image]: """ 重新生成背景(保留前景对象,消除其他区域) 这个方法使用反向掩码:保留 bbox 区域,消除其他所有区域 Args: image: 原始图像 foreground_bboxes: 前景对象的边界框列表(这些区域会被保留) expand_pixels: 收缩像素数(负数表示扩展),让前景边缘更自然 Returns: 处理后的图像,失败返回 None """ try: logger.info(f"开始重新生成背景,前景对象数量: {len(foreground_bboxes)}") # 生成反向掩码(保留前景,消除背景) mask = create_inverse_mask_from_bboxes( image.size, foreground_bboxes, expand_pixels=expand_pixels ) logger.info(f"反向掩码已生成,尺寸: {mask.size}") # 调用 inpainting 服务(已内置重试逻辑) result = self.provider.inpaint_image( original_image=image, mask_image=mask ) if result is not None: logger.info(f"背景重生成成功,结果尺寸: {result.size}") else: logger.error("背景重生成失败") return result except Exception as e: logger.error(f"重新生成背景失败: {str(e)}", exc_info=True) return None def create_mask_preview( self, image: Image.Image, bboxes: List[Union[Tuple[int, int, int, int], dict]], expand_pixels: int = 0, alpha: float = 0.5 ) -> Image.Image: """ 创建掩码预览图(用于调试和可视化) Args: image: 原始图像 bboxes: 边界框列表 expand_pixels: 扩展像素数 alpha: 掩码透明度 Returns: 叠加了黑色半透明掩码的预览图 """ mask = create_mask_from_image_and_bboxes(image, bboxes, expand_pixels) return visualize_mask_overlay(image, mask, alpha) @staticmethod def create_mask_image( image_size: Tuple[int, int], bboxes: List[Union[Tuple[int, int, int, int], dict]], expand_pixels: int = 0 ) -> Image.Image: """ 静态方法:创建掩码图像(不需要实例化服务) Args: image_size: 图像尺寸 (width, height) bboxes: 边界框列表 expand_pixels: 扩展像素数 Returns: 掩码图像 """ return create_mask_from_bboxes(image_size, bboxes, expand_pixels) # 便捷函数 _inpainting_service_instances = {} def get_inpainting_service(provider_type: str = None) -> InpaintingService: """ 获取 InpaintingService 实例(单例模式,每种 provider 一个实例) Args: provider_type: Provider 类型 ('volcengine', 'gemini'), 如果为 None 则从配置读取 Returns: InpaintingService 实例 """ global _inpainting_service_instances # 从配置读取默认 provider if provider_type is None: config = get_config() provider_type = getattr(config, 'INPAINTING_PROVIDER', 'gemini') # 默认使用 gemini # 获取或创建对应的实例 if provider_type not in _inpainting_service_instances: _inpainting_service_instances[provider_type] = InpaintingService( provider_type=provider_type ) return _inpainting_service_instances[provider_type] def remove_regions( image: Image.Image, bboxes: List[Union[Tuple[int, int, int, int], dict]], **kwargs ) -> Optional[Image.Image]: """ 便捷函数:消除图像中的指定区域 Args: image: 原始图像 bboxes: 边界框列表 **kwargs: 其他参数传递给 InpaintingService.remove_regions_by_bboxes Returns: 处理后的图像 """ service = get_inpainting_service() return service.remove_regions_by_bboxes(image, bboxes, **kwargs) def regenerate_background( image: Image.Image, foreground_bboxes: List[Union[Tuple[int, int, int, int], dict]], **kwargs ) -> Optional[Image.Image]: """ 便捷函数:重新生成背景 Args: image: 原始图像 foreground_bboxes: 前景对象的边界框列表 **kwargs: 其他参数传递给 InpaintingService.regenerate_background Returns: 处理后的图像 """ service = get_inpainting_service() return service.regenerate_background(image, foreground_bboxes, **kwargs) ================================================ FILE: backend/services/pdf_service.py ================================================ """ PDF Service - PDF splitting utilities using PyPDF2 """ import logging import os from typing import List from PyPDF2 import PdfReader, PdfWriter logger = logging.getLogger(__name__) def split_pdf_to_pages(pdf_path: str, output_dir: str) -> List[str]: """ Split a multi-page PDF into individual single-page PDF files. Args: pdf_path: Path to the source PDF file output_dir: Directory to write individual page PDFs Returns: List of file paths for each single-page PDF, ordered by page number """ os.makedirs(output_dir, exist_ok=True) reader = PdfReader(pdf_path) page_paths = [] for i, page in enumerate(reader.pages): writer = PdfWriter() writer.add_page(page) page_path = os.path.join(output_dir, f"page_{i + 1}.pdf") with open(page_path, "wb") as f: writer.write(f) page_paths.append(page_path) logger.info(f"Split PDF into {len(page_paths)} pages: {pdf_path}") return page_paths ================================================ FILE: backend/services/prompts.py ================================================ """ AI Service Prompts - 集中管理所有 AI 服务的 prompt 模板 分区: 1. 共享工具 & 常量 — 语言配置、格式化辅助、DRY 常量 2. 大纲 Prompts — 生成、解析、细化大纲 3. 描述 Prompts — 单页、流式、拆分、细化描述 4. 图片生成 Prompts — 文生图、图片编辑 5. 图片处理 Prompts — 背景提取、画质修复 6. 内容提取 Prompts — 文字属性、页面内容、排版分析、风格提取 """ import json import logging from typing import List, Dict, Optional, TYPE_CHECKING if TYPE_CHECKING: from services.ai_service import ProjectContext logger = logging.getLogger(__name__) # ═══════════════════════════════════════════════════════════════════════════════ # 1. 共享工具 & 常量 # ═══════════════════════════════════════════════════════════════════════════════ # --- 常量 --- LANGUAGE_CONFIG = { 'zh': { 'name': '中文', 'instruction': '请使用全中文输出。', 'ppt_text': 'PPT文字请使用全中文。' }, 'ja': { 'name': '日本語', 'instruction': 'すべて日本語で出力してください。', 'ppt_text': 'PPTのテキストは全て日本語で出力してください。' }, 'en': { 'name': 'English', 'instruction': 'Please output all in English.', 'ppt_text': 'Use English for PPT text.' }, 'auto': { 'name': '自动', 'instruction': '', 'ppt_text': '' } } DETAIL_LEVEL_SPECS = { 'concise': '文字极致地压缩和精简,每条要点用一个核心词语或数据代替,例如效率↑80%', 'default': '清晰明了,每条要点控制在15-20字以内, 避免冗长的句子和复杂的表述', 'detailed': '忠于原文的基础上做到内容详实,逻辑清晰。', } _OUTLINE_JSON_FORMAT = """\ 1. Simple format (for short PPTs without major sections): [{"title": "title1", "points": ["point1", "point2"]}, {"title": "title2", "points": ["point1", "point2"]}] 2. Part-based format (for longer PPTs with major sections): [ { "part": "Part 1: Introduction", "pages": [ {"title": "Welcome", "points": ["point1", "point2"]}, {"title": "Overview", "points": ["point1", "point2"]} ] }, { "part": "Part 2: Main Content", "pages": [ {"title": "Topic 1", "points": ["point1", "point2"]}, {"title": "Topic 2", "points": ["point1", "point2"]} ] } ]""" # --- 辅助函数 --- def _build_prompt(prompt_text: str, reference_files_content=None, *, tag: str = '') -> str: """Prepend reference files XML and log the final prompt.""" files_xml = _format_reference_files_xml(reference_files_content) final = files_xml + prompt_text if tag: logger.debug(f"[{tag}] Final prompt:\n{final}") return final def _get_original_input(project_context: 'ProjectContext') -> str: """Extract original user input from project context (shared across prompt builders).""" if project_context.creation_type == 'idea' and project_context.idea_prompt: return project_context.idea_prompt if project_context.creation_type == 'outline' and project_context.outline_text: return f"用户提供的大纲:\n{project_context.outline_text}" if project_context.creation_type == 'descriptions' and project_context.description_text: return f"用户提供的描述:\n{project_context.description_text}" return project_context.idea_prompt or "" def _get_original_input_labeled(project_context: 'ProjectContext') -> str: """Build labeled original input section for refinement prompts.""" text = "\n原始输入信息:\n" if project_context.creation_type == 'idea' and project_context.idea_prompt: text += f"- PPT构想:{project_context.idea_prompt}\n" elif project_context.creation_type == 'outline' and project_context.outline_text: text += f"- 用户提供的大纲文本:\n{project_context.outline_text}\n" elif project_context.creation_type == 'descriptions' and project_context.description_text: text += f"- 用户提供的页面描述文本:\n{project_context.description_text}\n" elif project_context.idea_prompt: text += f"- 用户输入:{project_context.idea_prompt}\n" return text def _get_previous_requirements_text(previous_requirements: Optional[List[str]]) -> str: """Format previous modification history.""" if not previous_requirements: return "" prev_list = "\n".join([f"- {req}" for req in previous_requirements]) return f"\n\n之前用户提出的修改要求:\n{prev_list}\n" def _format_extra_field_instructions(extra_fields: list | None) -> str: """将额外字段列表格式化为 prompt 中的输出要求。""" if not extra_fields: return '' parts = [f'{f}:[关于{f}的建议]' for f in extra_fields] return '\n'.join([''] + parts) # 前导换行 def _format_reference_files_xml(reference_files_content: Optional[List[Dict[str, str]]]) -> str: """Format reference files content as XML structure.""" if not reference_files_content: return "" xml_parts = [""] for file_info in reference_files_content: filename = file_info.get('filename', 'unknown') content = file_info.get('content', '') xml_parts.append(f' ') xml_parts.append(' ') xml_parts.append(content) xml_parts.append(' ') xml_parts.append(' ') xml_parts.append('') xml_parts.append('') # Empty line after XML return '\n'.join(xml_parts) def _format_requirements(requirements: str, context: str = "outline") -> str: """格式化用户提供的生成要求,返回可直接拼接到 prompt 中的文本段。 context: "outline" 或 "description",用于生成对应的结构标记示例。 """ if requirements and requirements.strip(): if context == "description": marker_example = ( "For example, if the user asks to avoid certain symbols, " "do NOT use them in the page content, but still use structural markers " "like '页面文字:', '图片素材:', and '' as-is." ) else: marker_example = ( "For example, if the user asks to avoid '#' symbols, " "do NOT use '#' in the page content, but still use '## Title' as " "the structural heading delimiter between pages." ) return ( "\n" f"{requirements.strip()}\n" "\n" "Note: The requirements above apply to the generated content of each page and " "take precedence over other content-related instructions. The required output format " f"and structural markers must still be used as-is. {marker_example}\n\n" ) return "" def get_default_output_language() -> str: """获取环境变量中配置的默认输出语言""" from config import Config return getattr(Config, 'OUTPUT_LANGUAGE', 'zh') def get_language_instruction(language: str = None) -> str: """获取语言限制指令文本""" lang = language if language else get_default_output_language() config = LANGUAGE_CONFIG.get(lang, LANGUAGE_CONFIG['zh']) return config['instruction'] def get_ppt_language_instruction(language: str = None) -> str: """获取PPT文字语言限制指令""" lang = language if language else get_default_output_language() config = LANGUAGE_CONFIG.get(lang, LANGUAGE_CONFIG['zh']) return config['ppt_text'] # ═══════════════════════════════════════════════════════════════════════════════ # 2. 大纲 Prompts — 生成、解析、细化大纲 # ═══════════════════════════════════════════════════════════════════════════════ def get_outline_generation_prompt(project_context: 'ProjectContext', language: str = None) -> str: """生成 PPT 大纲的 prompt(JSON 输出)""" idea_prompt = project_context.idea_prompt or "" prompt = (f"""\ You are a helpful assistant that generates an outline for a ppt. You can organize the content in two ways: {_OUTLINE_JSON_FORMAT} Choose the format that best fits the content. Use parts when the PPT has clear major sections. Unless otherwise specified, the first page should be kept simplest, containing only the title, subtitle, and presenter information. The user's request: {idea_prompt}. {_format_requirements(project_context.outline_requirements)}Now generate the outline, don't include any other text. {get_language_instruction(language)} """) return _build_prompt(prompt, project_context.reference_files_content, tag='get_outline_generation_prompt') def get_outline_generation_prompt_markdown(project_context: 'ProjectContext', language: str = None) -> str: """生成 PPT 大纲的 prompt(Markdown 输出,用于流式生成)""" idea_prompt = project_context.idea_prompt or "" prompt = (f"""\ You are a helpful assistant that generates an outline for a ppt. You can organize the content in two ways: 1. Simple format (for short PPTs without major sections): ## title1 - point1 - point2 ## title2 - point1 - point2 2. Part-based format (for longer PPTs with major sections): # Part 1: Introduction ## Welcome - point1 - point2 ## Overview - point1 - point2 # Part 2: Main Content ## Topic 1 - point1 - point2 ## Topic 2 - point1 - point2 Constraints: - Title should not contain page number. - Choose the format that best fits the content. Use parts when the PPT has clear major sections. - Unless otherwise specified, the first page should be kept simplest, containing only the title, subtitle, and presenter information. The user's request: {idea_prompt}. {_format_requirements(project_context.outline_requirements)}Now generate the outline, strictly follow the format provided above, don't include any other text. Output `` on the last line when finished. {get_language_instruction(language)} """) return _build_prompt(prompt, project_context.reference_files_content, tag='get_outline_generation_prompt_markdown') def get_outline_parsing_prompt(project_context: 'ProjectContext', language: str = None) -> str: """解析用户提供的大纲文本的 prompt(JSON 输出)""" outline_text = project_context.outline_text or "" prompt = (f"""\ You are a helpful assistant that parses a user-provided PPT outline text into a structured format. The user has provided the following outline text: {outline_text} Your task is to analyze this text and convert it into a structured JSON format WITHOUT modifying any of the original text content. You should only reorganize and structure the existing content, preserving all titles, points, and text exactly as provided. You can organize the content in two ways: {_OUTLINE_JSON_FORMAT} Important rules: - DO NOT modify, rewrite, or change any text from the original outline - DO NOT add new content that wasn't in the original text - DO NOT remove any content from the original text - Only reorganize the existing content into the structured format - Preserve all titles, bullet points, and text exactly as they appear - If the text has clear sections/parts, use the part-based format - Extract titles and points from the original text, keeping them exactly as written Now parse the outline text above into the structured format. Return only the JSON, don't include any other text. {get_language_instruction(language)} """) return _build_prompt(prompt, project_context.reference_files_content, tag='get_outline_parsing_prompt') def get_outline_parsing_prompt_markdown(project_context: 'ProjectContext', language: str = None) -> str: """解析用户提供的大纲文本的 prompt(Markdown 输出,用于流式生成)""" outline_text = project_context.outline_text or "" prompt = (f"""\ You are a helpful assistant that parses a user-provided PPT outline text into a structured Markdown format. The user has provided the following outline text: {outline_text} Your task is to analyze this text and convert it into a structured Markdown outline WITHOUT modifying any of the original text content. Output rules: - Use `# Part Name` for major sections (only if the text has clear parts/chapters) - Use `## Page Title` for each page - Use `- ` bullet points for key points under each page - Preserve all titles, points, and text exactly as provided - Do NOT wrap in code blocks or add any extra text Now parse the outline text above into the Markdown format. Output `` on the last line when finished. {get_language_instruction(language)} """) return _build_prompt(prompt, project_context.reference_files_content, tag='get_outline_parsing_prompt_markdown') def get_description_to_outline_prompt(project_context: 'ProjectContext', language: str = None) -> str: """从描述文本解析出大纲的 prompt(JSON 输出)""" description_text = project_context.description_text or "" prompt = (f"""\ You are a helpful assistant that analyzes a user-provided PPT description text and extracts the outline structure from it. The user has provided the following description text: {description_text} Your task is to analyze this text and extract the outline structure (titles and key points) for each page. You should identify: 1. How many pages are described 2. The title for each page 3. The key points or content structure for each page You can organize the content in two ways: {_OUTLINE_JSON_FORMAT} Important rules: - Extract the outline structure from the description text - Identify page titles and key points - If the text has clear sections/parts, use the part-based format - Preserve the logical structure and organization from the original text - The points should be concise summaries of the main content for each page Now extract the outline structure from the description text above. Return only the JSON, don't include any other text. {get_language_instruction(language)} """) return _build_prompt(prompt, project_context.reference_files_content, tag='get_description_to_outline_prompt') def get_description_to_outline_prompt_markdown(project_context: 'ProjectContext', language: str = None) -> str: """从描述文本解析出大纲的 prompt(Markdown 输出,用于流式生成)""" description_text = project_context.description_text or "" prompt = (f"""\ You are a helpful assistant that analyzes a user-provided PPT description text and extracts the outline structure. The user has provided the following description text: {description_text} Your task is to extract the outline structure (titles and key points) for each page. Output rules: - Use `# Part Name` for major sections (only if the text has clear parts/chapters) - Use `## Page Title` for each page - Use `- ` bullet points for key points under each page - Preserve the logical structure from the original text - Do NOT wrap in code blocks or add any extra text Now extract the outline structure from the description text above. Output `` on the last line when finished. {get_language_instruction(language)} """) return _build_prompt(prompt, project_context.reference_files_content, tag='get_description_to_outline_prompt_markdown') def get_outline_refinement_prompt(current_outline: List[Dict], user_requirement: str, project_context: 'ProjectContext', previous_requirements: Optional[List[str]] = None, language: str = None) -> str: """根据用户要求修改已有大纲的 prompt""" if not current_outline or len(current_outline) == 0: outline_text = "(当前没有内容)" else: outline_text = json.dumps(current_outline, ensure_ascii=False, indent=2) prompt = (f"""\ You are a helpful assistant that modifies PPT outlines based on user requirements. {_get_original_input_labeled(project_context)} 当前的 PPT 大纲结构如下: {outline_text} {_get_previous_requirements_text(previous_requirements)} **用户现在提出新的要求:{user_requirement}** 请根据用户的要求修改和调整大纲。你可以: - 添加、删除或重新排列页面 - 修改页面标题和要点 - 调整页面的组织结构 - 添加或删除章节(part) - 合并或拆分页面 - 根据用户要求进行任何合理的调整 - 如果当前没有内容,请根据用户要求和原始输入信息创建新的大纲 输出格式可以选择: 1. 简单格式(适用于没有主要章节的短 PPT): [{{"title": "title1", "points": ["point1", "point2"]}}, {{"title": "title2", "points": ["point1", "point2"]}}] 2. 基于章节的格式(适用于有明确主要章节的长 PPT): [ {{ "part": "第一部分:引言", "pages": [ {{"title": "欢迎", "points": ["point1", "point2"]}}, {{"title": "概述", "points": ["point1", "point2"]}} ] }}, {{ "part": "第二部分:主要内容", "pages": [ {{"title": "主题1", "points": ["point1", "point2"]}}, {{"title": "主题2", "points": ["point1", "point2"]}} ] }} ] 选择最适合内容的格式。当 PPT 有清晰的主要章节时使用章节格式。 现在请根据用户要求修改大纲,只输出 JSON 格式的大纲,不要包含其他文字。 {get_language_instruction(language)} """) return _build_prompt(prompt, project_context.reference_files_content, tag='get_outline_refinement_prompt') # ═══════════════════════════════════════════════════════════════════════════════ # 3. 描述 Prompts — 单页、流式、拆分、细化描述 # ═══════════════════════════════════════════════════════════════════════════════ def get_page_description_prompt(project_context: 'ProjectContext', outline: list, page_outline: dict, page_index: int, part_info: str = "", language: str = None, detail_level: str = "default", extra_fields: list = None) -> str: """生成单个页面描述的 prompt""" original_input = _get_original_input(project_context) # 单页版使用简短的 concise 描述(与流式版略有不同) detail_level_specs = { 'concise': '文字极致地压缩和精简', 'default': '清晰明了,每条要点控制在15-20字以内, 避免冗长的句子和复杂的表述', 'detailed': '忠于原文的基础上做到内容详实,逻辑清晰。', } prompt = (f"""\ 我们正在为PPT的每一页生成内容描述。 用户的原始需求是:\n{original_input}\n 我们已经有了完整的大纲:\n{outline}\n{part_info} {_format_requirements(project_context.description_requirements, "description")}现在请为第 {page_index} 页生成描述: {page_outline} {"**除非特殊要求,第一页的内容需要保持极简,只放标题副标题以及演讲人等(输出到标题后), 不添加任何素材。**" if page_index == 1 else ""} ## 重要提示 生成的"页面文字"部分会直接渲染到PPT页面上,因此请务必不要包含任何额外的说明性文字或注释。 ## 输出格式 页面文字: [此处使用markdown直接放置正文文字, 细致程度要求:{detail_level_specs[detail_level]}\n\n, 可包含latex公式、表格等内容, 不要重复添加] 图片素材: [如果文件中存在图片请积极添加; 否则忽略图片素材字段] {_format_extra_field_instructions(extra_fields)} ## 关于图片 如果参考文件中包含以 /files/ 开头的本地文件URL图片(例如 /files/mineru/xxx/image.png),请将这些图片以markdown格式输出,例如:![图片描述](/files/mineru/xxx/image.png)。这些图片会被包含在PPT页面中。 {get_language_instruction(language)} """) return _build_prompt(prompt, project_context.reference_files_content, tag='get_page_description_prompt') def get_all_descriptions_stream_prompt(project_context: 'ProjectContext', outline: list, flat_pages: list, language: str = None, detail_level: str = "default", extra_fields: list = None) -> str: """一次性生成所有页面描述的 prompt(用于流式生成)""" original_input = _get_original_input(project_context) # 构建页面大纲列表 outline_lines = [] for i, page in enumerate(flat_pages): part_str = f" [章节: {page['part']}]" if page.get('part') else "" points_str = ", ".join(page.get('points', [])) outline_lines.append(f"第 {i + 1} 页:{page.get('title', '')}{part_str}\n 要点:{points_str}") pages_outline_text = "\n".join(outline_lines) prompt = (f"""\ 我们正在为PPT的每一页生成内容描述。 用户的原始需求是:\n{original_input}\n 完整大纲如下: {pages_outline_text} {_format_requirements(project_context.description_requirements, "description")}请为每一页依次生成描述。先输出 `` 标记开始,然后逐页输出内容,每页用 `` 结束,全部完成后输出 ``。 ## 重要提示 - 生成的页面文字会直接渲染到PPT页面上,请务必不要包含任何额外的说明性文字或注释。 - **第一页(封面页)保持极简**,只放标题、副标题、演讲人等信息,不添加任何素材。 - 细致程度要求:{DETAIL_LEVEL_SPECS[detail_level]} ## 输出格式 每页默认包含"页面文字"和"图片素材"两个部分。图片素材用于引用参考文件中的图片(以 /files/ 开头的本地路径),如果参考文件中没有相关图片则省略该部分。 ``` 页面文字: [第1页文字内容,可包含标题、副标题、要点、latex公式、表格等,根据实际需求选择,避免堆砌和重复] 图片素材: [如果参考文件中存在相关图片,以markdown格式引用,如 ![描述](/files/xxx/image.png);否则省略此部分。如果用户上传了图片素材请积极地添加] {_format_extra_field_instructions(extra_fields)} 页面文字: [第2页文字内容] 图片素材: [同上] {_format_extra_field_instructions(extra_fields)} ... ``` 现在请开始生成,严格按照上述格式输出。 {get_language_instruction(language)} """) return _build_prompt(prompt, project_context.reference_files_content, tag='get_all_descriptions_stream_prompt') def get_description_split_prompt(project_context: 'ProjectContext', outline: List[Dict], language: str = None) -> str: """从描述文本切分出每页描述的 prompt""" outline_json = json.dumps(outline, ensure_ascii=False, indent=2) description_text = project_context.description_text or "" prompt = (f"""\ You are a helpful assistant that splits a complete PPT description text into individual page descriptions. The user has provided a complete description text: {description_text} We have already extracted the outline structure: {outline_json} Your task is to split the description text into individual page descriptions based on the outline structure. For each page in the outline, extract the corresponding description from the original text. Return a JSON array where each element corresponds to a page in the outline (in the same order). Each element should be a string containing the page description in the following format: 页面标题:[页面标题] 页面文字: - [要点1] - [要点2] ... 其他页面素材(如果有排版、风格、素材等细节) Example output format: [ "页面标题:人工智能的诞生\\n页面文字:\\n- 1950 年,图灵提出"图灵测试"\\n- 奠定了AI的理论基础\\n\\n其他页面素材:\\n排版:标题居中,大字号\\n风格:科技感蓝色背景", "页面标题:AI 的发展历程\\n页面文字:\\n- 1950年代:符号主义...", ... ] Important rules: - Split the description text according to the outline structure - Each page description should match the corresponding page in the outline - Preserve all important content from the original text, including layout details (排版细节), style requirements (风格要求), material specifications (素材说明), and any other design requirements - If the user described layout, style, or materials for a page, include them in the "其他页面素材" section - Keep the format consistent with the example above - If a page in the outline doesn't have a clear description in the text, create a reasonable description based on the outline Now split the description text into individual page descriptions. Return only the JSON array, don't include any other text. {get_language_instruction(language)} """) logger.debug(f"[get_description_split_prompt] Final prompt:\n{prompt}") return prompt def get_descriptions_refinement_prompt(current_descriptions: List[Dict], user_requirement: str, project_context: 'ProjectContext', outline: List[Dict] = None, previous_requirements: Optional[List[str]] = None, language: str = None) -> str: """根据用户要求修改已有页面描述的 prompt""" # 构建大纲文本 outline_text = "" if outline: outline_json = json.dumps(outline, ensure_ascii=False, indent=2) outline_text = f"\n\n完整的 PPT 大纲:\n{outline_json}\n" # 构建所有页面描述的汇总 all_descriptions_text = "当前所有页面的描述:\n\n" has_any_description = False for desc in current_descriptions: page_num = desc.get('index', 0) + 1 title = desc.get('title', '未命名') content = desc.get('description_content', '') if isinstance(content, dict): content = content.get('text', '') if content: has_any_description = True all_descriptions_text += f"--- 第 {page_num} 页:{title} ---\n{content}\n\n" else: all_descriptions_text += f"--- 第 {page_num} 页:{title} ---\n(当前没有内容)\n\n" if not has_any_description: all_descriptions_text = "当前所有页面的描述:\n\n(当前没有内容,需要基于大纲生成新的描述)\n\n" prompt = (f"""\ You are a helpful assistant that modifies PPT page descriptions based on user requirements. {_get_original_input_labeled(project_context)}{outline_text} {all_descriptions_text} {_get_previous_requirements_text(previous_requirements)} **用户现在提出新的要求:{user_requirement}** 请根据用户的要求修改和调整所有页面的描述。你可以: - 修改页面标题和内容 - 调整页面文字的详细程度 - 添加或删除要点 - 调整描述的结构和表达 - 确保所有页面描述都符合用户的要求 - 如果当前没有内容,请根据大纲和用户要求创建新的描述 请为每个页面生成修改后的描述,格式如下: 页面标题:[页面标题] 页面文字: - [要点1] - [要点2] ... 其他页面素材(如果有请加上,包括markdown图片链接等) 提示:如果参考文件中包含以 /files/ 开头的本地文件URL图片(例如 /files/mineru/xxx/image.png),请将这些图片以markdown格式输出,例如:![图片描述](/files/mineru/xxx/image.png),而不是作为普通文本。 请返回一个 JSON 数组,每个元素是一个字符串,对应每个页面的修改后描述(按页面顺序)。 示例输出格式: [ "页面标题:人工智能的诞生\\n页面文字:\\n- 1950 年,图灵提出\\"图灵测试\\"...", "页面标题:AI 的发展历程\\n页面文字:\\n- 1950年代:符号主义...", ... ] 现在请根据用户要求修改所有页面描述,只输出 JSON 数组,不要包含其他文字。 {get_language_instruction(language)} """) return _build_prompt(prompt, project_context.reference_files_content, tag='get_descriptions_refinement_prompt') # ═══════════════════════════════════════════════════════════════════════════════ # 4. 图片生成 Prompts — 文生图、图片编辑 # ═══════════════════════════════════════════════════════════════════════════════ def get_image_generation_prompt(page_desc: str, outline_text: str, current_section: str, has_material_images: bool = False, extra_requirements: str = None, language: str = None, has_template: bool = True, page_index: int = 1, aspect_ratio: str = "16:9") -> str: """生成图片生成 prompt""" material_images_note = "" if has_material_images: material_images_note = ( "\n\n提示:" + ("除了模板参考图片(用于风格参考)外,还提供了额外的素材图片。" if has_template else "用户提供了额外的素材图片。") + "这些素材图片是可供挑选和使用的元素,你可以从这些素材图片中选择合适的图片、图标、图表或其他视觉元素" "直接整合到生成的PPT页面中。请根据页面内容的需要,智能地选择和组合这些素材图片中的元素。" ) extra_req_text = "" if extra_requirements and extra_requirements.strip(): extra_req_text = f"\n\n额外要求(请务必遵循):\n{extra_requirements}\n" template_style_guideline = "- 配色和设计语言和模板图片严格相似。" if has_template else "- 严格按照风格描述进行设计。" forbidden_template_text_guidline = "- 只参考风格设计,禁止出现模板中的文字。\n" if has_template else "" prompt = (f"""\ 你是一位专家级UI UX演示设计师,专注于生成设计良好的PPT页面。 当前PPT页面的页面描述如下: {page_desc} - 要求文字清晰锐利, 画面为4K分辨率,{aspect_ratio}比例。 {template_style_guideline} - 根据内容和要求自动设计最完美的构图,不重不漏地渲染"页面文字"段落中的文本。 - 如非必要,禁止出现 markdown 格式符号(如 # 和 * 等)。 {forbidden_template_text_guidline} {get_ppt_language_instruction(language)} {material_images_note}{extra_req_text} {"**注意:当前页面为ppt的封面页,请你采用专业的封面设计美学技巧,务必凸显出页面标题,分清主次,确保一下就能抓住观众的注意力。**" if page_index == 1 else ""} """) logger.debug(f"[get_image_generation_prompt] Final prompt:\n{prompt}") return prompt def get_image_edit_prompt(edit_instruction: str, original_description: str = None) -> str: """生成图片编辑 prompt""" if original_description: if "其他页面素材" in original_description: original_description = original_description.split("其他页面素材")[0].strip() prompt = (f"""\ 该PPT页面的原始页面描述为: {original_description} 现在,根据以下指令修改这张PPT页面:{edit_instruction} 要求维持原有的文字内容和设计风格,只按照指令进行修改。提供的参考图中既有新素材,也有用户手动框选出的区域,请你根据原图和参考图的关系智能判断用户意图。 """) else: prompt = f"根据以下指令修改这张PPT页面:{edit_instruction}\n保持原有的内容结构和设计风格,只按照指令进行修改。提供的参考图中既有新素材,也有用户手动框选出的区域,请你根据原图和参考图的关系智能判断用户意图。" logger.debug(f"[get_image_edit_prompt] Final prompt:\n{prompt}") return prompt # ═══════════════════════════════════════════════════════════════════════════════ # 5. 图片处理 Prompts — 背景提取、画质修复 # ═══════════════════════════════════════════════════════════════════════════════ def get_clean_background_prompt() -> str: """生成纯背景图的 prompt(去除文字和插画)""" prompt = """\ 你是一位专业的图片文字&图片擦除专家。你的任务是从原始图片中移除文字和配图,输出一张无任何文字和图表内容、干净纯净的底板图。 - 彻底移除页面中的所有文字、插画、图表。必须确保所有文字都被完全去除。 - 保持原背景设计的完整性(包括渐变、纹理、图案、线条、色块等)。保留原图的文本框和色块。 - 对于被前景元素遮挡的背景区域,要智能填补,使背景保持无缝和完整,就像被移除的元素从来没有出现过。 - 输出图片的尺寸、风格、配色必须和原图完全一致。 - 请勿新增任何元素。 注意,**任意位置的, 所有的**文字和图表都应该被彻底移除,**输出不应该包含任何文字和图表。** """ logger.debug(f"[get_clean_background_prompt] Final prompt:\n{prompt}") return prompt def get_quality_enhancement_prompt(inpainted_regions: list = None) -> str: """生成画质提升的 prompt(用于百度图像修复后的画质修复)""" regions_info = "" if inpainted_regions and len(inpainted_regions) > 0: regions_json = json.dumps(inpainted_regions, ensure_ascii=False, indent=2) regions_info = f""" 以下是被抹除工具处理过的具体区域(共 {len(inpainted_regions)} 个矩形区域),请重点修复这些位置: ```json {regions_json} ``` 坐标说明(所有数值都是相对于图片宽高的百分比,范围0-100%): - left: 区域左边缘距离图片左边缘的百分比 - top: 区域上边缘距离图片上边缘的百分比 - right: 区域右边缘距离图片左边缘的百分比 - bottom: 区域下边缘距离图片上边缘的百分比 - width_percent: 区域宽度占图片宽度的百分比 - height_percent: 区域高度占图片高度的百分比 例如:left=10 表示区域从图片左侧10%的位置开始。 """ prompt = f"""\ 你是一位专业的图像修复专家。这张ppt页面图片刚刚经过了文字/对象抹除操作,抹除工具在指定区域留下了一些修复痕迹,包括: - 色块不均匀、颜色不连贯 - 模糊的斑块或涂抹痕迹 - 与周围背景不协调的区域,比如不和谐的渐变色块 - 可能的纹理断裂或图案不连续 {regions_info} 你的任务是修复这些抹除痕迹,让图片看起来像从未有过对象抹除操作一样自然。 要求: - **重点修复上述标注的区域**:这些区域刚刚经过抹除处理,需要让它们与周围背景完美融合 - 保持纹理、颜色、图案的连续性 - 提升整体画质,消除模糊、噪点、伪影 - 保持图片的原始构图、布局、色调风格 - 禁止添加任何文字、图表、插画、图案、边框等元素 - 除了上述区域,其他区域不要做任何修改,保持和原图像素级别地一致。 - 输出图片的尺寸必须与原图一致 请输出修复后的高清ppt页面背景图片,不要遗漏修复任何一个被涂抹的区域。 """ return prompt # ═══════════════════════════════════════════════════════════════════════════════ # 6. 内容提取 Prompts — 文字属性、页面内容、排版分析、风格提取 # ═══════════════════════════════════════════════════════════════════════════════ def get_text_attribute_extraction_prompt(content_hint: str = "") -> str: """生成文字属性提取的 prompt(提取文字内容、颜色、公式等信息)""" prompt = """你的任务是精确识别这张图片中的文字内容和样式,返回JSON格式的结果。 {content_hint} ## 核心任务 请仔细观察图片,精确识别: 1. **文字内容** - 输出你实际看到的文字符号。 2. **颜色** - 每个字/词的实际颜色 3. **空格** - 精确识别文本中空格的位置和数量 4. **公式** - 如果是数学公式,输出 LaTeX 格式 ## 注意事项 - **空格识别**:必须精确还原空格数量,多个连续空格要完整保留,不要合并或省略 - **颜色分割**:一行文字可能有多种颜色,按颜色分割成片段,一般来说只有两种颜色。 - **公式识别**:如果片段是数学公式,设置 is_latex=true 并用 LaTeX 格式输出 - **相邻合并**:相同颜色的相邻普通文字应合并为一个片段 ## 输出格式 - colored_segments: 文字片段数组,每个片段包含: - text: 文字内容(公式时为 LaTeX 格式,如 "x^2"、"\\sum_{{i=1}}^n") - color: 颜色,十六进制格式 "#RRGGBB" - is_latex: 布尔值,true 表示这是一个 LaTeX 公式片段(可选,默认 false) 只返回JSON对象,不要包含任何其他文字。 示例输出: ```json {{ "colored_segments": [ {{"text": "· 创新合成", "color": "#000000"}}, {{"text": "1827个任务环境", "color": "#26397A"}}, {{"text": "与", "color": "#000000"}}, {{"text": "8.5万提示词", "color": "#26397A"}}, {{"text": "突破数据瓶颈", "color": "#000000"}}, {{"text": "x^2 + y^2 = z^2", "color": "#FF0000", "is_latex": true}} ] }} ``` """.format(content_hint=content_hint) return prompt def get_batch_text_attribute_extraction_prompt(text_elements_json: str) -> str: """生成批量文字属性提取的 prompt(给模型全图 + 所有文本元素的 bbox)""" prompt = f"""你是一位专业的 PPT/文档排版分析专家。请分析这张图片中所有标注的文字区域的样式属性。 我已经从图片中提取了以下文字元素及其位置信息: ```json {text_elements_json} ``` 请仔细观察图片,对比每个文字区域在图片中的实际视觉效果,为每个元素分析以下属性: 1. **font_color**: 字体颜色的十六进制值,格式为 "#RRGGBB" - 请仔细观察文字的实际颜色,不要只返回黑色 - 常见颜色如:白色 "#FFFFFF"、蓝色 "#0066CC"、红色 "#FF0000" 等 2. **is_bold**: 是否为粗体 (true/false) - 观察笔画粗细,标题通常是粗体 3. **is_italic**: 是否为斜体 (true/false) 4. **is_underline**: 是否有下划线 (true/false) 5. **text_alignment**: 文字对齐方式 - "left": 左对齐 - "center": 居中对齐 - "right": 右对齐 - "justify": 两端对齐 - 如果无法判断,根据文字在其区域内的位置推测 请返回一个 JSON 数组,数组中每个对象对应输入的一个元素(按相同顺序),包含以下字段: - element_id: 与输入相同的元素ID - text_content: 文字内容 - font_color: 颜色十六进制值 - is_bold: 布尔值 - is_italic: 布尔值 - is_underline: 布尔值 - text_alignment: 对齐方式字符串 只返回 JSON 数组,不要包含其他文字: ```json [ {{ "element_id": "xxx", "text_content": "文字内容", "font_color": "#RRGGBB", "is_bold": true/false, "is_italic": true/false, "is_underline": true/false, "text_alignment": "对齐方式" }}, ... ] ``` """ return prompt def get_ppt_page_content_extraction_prompt(markdown_text: str, language: str = None) -> str: """从 fileparser 解析出的 markdown 文本中提取页面内容(title, points, description)""" prompt = f"""\ You are a helpful assistant that extracts structured PPT page content from parsed document text. The following markdown text was extracted from a single PPT slide: {markdown_text} Your task is to extract the following structured information from this slide: 1. **title**: The main title/heading of the slide 2. **points**: A list of key bullet points or content items on the slide 3. **description**: A complete page description suitable for regenerating this slide, following this format: 页面标题:[title] 页面文字: - [point 1] - [point 2] ... 其他页面素材(如果有图表、表格、公式等描述,保留原文中的markdown图片完整形式) Rules: - Extract the title faithfully from the first heading in the markdown. Do NOT invent or rephrase it - Points must be extracted verbatim from the slide content, in their original order - In the description, 页面标题 and 页面文字 must be copied verbatim from the original text (punctuation may be normalized, but wording must be identical) - The description should capture ALL content on the slide including text, data, and visual element descriptions - If there are tables, charts, or formulas, describe them in the description under "其他页面素材" - Preserve the original language of the content Return a JSON object with exactly these three fields: "title", "points" (array of strings), "description" (string). Return only the JSON, no other text. {get_language_instruction(language)} """ logger.debug(f"[get_ppt_page_content_extraction_prompt] Final prompt:\n{prompt}") return prompt def get_layout_caption_prompt() -> str: """描述 PPT 页面的排版布局(给 caption model 用)""" prompt = """\ You are a professional PPT layout analyst. Describe the visual layout and composition of this PPT slide image in detail. Focus on: 1. **Overall layout**: How elements are arranged (e.g., title at top, content in two columns, image on the right) 2. **Text placement**: Where text blocks are positioned, their relative sizes, alignment 3. **Visual elements**: Position and size of images, charts, icons, decorative elements 4. **Spacing and proportions**: How space is distributed between elements Output a concise layout description in Chinese that can be used to recreate a similar layout. Format: 排版布局: - 整体结构:[描述] - 标题位置:[描述] - 内容区域:[描述] - 视觉元素:[描述] Only describe the layout and spatial arrangement. Do not describe colors, text content, or style. """ logger.debug(f"[get_layout_caption_prompt] Final prompt:\n{prompt}") return prompt def get_style_extraction_prompt() -> str: """从图片中提取风格描述(通用,可复用于所有创建模式)""" prompt = """\ You are a professional PPT design analyst. Analyze this image and extract a detailed style description that can be used to generate PPT slides with a similar visual style. Focus on: 1. **Color palette**: Primary colors, secondary colors, accent colors, background colors 2. **Typography style**: Font style impression (serif/sans-serif, weight, size hierarchy) 3. **Design elements**: Decorative patterns, shapes, icons style, borders, shadows 4. **Overall mood**: Professional, playful, minimalist, corporate, creative, etc. 5. **Layout tendencies**: How content is typically arranged, spacing preferences Output a concise style description in Chinese that can be directly used as a style prompt for PPT generation. Write it as a single paragraph, not a list. Example: "采用深蓝色渐变背景,搭配白色和金色文字。整体风格简约商务,使用无衬线字体,标题加粗突出。页面装饰以几何线条和半透明色块为主,配色统一协调。内容区域留白充足,视觉层次分明。" Only output the style description text, no other content. """ logger.debug(f"[get_style_extraction_prompt] Final prompt:\n{prompt}") return prompt ================================================ FILE: backend/services/task_manager.py ================================================ """ Task Manager - handles background tasks using ThreadPoolExecutor No need for Celery or Redis, uses in-memory task tracking """ import logging import os import threading from concurrent.futures import ThreadPoolExecutor, as_completed from typing import Callable, List, Dict, Any, Optional from datetime import datetime from sqlalchemy import func from PIL import Image from models import db, Task, Page, Material, PageImageVersion from utils import get_filtered_pages from utils.image_utils import check_image_resolution def _get_image_prompt_field_names() -> set | None: """读取设置中允许进入文生图 prompt 的额外字段名。返回 None 表示全部允许。""" try: from models import Settings settings = Settings.get_settings() if settings.image_prompt_extra_fields is None: return None # 未配置 → 全部允许 return set(settings.get_image_prompt_extra_fields()) except Exception: return None def _append_extra_fields(desc_text: str, desc_content: dict) -> str: """将 extra_fields 拼接到描述文本末尾,供图片生成 prompt 使用。""" extra_fields = desc_content.get('extra_fields') if not extra_fields or not isinstance(extra_fields, dict): return desc_text allowed = _get_image_prompt_field_names() parts = [desc_text] for name, value in extra_fields.items(): if value and (allowed is None or name in allowed): parts.append(f"\n{name}:{value}") return ''.join(parts) from pathlib import Path from services.pdf_service import split_pdf_to_pages logger = logging.getLogger(__name__) class TaskManager: """Simple task manager using ThreadPoolExecutor""" def __init__(self, max_workers: int = 4): """Initialize task manager""" self.executor = ThreadPoolExecutor(max_workers=max_workers) self.active_tasks = {} # task_id -> Future self.lock = threading.Lock() def submit_task(self, task_id: str, func: Callable, *args, **kwargs): """Submit a background task""" future = self.executor.submit(func, task_id, *args, **kwargs) with self.lock: self.active_tasks[task_id] = future # Add callback to clean up when done and log exceptions future.add_done_callback(lambda f: self._task_done_callback(task_id, f)) def _task_done_callback(self, task_id: str, future): """Handle task completion and log any exceptions""" try: # Check if task raised an exception exception = future.exception() if exception: logger.error(f"Task {task_id} failed with exception: {exception}", exc_info=exception) except Exception as e: logger.error(f"Error in task callback for {task_id}: {e}", exc_info=True) finally: self._cleanup_task(task_id) def _cleanup_task(self, task_id: str): """Clean up completed task""" with self.lock: if task_id in self.active_tasks: del self.active_tasks[task_id] def is_task_active(self, task_id: str) -> bool: """Check if task is still running""" with self.lock: return task_id in self.active_tasks def shutdown(self): """Shutdown the executor""" self.executor.shutdown(wait=True) # Global task manager instance task_manager = TaskManager(max_workers=4) def save_image_with_version(image, project_id: str, page_id: str, file_service, page_obj=None, image_format: str = 'PNG') -> tuple[str, int]: """ 保存图片并创建历史版本记录的公共函数 Args: image: PIL Image 对象 project_id: 项目ID page_id: 页面ID file_service: FileService 实例 page_obj: Page 对象(可选,如果提供则更新页面状态) image_format: 图片格式,默认 PNG Returns: tuple: (image_path, version_number) - 图片路径和版本号 这个函数会: 1. 计算下一个版本号(使用 MAX 查询确保安全) 2. 标记所有旧版本为非当前版本 3. 保存图片到最终位置 4. 生成并保存压缩的缓存图片 5. 创建新版本记录 6. 如果提供了 page_obj,更新页面状态和图片路径 """ # 使用 MAX 查询确保版本号安全(即使有版本被删除也不会重复) max_version = db.session.query(func.max(PageImageVersion.version_number)).filter_by(page_id=page_id).scalar() or 0 next_version = max_version + 1 # 批量更新:标记所有旧版本为非当前版本(使用单条 SQL 更高效) PageImageVersion.query.filter_by(page_id=page_id).update({'is_current': False}) # 保存原图到最终位置(使用版本号) image_path = file_service.save_generated_image( image, project_id, page_id, version_number=next_version, image_format=image_format ) # 生成并保存压缩的缓存图片(用于前端快速显示) cached_image_path = file_service.save_cached_image( image, project_id, page_id, version_number=next_version, quality=85 ) # 创建新版本记录 new_version = PageImageVersion( page_id=page_id, image_path=image_path, version_number=next_version, is_current=True ) db.session.add(new_version) # 如果提供了 page_obj,更新页面状态和图片路径 if page_obj: page_obj.generated_image_path = image_path page_obj.cached_image_path = cached_image_path page_obj.status = 'COMPLETED' page_obj.updated_at = datetime.utcnow() # 提交事务 db.session.commit() logger.debug(f"Page {page_id} image saved as version {next_version}: {image_path}, cached: {cached_image_path}") return image_path, next_version def generate_descriptions_task(task_id: str, project_id: str, ai_service, project_context, outline: List[Dict], max_workers: int = 5, app=None, language: str = None, detail_level: str = 'default'): """ Background task for generating page descriptions Based on demo.py gen_desc() with parallel processing Note: app instance MUST be passed from the request context Args: task_id: Task ID project_id: Project ID ai_service: AI service instance project_context: ProjectContext object containing all project information outline: Complete outline structure max_workers: Maximum number of parallel workers app: Flask app instance language: Output language (zh, en, ja, auto) detail_level: Description detail level (concise/default/detailed) """ if app is None: raise ValueError("Flask app instance must be provided") # 在整个任务中保持应用上下文 with app.app_context(): try: # 重要:在后台线程开始时就获取task和设置状态 task = Task.query.get(task_id) if not task: logger.error(f"Task {task_id} not found") return task.status = 'PROCESSING' db.session.commit() logger.info(f"Task {task_id} status updated to PROCESSING") # Flatten outline to get pages pages_data = ai_service.flatten_outline(outline) # Get all pages for this project pages = Page.query.filter_by(project_id=project_id).order_by(Page.order_index).all() if len(pages) != len(pages_data): raise ValueError("Page count mismatch") # Mark all pages as GENERATING_DESCRIPTION before starting for page in pages: page.status = 'GENERATING_DESCRIPTION' # Initialize progress task.set_progress({ "total": len(pages), "completed": 0, "failed": 0 }) db.session.commit() # Generate descriptions in parallel completed = 0 failed = 0 def generate_single_desc(page_id, page_outline, page_index): """ Generate description for a single page 注意:只传递 page_id(字符串),不传递 ORM 对象,避免跨线程会话问题 """ # 关键修复:在子线程中也需要应用上下文 with app.app_context(): try: # Get singleton AI service instance from services.ai_service_manager import get_ai_service ai_service = get_ai_service() desc_result = ai_service.generate_page_description( project_context, outline, page_outline, page_index, language=language, detail_level=detail_level ) # generate_page_description returns dict with text + optional extra_fields desc_content = { "text": desc_result['text'], "generated_at": datetime.utcnow().isoformat() } if desc_result.get('extra_fields'): desc_content['extra_fields'] = desc_result['extra_fields'] return (page_id, desc_content, None) except Exception as e: import traceback error_detail = traceback.format_exc() logger.error(f"Failed to generate description for page {page_id}: {error_detail}") return (page_id, None, str(e)) # Use ThreadPoolExecutor for parallel generation # 关键:提前提取 page.id,不要传递 ORM 对象到子线程 with ThreadPoolExecutor(max_workers=max_workers) as executor: futures = [ executor.submit(generate_single_desc, page.id, page_data, i) for i, (page, page_data) in enumerate(zip(pages, pages_data), 1) ] # Process results as they complete for future in as_completed(futures): page_id, desc_content, error = future.result() db.session.expire_all() # Update page in database page = Page.query.get(page_id) if page: if error: page.status = 'FAILED' failed += 1 else: page.set_description_content(desc_content) page.status = 'DESCRIPTION_GENERATED' completed += 1 db.session.commit() # Update task progress task = Task.query.get(task_id) if task: task.update_progress(completed=completed, failed=failed) db.session.commit() logger.info(f"Description Progress: {completed}/{len(pages)} pages completed") # Mark task as completed task = Task.query.get(task_id) if task: task.status = 'COMPLETED' task.completed_at = datetime.utcnow() db.session.commit() logger.info(f"Task {task_id} COMPLETED - {completed} pages generated, {failed} failed") # Update project status from models import Project project = Project.query.get(project_id) if project and failed == 0: project.status = 'DESCRIPTIONS_GENERATED' db.session.commit() logger.info(f"Project {project_id} status updated to DESCRIPTIONS_GENERATED") except Exception as e: # Mark task as failed task = Task.query.get(task_id) if task: task.status = 'FAILED' task.error_message = str(e) task.completed_at = datetime.utcnow() db.session.commit() def generate_images_task(task_id: str, project_id: str, ai_service, file_service, outline: List[Dict], use_template: bool = True, max_workers: int = 8, aspect_ratio: str = "16:9", resolution: str = "2K", app=None, extra_requirements: str = None, language: str = None, page_ids: list = None): """ Background task for generating page images Based on demo.py gen_images_parallel() Note: app instance MUST be passed from the request context Args: language: Output language (zh, en, ja, auto) page_ids: Optional list of page IDs to generate (if not provided, generates all pages) """ if app is None: raise ValueError("Flask app instance must be provided") with app.app_context(): try: # Update task status to PROCESSING task = Task.query.get(task_id) if not task: return task.status = 'PROCESSING' db.session.commit() # Get pages for this project (filtered by page_ids if provided) pages = get_filtered_pages(project_id, page_ids) all_pages_data = ai_service.flatten_outline(outline) # Build mapping from order_index to page_data so filtered pages # get matched to the correct outline entry (not just first N) pages_data_by_index = {i: pd for i, pd in enumerate(all_pages_data)} # 注意:不在任务开始时获取模板路径,而是在每个子线程中动态获取 # 这样可以确保即使用户在上传新模板后立即生成,也能使用最新模板 # Initialize progress task.set_progress({ "total": len(pages), "completed": 0, "failed": 0 }) db.session.commit() # Generate images in parallel completed = 0 failed = 0 resolution_mismatched = 0 # Count of resolution mismatches def generate_single_image(page_id, page_data, page_index): """ Generate image for a single page 注意:只传递 page_id(字符串),不传递 ORM 对象,避免跨线程会话问题 """ # 关键修复:在子线程中也需要应用上下文 with app.app_context(): try: logger.debug(f"Starting image generation for page {page_id}, index {page_index}") # Get page from database in this thread page_obj = Page.query.get(page_id) if not page_obj: raise ValueError(f"Page {page_id} not found") # Update page status page_obj.status = 'GENERATING' db.session.commit() logger.debug(f"Page {page_id} status updated to GENERATING") # Get description content desc_content = page_obj.get_description_content() if not desc_content: raise ValueError("No description content for page") # 获取描述文本(可能是 text 字段或 text_content 数组) desc_text = desc_content.get('text', '') if not desc_text and desc_content.get('text_content'): # 如果 text 字段不存在,尝试从 text_content 数组获取 text_content = desc_content.get('text_content', []) if isinstance(text_content, list): desc_text = '\n'.join(text_content) else: desc_text = str(text_content) # 将 extra_fields 拼入描述文本供图片生成使用 desc_text = _append_extra_fields(desc_text, desc_content) logger.debug(f"Got description text for page {page_id}: {desc_text[:100]}...") # 从当前页面的描述内容中提取图片 URL page_additional_ref_images = [] has_material_images = False # 从描述文本中提取图片 if desc_text: image_urls = ai_service.extract_image_urls_from_markdown(desc_text) if image_urls: logger.info(f"Found {len(image_urls)} image(s) in page {page_id} description") page_additional_ref_images = image_urls has_material_images = True # 在子线程中动态获取模板路径,确保使用最新模板 page_ref_image_path = None if use_template: page_ref_image_path = file_service.get_template_path(project_id) # 注意:如果有风格描述,即使没有模板图片也允许生成 # 这个检查已经在 controller 层完成,这里不再检查 # Generate image prompt prompt = ai_service.generate_image_prompt( outline, page_data, desc_text, page_index, has_material_images=has_material_images, extra_requirements=extra_requirements, language=language, has_template=use_template, aspect_ratio=aspect_ratio ) logger.debug(f"Generated image prompt for page {page_id}") # Generate image logger.info(f"🎨 Calling AI service to generate image for page {page_index}/{len(pages)}...") image = ai_service.generate_image( prompt, page_ref_image_path, aspect_ratio, resolution, additional_ref_images=page_additional_ref_images if page_additional_ref_images else None ) logger.info(f"✅ Image generated successfully for page {page_index}") if not image: raise ValueError("Failed to generate image") # Check resolution for all providers actual_res, is_match = check_image_resolution(image, resolution) if not is_match: logger.warning(f"Resolution mismatch for page {page_index}: requested {resolution}, got {actual_res}") # 优化:直接在子线程中计算版本号并保存到最终位置 # 每个页面独立,使用数据库事务保证版本号原子性,避免临时文件 image_path, next_version = save_image_with_version( image, project_id, page_id, file_service, page_obj=page_obj ) return (page_id, image_path, None, not is_match) except Exception as e: import traceback error_detail = traceback.format_exc() logger.error(f"Failed to generate image for page {page_id}: {error_detail}") return (page_id, None, str(e), None) # Use ThreadPoolExecutor for parallel generation # 关键:提前提取 page.id,不要传递 ORM 对象到子线程 with ThreadPoolExecutor(max_workers=max_workers) as executor: futures = [ executor.submit( generate_single_image, page.id, pages_data_by_index.get(page.order_index, {}), i ) for i, page in enumerate(pages, 1) ] # Process results as they complete for future in as_completed(futures): page_id, image_path, error, is_mismatched = future.result() if is_mismatched: resolution_mismatched += 1 db.session.expire_all() # Update page in database (主要是为了更新失败状态) page = Page.query.get(page_id) if page: if error: page.status = 'FAILED' failed += 1 db.session.commit() else: # 图片已在子线程中保存并创建版本记录,这里只需要更新计数 completed += 1 # 刷新页面对象以获取最新状态 db.session.refresh(page) # Update task progress task = Task.query.get(task_id) if task: progress = task.get_progress() progress['completed'] = completed progress['failed'] = failed # 第一次检测到不匹配时设置警告 if resolution_mismatched > 0 and 'warning_message' not in progress: progress['warning_message'] = "图片返回分辨率与设置不符,建议使用gemini格式以避免此问题" task.set_progress(progress) db.session.commit() logger.info(f"Image Progress: {completed}/{len(pages)} pages completed") # Mark task as completed task = Task.query.get(task_id) if task: task.status = 'COMPLETED' task.completed_at = datetime.utcnow() if resolution_mismatched > 0: logger.warning(f"Task {task_id} has {resolution_mismatched} resolution mismatches") db.session.commit() logger.info(f"Task {task_id} COMPLETED - {completed} images generated, {failed} failed") # Update project status from models import Project project = Project.query.get(project_id) if project and failed == 0: project.status = 'COMPLETED' db.session.commit() logger.info(f"Project {project_id} status updated to COMPLETED") except Exception as e: # Mark task as failed task = Task.query.get(task_id) if task: task.status = 'FAILED' task.error_message = str(e) task.completed_at = datetime.utcnow() db.session.commit() def generate_single_page_image_task(task_id: str, project_id: str, page_id: str, ai_service, file_service, outline: List[Dict], use_template: bool = True, aspect_ratio: str = "16:9", resolution: str = "2K", app=None, extra_requirements: str = None, language: str = None): """ Background task for generating a single page image Note: app instance MUST be passed from the request context """ if app is None: raise ValueError("Flask app instance must be provided") with app.app_context(): try: # Update task status to PROCESSING task = Task.query.get(task_id) if not task: return task.status = 'PROCESSING' db.session.commit() # Get page from database page = Page.query.get(page_id) if not page or page.project_id != project_id: raise ValueError(f"Page {page_id} not found") # Update page status page.status = 'GENERATING' db.session.commit() # Get description content desc_content = page.get_description_content() if not desc_content: raise ValueError("No description content for page") # 获取描述文本(可能是 text 字段或 text_content 数组) desc_text = desc_content.get('text', '') if not desc_text and desc_content.get('text_content'): text_content = desc_content.get('text_content', []) if isinstance(text_content, list): desc_text = '\n'.join(text_content) else: desc_text = str(text_content) # 将 extra_fields 拼入描述文本供图片生成使用 desc_text = _append_extra_fields(desc_text, desc_content) # 从描述文本中提取图片 URL additional_ref_images = [] has_material_images = False if desc_text: image_urls = ai_service.extract_image_urls_from_markdown(desc_text) if image_urls: logger.info(f"Found {len(image_urls)} image(s) in page {page_id} description") additional_ref_images = image_urls has_material_images = True # Get template path if use_template ref_image_path = None if use_template: ref_image_path = file_service.get_template_path(project_id) # 注意:如果有风格描述,即使没有模板图片也允许生成 # 这个检查已经在 controller 层完成,这里不再检查 # Generate image prompt page_data = page.get_outline_content() or {} if page.part: page_data['part'] = page.part prompt = ai_service.generate_image_prompt( outline, page_data, desc_text, page.order_index + 1, has_material_images=has_material_images, extra_requirements=extra_requirements, language=language, has_template=use_template, aspect_ratio=aspect_ratio ) # Generate image logger.info(f"🎨 Generating image for page {page_id}...") image = ai_service.generate_image( prompt, ref_image_path, aspect_ratio, resolution, additional_ref_images=additional_ref_images if additional_ref_images else None ) if not image: raise ValueError("Failed to generate image") # 保存图片并创建历史版本记录 image_path, next_version = save_image_with_version( image, project_id, page_id, file_service, page_obj=page ) # Mark task as completed task.status = 'COMPLETED' task.completed_at = datetime.utcnow() task.set_progress({ "total": 1, "completed": 1, "failed": 0 }) db.session.commit() logger.info(f"✅ Task {task_id} COMPLETED - Page {page_id} image generated") except Exception as e: import traceback error_detail = traceback.format_exc() logger.error(f"Task {task_id} FAILED: {error_detail}") # Mark task as failed task = Task.query.get(task_id) if task: task.status = 'FAILED' task.error_message = str(e) task.completed_at = datetime.utcnow() db.session.commit() # Update page status page = Page.query.get(page_id) if page: page.status = 'FAILED' db.session.commit() def edit_page_image_task(task_id: str, project_id: str, page_id: str, edit_instruction: str, ai_service, file_service, aspect_ratio: str = "16:9", resolution: str = "2K", original_description: str = None, additional_ref_images: List[str] = None, temp_dir: str = None, app=None): """ Background task for editing a page image Note: app instance MUST be passed from the request context """ if app is None: raise ValueError("Flask app instance must be provided") with app.app_context(): try: # Update task status to PROCESSING task = Task.query.get(task_id) if not task: return task.status = 'PROCESSING' db.session.commit() # Get page from database page = Page.query.get(page_id) if not page or page.project_id != project_id: raise ValueError(f"Page {page_id} not found") if not page.generated_image_path: raise ValueError("Page must have generated image first") # Update page status page.status = 'GENERATING' db.session.commit() # Get current image path current_image_path = file_service.get_absolute_path(page.generated_image_path) # Edit image logger.info(f"🎨 Editing image for page {page_id}...") try: image = ai_service.edit_image( edit_instruction, current_image_path, aspect_ratio, resolution, original_description=original_description, additional_ref_images=additional_ref_images if additional_ref_images else None ) finally: # Clean up temp directory if created if temp_dir: import shutil from pathlib import Path temp_path = Path(temp_dir) if temp_path.exists(): shutil.rmtree(temp_dir) if not image: raise ValueError("Failed to edit image") # 保存编辑后的图片并创建历史版本记录 image_path, next_version = save_image_with_version( image, project_id, page_id, file_service, page_obj=page ) # Mark task as completed task.status = 'COMPLETED' task.completed_at = datetime.utcnow() task.set_progress({ "total": 1, "completed": 1, "failed": 0 }) db.session.commit() logger.info(f"✅ Task {task_id} COMPLETED - Page {page_id} image edited") except Exception as e: import traceback error_detail = traceback.format_exc() logger.error(f"Task {task_id} FAILED: {error_detail}") # Clean up temp directory on error if temp_dir: import shutil from pathlib import Path temp_path = Path(temp_dir) if temp_path.exists(): shutil.rmtree(temp_dir) # Mark task as failed task = Task.query.get(task_id) if task: task.status = 'FAILED' task.error_message = str(e) task.completed_at = datetime.utcnow() db.session.commit() # Update page status page = Page.query.get(page_id) if page: page.status = 'FAILED' db.session.commit() def generate_material_image_task(task_id: str, project_id: str, prompt: str, ai_service, file_service, ref_image_path: str = None, additional_ref_images: List[str] = None, aspect_ratio: str = "16:9", resolution: str = "2K", temp_dir: str = None, app=None): """ Background task for generating a material image 复用核心的generate_image逻辑,但保存到Material表而不是Page表 Note: app instance MUST be passed from the request context project_id can be None for global materials (but Task model requires a project_id, so we use a special value 'global' for task tracking) """ if app is None: raise ValueError("Flask app instance must be provided") with app.app_context(): try: # Update task status to PROCESSING task = Task.query.get(task_id) if not task: return task.status = 'PROCESSING' db.session.commit() # Generate image (复用核心逻辑) logger.info(f"🎨 Generating material image with prompt: {prompt[:100]}...") image = ai_service.generate_image( prompt=prompt, ref_image_path=ref_image_path, aspect_ratio=aspect_ratio, resolution=resolution, additional_ref_images=additional_ref_images or None, ) if not image: raise ValueError("Failed to generate image") # 处理project_id:如果为'global'或None,转换为None actual_project_id = None if (project_id == 'global' or project_id is None) else project_id # Save generated material image relative_path = file_service.save_material_image(image, actual_project_id) relative = Path(relative_path) filename = relative.name # Construct frontend-accessible URL image_url = file_service.get_file_url(actual_project_id, 'materials', filename) # Save material info to database material = Material( project_id=actual_project_id, filename=filename, relative_path=relative_path, url=image_url ) db.session.add(material) # Mark task as completed task.status = 'COMPLETED' task.completed_at = datetime.utcnow() task.set_progress({ "total": 1, "completed": 1, "failed": 0, "material_id": material.id, "image_url": image_url }) db.session.commit() logger.info(f"✅ Task {task_id} COMPLETED - Material {material.id} generated") except Exception as e: import traceback error_detail = traceback.format_exc() logger.error(f"Task {task_id} FAILED: {error_detail}") # Mark task as failed task = Task.query.get(task_id) if task: task.status = 'FAILED' task.error_message = str(e) task.completed_at = datetime.utcnow() db.session.commit() finally: # Clean up temp directory if temp_dir: import shutil temp_path = Path(temp_dir) if temp_path.exists(): shutil.rmtree(temp_dir, ignore_errors=True) def process_ppt_renovation_task(task_id: str, project_id: str, ai_service, file_service, file_parser_service, keep_layout: bool = False, max_workers: int = 5, app=None, language: str = 'zh'): """ Background task for PPT renovation: parse PDF pages → extract content → fill outline + description Flow: 1. Split PDF → per-page PDFs 2. Parallel: parse each page PDF → markdown via fileparser 3. Parallel: AI extract {title, points, description} from each markdown 4. If keep_layout: parallel caption model describe layout → append to description 5. Update page.outline_content + page.description_content 6. Concatenate descriptions → project.description_text 7. project.status = DESCRIPTIONS_GENERATED Args: task_id: Task ID project_id: Project ID ai_service: AI service instance file_service: FileService instance file_parser_service: FileParserService instance keep_layout: Whether to preserve original layout via caption model max_workers: Maximum parallel workers app: Flask app instance language: Output language """ if app is None: raise ValueError("Flask app instance must be provided") with app.app_context(): try: task = Task.query.get(task_id) if not task: logger.error(f"Task {task_id} not found") return task.status = 'PROCESSING' db.session.commit() from models import Project project = Project.query.get(project_id) if not project: raise ValueError(f"Project {project_id} not found") # Get the PDF path from project pdf_path = None project_dir = Path(app.config['UPLOAD_FOLDER']) / project_id # Look for the uploaded PDF file for f in (project_dir / "template").iterdir() if (project_dir / "template").exists() else []: if f.suffix.lower() == '.pdf': pdf_path = str(f) break if not pdf_path: raise ValueError("No PDF file found for renovation project") # Step 1: Split PDF into per-page PDFs split_dir = str(project_dir / "split_pages") page_pdfs = split_pdf_to_pages(pdf_path, split_dir) logger.info(f"Split PDF into {len(page_pdfs)} pages") # Get existing pages pages = Page.query.filter_by(project_id=project_id).order_by(Page.order_index).all() # Ensure page count matches if len(pages) != len(page_pdfs): logger.warning(f"Page count mismatch: {len(pages)} pages vs {len(page_pdfs)} PDFs. Using min.") page_count = min(len(pages), len(page_pdfs)) if page_count == 0: raise ValueError("No pages to process") task.set_progress({ "total": page_count, "completed": 0, "failed": 0, "current_step": "parsing" }) db.session.commit() # Process each page as an independent pipeline: # parse markdown → AI extract content → (optional layout caption) → write to DB logger.info("Processing pages (parse → extract → save pipeline)...") import threading progress_lock = threading.Lock() completed = 0 failed = 0 extraction_errors = [] content_results = {} # index -> {title, points, description} def process_single_page(idx, page_pdf_path): nonlocal completed, failed with app.app_context(): try: # Step A: Parse page PDF → markdown filename = os.path.basename(page_pdf_path) _batch_id, md_text, extract_id, error_msg, _failed = file_parser_service.parse_file(page_pdf_path, filename) if error_msg: logger.warning(f"Page {idx} parse warning: {error_msg}") md_text = md_text or '' # Supplement with header/footer from layout.json if extract_id: hf_text = file_parser_service.extract_header_footer_from_layout(extract_id) if hf_text: md_text = hf_text + '\n\n' + md_text if not md_text.strip(): content = {'title': f'Page {idx + 1}', 'points': [], 'description': ''} error = 'empty_input' else: # Step B: AI extract structured content content = ai_service.extract_page_content(md_text, language=language) error = None # Step C: Optional layout caption if keep_layout and not error: try: page_obj = pages[idx] if idx < len(pages) else None if page_obj: image_path = None if page_obj.cached_image_path: image_path = file_service.get_absolute_path(page_obj.cached_image_path) elif page_obj.generated_image_path: image_path = file_service.get_absolute_path(page_obj.generated_image_path) if image_path and Path(image_path).exists(): caption = ai_service.generate_layout_caption(image_path) if caption: content['description'] += f"\n\n{caption}" except Exception as e: logger.error(f"Layout caption failed for page {idx}: {e}") # Step D: Write to DB immediately content_results[idx] = content page_obj = Page.query.get(pages[idx].id) if page_obj: title = content.get('title', f'Page {idx + 1}') points = content.get('points', []) description = content.get('description', '') page_obj.set_outline_content({ 'title': title, 'points': points }) page_obj.set_description_content({ "text": description, "generated_at": datetime.utcnow().isoformat() }) page_obj.status = 'DESCRIPTION_GENERATED' db.session.commit() with progress_lock: if error and error != 'empty_input': failed += 1 extraction_errors.append(error) else: completed += 1 task_obj = Task.query.get(task_id) if task_obj: task_obj.update_progress(completed=completed, failed=failed) db.session.commit() logger.info(f"Page {idx} pipeline done (completed={completed}, failed={failed})") except Exception as e: logger.error(f"Pipeline failed for page {idx}: {e}") with progress_lock: failed += 1 extraction_errors.append(str(e)) task_obj = Task.query.get(task_id) if task_obj: task_obj.update_progress(completed=completed, failed=failed) db.session.commit() with ThreadPoolExecutor(max_workers=max_workers) as executor: futures = [ executor.submit(process_single_page, i, page_pdfs[i]) for i in range(page_count) ] for future in as_completed(futures): future.result() # propagate any unexpected exceptions logger.info(f"All pages processed: {completed} completed, {failed} failed") # Fail-fast: any extraction failure aborts the entire task if failed > 0: reason = extraction_errors[0] if extraction_errors else "empty page content" raise ValueError(f"{failed}/{page_count} 页内容提取失败: {reason}") # Update project-level aggregated text project = Project.query.get(project_id) if project: all_outlines = [] all_descriptions = [] for i in range(page_count): content = content_results.get(i, {}) title = content.get('title', '') points = content.get('points', []) description = content.get('description', '') header = f"第{i + 1}页:{title}" if points: all_outlines.append(f"{header}\n" + "\n".join(f"- {p}" for p in points)) else: all_outlines.append(header) all_descriptions.append(f"--- 第{i + 1}页 ---\n{description}") project.outline_text = "\n\n".join(all_outlines) project.description_text = "\n\n".join(all_descriptions) project.status = 'DESCRIPTIONS_GENERATED' project.updated_at = datetime.utcnow() db.session.commit() # Mark task as completed task = Task.query.get(task_id) if task: task.status = 'COMPLETED' task.completed_at = datetime.utcnow() task.set_progress({ "total": page_count, "completed": completed, "failed": failed, "current_step": "done" }) db.session.commit() logger.info(f"Task {task_id} COMPLETED - PPT renovation processed {page_count} pages") except Exception as e: import traceback error_detail = traceback.format_exc() logger.error(f"Task {task_id} FAILED: {error_detail}") task = Task.query.get(task_id) if task: task.status = 'FAILED' task.error_message = str(e) task.completed_at = datetime.utcnow() # Reset project status so user can retry project = Project.query.get(project_id) if project: project.status = 'DRAFT' db.session.commit() def export_editable_pptx_with_recursive_analysis_task( task_id: str, project_id: str, filename: str, file_service, page_ids: list = None, max_depth: int = 2, max_workers: int = 4, export_extractor_method: str = 'hybrid', export_inpaint_method: str = 'hybrid', app=None ): """ 使用递归图片可编辑化分析导出可编辑PPTX的后台任务 这是新的架构方法,使用ImageEditabilityService进行递归版面分析。 与旧方法的区别: - 不再假设图片是16:9 - 支持任意尺寸和分辨率 - 递归分析图片中的子图和图表 - 更智能的坐标映射和元素提取 - 不需要 ai_service(使用 ImageEditabilityService 和 MinerU) Args: task_id: 任务ID project_id: 项目ID filename: 输出文件名 file_service: 文件服务实例 page_ids: 可选的页面ID列表(如果提供,只导出这些页面) max_depth: 最大递归深度 max_workers: 并发处理数 export_extractor_method: 组件提取方法 ('mineru' 或 'hybrid') export_inpaint_method: 背景修复方法 ('generative', 'baidu', 'hybrid') app: Flask应用实例 """ logger.info(f"🚀 Task {task_id} started: export_editable_pptx_with_recursive_analysis (project={project_id}, depth={max_depth}, workers={max_workers}, extractor={export_extractor_method}, inpaint={export_inpaint_method})") if app is None: raise ValueError("Flask app instance must be provided") with app.app_context(): import os from datetime import datetime from PIL import Image from models import Project from services.export_service import ExportService, ExportError logger.info(f"开始递归分析导出任务 {task_id} for project {project_id}") try: # Get project project = Project.query.get(project_id) if not project: raise ValueError(f'Project {project_id} not found') # 读取项目的导出设置:是否允许返回半成品 export_allow_partial = project.export_allow_partial or False fail_fast = not export_allow_partial logger.info(f"导出设置: export_allow_partial={export_allow_partial}, fail_fast={fail_fast}") # IMPORTANT: Expire cached objects to ensure fresh data from database # This prevents reading stale generated_image_path after page regeneration db.session.expire_all() # Get pages (filtered by page_ids if provided) pages = get_filtered_pages(project_id, page_ids) if not pages: raise ValueError('No pages found for project') image_paths = [] for page in pages: if page.generated_image_path: img_path = file_service.get_absolute_path(page.generated_image_path) if os.path.exists(img_path): image_paths.append(img_path) if not image_paths: raise ValueError('No generated images found for project') logger.info(f"找到 {len(image_paths)} 张图片") # 初始化任务进度(包含消息日志) task = Task.query.get(task_id) task.set_progress({ "total": 100, # 使用百分比 "completed": 0, "failed": 0, "current_step": "准备中...", "percent": 0, "messages": ["🚀 开始导出可编辑PPTX..."] # 消息日志 }) db.session.commit() # 进度回调函数 - 更新数据库中的进度 progress_messages = ["🚀 开始导出可编辑PPTX..."] max_messages = 10 # 最多保留最近10条消息 def progress_callback(step: str, message: str, percent: int): """更新任务进度到数据库""" nonlocal progress_messages try: # 添加新消息到日志 new_message = f"[{step}] {message}" progress_messages.append(new_message) # 只保留最近的消息 if len(progress_messages) > max_messages: progress_messages = progress_messages[-max_messages:] # 更新数据库 task = Task.query.get(task_id) if task: task.set_progress({ "total": 100, "completed": percent, "failed": 0, "current_step": message, "percent": percent, "messages": progress_messages.copy() }) db.session.commit() except Exception as e: logger.warning(f"更新进度失败: {e}") # Step 1: 准备工作 logger.info("Step 1: 准备工作...") progress_callback("准备", f"找到 {len(image_paths)} 张幻灯片图片", 2) # 准备输出路径 exports_dir = os.path.join(app.config['UPLOAD_FOLDER'], project_id, 'exports') os.makedirs(exports_dir, exist_ok=True) # Handle filename collision if not filename.endswith('.pptx'): filename += '.pptx' output_path = os.path.join(exports_dir, filename) if os.path.exists(output_path): base_name = filename.rsplit('.', 1)[0] timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S') filename = f"{base_name}_{timestamp}.pptx" output_path = os.path.join(exports_dir, filename) logger.info(f"文件名冲突,使用新文件名: {filename}") # 获取第一张图片的尺寸作为参考 first_img = Image.open(image_paths[0]) slide_width, slide_height = first_img.size first_img.close() logger.info(f"幻灯片尺寸: {slide_width}x{slide_height}") logger.info(f"递归深度: {max_depth}, 并发数: {max_workers}") progress_callback("准备", f"幻灯片尺寸: {slide_width}×{slide_height}", 3) # Step 2: 创建文字属性提取器 from services.image_editability import TextAttributeExtractorFactory text_attribute_extractor = TextAttributeExtractorFactory.create_caption_model_extractor() progress_callback("准备", "文字属性提取器已初始化", 5) # Step 3: 调用导出方法(使用项目的导出设置) logger.info(f"Step 3: 创建可编辑PPTX (extractor={export_extractor_method}, inpaint={export_inpaint_method}, fail_fast={fail_fast})...") progress_callback("配置", f"提取方法: {export_extractor_method}, 背景修复: {export_inpaint_method}", 6) _, export_warnings = ExportService.create_editable_pptx_with_recursive_analysis( image_paths=image_paths, output_file=output_path, slide_width_pixels=slide_width, slide_height_pixels=slide_height, max_depth=max_depth, max_workers=max_workers, text_attribute_extractor=text_attribute_extractor, progress_callback=progress_callback, export_extractor_method=export_extractor_method, export_inpaint_method=export_inpaint_method, fail_fast=fail_fast ) logger.info(f"✓ 可编辑PPTX已创建: {output_path}") # Step 4: 标记任务完成 download_path = f"/files/{project_id}/exports/{filename}" # 添加完成消息 progress_messages.append("✅ 导出完成!") # 添加警告信息(如果有) warning_messages = [] if export_warnings and export_warnings.has_warnings(): warning_messages = export_warnings.to_summary() progress_messages.extend(warning_messages) logger.warning(f"导出有 {len(warning_messages)} 条警告") task = Task.query.get(task_id) if task: task.status = 'COMPLETED' task.completed_at = datetime.utcnow() task.set_progress({ "total": 100, "completed": 100, "failed": 0, "current_step": "✓ 导出完成", "percent": 100, "messages": progress_messages, "download_url": download_path, "filename": filename, "method": "recursive_analysis", "max_depth": max_depth, "warnings": warning_messages, # 单独的警告列表 "warning_details": export_warnings.to_dict() if export_warnings else {} # 详细警告信息 }) db.session.commit() logger.info(f"✓ 任务 {task_id} 完成 - 递归分析导出成功(深度={max_depth})") except ExportError as e: # 导出错误(fail_fast 模式下的详细错误) import traceback error_detail = traceback.format_exc() logger.error(f"✗ 任务 {task_id} 导出失败: {e.message}") logger.error(f"错误类型: {e.error_type}, 详情: {e.details}") # 标记任务失败,包含详细错误信息 task = Task.query.get(task_id) if task: task.status = 'FAILED' # 构建详细的错误消息 error_message = f"{e.message}" if e.help_text: error_message += f"\n\n💡 {e.help_text}" task.error_message = error_message task.completed_at = datetime.utcnow() # 在 progress 中保存详细错误信息 task.set_progress({ "total": 100, "completed": 0, "failed": 1, "current_step": "导出失败", "percent": 0, "error_type": e.error_type, "error_details": e.details, "help_text": e.help_text }) db.session.commit() except Exception as e: import traceback error_detail = traceback.format_exc() logger.error(f"✗ 任务 {task_id} 失败: {error_detail}") # 标记任务失败 task = Task.query.get(task_id) if task: task.status = 'FAILED' task.error_message = str(e) task.completed_at = datetime.utcnow() db.session.commit() ================================================ FILE: backend/tests/conftest.py ================================================ """ pytest配置文件 - 提供测试fixtures和配置 用于后端所有测试的共享配置和fixtures """ import os import sys import pytest import tempfile from pathlib import Path from unittest.mock import MagicMock, patch # 确保backend目录在Python路径中 backend_path = Path(__file__).parent.parent sys.path.insert(0, str(backend_path)) # 设置测试环境变量 - 必须在导入app之前设置 os.environ['TESTING'] = 'true' os.environ['USE_MOCK_AI'] = 'true' # 标记使用mock AI服务 os.environ['GOOGLE_API_KEY'] = os.environ.get('GOOGLE_API_KEY', 'mock-api-key-for-testing') os.environ['FLASK_ENV'] = 'testing' @pytest.fixture(scope='session') def app(): """创建Flask测试应用""" # 创建临时目录用于测试 temp_dir = tempfile.mkdtemp() temp_db = os.path.join(temp_dir, 'test.db') # 设置测试数据库路径 os.environ['DATABASE_URL'] = f'sqlite:///{temp_db}' # 现在导入app from app import create_app # 使用工厂函数创建测试应用 test_app = create_app() # 覆盖配置 test_app.config.update({ 'TESTING': True, 'SQLALCHEMY_DATABASE_URI': f'sqlite:///{temp_db}', 'WTF_CSRF_ENABLED': False, 'UPLOAD_FOLDER': temp_dir, }) # 创建应用上下文 with test_app.app_context(): from models import db db.create_all() yield test_app # 清理 import shutil try: shutil.rmtree(temp_dir) except Exception: pass @pytest.fixture(scope='function') def client(app): """创建测试客户端""" with app.test_client() as test_client: with app.app_context(): from models import db # 清理旧数据,保持测试隔离 db.session.rollback() for table in reversed(db.metadata.sorted_tables): db.session.execute(table.delete()) db.session.commit() yield test_client db.session.rollback() @pytest.fixture(scope='function') def db_session(app): """创建数据库会话""" with app.app_context(): from models import db db.create_all() yield db.session db.session.remove() db.drop_all() @pytest.fixture def sample_project(client): """创建示例项目""" response = client.post('/api/projects', json={ 'creation_type': 'idea', 'idea_prompt': '测试PPT生成' } ) data = response.get_json() return data['data'] if data.get('success') else None @pytest.fixture def mock_ai_service(): """Mock AI服务,避免真实API调用(使用标准库unittest.mock)""" with patch('services.ai_service.AIService') as mock: # Mock实例 mock_instance = MagicMock() mock.return_value = mock_instance # Mock大纲生成 mock_instance.generate_outline.return_value = [ {'title': '测试页面1', 'points': ['要点1', '要点2']}, {'title': '测试页面2', 'points': ['要点3', '要点4']}, ] # Mock扁平化大纲 mock_instance.flatten_outline.return_value = [ {'title': '测试页面1', 'points': ['要点1', '要点2']}, {'title': '测试页面2', 'points': ['要点3', '要点4']}, ] # Mock描述生成 mock_instance.generate_page_description.return_value = { 'title': '测试标题', 'text_content': ['内容1', '内容2'], 'extra_fields': {'排版布局': '居中布局'} } # Mock图片生成 - 返回一个简单的测试图片 from PIL import Image test_image = Image.new('RGB', (1920, 1080), color='blue') mock_instance.generate_image.return_value = test_image yield mock_instance @pytest.fixture def temp_upload_dir(): """创建临时上传目录""" with tempfile.TemporaryDirectory() as tmpdir: yield tmpdir @pytest.fixture def sample_image_file(): """创建示例图片文件""" # 创建一个简单的PNG文件(1x1像素的红色图片) import io from PIL import Image img = Image.new('RGB', (100, 100), color='red') img_bytes = io.BytesIO() img.save(img_bytes, format='PNG') img_bytes.seek(0) return img_bytes # ===================================== # 测试工具函数 # ===================================== def assert_success_response(response, status_code=200): """断言成功响应""" assert response.status_code == status_code data = response.get_json() assert data is not None assert data.get('success') is True return data def assert_error_response(response, expected_status=None): """断言错误响应""" if expected_status: assert response.status_code == expected_status data = response.get_json() assert data is not None assert data.get('success') is False or 'error' in data return data ================================================ FILE: backend/tests/integration/README.md ================================================ # Backend Integration Tests ## 测试分类 ### 1. Flask Test Client 测试(不需要运行服务) **文件**: `test_full_workflow.py` 这些测试使用 Flask 的测试客户端(`client` fixture),不需要真实的服务运行。 **特点**: - ✅ 快速(毫秒级) - ✅ 不需要启动服务 - ✅ 在 CI 的 `backend-integration-test` 阶段运行 - ✅ 使用 mock 模式,不需要真实 API key **运行方式**: ```bash cd backend uv run pytest tests/integration/test_full_workflow.py -v ``` ### 2. Real Service 测试(需要运行服务) **文件**: `test_api_full_flow.py` 这些测试使用 `requests` 库直接调用 HTTP 端点,需要真实的后端服务运行。 **特点**: - ⏱️ 较慢(需要真实 HTTP 请求) - 🔧 需要服务运行在 `http://localhost:5000` - 🏗️ 在 CI 的 `docker-test` 阶段运行(服务已启动) - 🔑 完整流程测试需要真实 AI API key **标记**: `@pytest.mark.requires_service` **运行方式**: ```bash # 1. 启动服务 docker compose up -d # 2. 运行测试 cd backend uv run pytest tests/integration/test_api_full_flow.py -v -m "requires_service" ``` ## CI/CD 策略 ### Backend Integration Test 阶段 **何时运行**: 在每次 PR 和 push 时 **运行测试**: - ✅ 使用 Flask test client 的测试 - ❌ 跳过需要真实服务的测试 ```yaml # 跳过 @pytest.mark.requires_service 标记的测试 pytest tests/integration -v -m "not requires_service" ``` **环境变量**: ```yaml TESTING: true SKIP_SERVICE_TESTS: true GOOGLE_API_KEY: mock-api-key-for-testing ``` ### Docker Test 阶段 **何时运行**: 在 PR 添加 `ready-for-test` 标签时 **运行测试**: - ✅ 运行需要真实服务的测试 - ✅ 测试完整的 API 调用流程 ```yaml # 只运行 @pytest.mark.requires_service 标记的测试 pytest tests/integration/test_api_full_flow.py -v -m "requires_service" ``` **环境变量**: ```yaml SKIP_SERVICE_TESTS: false GOOGLE_API_KEY: ``` ## Pytest Markers 所有可用的 markers 定义在 `pytest.ini` 中: | Marker | 说明 | 示例 | |--------|------|------| | `unit` | 单元测试 | 测试单个函数或方法 | | `integration` | 集成测试 | 测试多个组件交互 | | `slow` | 慢速测试 | 需要 AI API 调用的测试 | | `requires_service` | 需要运行服务 | 使用 requests 调用 HTTP 端点 | | `mock` | 使用 mock | 不调用真实外部服务 | | `docker` | Docker 环境测试 | 需要 Docker 环境 | ## 运行示例 ### 运行所有集成测试(跳过需要服务的) ```bash cd backend SKIP_SERVICE_TESTS=true uv run pytest tests/integration/ -v -m "not requires_service" ``` ### 只运行需要服务的测试 ```bash # 确保服务已启动 docker compose up -d # 运行测试 cd backend SKIP_SERVICE_TESTS=false uv run pytest tests/integration/ -v -m "requires_service" ``` ### 运行所有集成测试(需要服务) ```bash # 确保服务已启动 docker compose up -d # 运行所有测试 cd backend uv run pytest tests/integration/ -v ``` ### 运行特定测试 ```bash # 运行快速 API 测试(需要服务) cd backend uv run pytest tests/integration/test_api_full_flow.py::TestAPIFullFlow::test_quick_api_flow_no_ai -v # 运行完整流程测试(需要服务和真实 API key) cd backend uv run pytest tests/integration/test_api_full_flow.py::TestAPIFullFlow::test_api_full_flow_create_to_export -v ``` ## 故障排除 ### 问题:`ConnectionRefusedError: [Errno 111] Connection refused` **原因**: 测试尝试连接 `localhost:5000`,但服务未运行。 **解决方案**: 1. 启动服务:`docker compose up -d` 2. 或者跳过这些测试:`pytest -m "not requires_service"` ### 问题:测试在 CI 的 backend-integration-test 阶段失败 **原因**: 该阶段不启动服务,应该跳过 `requires_service` 测试。 **解决方案**: 确保 CI 配置使用了正确的 pytest 命令: ```yaml pytest tests/integration -v -m "not requires_service" ``` ## 最佳实践 1. **新的集成测试**: - 如果测试可以使用 Flask test client → 添加到 `test_full_workflow.py` - 如果测试需要真实 HTTP 调用 → 添加到 `test_api_full_flow.py` 并标记 `@pytest.mark.requires_service` 2. **Marker 使用**: ```python @pytest.mark.integration @pytest.mark.requires_service def test_real_api_call(self): response = requests.post('http://localhost:5000/api/projects', ...) ``` 3. **环境检查**: - 文件级跳过:使用 `pytestmark = pytest.mark.skipif(...)` - 测试级跳过:使用 `@pytest.mark.skipif(...)` --- **更新日期**: 2025-12-22 **维护者**: Banana Slides Team ================================================ FILE: backend/tests/integration/__init__.py ================================================ # 后端集成测试模块 ================================================ FILE: backend/tests/integration/test_api_full_flow.py ================================================ """ API Full Flow Integration Test This test validates the complete API flow without UI: 1. Create project from idea 2. Upload template image 3. Generate outline 4. Generate descriptions 5. Generate images (using template) 6. Export PPT Note: - This test requires REAL running backend service (not Flask test client) - This test requires real AI API keys (GOOGLE_API_KEY) - These tests should only run in the docker-test stage of CI """ import pytest import requests import time import os import io from pathlib import Path from PIL import Image # Skip these tests if service is not running (for backend-integration-test stage) pytestmark = pytest.mark.skipif( os.environ.get('SKIP_SERVICE_TESTS', '').lower() == 'true', reason="Skipping tests that require running backend service" ) BASE_URL = "http://localhost:5000" API_TIMEOUT = 180 # 3 minutes timeout for AI operations def wait_for_project_status(project_id: str, expected_status: str, timeout: int = 180): """Wait for project to reach expected status with smart retry.""" start_time = time.time() check_interval = 2 # Start with 2 seconds max_interval = 10 consecutive_errors = 0 max_consecutive_errors = 3 while time.time() - start_time < timeout: try: response = requests.get(f"{BASE_URL}/api/projects/{project_id}", timeout=10) if not response.ok: consecutive_errors += 1 if consecutive_errors >= max_consecutive_errors: raise Exception(f"Failed to get project status after {max_consecutive_errors} consecutive errors") time.sleep(check_interval * 2) continue consecutive_errors = 0 data = response.json() current_status = data['data']['status'] elapsed = int(time.time() - start_time) print(f"[{elapsed}s] Project status: {current_status}, waiting for: {expected_status}") if current_status == expected_status: print(f"✓ Project reached status: {expected_status} (took {elapsed}s)") return if current_status == 'FAILED': error_msg = data['data'].get('error', 'Unknown error') raise Exception(f"Project generation failed. Expected: {expected_status}, Got: {current_status}. Error: {error_msg}") # Adaptive interval elapsed_time = time.time() - start_time if elapsed_time > 30: check_interval = min(max_interval, check_interval + 1) time.sleep(check_interval) except Exception as e: if "Failed to get project status" in str(e) or "Project generation failed" in str(e): raise consecutive_errors += 1 if consecutive_errors >= max_consecutive_errors: raise Exception(f"Network error: {str(e)}") time.sleep(check_interval * 2) raise Exception(f"Timeout: Project did not reach status {expected_status} within {timeout}s") def wait_for_task_completion(project_id: str, task_id: str, timeout: int = 120): """Wait for task to complete with smart retry.""" start_time = time.time() check_interval = 3 max_interval = 10 consecutive_errors = 0 max_consecutive_errors = 3 while time.time() - start_time < timeout: try: response = requests.get( f"{BASE_URL}/api/projects/{project_id}/tasks/{task_id}", timeout=10 ) if not response.ok: consecutive_errors += 1 if consecutive_errors >= max_consecutive_errors: raise Exception(f"Failed to get task status after {max_consecutive_errors} consecutive errors") time.sleep(check_interval * 2) continue consecutive_errors = 0 data = response.json() task_status = data['data']['status'] elapsed = int(time.time() - start_time) print(f"[{elapsed}s] Task {task_id[:8]}... status: {task_status}") if task_status == 'COMPLETED': print(f"✓ Task {task_id[:8]}... completed (took {elapsed}s)") return if task_status == 'FAILED': error_msg = data['data'].get('error_message', 'Unknown error') raise Exception(f"Task {task_id} failed: {error_msg}") # Adaptive interval elapsed_time = time.time() - start_time if elapsed_time > 60: check_interval = min(max_interval, check_interval + 1) time.sleep(check_interval) except Exception as e: if "Failed to get task status" in str(e) or "Task" in str(e) and "failed" in str(e): raise consecutive_errors += 1 if consecutive_errors >= max_consecutive_errors: raise Exception(f"Network error: {str(e)}") time.sleep(check_interval * 2) raise Exception(f"Timeout: Task {task_id} did not complete within {timeout}s") @pytest.fixture def project_id(): """Fixture that creates a project and cleans up after test.""" created_project_ids = [] def register_project(pid): created_project_ids.append(pid) yield register_project # Cleanup for pid in created_project_ids: try: requests.delete(f"{BASE_URL}/api/projects/{pid}", timeout=10) print(f"✓ Cleaned up project: {pid}") except Exception as e: print(f"Failed to cleanup project {pid}: {e}") class TestAPIFullFlow: """API Integration Tests - Full workflow from creation to export. These tests require a running backend service and are designed to run in the docker-test stage of CI where services are started. """ @pytest.mark.integration @pytest.mark.slow @pytest.mark.requires_service def test_api_full_flow_create_to_export(self, project_id): """ Test complete API flow: Create project → Upload template → Outline → Descriptions → Images (with template) → Export PPT This test requires real AI API keys and takes 5-10 minutes to complete. """ print('\n' + '=' * 40) print('🚀 Starting API full flow integration test') print('=' * 40 + '\n') # Step 1: Create project print('📝 Step 1: Creating project...') response = requests.post( f"{BASE_URL}/api/projects", json={ 'creation_type': 'idea', 'idea_prompt': '创建一份关于人工智能基础的简短PPT,包含3页内容:什么是AI、AI的应用、AI的未来' }, timeout=30 ) assert response.status_code in [200, 201] # 201 Created is also valid data = response.json() assert data['success'] is True assert 'project_id' in data['data'] pid = data['data']['project_id'] project_id(pid) # Register for cleanup print(f"✓ Project created successfully: {pid}\n") # Step 1.5: Upload template image print('🖼️ Step 1.5: Uploading template image...') # Create a simple test template image template_img = Image.new('RGB', (1920, 1080), color='lightblue') img_bytes = io.BytesIO() template_img.save(img_bytes, format='PNG') img_bytes.seek(0) response = requests.post( f"{BASE_URL}/api/projects/{pid}/template", files={'template_image': ('template.png', img_bytes, 'image/png')}, timeout=30 ) assert response.status_code in [200, 201] data = response.json() assert data['success'] is True print('✓ Template image uploaded successfully\n') # Step 2: Generate outline print('📋 Step 2: Triggering outline generation...') response = requests.post( f"{BASE_URL}/api/projects/{pid}/generate/outline", json={}, timeout=30 ) assert response.status_code == 200 data = response.json() assert data['success'] is True print('✓ Outline generation request submitted\n') # Step 3: Wait for outline completion print('⏳ Step 3: Waiting for outline generation to complete...') wait_for_project_status(pid, 'OUTLINE_GENERATED', timeout=API_TIMEOUT) # Verify pages were created response = requests.get(f"{BASE_URL}/api/projects/{pid}", timeout=10) data = response.json() pages = data['data']['pages'] assert pages is not None assert len(pages) > 0 print(f"✓ Outline generated successfully, contains {len(pages)} pages\n") # Step 4: Generate descriptions print('✍️ Step 4: Starting to generate page descriptions...') response = requests.post( f"{BASE_URL}/api/projects/{pid}/generate/descriptions", json={}, timeout=30 ) assert response.status_code == 202 # 202 Accepted for async operations data = response.json() assert data['success'] is True desc_task_id = data['data']['task_id'] print(f" Task ID: {desc_task_id}") # Wait for description generation wait_for_task_completion(pid, desc_task_id, timeout=API_TIMEOUT) wait_for_project_status(pid, 'DESCRIPTIONS_GENERATED', timeout=10) print('✓ All page descriptions generated\n') # Step 5: Generate images print('🎨 Step 5: Starting to generate page images...') response = requests.post( f"{BASE_URL}/api/projects/{pid}/generate/images", json={ 'use_template': True, # Use the uploaded template 'aspect_ratio': '16:9', 'resolution': '1080p' }, timeout=30 ) assert response.status_code == 202 # 202 Accepted for async operations data = response.json() assert data['success'] is True image_task_id = data['data']['task_id'] print(f" Task ID: {image_task_id}") # Wait for image generation (slower, 5 minutes timeout) wait_for_task_completion(pid, image_task_id, timeout=300) wait_for_project_status(pid, 'COMPLETED', timeout=10) print('✓ All page images generated\n') # Verify all pages have images response = requests.get(f"{BASE_URL}/api/projects/{pid}", timeout=10) data = response.json() pages = data['data'].get('pages', []) assert len(pages) > 0 for page in pages: assert page.get('generated_image_url') is not None assert page.get('status') == 'COMPLETED' print(f" ✓ Page {page['order_index'] + 1}: Image generated") print() # Step 6: Export PPT print('📦 Step 6: Exporting PPT file...') response = requests.get( f"{BASE_URL}/api/projects/{pid}/export/pptx?filename=integration-test.pptx", timeout=60 ) assert response.status_code == 200 data = response.json() assert data['success'] is True assert 'download_url' in data['data'] assert '.pptx' in data['data']['download_url'] print(f" Export URL: {data['data']['download_url']}") # Step 7: Verify PPT can be downloaded print('📥 Step 7: Verifying PPT file can be downloaded...') download_url = data['data']['download_url'] response = requests.get(f"{BASE_URL}{download_url}", timeout=30) assert response.status_code == 200 # Verify it's a PPTX file - check Content-Type or file extension content_type = response.headers.get('content-type', '').lower() is_pptx_content_type = ( 'application/vnd.openxmlformats-officedocument.presentationml.presentation' in content_type or 'application/octet-stream' in content_type # Flask may serve as octet-stream ) is_pptx_filename = download_url.endswith('.pptx') assert is_pptx_content_type or is_pptx_filename, \ f"Expected PPTX file, got Content-Type: {content_type}, URL: {download_url}" ppt_data = response.content assert len(ppt_data) > 1000 # PPT should be larger than 1KB print(f"✓ PPT file downloaded successfully, size: {len(ppt_data) / 1024:.2f} KB\n") print('=' * 40) print('✅ API integration test passed!') print('=' * 40 + '\n') @pytest.mark.integration @pytest.mark.requires_service def test_quick_api_flow_no_ai(self): """Quick test: Only verify API endpoints work (skip AI generation). This test requires a running backend service. """ print('\n🏃 Quick API flow test (skip AI generation)\n') # Create project response = requests.post( f"{BASE_URL}/api/projects", json={ 'creation_type': 'idea', 'idea_prompt': 'API test project' }, timeout=30 ) assert response.status_code in [200, 201] # 201 Created is also valid data = response.json() pid = data['data']['project_id'] print(f"✓ Project created: {pid}") # Get project info response = requests.get(f"{BASE_URL}/api/projects/{pid}", timeout=10) assert response.status_code == 200 print('✓ Project query successful') # List all projects response = requests.get(f"{BASE_URL}/api/projects", timeout=10) assert response.status_code == 200 data = response.json() assert 'projects' in data['data'] print(f"✓ Project list query successful, total {len(data['data']['projects'])} projects") # Delete project response = requests.delete(f"{BASE_URL}/api/projects/{pid}", timeout=10) assert response.status_code == 200 print('✓ Project deleted successfully\n') ================================================ FILE: backend/tests/integration/test_full_workflow.py ================================================ """ 完整工作流集成测试 测试从创建项目到导出PPTX的完整流程 """ import pytest import time from conftest import assert_success_response class TestFullWorkflow: """完整工作流测试""" def test_create_project_and_get_details(self, client): """测试创建项目并获取详情""" # 1. 创建项目 create_response = client.post('/api/projects', json={ 'creation_type': 'idea', 'idea_prompt': '生成一份关于量子计算的PPT,共3页' }) data = assert_success_response(create_response, 201) project_id = data['data']['project_id'] # 2. 获取项目详情 get_response = client.get(f'/api/projects/{project_id}') data = assert_success_response(get_response) assert data['data']['project_id'] == project_id assert data['data']['status'] == 'DRAFT' def test_template_upload_workflow(self, client, sample_image_file): """测试模板上传工作流""" # 1. 创建项目 create_response = client.post('/api/projects', json={ 'creation_type': 'idea', 'idea_prompt': '测试模板上传' }) data = assert_success_response(create_response, 201) project_id = data['data']['project_id'] # 2. 上传模板 upload_response = client.post( f'/api/projects/{project_id}/template', data={'template_image': (sample_image_file, 'template.png')}, content_type='multipart/form-data' ) # 检查上传结果 assert upload_response.status_code in [200, 201] def test_project_lifecycle(self, client): """测试项目完整生命周期""" # 1. 创建 create_response = client.post('/api/projects', json={ 'creation_type': 'idea', 'idea_prompt': '生命周期测试' }) data = assert_success_response(create_response, 201) project_id = data['data']['project_id'] # 2. 读取 get_response = client.get(f'/api/projects/{project_id}') assert_success_response(get_response) # 3. 更新(如果API支持) # update_response = client.put(f'/api/projects/{project_id}', json={...}) # 4. 删除 delete_response = client.delete(f'/api/projects/{project_id}') assert_success_response(delete_response) # 5. 确认删除 verify_response = client.get(f'/api/projects/{project_id}') assert verify_response.status_code == 404 class TestAPIErrorHandling: """API错误处理测试""" def test_invalid_json_body(self, client): """测试无效的JSON请求体""" response = client.post( '/api/projects', data='invalid json', content_type='application/json' ) assert response.status_code in [400, 415, 422] def test_missing_required_fields(self, client): """测试缺少必需字段""" response = client.post('/api/projects', json={}) assert response.status_code in [400, 422] def test_method_not_allowed(self, client): """测试不允许的HTTP方法""" response = client.patch('/api/projects') # PATCH可能不被支持 assert response.status_code in [404, 405] class TestConcurrentRequests: """并发请求测试""" def test_multiple_project_creation(self, client): """测试多个项目创建不冲突""" project_ids = [] for i in range(3): response = client.post('/api/projects', json={ 'creation_type': 'idea', 'idea_prompt': f'并发测试项目 {i}' }) data = assert_success_response(response, 201) project_ids.append(data['data']['project_id']) # 确保所有项目ID都不同 assert len(set(project_ids)) == 3 # 清理 for pid in project_ids: client.delete(f'/api/projects/{pid}') ================================================ FILE: backend/tests/pytest.ini ================================================ [pytest] # pytest配置文件 # 测试文件匹配模式 python_files = test_*.py python_classes = Test* python_functions = test_* # 测试目录 testpaths = tests # 输出选项 addopts = -v --strict-markers --tb=short --disable-warnings -p no:cacheprovider # 标记定义 markers = unit: 单元测试 integration: 集成测试 e2e: 端到端测试 slow: 慢速测试(需要API调用) mock: 使用mock的测试 docker: Docker环境测试 requires_service: 需要真实运行的后端服务(在docker-test阶段运行) # 覆盖率配置 [coverage:run] source = . omit = */tests/* */venv/* */.venv/* */migrations/* */config.py [coverage:report] precision = 2 show_missing = True skip_covered = False [coverage:html] directory = htmlcov ================================================ FILE: backend/tests/unit/__init__.py ================================================ # 后端单元测试模块 ================================================ FILE: backend/tests/unit/test_ai_mock.py ================================================ """ AI服务Mock测试 验证AI服务被正确mock,不会真正调用外部API """ import pytest from unittest.mock import patch, MagicMock class TestAIMock: """AI Mock测试""" def test_ai_service_is_mocked(self, mock_ai_service): """验证AI服务被正确mock""" # 调用mock的方法 outline = mock_ai_service.generate_outline("测试prompt") # 验证返回mock数据 assert len(outline) == 2 assert outline[0]['title'] == '测试页面1' # 验证方法被调用 mock_ai_service.generate_outline.assert_called_once_with("测试prompt") def test_description_generation_mocked(self, mock_ai_service): """验证描述生成被mock""" desc = mock_ai_service.generate_page_description( "idea", [], {}, 1 ) assert desc['title'] == '测试标题' assert 'text_content' in desc def test_image_generation_mocked(self, mock_ai_service): """验证图片生成被mock""" image = mock_ai_service.generate_image("prompt", "ref.png") # 应该返回一个PIL Image对象 assert image is not None assert image.size == (1920, 1080) def test_no_real_api_calls(self, mock_ai_service): """确保没有真实API调用""" # 多次调用 for _ in range(10): mock_ai_service.generate_outline("test") mock_ai_service.generate_page_description("idea", [], {}, 1) # 验证调用次数 assert mock_ai_service.generate_outline.call_count == 10 assert mock_ai_service.generate_page_description.call_count == 10 class TestEnvironmentFlags: """环境标志测试""" def test_testing_flag_is_set(self): """验证测试标志已设置""" import os assert os.environ.get('TESTING') == 'true' def test_mock_ai_flag_is_set(self): """验证mock AI标志已设置""" import os assert os.environ.get('USE_MOCK_AI') == 'true' ================================================ FILE: backend/tests/unit/test_api_health.py ================================================ """ 健康检查API单元测试 """ import pytest class TestHealthEndpoint: """健康检查端点测试""" def test_health_check_returns_ok(self, client): """测试健康检查返回正常状态""" response = client.get('/health') assert response.status_code == 200 data = response.get_json() assert data['status'] == 'ok' assert 'message' in data def test_health_check_response_format(self, client): """测试健康检查响应格式""" response = client.get('/health') data = response.get_json() assert isinstance(data, dict) assert 'status' in data assert 'message' in data ================================================ FILE: backend/tests/unit/test_api_material.py ================================================ """ Material upload API tests - including caption generation """ import io import pytest from unittest.mock import patch, MagicMock from PIL import Image from conftest import assert_success_response, assert_error_response def _create_test_image(): """Helper to create a test PNG image bytes""" img = Image.new('RGB', (100, 100), color='red') img_bytes = io.BytesIO() img.save(img_bytes, format='PNG') img_bytes.seek(0) return img_bytes @pytest.mark.unit class TestMaterialUpload: """Material upload endpoint tests""" def test_upload_material_without_caption(self, client): """Upload without generate_caption param should not include caption in response""" img_bytes = _create_test_image() response = client.post( '/api/materials/upload', data={'file': (img_bytes, 'test.png')}, content_type='multipart/form-data' ) data = assert_success_response(response, 201) assert 'url' in data['data'] assert 'caption' not in data['data'] @patch('controllers.material_controller._generate_image_caption') def test_upload_material_with_caption(self, mock_caption, client): """Upload with generate_caption=true should include AI caption""" mock_caption.return_value = '一张红色方块图片' img_bytes = _create_test_image() response = client.post( '/api/materials/upload?generate_caption=true', data={'file': (img_bytes, 'test.png')}, content_type='multipart/form-data' ) data = assert_success_response(response, 201) assert data['data']['caption'] == '一张红色方块图片' assert 'url' in data['data'] mock_caption.assert_called_once() @patch('controllers.material_controller._generate_image_caption') def test_upload_material_caption_failure_still_succeeds(self, mock_caption, client): """Caption failure should return empty string, upload still succeeds""" mock_caption.return_value = '' img_bytes = _create_test_image() response = client.post( '/api/materials/upload?generate_caption=true', data={'file': (img_bytes, 'test.png')}, content_type='multipart/form-data' ) data = assert_success_response(response, 201) assert data['data']['caption'] == '' assert 'url' in data['data'] @patch('controllers.material_controller._generate_image_caption') def test_upload_material_caption_false_param(self, mock_caption, client): """generate_caption=false should not trigger caption generation""" img_bytes = _create_test_image() response = client.post( '/api/materials/upload?generate_caption=false', data={'file': (img_bytes, 'test.png')}, content_type='multipart/form-data' ) data = assert_success_response(response, 201) assert 'caption' not in data['data'] mock_caption.assert_not_called() def test_upload_material_invalid_file_type(self, client): """Unsupported file type should return 400""" response = client.post( '/api/materials/upload', data={'file': (io.BytesIO(b'fake data'), 'test.txt')}, content_type='multipart/form-data' ) assert response.status_code == 400 def test_upload_material_no_file(self, client): """No file should return 400""" response = client.post( '/api/materials/upload', content_type='multipart/form-data' ) assert response.status_code == 400 @pytest.mark.unit class TestGenerateImageCaption: """Unit tests for _generate_image_caption function""" def test_caption_returns_empty_on_missing_gemini_key(self, app): """Caption returns empty when Gemini API key is not configured""" with app.app_context(): app.config['AI_PROVIDER_FORMAT'] = 'gemini' app.config['GOOGLE_API_KEY'] = '' from controllers.material_controller import _generate_image_caption # Create a temp image import tempfile with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f: img = Image.new('RGB', (10, 10), color='blue') img.save(f, format='PNG') tmp_path = f.name try: result = _generate_image_caption(tmp_path) assert result == '' finally: import os os.unlink(tmp_path) def test_caption_returns_empty_on_missing_openai_key(self, app): """Caption returns empty when OpenAI API key is not configured""" with app.app_context(): app.config['AI_PROVIDER_FORMAT'] = 'openai' app.config['OPENAI_API_KEY'] = '' from controllers.material_controller import _generate_image_caption import tempfile with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f: img = Image.new('RGB', (10, 10), color='blue') img.save(f, format='PNG') tmp_path = f.name try: result = _generate_image_caption(tmp_path) assert result == '' finally: import os os.unlink(tmp_path) def test_caption_returns_empty_on_invalid_file(self, app): """Caption returns empty on invalid image file""" with app.app_context(): app.config['AI_PROVIDER_FORMAT'] = 'gemini' app.config['GOOGLE_API_KEY'] = 'fake-key' from controllers.material_controller import _generate_image_caption result = _generate_image_caption('/nonexistent/path/image.png') assert result == '' @patch('google.genai.Client') def test_caption_gemini_success(self, mock_client_class, app): """Caption with Gemini provider returns expected text""" with app.app_context(): app.config['AI_PROVIDER_FORMAT'] = 'gemini' app.config['GOOGLE_API_KEY'] = 'test-key' app.config['GOOGLE_API_BASE'] = '' app.config['IMAGE_CAPTION_MODEL'] = 'test-model' mock_client = MagicMock() mock_client_class.return_value = mock_client mock_result = MagicMock() mock_result.text = ' 一张测试图片 ' mock_client.models.generate_content.return_value = mock_result from controllers.material_controller import _generate_image_caption import tempfile with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as f: img = Image.new('RGB', (10, 10), color='green') img.save(f, format='PNG') tmp_path = f.name try: result = _generate_image_caption(tmp_path) assert result == '一张测试图片' mock_client.models.generate_content.assert_called_once() finally: import os os.unlink(tmp_path) ================================================ FILE: backend/tests/unit/test_api_project.py ================================================ """ 项目管理API单元测试 """ import pytest from conftest import assert_success_response, assert_error_response class TestProjectCreate: """项目创建测试""" def test_create_project_idea_mode(self, client): """测试从想法创建项目""" response = client.post('/api/projects', json={ 'creation_type': 'idea', 'idea_prompt': '生成一份关于AI的PPT' }) data = assert_success_response(response, 201) assert 'project_id' in data['data'] assert data['data']['status'] == 'DRAFT' def test_create_project_outline_mode(self, client): """测试从大纲创建项目""" response = client.post('/api/projects', json={ 'creation_type': 'outline', 'outline': [ {'title': '第一页', 'points': ['要点1']}, {'title': '第二页', 'points': ['要点2']} ] }) data = assert_success_response(response, 201) assert 'project_id' in data['data'] def test_create_project_missing_type(self, client): """测试缺少creation_type参数""" response = client.post('/api/projects', json={ 'idea_prompt': '测试' }) # 应该返回错误 assert response.status_code in [400, 422] def test_create_project_invalid_type(self, client): """测试无效的creation_type""" response = client.post('/api/projects', json={ 'creation_type': 'invalid_type', 'idea_prompt': '测试' }) assert response.status_code in [400, 422] class TestProjectGet: """项目获取测试""" def test_get_project_success(self, client, sample_project): """测试获取项目成功""" if not sample_project: pytest.skip("项目创建失败") project_id = sample_project['project_id'] response = client.get(f'/api/projects/{project_id}') data = assert_success_response(response) assert data['data']['project_id'] == project_id def test_get_project_not_found(self, client): """测试获取不存在的项目""" response = client.get('/api/projects/non-existent-id') assert response.status_code == 404 def test_get_project_invalid_id_format(self, client): """测试无效的项目ID格式""" response = client.get('/api/projects/invalid!@#$%id') # 可能返回404或400 assert response.status_code in [400, 404] class TestProjectUpdate: """项目更新测试""" def test_update_project_status(self, client, sample_project): """测试更新项目状态""" if not sample_project: pytest.skip("项目创建失败") project_id = sample_project['project_id'] response = client.put(f'/api/projects/{project_id}', json={ 'status': 'GENERATING' }) # 状态更新应该成功 assert response.status_code == 200 data = response.get_json() assert data['success'] is True class TestProjectDelete: """项目删除测试""" def test_delete_project_success(self, client, sample_project): """测试删除项目成功""" if not sample_project: pytest.skip("项目创建失败") project_id = sample_project['project_id'] response = client.delete(f'/api/projects/{project_id}') data = assert_success_response(response) # 确认项目已删除 get_response = client.get(f'/api/projects/{project_id}') assert get_response.status_code == 404 def test_delete_project_not_found(self, client): """测试删除不存在的项目""" response = client.delete('/api/projects/non-existent-id') assert response.status_code == 404 ================================================ FILE: backend/tests/unit/test_api_settings_provider.py ================================================ """ Settings controller tests for provider format handling. """ from types import SimpleNamespace from unittest.mock import MagicMock, patch from flask import Flask from controllers.settings_controller import update_settings, verify_api_key def _build_settings(**overrides): defaults = { 'ai_provider_format': 'gemini', 'api_key': None, 'api_base_url': None, 'text_model': None, } defaults.update(overrides) settings = SimpleNamespace(**defaults) settings.to_dict = lambda: { 'ai_provider_format': settings.ai_provider_format, 'api_key_length': len(settings.api_key) if settings.api_key else 0, } return settings def test_update_settings_accepts_lazyllm_provider(): """`lazyllm` should be accepted as a valid provider format.""" app = Flask(__name__) settings = _build_settings() with app.app_context(): with app.test_request_context('/api/settings/', method='PUT', json={'ai_provider_format': 'lazyllm'}): with patch('controllers.settings_controller.Settings.get_settings', return_value=settings): with patch('controllers.settings_controller.db.session.commit'): with patch('controllers.settings_controller._sync_settings_to_config'): response, status_code = update_settings() assert status_code == 200 data = response.get_json() assert data['success'] is True assert data['data']['ai_provider_format'] == 'lazyllm' def test_verify_uses_configured_text_model(): """Verify endpoint should use configured text model, not a hardcoded gemini model.""" app = Flask(__name__) app.config.update( TEXT_MODEL='gemini-3-flash-preview', AI_PROVIDER_FORMAT='lazyllm', ) settings = _build_settings(ai_provider_format='lazyllm', text_model='deepseek-chat') mock_provider = MagicMock() mock_provider.generate_text.return_value = 'OK' with app.app_context(): with app.test_request_context('/api/settings/verify', method='POST'): with patch('controllers.settings_controller.Settings.get_settings', return_value=settings): with patch('services.ai_providers.get_text_provider', return_value=mock_provider) as mock_get_provider: response, status_code = verify_api_key() assert status_code == 200 data = response.get_json() assert data['success'] is True assert data['data']['available'] is True mock_get_provider.assert_called_once_with(model='deepseek-chat') mock_provider.generate_text.assert_called_once() ================================================ FILE: backend/tests/unit/test_editable_pptx_style_extraction.py ================================================ from pathlib import Path from services.export_service import ExportError, ExportService from services.image_editability.text_attribute_extractors import TextStyleResult class FailingExtractor: def extract_batch_with_full_image(self, full_image, text_elements, **kwargs): raise RuntimeError("caption_provider 不支持图片输入") def extract(self, image, text_content=None, **kwargs): return TextStyleResult(confidence=0.0, metadata={"error": "caption_provider 不支持图片输入"}) class EmptyGlobalExtractor: def extract_batch_with_full_image(self, full_image, text_elements, **kwargs): return {} def extract(self, image, text_content=None, **kwargs): return TextStyleResult(font_color_rgb=(255, 0, 0), confidence=0.9) class EditableImageStub: class BBox: def __init__(self): self.x0 = 0 self.y0 = 0 self.x1 = 100 self.y1 = 40 class Element: def __init__(self, image_path: str): self.element_type = "text" self.element_id = "text_0" self.content = "hello" self.image_path = image_path self.bbox = EditableImageStub.BBox() self.bbox_global = self.bbox self.children = [] def __init__(self, image_path: str): self.image_path = image_path self.elements = [EditableImageStub.Element(image_path)] def _make_editable_images(tmp_path): image_path = Path(tmp_path) / "text.png" image_path.write_bytes(b"png") return [EditableImageStub(str(image_path))] def test_hybrid_style_extraction_fails_fast_when_provider_has_no_image_input(tmp_path): editable_images = _make_editable_images(tmp_path) try: ExportService._batch_extract_text_styles_hybrid( editable_images=editable_images, text_attribute_extractor=FailingExtractor(), max_workers=2, fail_fast=True, ) assert False, "expected ExportError" except ExportError as exc: assert exc.error_type == "style_extraction" assert "不支持图片输入" in exc.message assert "image caption" in exc.help_text def test_hybrid_style_extraction_reports_missing_global_results_when_not_fail_fast(tmp_path): editable_images = _make_editable_images(tmp_path) results, failures = ExportService._batch_extract_text_styles_hybrid( editable_images=editable_images, text_attribute_extractor=EmptyGlobalExtractor(), max_workers=2, fail_fast=False, ) assert "text_0" in results assert failures == [("text_0", "全局识别未返回完整结果")] ================================================ FILE: backend/tests/unit/test_file_parser_service.py ================================================ """ Unit tests for FileParserService provider-specific behavior. """ import os import tempfile from pathlib import Path from unittest.mock import MagicMock, patch from PIL import Image from services.file_parser_service import FileParserService def _create_temp_image() -> str: with tempfile.NamedTemporaryFile(prefix='caption_test_', suffix='.png', delete=False) as tmp: Image.new('RGB', (20, 20), color='green').save(tmp.name) return tmp.name def test_generate_single_caption_openai_uses_configured_model(): """OpenAI caption generation should use `image_caption_model` from service config.""" image_path = _create_temp_image() try: service = FileParserService( mineru_token='test-token', openai_api_key='test-openai-key', image_caption_model='gpt-4.1-mini', provider_format='openai', ) mock_client = MagicMock() mock_response = MagicMock() mock_response.choices = [MagicMock(message=MagicMock(content='示例描述'))] mock_client.chat.completions.create.return_value = mock_response with patch('utils.path_utils.find_mineru_file_with_prefix', return_value=Path(image_path)): with patch.object(service, '_get_openai_client', return_value=mock_client): caption = service._generate_single_caption('/files/mineru/demo.png') assert caption == '示例描述' assert mock_client.chat.completions.create.call_args.kwargs['model'] == 'gpt-4.1-mini' finally: if os.path.exists(image_path): os.remove(image_path) def test_can_generate_captions_does_not_accept_legacy_prefixes(): """LazyLLM caption check should ignore legacy BANANA_*/LAZYLLM_* key prefixes.""" source = 'unit_test_source' with patch.dict( os.environ, { f'BANANA_{source.upper()}_API_KEY': 'test-key', f'LAZYLLM_{source.upper()}_API_KEY': 'test-key', f'BANANA_SLIDES_{source.upper()}_API_KEY': 'test-key', }, clear=False, ): service = FileParserService( mineru_token='test-token', provider_format='lazyllm', lazyllm_image_caption_source=source, ) assert service._can_generate_captions() is False def test_can_generate_captions_accepts_vendor_prefix_key(): """LazyLLM caption check should accept {SOURCE}_API_KEY vendor prefix.""" source = 'qwen' key_name = f'{source.upper()}_API_KEY' with patch.dict(os.environ, {key_name: 'test-key'}, clear=False): service = FileParserService( mineru_token='test-token', provider_format='lazyllm', lazyllm_image_caption_source=source, ) assert service._can_generate_captions() is True ================================================ FILE: backend/tests/unit/test_image_prompt_ratio.py ================================================ """Test that image generation prompt uses the correct aspect ratio.""" from services.prompts import get_image_generation_prompt class TestImagePromptAspectRatio: def test_default_ratio_is_16_9(self): prompt = get_image_generation_prompt( page_desc="Test page", outline_text="Test outline", current_section="Section 1", ) assert "16:9比例" in prompt def test_custom_ratio_4_3(self): prompt = get_image_generation_prompt( page_desc="Test page", outline_text="Test outline", current_section="Section 1", aspect_ratio="4:3", ) assert "4:3比例" in prompt assert "16:9比例" not in prompt def test_custom_ratio_1_1(self): prompt = get_image_generation_prompt( page_desc="Test page", outline_text="Test outline", current_section="Section 1", aspect_ratio="1:1", ) assert "1:1比例" in prompt assert "16:9比例" not in prompt ================================================ FILE: backend/tests/unit/test_lazyllm_image_content_type.py ================================================ """ Unit tests for LazyLLM image provider content-type fallback. Verifies that when LazyLLM raises a content-type error (S3 returns application/octet-stream), the provider falls back to manual download. """ import io import sys import types import pytest from unittest.mock import MagicMock, patch from PIL import Image def _make_png_bytes() -> bytes: img = Image.new('RGB', (100, 60), color=(255, 0, 0)) buf = io.BytesIO() img.save(buf, format='PNG') return buf.getvalue() def _inject_lazyllm_mock(): """Inject a fake lazyllm into sys.modules so the provider can be imported.""" lz = types.ModuleType('lazyllm') lz.namespace = MagicMock(return_value=MagicMock()) components = types.ModuleType('lazyllm.components') formatter = types.ModuleType('lazyllm.components.formatter') formatter.decode_query_with_filepaths = MagicMock(return_value={'files': []}) sys.modules.setdefault('lazyllm', lz) sys.modules.setdefault('lazyllm.components', components) sys.modules.setdefault('lazyllm.components.formatter', formatter) return lz, formatter class TestLazyLLMContentTypeFallback: def setup_method(self): self._lz, self._formatter = _inject_lazyllm_mock() # Remove cached provider module so it re-imports with our mock for key in ('services.ai_providers.image.lazyllm_provider', 'backend.services.ai_providers.image.lazyllm_provider'): sys.modules.pop(key, None) def _make_provider(self): with patch('services.ai_providers.image.lazyllm_provider.ensure_lazyllm_namespace_key'): from services.ai_providers.image.lazyllm_provider import LazyLLMImageProvider provider = LazyLLMImageProvider.__new__(LazyLLMImageProvider) provider._source = 'siliconflow' provider.client = MagicMock() return provider def test_fallback_on_content_type_error(self): """Provider downloads image manually when LazyLLM raises content-type error.""" provider = self._make_provider() s3_url = 'https://s3.siliconflow.cn/outputs/test.png?X-Amz-Signature=abc' error_msg = ( f'ModuleExecutionError: Failed to load image from {s3_url}\n' f'Invalid content type for image: application/octet-stream from {s3_url}\n' 'Expected content type starting with "image/".' ) provider.client.side_effect = Exception(error_msg) png_bytes = _make_png_bytes() mock_resp = MagicMock() mock_resp.raise_for_status = MagicMock() mock_resp.iter_content = MagicMock(return_value=iter([png_bytes])) with patch('services.ai_providers.image.lazyllm_provider.requests.get', return_value=mock_resp) as mock_get: result = provider.generate_image(prompt='test prompt') assert result is not None assert isinstance(result, Image.Image) mock_get.assert_called_once() assert 's3.siliconflow.cn' in mock_get.call_args[0][0] def test_untrusted_host_is_not_fetched(self): """URLs from untrusted hosts should not be fetched (SSRF prevention).""" provider = self._make_provider() evil_url = 'https://evil.example.com/steal.png' error_msg = ( f'Failed to load image from {evil_url}\n' 'Invalid content type for image: application/octet-stream' ) provider.client.side_effect = Exception(error_msg) with patch('services.ai_providers.image.lazyllm_provider.requests.get') as mock_get: with pytest.raises(Exception): provider.generate_image(prompt='test prompt') mock_get.assert_not_called() def test_non_content_type_error_is_reraised(self): """Non content-type errors propagate normally.""" provider = self._make_provider() provider.client.side_effect = RuntimeError('network timeout') with pytest.raises(RuntimeError, match='network timeout'): provider.generate_image(prompt='test prompt') ================================================ FILE: backend/tests/unit/test_smart_merge.py ================================================ """Test _smart_merge_pages position-based logic with a minimal Flask app.""" import json import os import sys import tempfile import pytest # Ensure backend is on path sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..')) os.environ.setdefault('TESTING', 'true') os.environ.setdefault('GOOGLE_API_KEY', 'mock') @pytest.fixture(scope='module') def merge_app(): """Minimal Flask app for testing _smart_merge_pages.""" from flask import Flask from models import db, Page, Project app = Flask(__name__) tmp = tempfile.mkdtemp() app.config['SQLALCHEMY_DATABASE_URI'] = f'sqlite:///{tmp}/test.db' app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False db.init_app(app) with app.app_context(): db.create_all() yield app import shutil shutil.rmtree(tmp, ignore_errors=True) @pytest.fixture def ctx(merge_app): with merge_app.app_context(): from models import db yield db.session.rollback() for t in reversed(db.metadata.sorted_tables): db.session.execute(t.delete()) db.session.commit() def _make_project(pid='test-proj'): from models import db, Project p = Project(id=pid, creation_type='idea', idea_prompt='test') db.session.add(p) db.session.commit() return pid def _make_page(project_id, title, order, desc=None, image_path=None, status='DRAFT'): from models import db, Page page = Page(project_id=project_id, order_index=order, status=status) page.set_outline_content({'title': title, 'points': ['p1']}) if desc: page.set_description_content({'text': desc}) if image_path: page.generated_image_path = image_path db.session.add(page) db.session.commit() return page class TestPositionBasedMerge: def test_equal_pages_preserves_description_and_image(self, ctx): """Same number of pages: outline updated, description/image kept.""" from controllers.project_controller import _smart_merge_pages from models import db pid = _make_project() old0 = _make_page(pid, 'Old Title A', 0, desc='desc A', image_path='/img/a.png', status='IMAGE_GENERATED') old1 = _make_page(pid, 'Old Title B', 1, desc='desc B', image_path='/img/b.png', status='IMAGE_GENERATED') result = _smart_merge_pages(pid, [ {'title': 'New Title A', 'points': ['new']}, {'title': 'New Title B', 'points': ['new']}, ]) db.session.flush() assert len(result) == 2 # Same page objects reused assert result[0].id == old0.id assert result[1].id == old1.id # Outline updated assert result[0].get_outline_content()['title'] == 'New Title A' assert result[1].get_outline_content()['title'] == 'New Title B' # Description and image preserved assert result[0].get_description_content()['text'] == 'desc A' assert result[0].generated_image_path == '/img/a.png' assert result[1].get_description_content()['text'] == 'desc B' assert result[1].generated_image_path == '/img/b.png' def test_more_pages_creates_new_ones(self, ctx): """New outline has more pages: old pages kept, new pages created.""" from controllers.project_controller import _smart_merge_pages from models import db pid = _make_project('proj-more') old0 = _make_page(pid, 'Page A', 0, desc='desc A') result = _smart_merge_pages(pid, [ {'title': 'Page A updated', 'points': []}, {'title': 'Page B new', 'points': ['b1']}, ]) db.session.flush() assert len(result) == 2 assert result[0].id == old0.id assert result[0].get_description_content()['text'] == 'desc A' assert result[1].status == 'DRAFT' assert result[1].get_description_content() is None def test_fewer_pages_deletes_trailing(self, ctx): """New outline has fewer pages: trailing old pages deleted.""" from controllers.project_controller import _smart_merge_pages from models import db, Page pid = _make_project('proj-fewer') old0 = _make_page(pid, 'Keep', 0, desc='keep me') old1 = _make_page(pid, 'Delete', 1, desc='gone') old2 = _make_page(pid, 'Also Delete', 2, desc='also gone') result = _smart_merge_pages(pid, [ {'title': 'Kept Page', 'points': []}, ]) db.session.flush() assert len(result) == 1 assert result[0].id == old0.id assert result[0].get_description_content()['text'] == 'keep me' assert Page.query.get(old1.id) is None assert Page.query.get(old2.id) is None def test_no_old_pages_creates_all_new(self, ctx): """No existing pages: all new pages created.""" from controllers.project_controller import _smart_merge_pages from models import db pid = _make_project('proj-empty') result = _smart_merge_pages(pid, [ {'title': 'Brand New', 'points': ['x']}, ]) db.session.flush() assert len(result) == 1 assert result[0].status == 'DRAFT' assert result[0].get_outline_content()['title'] == 'Brand New' def test_part_field_updated(self, ctx): """Part field is updated from new data.""" from controllers.project_controller import _smart_merge_pages from models import db pid = _make_project('proj-part') old0 = _make_page(pid, 'Page', 0) result = _smart_merge_pages(pid, [ {'title': 'Page', 'points': [], 'part': 'Chapter 2'}, ]) db.session.flush() assert result[0].part == 'Chapter 2' def test_order_index_updated(self, ctx): """Order indices are set sequentially.""" from controllers.project_controller import _smart_merge_pages from models import db pid = _make_project('proj-order') _make_page(pid, 'A', 0) _make_page(pid, 'B', 1) result = _smart_merge_pages(pid, [ {'title': 'X', 'points': []}, {'title': 'Y', 'points': []}, {'title': 'Z', 'points': []}, ]) db.session.flush() assert [p.order_index for p in result] == [0, 1, 2] ================================================ FILE: backend/utils/__init__.py ================================================ """Utils package""" from .response import ( success_response, error_response, bad_request, not_found, invalid_status, ai_service_error, rate_limit_error ) from .validators import validate_project_status, validate_page_status, allowed_file from .path_utils import convert_mineru_path_to_local, find_mineru_file_with_prefix, find_file_with_prefix from .pptx_builder import PPTXBuilder from .page_utils import parse_page_ids_from_query, parse_page_ids_from_body, get_filtered_pages __all__ = [ 'success_response', 'error_response', 'bad_request', 'not_found', 'invalid_status', 'ai_service_error', 'rate_limit_error', 'validate_project_status', 'validate_page_status', 'allowed_file', 'convert_mineru_path_to_local', 'find_mineru_file_with_prefix', 'find_file_with_prefix', 'PPTXBuilder', 'parse_page_ids_from_query', 'parse_page_ids_from_body', 'get_filtered_pages' ] ================================================ FILE: backend/utils/image_utils.py ================================================ """ Image utility functions """ from typing import Tuple from PIL import Image def check_image_resolution(image: Image.Image, expected_resolution: str) -> Tuple[str, bool]: """ Check if the actual image resolution matches expected resolution. Args: image: PIL Image object expected_resolution: Expected resolution setting ("1K", "2K", "4K") Returns: Tuple of (actual_resolution_category, is_match) """ max_dimension = max(image.width, image.height) # Determine actual resolution category if max_dimension < 1500: actual = "1K" elif max_dimension < 3000: actual = "2K" else: actual = "4K" is_match = actual == expected_resolution.upper() return actual, is_match ================================================ FILE: backend/utils/latex_utils.py ================================================ """ LaTeX 工具模块 - 处理 LaTeX 公式转换 提供以下功能: 1. 简单 LaTeX 转文本(转义字符、简单符号) 2. LaTeX 转 MathML 3. MathML 转 OMML(用于 PPTX) """ import re import logging from typing import Optional, Tuple logger = logging.getLogger(__name__) # LaTeX 转义字符映射 LATEX_ESCAPES = { r'\%': '%', r'\$': '$', r'\&': '&', r'\#': '#', r'\_': '_', r'\{': '{', r'\}': '}', r'\ ': ' ', r'\,': ' ', # thin space r'\;': ' ', # thick space r'\!': '', # negative thin space r'\quad': ' ', r'\qquad': ' ', } # 常用 LaTeX 符号到 Unicode 映射 LATEX_SYMBOLS = { # 希腊字母 r'\alpha': 'α', r'\beta': 'β', r'\gamma': 'γ', r'\delta': 'δ', r'\epsilon': 'ε', r'\zeta': 'ζ', r'\eta': 'η', r'\theta': 'θ', r'\iota': 'ι', r'\kappa': 'κ', r'\lambda': 'λ', r'\mu': 'μ', r'\nu': 'ν', r'\xi': 'ξ', r'\pi': 'π', r'\rho': 'ρ', r'\sigma': 'σ', r'\tau': 'τ', r'\upsilon': 'υ', r'\phi': 'φ', r'\chi': 'χ', r'\psi': 'ψ', r'\omega': 'ω', r'\Gamma': 'Γ', r'\Delta': 'Δ', r'\Theta': 'Θ', r'\Lambda': 'Λ', r'\Xi': 'Ξ', r'\Pi': 'Π', r'\Sigma': 'Σ', r'\Phi': 'Φ', r'\Psi': 'Ψ', r'\Omega': 'Ω', # 数学运算符 r'\times': '×', r'\div': '÷', r'\pm': '±', r'\mp': '∓', r'\cdot': '·', r'\ast': '∗', r'\star': '☆', r'\leq': '≤', r'\geq': '≥', r'\neq': '≠', r'\approx': '≈', r'\equiv': '≡', r'\sim': '∼', r'\propto': '∝', r'\infty': '∞', r'\partial': '∂', r'\nabla': '∇', r'\sum': '∑', r'\prod': '∏', r'\int': '∫', r'\sqrt': '√', r'\angle': '∠', r'\degree': '°', # 箭头 r'\leftarrow': '←', r'\rightarrow': '→', r'\leftrightarrow': '↔', r'\Leftarrow': '⇐', r'\Rightarrow': '⇒', r'\Leftrightarrow': '⇔', # 其他 r'\ldots': '…', r'\cdots': '⋯', r'\vdots': '⋮', r'\forall': '∀', r'\exists': '∃', r'\in': '∈', r'\notin': '∉', r'\subset': '⊂', r'\supset': '⊃', r'\cup': '∪', r'\cap': '∩', } # 上标数字映射 SUPERSCRIPT_MAP = { '0': '⁰', '1': '¹', '2': '²', '3': '³', '4': '⁴', '5': '⁵', '6': '⁶', '7': '⁷', '8': '⁸', '9': '⁹', '+': '⁺', '-': '⁻', '=': '⁼', '(': '⁽', ')': '⁾', 'n': 'ⁿ', 'i': 'ⁱ', } # 下标数字映射 SUBSCRIPT_MAP = { '0': '₀', '1': '₁', '2': '₂', '3': '₃', '4': '₄', '5': '₅', '6': '₆', '7': '₇', '8': '₈', '9': '₉', '+': '₊', '-': '₋', '=': '₌', '(': '₍', ')': '₎', 'a': 'ₐ', 'e': 'ₑ', 'o': 'ₒ', 'x': 'ₓ', 'i': 'ᵢ', 'j': 'ⱼ', 'n': 'ₙ', 'm': 'ₘ', } def is_simple_latex(latex: str) -> bool: """ 判断是否是简单的 LaTeX(可以直接转换为文本) 简单 LaTeX 包括: - 纯转义字符(如 10\%) - 简单符号(如 \alpha) - 简单上下标(如 x^2, x_1) """ # 移除所有已知的简单模式 test = latex # 移除转义字符 for escape in LATEX_ESCAPES: test = test.replace(escape, '') # 移除符号 for symbol in LATEX_SYMBOLS: test = test.replace(symbol, '') # 移除简单上下标 ^{...} 或 ^x test = re.sub(r'\^{[^{}]*}', '', test) test = re.sub(r'\^[0-9a-zA-Z]', '', test) # 移除简单下标 _{...} 或 _x test = re.sub(r'_{[^{}]*}', '', test) test = re.sub(r'_[0-9a-zA-Z]', '', test) # 如果剩余的都是普通字符,则是简单 LaTeX remaining = test.strip() # 检查是否还有未处理的 LaTeX 命令 if '\\' in remaining and not remaining.replace('\\', '').isalnum(): return False return True def latex_to_text(latex: str) -> str: """ 将简单 LaTeX 转换为 Unicode 文本 Args: latex: LaTeX 字符串 Returns: 转换后的文本 """ result = latex # 1. 处理转义字符 for escape, char in LATEX_ESCAPES.items(): result = result.replace(escape, char) # 2. 处理符号 for symbol, char in LATEX_SYMBOLS.items(): result = result.replace(symbol, char) # 3. 处理上标 ^{...} 或 ^x def convert_superscript(match): content = match.group(1) if match.group(1) else match.group(2) return ''.join(SUPERSCRIPT_MAP.get(c, c) for c in content) result = re.sub(r'\^{([^{}]*)}|\^([0-9a-zA-Z])', convert_superscript, result) # 4. 处理下标 _{...} 或 _x def convert_subscript(match): content = match.group(1) if match.group(1) else match.group(2) return ''.join(SUBSCRIPT_MAP.get(c, c) for c in content) result = re.sub(r'_{([^{}]*)}|_([0-9a-zA-Z])', convert_subscript, result) # 5. 移除剩余的 LaTeX 命令(如 \text{}, \mathrm{} 等) result = re.sub(r'\\(?:text|mathrm|mathbf|mathit|mathbb|mathcal){([^{}]*)}', r'\1', result) # 6. 清理多余的空格和花括号 result = result.replace('{', '').replace('}', '') result = re.sub(r'\s+', ' ', result).strip() return result def latex_to_mathml(latex: str) -> Optional[str]: """ 将 LaTeX 转换为 MathML Args: latex: LaTeX 字符串 Returns: MathML 字符串,失败返回 None """ try: import latex2mathml.converter mathml = latex2mathml.converter.convert(latex) return mathml except Exception as e: logger.warning(f"LaTeX to MathML conversion failed: {e}") return None def mathml_to_omml(mathml: str) -> Optional[str]: """ 将 MathML 转换为 OMML (Office Math Markup Language) 使用 Microsoft 的 MML2OMML.xsl 样式表进行转换 Args: mathml: MathML 字符串 Returns: OMML 字符串,失败返回 None """ try: from lxml import etree import os # MML2OMML.xsl 样式表路径 xsl_path = os.path.join(os.path.dirname(__file__), 'MML2OMML.xsl') if not os.path.exists(xsl_path): logger.warning(f"MML2OMML.xsl not found at {xsl_path}") return None # 解析 MathML mathml_tree = etree.fromstring(mathml.encode('utf-8')) # 加载 XSLT xslt_tree = etree.parse(xsl_path) transform = etree.XSLT(xslt_tree) # 转换 omml_tree = transform(mathml_tree) return etree.tostring(omml_tree, encoding='unicode') except ImportError: logger.warning("lxml not installed, cannot convert to OMML") return None except Exception as e: logger.warning(f"MathML to OMML conversion failed: {e}") return None def convert_latex_for_pptx(latex: str) -> Tuple[str, Optional[str]]: """ 为 PPTX 转换 LaTeX 公式 Args: latex: LaTeX 字符串 Returns: (text_fallback, omml) 元组 - text_fallback: 文本回退方案(总是有值) - omml: OMML 字符串(如果转换成功) """ # 总是生成文本回退 text_fallback = latex_to_text(latex) # 对于简单 LaTeX,不需要 OMML if is_simple_latex(latex): return text_fallback, None # 尝试生成 OMML mathml = latex_to_mathml(latex) if mathml: omml = mathml_to_omml(mathml) if omml: return text_fallback, omml return text_fallback, None ================================================ FILE: backend/utils/mask_utils.py ================================================ """ 掩码图像生成工具 用于从边界框(bbox)生成黑白掩码图像 """ import logging from typing import List, Tuple, Union, Callable from PIL import Image, ImageDraw logger = logging.getLogger(__name__) # ============== Bbox 工具函数 ============== def normalize_bbox(bbox: Union[Tuple, List, dict]) -> Tuple[int, int, int, int]: """ 将各种格式的bbox标准化为 (x1, y1, x2, y2) 元组格式 支持的输入格式: - 元组/列表: (x1, y1, x2, y2) - 字典: {"x1": x1, "y1": y1, "x2": x2, "y2": y2} - 字典: {"x": x, "y": y, "width": w, "height": h} """ if isinstance(bbox, dict): if 'x1' in bbox: return (bbox['x1'], bbox['y1'], bbox['x2'], bbox['y2']) elif 'x' in bbox: return (bbox['x'], bbox['y'], bbox['x'] + bbox['width'], bbox['y'] + bbox['height']) else: raise ValueError(f"无法识别的bbox字典格式: {bbox}") elif isinstance(bbox, (tuple, list)) and len(bbox) == 4: return tuple(bbox) else: raise ValueError(f"无法识别的bbox格式: {bbox}") def normalize_bboxes(bboxes: List[Union[Tuple, List, dict]]) -> List[Tuple[int, int, int, int]]: """批量标准化bbox列表""" result = [] for bbox in bboxes: try: result.append(normalize_bbox(bbox)) except ValueError as e: logger.warning(str(e)) return result def merge_two_boxes(box1: Tuple, box2: Tuple) -> Tuple[int, int, int, int]: """合并两个bbox为一个包含它们的最小bbox""" return ( min(box1[0], box2[0]), min(box1[1], box2[1]), max(box1[2], box2[2]), max(box1[3], box2[3]) ) def _iterative_merge( bboxes: List[Tuple[int, int, int, int]], should_merge_fn: Callable[[Tuple, Tuple], bool] ) -> List[Tuple[int, int, int, int]]: """ 通用的迭代合并算法 Args: bboxes: 标准化后的bbox列表 should_merge_fn: 判断两个bbox是否应该合并的函数 Returns: 合并后的bbox列表 """ if not bboxes: return [] if len(bboxes) == 1: return list(bboxes) normalized = list(bboxes) merged = True while merged: merged = False new_boxes = [] used = set() for i, box1 in enumerate(normalized): if i in used: continue current_box = box1 for j, box2 in enumerate(normalized): if j <= i or j in used: continue if should_merge_fn(current_box, box2): current_box = merge_two_boxes(current_box, box2) used.add(j) merged = True new_boxes.append(current_box) used.add(i) normalized = new_boxes return normalized def create_mask_from_bboxes( image_size: Tuple[int, int], bboxes: List[Union[Tuple[int, int, int, int], dict]], mask_color: Tuple[int, int, int] = (255, 255, 255), background_color: Tuple[int, int, int] = (0, 0, 0), expand_pixels: int = 0 ) -> Image.Image: """ 从边界框列表创建掩码图像 Args: image_size: 图像尺寸 (width, height) bboxes: 边界框列表,每个元素可以是: - 元组格式: (x1, y1, x2, y2) 其中 (x1,y1) 是左上角,(x2,y2) 是右下角 - 字典格式: {"x": x, "y": y, "width": w, "height": h} - 字典格式: {"x1": x1, "y1": y1, "x2": x2, "y2": y2} mask_color: 掩码区域的颜色(默认白色),表示需要消除的区域 background_color: 背景区域的颜色(默认黑色),表示保留的区域 expand_pixels: 扩展像素数,可以让掩码区域略微扩大(用于更好的消除效果) Returns: PIL Image 对象,RGB 模式的掩码图像 """ try: # 创建黑色背景图像 mask = Image.new('RGB', image_size, background_color) draw = ImageDraw.Draw(mask) logger.info(f"创建掩码图像,尺寸: {image_size}, bbox数量: {len(bboxes)}") # 绘制每个 bbox 为白色区域 bbox_list = [] # 用于记录所有bbox坐标 for i, bbox in enumerate(bboxes): # 解析不同格式的 bbox if isinstance(bbox, dict): if 'x1' in bbox and 'y1' in bbox and 'x2' in bbox and 'y2' in bbox: # 格式: {"x1": x1, "y1": y1, "x2": x2, "y2": y2} x1 = bbox['x1'] y1 = bbox['y1'] x2 = bbox['x2'] y2 = bbox['y2'] elif 'x' in bbox and 'y' in bbox and 'width' in bbox and 'height' in bbox: # 格式: {"x": x, "y": y, "width": w, "height": h} x1 = bbox['x'] y1 = bbox['y'] x2 = x1 + bbox['width'] y2 = y1 + bbox['height'] else: logger.warning(f"无法识别的 bbox 字典格式: {bbox}") continue elif isinstance(bbox, (tuple, list)) and len(bbox) == 4: # 格式: (x1, y1, x2, y2) x1, y1, x2, y2 = bbox else: logger.warning(f"无法识别的 bbox 格式: {bbox}") continue # 记录原始坐标 x1_orig, y1_orig, x2_orig, y2_orig = x1, y1, x2, y2 # 应用扩展或收缩 if expand_pixels > 0: # 扩展 x1 = max(0, x1 - expand_pixels) y1 = max(0, y1 - expand_pixels) x2 = min(image_size[0], x2 + expand_pixels) y2 = min(image_size[1], y2 + expand_pixels) elif expand_pixels < 0: # 收缩(向内收缩) shrink = abs(expand_pixels) x1 = x1 + shrink y1 = y1 + shrink x2 = x2 - shrink y2 = y2 - shrink # 确保收缩后仍然有效(宽度和高度必须大于0) if x2 <= x1 or y2 <= y1: logger.warning(f"bbox {i+1} 收缩后无效: ({x1}, {y1}, {x2}, {y2}),跳过") continue # 确保坐标在图像范围内 x1 = max(0, min(x1, image_size[0])) y1 = max(0, min(y1, image_size[1])) x2 = max(0, min(x2, image_size[0])) y2 = max(0, min(y2, image_size[1])) # 再次检查有效性 if x2 <= x1 or y2 <= y1: logger.warning(f"bbox {i+1} 最终坐标无效: ({x1}, {y1}, {x2}, {y2}),跳过") continue # 绘制矩形 draw.rectangle([x1, y1, x2, y2], fill=mask_color) width = x2 - x1 height = y2 - y1 if expand_pixels > 0: bbox_list.append(f" [{i+1}] 原始: ({x1_orig}, {y1_orig}, {x2_orig}, {y2_orig}) -> 扩展后: ({x1}, {y1}, {x2}, {y2}) 尺寸: {width}x{height}") elif expand_pixels < 0: bbox_list.append(f" [{i+1}] 原始: ({x1_orig}, {y1_orig}, {x2_orig}, {y2_orig}) -> 收缩后: ({x1}, {y1}, {x2}, {y2}) 尺寸: {width}x{height}") else: bbox_list.append(f" [{i+1}] ({x1}, {y1}, {x2}, {y2}) 尺寸: {width}x{height}") logger.debug(f"bbox {i+1}: ({x1}, {y1}, {x2}, {y2}) 尺寸: {width}x{height}") # 输出所有bbox的详细信息 if bbox_list: logger.info(f"添加了 {len(bbox_list)} 个bbox的mask:") for bbox_info in bbox_list: logger.info(bbox_info) logger.info(f"掩码图像创建完成") return mask except Exception as e: logger.error(f"创建掩码图像失败: {str(e)}", exc_info=True) raise def create_inverse_mask_from_bboxes( image_size: Tuple[int, int], bboxes: List[Union[Tuple[int, int, int, int], dict]], expand_pixels: int = 0 ) -> Image.Image: """ 创建反向掩码(保留 bbox 区域,消除其他区域) Args: image_size: 图像尺寸 (width, height) bboxes: 边界框列表 expand_pixels: 扩展像素数 Returns: PIL Image 对象,反向掩码图像 """ # 交换颜色即可 return create_mask_from_bboxes( image_size, bboxes, mask_color=(0, 0, 0), # bbox 区域为黑色(保留) background_color=(255, 255, 255), # 背景为白色(消除) expand_pixels=expand_pixels ) def create_mask_from_image_and_bboxes( image: Image.Image, bboxes: List[Union[Tuple[int, int, int, int], dict]], expand_pixels: int = 0 ) -> Image.Image: """ 从图像和边界框创建掩码(便捷函数) Args: image: 原始图像 bboxes: 边界框列表 expand_pixels: 扩展像素数 Returns: 掩码图像 """ return create_mask_from_bboxes( image.size, bboxes, expand_pixels=expand_pixels ) def visualize_mask_overlay( original_image: Image.Image, mask_image: Image.Image, alpha: float = 0.5 ) -> Image.Image: """ 将掩码叠加到原始图像上以便可视化 Args: original_image: 原始图像 mask_image: 掩码图像 alpha: 掩码透明度 (0.0-1.0) Returns: 叠加后的图像 """ try: # 确保两个图像尺寸相同 if original_image.size != mask_image.size: logger.warning(f"图像尺寸不匹配,调整掩码尺寸: {mask_image.size} -> {original_image.size}") mask_image = mask_image.resize(original_image.size, Image.LANCZOS) # 转换为 RGBA if original_image.mode != 'RGBA': original_rgba = original_image.convert('RGBA') else: original_rgba = original_image.copy() # 创建黑色半透明掩码用于可视化 mask_rgba = Image.new('RGBA', original_image.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(mask_rgba) # 遍历掩码图像,将白色区域绘制为黑色半透明 mask_array = mask_image.load() mask_rgba_array = mask_rgba.load() for y in range(mask_image.size[1]): for x in range(mask_image.size[0]): pixel = mask_array[x, y] # 如果是白色(或接近白色),设置为黑色半透明 if isinstance(pixel, tuple): brightness = sum(pixel) / len(pixel) else: brightness = pixel if brightness > 200: # 接近白色 mask_rgba_array[x, y] = (0, 0, 0, int(128 * alpha)) # 叠加 result = Image.alpha_composite(original_rgba, mask_rgba) return result.convert('RGB') except Exception as e: logger.error(f"可视化掩码叠加失败: {str(e)}", exc_info=True) return original_image def merge_vertical_nearby_bboxes( bboxes: List[Tuple[int, int, int, int]], vertical_gap_ratio: float = 0.8, horizontal_overlap_ratio: float = 0.3 ) -> List[Tuple[int, int, int, int]]: """ 合并上下间距很小的边界框(适用于文字行合并) 合并策略(基于原始bbox判断,避免雪球效应): - 按y坐标排序后,先判断每对相邻原始bbox是否应该合并 - 如果垂直间距小于平均行高的 vertical_gap_ratio 倍 - 并且在水平方向上有至少 horizontal_overlap_ratio 的重叠 - 则标记为可合并,最后统一执行合并 Args: bboxes: 边界框列表 [(x1, y1, x2, y2), ...] vertical_gap_ratio: 垂直间距阈值,相对于平均行高的比例,默认0.8 horizontal_overlap_ratio: 水平重叠比例阈值,默认0.3 Returns: 合并后的边界框列表 """ if not bboxes or len(bboxes) <= 1: return list(bboxes) if bboxes else [] normalized = normalize_bboxes(bboxes) if not normalized: return [] # 按y坐标排序(从上到下) normalized.sort(key=lambda b: b[1]) # 计算原始bbox的平均行高 avg_height = sum(b[3] - b[1] for b in normalized) / len(normalized) max_vertical_gap = avg_height * vertical_gap_ratio def get_horizontal_overlap(box1, box2): """计算两个bbox在水平方向的重叠比例(相对于较小的宽度)""" overlap_start = max(box1[0], box2[0]) overlap_end = min(box1[2], box2[2]) overlap = max(0, overlap_end - overlap_start) min_width = min(box1[2] - box1[0], box2[2] - box2[0]) return overlap / min_width if min_width > 0 else 0 def should_merge_adjacent(box1, box2): """判断两个相邻(按y排序)的原始bbox是否应该合并""" # 垂直间距 = box2的顶部 - box1的底部 v_gap = box2[1] - box1[3] # 如果垂直间距太大,不合并 if v_gap > max_vertical_gap: return False # 检查水平重叠 h_overlap = get_horizontal_overlap(box1, box2) if h_overlap >= horizontal_overlap_ratio: return True # 没有重叠但水平距离很近也合并 if h_overlap <= 0: h_gap = max(0, max(box2[0] - box1[2], box1[0] - box2[2])) if h_gap < avg_height: return True return False # 第一步:基于原始bbox判断哪些相邻对应该合并 merge_with_next = [] for i in range(len(normalized) - 1): merge_with_next.append(should_merge_adjacent(normalized[i], normalized[i + 1])) # 第二步:根据标记执行合并 result = [] current_box = normalized[0] for i in range(len(merge_with_next)): if merge_with_next[i]: # 和下一个合并 current_box = merge_two_boxes(current_box, normalized[i + 1]) else: # 不合并,保存当前,开始新组 result.append(current_box) current_box = normalized[i + 1] # 添加最后一个 result.append(current_box) logger.info(f"合并相邻文字行bbox:{len(bboxes)} -> {len(result)}") return result def merge_overlapping_bboxes( bboxes: List[Tuple[int, int, int, int]], merge_threshold: int = 10 ) -> List[Tuple[int, int, int, int]]: """ 合并重叠或相邻的边界框 Args: bboxes: 边界框列表 [(x1, y1, x2, y2), ...] merge_threshold: 合并阈值(像素),边界框距离小于此值时会合并 Returns: 合并后的边界框列表 """ if not bboxes: return [] normalized = normalize_bboxes(bboxes) if not normalized: return [] def should_merge(box1, box2): x1, y1, x2, y2 = box1 bx1, by1, bx2, by2 = box2 return (x1 - merge_threshold <= bx2 and bx1 <= x2 + merge_threshold and y1 - merge_threshold <= by2 and by1 <= y2 + merge_threshold) result = _iterative_merge(normalized, should_merge) logger.info(f"合并边界框:{len(bboxes)} -> {len(result)}") return result ================================================ FILE: backend/utils/page_utils.py ================================================ """ Page utilities - shared helpers for parsing page_ids and fetching pages """ from typing import List, Optional, Union from flask import Request def parse_page_ids_from_query(request: Request) -> List[str]: """ Parse page_ids from query parameters (comma-separated string). Args: request: Flask request object Returns: List of page ID strings (empty list if none provided) """ page_ids_param = request.args.get('page_ids', '') if not page_ids_param: return [] return [pid.strip() for pid in page_ids_param.split(',') if pid.strip()] def parse_page_ids_from_body(data: dict) -> List[str]: """ Parse page_ids from request body (array of IDs). Args: data: Request JSON data dict Returns: List of page ID strings (empty list if invalid or none provided) """ page_ids = data.get('page_ids', []) if not isinstance(page_ids, list): return [] return page_ids def get_filtered_pages(project_id: str, page_ids: Optional[List[str]] = None): """ Fetch pages for a project, optionally filtered by page IDs. Args: project_id: Project ID page_ids: Optional list of page IDs to filter by Returns: List of Page objects ordered by order_index """ from models import Page if page_ids: return Page.query.filter( Page.project_id == project_id, Page.id.in_(page_ids) ).order_by(Page.order_index).all() else: return Page.query.filter_by(project_id=project_id).order_by(Page.order_index).all() ================================================ FILE: backend/utils/path_utils.py ================================================ """ Path utilities for handling MinerU file paths and prefix matching """ import os import logging from pathlib import Path from typing import Optional logger = logging.getLogger(__name__) def convert_mineru_path_to_local(mineru_path: str, project_root: Optional[Path] = None) -> Optional[Path]: """ 将 /files/mineru/{extract_id}/{rel_path} 格式的路径转换为本地文件系统路径 Args: mineru_path: MinerU URL 路径,格式为 /files/mineru/{extract_id}/{rel_path} project_root: 项目根目录路径(如果为 None,则自动计算) Returns: 本地文件系统路径(Path 对象),如果转换失败则返回 None """ try: if not mineru_path.startswith('/files/mineru/'): return None # Remove '/files/mineru/' prefix rel_path = mineru_path.replace('/files/mineru/', '') # Get project root if not provided if project_root is None: # Navigate to project root (assuming this file is in backend/utils/) current_file = Path(__file__).resolve() backend_dir = current_file.parent.parent project_root = backend_dir.parent # Construct full path: {project_root}/uploads/mineru_files/{rel_path} local_path = project_root / 'uploads' / 'mineru_files' / rel_path return local_path except Exception as e: logger.warning(f"Failed to convert MinerU path to local: {mineru_path}, error: {str(e)}") return None def find_mineru_file_with_prefix(mineru_path: str, project_root: Optional[Path] = None) -> Optional[Path]: """ 查找 MinerU 文件,支持前缀匹配 首先尝试直接路径匹配,如果失败则尝试前缀匹配。 前缀匹配逻辑:如果文件名看起来像是一个前缀+扩展名(前缀长度 >= 5), 则在目录中查找以该前缀开头的文件。 Args: mineru_path: MinerU URL 路径,格式为 /files/mineru/{extract_id}/{rel_path} project_root: 项目根目录路径(如果为 None,则自动计算) Returns: 找到的文件路径(Path 对象),如果未找到则返回 None """ # First try direct path conversion local_path = convert_mineru_path_to_local(mineru_path, project_root) if local_path is None: return None # Direct file matching if local_path.exists() and local_path.is_file(): return local_path # Try prefix match using the generic function return find_file_with_prefix(local_path) def find_file_with_prefix(file_path: Path) -> Optional[Path]: """ 查找文件,支持前缀匹配 首先检查文件是否存在,如果不存在则尝试前缀匹配。 前缀匹配逻辑:如果文件名看起来像是一个前缀+扩展名(前缀长度 >= 5), 则在目录中查找以该前缀开头的文件。 Args: file_path: 要查找的文件路径(Path 对象) Returns: 找到的文件路径(Path 对象),如果未找到则返回 None """ # Direct file matching if file_path.exists() and file_path.is_file(): return file_path # Try prefix match if not found and filename looks like a prefix with extension filename = file_path.name dirpath = file_path.parent if '.' in filename and dirpath.exists() and dirpath.is_dir(): prefix, ext = os.path.splitext(filename) if len(prefix) >= 5: try: for fname in os.listdir(dirpath): fp, fe = os.path.splitext(fname) if fp.lower().startswith(prefix.lower()) and fe.lower() == ext.lower(): matched_path = dirpath / fname if matched_path.is_file(): logger.debug(f"Prefix match found: {file_path} -> {matched_path}") return matched_path except OSError as e: logger.warning(f"Failed to list directory {dirpath}: {str(e)}") return None ================================================ FILE: backend/utils/pptx_builder.py ================================================ """ PPTX Builder - utilities for creating editable PPTX files Based on OpenDCAI/DataFlow-Agent's implementation """ import os import logging from datetime import datetime, timezone from typing import List, Dict, Any, Optional, Tuple from pathlib import Path from pptx import Presentation from pptx.util import Inches, Pt from pptx.enum.text import PP_ALIGN from pptx.dml.color import RGBColor from PIL import Image, ImageFont, ImageDraw from html.parser import HTMLParser logger = logging.getLogger(__name__) class HTMLTableParser(HTMLParser): """Parse HTML table into row/column data""" def __init__(self): super().__init__() self.table_data = [] self.current_row = [] self.current_cell = [] self.in_table = False self.in_row = False self.in_cell = False def handle_starttag(self, tag, attrs): if tag == 'table': self.in_table = True self.table_data = [] elif tag == 'tr': self.in_row = True self.current_row = [] elif tag in ['td', 'th']: self.in_cell = True self.current_cell = [] def handle_endtag(self, tag): if tag == 'table': self.in_table = False elif tag == 'tr': self.in_row = False if self.current_row: self.table_data.append(self.current_row) elif tag in ['td', 'th']: self.in_cell = False cell_text = ''.join(self.current_cell).strip() self.current_row.append(cell_text) def handle_data(self, data): if self.in_cell: self.current_cell.append(data) @staticmethod def parse_html_table(html: str) -> List[List[str]]: """Parse HTML table string into 2D array of cells""" parser = HTMLTableParser() parser.feed(html) return parser.table_data class PPTXBuilder: """Builder class for creating editable PPTX files from structured content""" # Standard slide dimensions (16:9 aspect ratio) DEFAULT_SLIDE_WIDTH_INCHES = 10 DEFAULT_SLIDE_HEIGHT_INCHES = 5.625 # Default DPI for pixel to inch conversion DEFAULT_DPI = 96 # python-pptx size limits (1-56 inches, 914400-51206400 EMU) # See: https://github.com/scanny/python-pptx/issues/93 MAX_SLIDE_WIDTH_INCHES = 56.0 MAX_SLIDE_HEIGHT_INCHES = 56.0 MIN_SLIDE_WIDTH_INCHES = 1.0 MIN_SLIDE_HEIGHT_INCHES = 1.0 # Global font size limits (to prevent extreme cases) MIN_FONT_SIZE = 6 # Minimum readable size MAX_FONT_SIZE = 200 # Maximum reasonable size # 项目内置字体(Noto Sans CJK SC,支持中日韩文字) FONT_PATH = os.path.join(os.path.dirname(__file__), "..", "fonts", "NotoSansSC-Regular.ttf") # Font cache: {size_pt: ImageFont} _font_cache: Dict[float, ImageFont.FreeTypeFont] = {} @classmethod def _get_font(cls, size_pt: float) -> Optional[ImageFont.FreeTypeFont]: """Get font object for given size (with caching)""" # Round to 0.5pt for cache efficiency cache_key = round(size_pt * 2) / 2 if cache_key not in cls._font_cache: try: cls._font_cache[cache_key] = ImageFont.truetype(cls.FONT_PATH, int(size_pt)) except Exception as e: logger.warning(f"Failed to load font {cls.FONT_PATH}: {e}") return None return cls._font_cache[cache_key] @classmethod def _measure_text_width(cls, text: str, font_size_pt: float) -> Optional[float]: """ Measure text width in points using the actual font Args: text: Text to measure font_size_pt: Font size in points Returns: Text width in points, or None if measurement failed """ font = cls._get_font(font_size_pt) if font is None: return None try: # Get text bounding box: (left, top, right, bottom) bbox = font.getbbox(text) width_px = bbox[2] - bbox[0] # Font is loaded at size=font_size_pt, so pixel width ≈ point width return width_px except Exception as e: logger.warning(f"Failed to measure text: {e}") return None def __init__(self, slide_width_inches: float = None, slide_height_inches: float = None): """ Initialize PPTX builder Args: slide_width_inches: Slide width in inches (default: 10) slide_height_inches: Slide height in inches (default: 5.625) """ self.slide_width_inches = slide_width_inches or self.DEFAULT_SLIDE_WIDTH_INCHES self.slide_height_inches = slide_height_inches or self.DEFAULT_SLIDE_HEIGHT_INCHES self.prs = None self.current_slide = None def create_presentation(self) -> Presentation: """Create a new presentation with configured dimensions""" self.prs = Presentation() self.prs.slide_width = Inches(self.slide_width_inches) self.prs.slide_height = Inches(self.slide_height_inches) self._set_core_properties(self.prs) return self.prs @staticmethod def _set_core_properties(prs: Presentation) -> None: """Set author/date metadata for exported PPTX.""" try: core = prs.core_properties now = datetime.now(timezone.utc) core.author = "banana-slides" core.last_modified_by = "banana-slides" core.created = now core.modified = now core.last_printed = None except Exception as e: logger.warning(f"Failed to set core properties: {e}") def setup_presentation_size(self, width_pixels: int, height_pixels: int, dpi: int = None): """ Setup presentation size based on pixel dimensions Automatically clamps to python-pptx limits (1-56 inches) while preserving aspect ratio Args: width_pixels: Width in pixels height_pixels: Height in pixels dpi: DPI for conversion (default: 96) """ dpi = dpi or self.DEFAULT_DPI # Convert pixels to inches width_inches = width_pixels / dpi height_inches = height_pixels / dpi # Check if dimensions exceed python-pptx limits and scale down if needed # python-pptx enforces: 1 <= dimension <= 56 inches scale_factor = 1.0 if width_inches > self.MAX_SLIDE_WIDTH_INCHES: scale_factor = self.MAX_SLIDE_WIDTH_INCHES / width_inches logger.warning( f"Slide width {width_inches:.2f}\" exceeds python-pptx limit ({self.MAX_SLIDE_WIDTH_INCHES}\"), " f"scaling down by {scale_factor:.3f}x to maintain aspect ratio" ) if height_inches > self.MAX_SLIDE_HEIGHT_INCHES: height_scale = self.MAX_SLIDE_HEIGHT_INCHES / height_inches if height_scale < scale_factor: scale_factor = height_scale logger.warning( f"Slide height {height_inches:.2f}\" exceeds python-pptx limit ({self.MAX_SLIDE_HEIGHT_INCHES}\"), " f"scaling down by {scale_factor:.3f}x to maintain aspect ratio" ) # Apply scale factor if needed if scale_factor < 1.0: width_inches *= scale_factor height_inches *= scale_factor logger.info( f"Final slide dimensions after scaling: {width_inches:.2f}\" x {height_inches:.2f}\" " f"(from {width_pixels}x{height_pixels}px @ {dpi} DPI)" ) # Ensure minimum size constraints width_inches = max(self.MIN_SLIDE_WIDTH_INCHES, width_inches) height_inches = max(self.MIN_SLIDE_HEIGHT_INCHES, height_inches) self.slide_width_inches = width_inches self.slide_height_inches = height_inches if self.prs: self.prs.slide_width = Inches(self.slide_width_inches) self.prs.slide_height = Inches(self.slide_height_inches) def add_blank_slide(self): """Add a blank slide to the presentation""" if not self.prs: self.create_presentation() # Use blank layout (layout 6 is typically blank) blank_layout = self.prs.slide_layouts[6] self.current_slide = self.prs.slides.add_slide(blank_layout) return self.current_slide def pixels_to_inches(self, pixels: float, dpi: int = None) -> float: """ Convert pixels to inches Args: pixels: Pixel value dpi: DPI for conversion (default: 96) Returns: Value in inches """ dpi = dpi or self.DEFAULT_DPI return pixels / dpi def calculate_font_size(self, bbox: List[int], text: str, text_level: Any = None, dpi: int = None) -> float: """ Calculate appropriate font size based on bounding box and text content. Uses precise font measurement when available, falls back to estimation otherwise. Supports both single-line and multi-line (auto-wrap) text. Args: bbox: Bounding box [x0, y0, x1, y1] in pixels text: Text content text_level: Text level (kept for compatibility, not used in calculation) dpi: DPI for pixel to inch conversion Returns: Font size in points (float for precision) """ dpi = dpi or self.DEFAULT_DPI # Get bbox dimensions in pixels width_px = bbox[2] - bbox[0] height_px = bbox[3] - bbox[1] # Convert to points (1 inch = 72 points) width_pt = (width_px / dpi) * 72 height_pt = (height_px / dpi) * 72 # MinerU bbox is tight, use it directly # Textbox margins are set to 0 in add_text_element() usable_width_pt = width_pt usable_height_pt = height_pt if usable_width_pt <= 0 or usable_height_pt <= 0: logger.warning(f"Bbox too small for text: {width_px}x{height_px}px, text: '{text[:30]}...'") return self.MIN_FONT_SIZE text_length = len(text) # Line height ratio: 1.0 for tight bbox line_height_ratio = 1.0 # Try precise measurement first (check if font file exists) use_precise = os.path.exists(self.FONT_PATH) # Binary search: find largest font size that fits best_size = self.MIN_FONT_SIZE for font_size in range(int(self.MAX_FONT_SIZE), int(self.MIN_FONT_SIZE) - 1, -1): font_size = float(font_size) # For text with explicit newlines, calculate each line's width separately lines = text.split('\n') total_required_lines = 0 for line in lines: if not line: total_required_lines += 1 continue # Measure line width (precise or estimated) if use_precise: line_width_pt = self._measure_text_width(line, font_size) if line_width_pt is None: use_precise = False if not use_precise: # Fallback: estimate based on character count cjk_count = sum(1 for c in line if '\u4e00' <= c <= '\u9fff' or '\u3040' <= c <= '\u30ff' or '\uac00' <= c <= '\ud7af') non_cjk_count = len(line) - cjk_count line_width_pt = (cjk_count * 1.0 + non_cjk_count * 0.5) * font_size # How many lines does this explicit line need (auto-wrap)? lines_needed = max(1, -(-int(line_width_pt) // int(usable_width_pt))) total_required_lines += lines_needed required_lines = total_required_lines # Calculate total height needed line_height_pt = font_size * line_height_ratio total_height_pt = required_lines * line_height_pt # Check if it fits if total_height_pt <= usable_height_pt: best_size = font_size break if best_size == self.MIN_FONT_SIZE and text_length > 3: logger.warning(f"Text may overflow: '{text[:50]}...' in bbox {width_px}x{height_px}px") # Debug log for font size calculation logger.debug( f"Font size calc: '{text[:20]}{'...' if len(text) > 20 else ''}' " f"bbox={width_px}x{height_px}px -> {best_size}pt " f"(usable: {usable_width_pt:.1f}x{usable_height_pt:.1f}pt)" ) return best_size def add_text_element( self, slide, text: str, bbox: List[int], text_level: Any = None, dpi: int = None, align: str = 'left', text_style: Any = None ): """ Add text element to slide Args: slide: Target slide text: Text content (used as fallback if text_style has no colored_segments) bbox: Bounding box [x0, y0, x1, y1] in pixels text_level: Text level (1=title, 2=heading, etc.) or type string dpi: DPI for conversion (default: 96) align: Text alignment ('left', 'center', 'right') text_style: TextStyleResult object with font color, bold, italic etc. (optional) If text_style has colored_segments, those will be used for rendering and the text content will come from the segments. """ dpi = dpi or self.DEFAULT_DPI # Check if we have colored segments (multi-color text) has_colored_segments = ( text_style and hasattr(text_style, 'colored_segments') and text_style.colored_segments and len(text_style.colored_segments) > 0 ) # Determine the actual text to use # If we have colored_segments, use the text from segments (model's recognized text) if has_colored_segments: actual_text = ''.join(seg.text for seg in text_style.colored_segments) else: actual_text = text # Expand bbox slightly to prevent text overflow # MinerU bbox is tight, but font rendering may need extra space EXPAND_RATIO = 0.01 # 1% expansion bbox_width = bbox[2] - bbox[0] bbox_height = bbox[3] - bbox[1] expand_w = bbox_width * EXPAND_RATIO expand_h = bbox_height * EXPAND_RATIO # Convert expanded bbox to inches (expand evenly on all sides) left = Inches(self.pixels_to_inches(bbox[0] - expand_w / 2, dpi)) top = Inches(self.pixels_to_inches(bbox[1] - expand_h / 2, dpi)) width = Inches(self.pixels_to_inches(bbox_width + expand_w, dpi)) height = Inches(self.pixels_to_inches(bbox_height + expand_h, dpi)) # Add text box textbox = slide.shapes.add_textbox(left, top, width, height) text_frame = textbox.text_frame text_frame.word_wrap = True # Remove margins completely - bbox is tight, no extra space needed text_frame.margin_left = Inches(0) text_frame.margin_right = Inches(0) text_frame.margin_top = Inches(0) text_frame.margin_bottom = Inches(0) def replace_some_chars(text: str) -> str: # replace logic # replace · to • if starts with · text = text.replace('·', '•', 1) if text.lstrip().startswith('·') else text return text actual_text = replace_some_chars(actual_text) # Calculate font size font_size = self.calculate_font_size(bbox, actual_text, text_level, dpi) # Determine effective alignment - text_style优先,否则使用参数 effective_align = align if text_style and hasattr(text_style, 'text_alignment') and text_style.text_alignment: effective_align = text_style.text_alignment # Get style attributes is_bold = False is_italic = False is_underline = False if text_style: is_bold = getattr(text_style, 'is_bold', False) is_italic = getattr(text_style, 'is_italic', False) is_underline = getattr(text_style, 'is_underline', False) # Make title text bold (legacy behavior) if text_level == 1 or text_level == 'title': is_bold = True # Render text with colors if has_colored_segments: # Multi-color text: use runs for each segment paragraph = text_frame.paragraphs[0] paragraph.clear() latex_count = 0 for seg in text_style.colored_segments: run = paragraph.add_run() run.text = replace_some_chars(seg.text) run.font.size = Pt(font_size) run.font.bold = is_bold run.font.underline = is_underline # Set segment-specific color r, g, b = seg.color_rgb run.font.color.rgb = RGBColor(r, g, b) # Handle LaTeX formula segments if hasattr(seg, 'is_latex') and seg.is_latex: # For LaTeX formulas, use italic style as visual hint # TODO: In future, could render as actual equation using OMML run.font.italic = True latex_count += 1 logger.debug(f" LaTeX formula detected: '{seg.text}'") else: run.font.italic = is_italic latex_info = f", {latex_count} latex" if latex_count > 0 else "" style_info = f" | multi-color: {len(text_style.colored_segments)} segments{latex_info}" else: # Single color text: use simple text assignment text_frame.text = actual_text # IMPORTANT: Re-get paragraph after setting text_frame.text # because setting text_frame.text creates a new paragraph object paragraph = text_frame.paragraphs[0] paragraph.font.size = Pt(font_size) paragraph.font.bold = is_bold paragraph.font.italic = is_italic paragraph.font.underline = is_underline # Apply single font color if provided if text_style and hasattr(text_style, 'font_color_rgb') and text_style.font_color_rgb: r, g, b = text_style.font_color_rgb paragraph.font.color.rgb = RGBColor(r, g, b) style_info = f" | color={text_style.font_color_rgb if text_style else 'default'}" # Apply alignment after paragraph is finalized if effective_align == 'center': paragraph.alignment = PP_ALIGN.CENTER elif effective_align == 'right': paragraph.alignment = PP_ALIGN.RIGHT elif effective_align == 'justify': paragraph.alignment = PP_ALIGN.JUSTIFY else: paragraph.alignment = PP_ALIGN.LEFT # Calculate bbox dimensions for logging bbox_width = bbox[2] - bbox[0] bbox_height = bbox[3] - bbox[1] logger.debug(f"Text: '{actual_text[:35]}' | box: {bbox_width}x{bbox_height}px | font: {font_size:.1f}pt | chars: {len(actual_text)}{style_info}") def add_image_element( self, slide, image_path: str, bbox: List[int], dpi: int = None ): """ Add image element to slide Args: slide: Target slide image_path: Path to image file bbox: Bounding box [x0, y0, x1, y1] in pixels dpi: DPI for conversion (default: 96) """ dpi = dpi or self.DEFAULT_DPI # Check if image exists if not os.path.exists(image_path): logger.warning(f"Image not found: {image_path}, adding placeholder") self.add_image_placeholder(slide, bbox, dpi) return # Convert bbox to inches left = Inches(self.pixels_to_inches(bbox[0], dpi)) top = Inches(self.pixels_to_inches(bbox[1], dpi)) width = Inches(self.pixels_to_inches(bbox[2] - bbox[0], dpi)) height = Inches(self.pixels_to_inches(bbox[3] - bbox[1], dpi)) try: # Add image slide.shapes.add_picture(image_path, left, top, width, height) logger.debug(f"Added image: {image_path} at bbox {bbox}") except Exception as e: logger.error(f"Failed to add image {image_path}: {str(e)}") self.add_image_placeholder(slide, bbox, dpi) def add_image_placeholder( self, slide, bbox: List[int], dpi: int = None ): """ Add a placeholder for missing images Args: slide: Target slide bbox: Bounding box [x0, y0, x1, y1] in pixels dpi: DPI for conversion (default: 96) """ dpi = dpi or self.DEFAULT_DPI # Convert bbox to inches left = Inches(self.pixels_to_inches(bbox[0], dpi)) top = Inches(self.pixels_to_inches(bbox[1], dpi)) width = Inches(self.pixels_to_inches(bbox[2] - bbox[0], dpi)) height = Inches(self.pixels_to_inches(bbox[3] - bbox[1], dpi)) # Add a text box as placeholder textbox = slide.shapes.add_textbox(left, top, width, height) text_frame = textbox.text_frame text_frame.text = "[Image]" paragraph = text_frame.paragraphs[0] paragraph.alignment = PP_ALIGN.CENTER paragraph.font.size = Pt(12) paragraph.font.italic = True def add_table_element( self, slide, html_table: str, bbox: List[int], dpi: int = None ): """ Add editable table to slide from HTML table string Args: slide: Target slide html_table: HTML table string bbox: Bounding box [x0, y0, x1, y1] in pixels dpi: DPI for conversion (default: 96) """ dpi = dpi or self.DEFAULT_DPI # Parse HTML table try: table_data = HTMLTableParser.parse_html_table(html_table) except Exception as e: logger.error(f"Failed to parse HTML table: {str(e)}") return if not table_data or not table_data[0]: logger.warning("Empty table data") return rows = len(table_data) cols = len(table_data[0]) # Convert bbox to inches left = Inches(self.pixels_to_inches(bbox[0], dpi)) top = Inches(self.pixels_to_inches(bbox[1], dpi)) width = Inches(self.pixels_to_inches(bbox[2] - bbox[0], dpi)) height = Inches(self.pixels_to_inches(bbox[3] - bbox[1], dpi)) try: # Add table shape table_shape = slide.shapes.add_table(rows, cols, left, top, width, height) table = table_shape.table # Calculate cell dimensions cell_width = width / cols cell_height = height / rows # Fill table with data for row_idx, row_data in enumerate(table_data): for col_idx, cell_text in enumerate(row_data): if col_idx < cols: # Safety check cell = table.cell(row_idx, col_idx) cell.text = cell_text # Style the cell text_frame = cell.text_frame text_frame.word_wrap = True # Calculate font size for table cell # Use a conservative size to fit in cell cell_height_px = (bbox[3] - bbox[1]) / rows cell_width_px = (bbox[2] - bbox[0]) / cols # Estimate font size (smaller for tables) font_size = min(18, max(8, cell_height_px * 0.3)) for paragraph in text_frame.paragraphs: paragraph.font.size = Pt(font_size) paragraph.alignment = PP_ALIGN.CENTER # Header row (first row) should be bold if row_idx == 0: paragraph.font.bold = True logger.info(f"Added editable table: {rows}x{cols} at bbox {bbox}") except Exception as e: logger.error(f"Failed to create table: {str(e)}") def save(self, output_path: str): """ Save presentation to file Args: output_path: Output file path """ if not self.prs: raise ValueError("No presentation to save") # Ensure directory exists output_path_obj = Path(output_path) output_dir = output_path_obj.parent if str(output_dir) != '.': # Only create directory if it's not current directory output_dir.mkdir(parents=True, exist_ok=True) self.prs.save(output_path) logger.info(f"Saved presentation to: {output_path}") def get_presentation(self) -> Presentation: """Get the current presentation object""" return self.prs ================================================ FILE: backend/utils/response.py ================================================ """ Unified response format utilities """ from flask import jsonify from typing import Any, Dict, Optional def success_response(data: Any = None, message: str = "Success", status_code: int = 200): """ Generate a successful response Args: data: Response data message: Success message status_code: HTTP status code Returns: Flask response with JSON format """ response = { "success": True, "message": message } if data is not None: response["data"] = data return jsonify(response), status_code def error_response(error_code: str, message: str, status_code: int = 400): """ Generate an error response Args: error_code: Error code identifier message: Error message status_code: HTTP status code Returns: Flask response with JSON format """ return jsonify({ "success": False, "error": { "code": error_code, "message": message } }), status_code # Common error responses def bad_request(message: str = "Invalid request"): return error_response("INVALID_REQUEST", message, 400) def not_found(resource: str = "Resource"): return error_response(f"{resource.upper()}_NOT_FOUND", f"{resource} not found", 404) def invalid_status(message: str = "Invalid status for this operation"): return error_response("INVALID_PROJECT_STATUS", message, 400) def ai_service_error(message: str = "AI service error"): return error_response("AI_SERVICE_ERROR", message, 503) def rate_limit_error(message: str = "Rate limit exceeded"): return error_response("RATE_LIMIT_EXCEEDED", message, 429) ================================================ FILE: backend/utils/validators.py ================================================ """ Data validation utilities """ import re from math import gcd from typing import Set # --- Aspect ratio validation --- _ASPECT_RATIO_PATTERN = re.compile(r"^\d+:\d+$") _ASPECT_RATIO_MIN = 0.2 _ASPECT_RATIO_MAX = 5.0 def normalize_aspect_ratio(raw_value) -> str: """ Normalize and validate aspect ratio input. - Accepts "W:H" where W/H are positive integers. - Reduces by gcd (e.g., "1920:1080" -> "16:9"). - Rejects obviously invalid or extreme ratios. - Returns the normalized string like "16:9". """ if raw_value is None: raise ValueError("Image aspect ratio is required") value = str(raw_value).strip() if value == "": raise ValueError("Image aspect ratio is required") if not _ASPECT_RATIO_PATTERN.fullmatch(value): raise ValueError( "Image aspect ratio must match \\d+:\\d+ (e.g., 16:9, 4:3, 1:1)" ) width, height = (int(part) for part in value.split(":", 1)) if width <= 0 or height <= 0: raise ValueError("Image aspect ratio must be positive integers (e.g., 16:9)") divisor = gcd(width, height) width //= divisor height //= divisor ratio_value = width / height if ratio_value < _ASPECT_RATIO_MIN or ratio_value > _ASPECT_RATIO_MAX: raise ValueError( f"Image aspect ratio must be between {_ASPECT_RATIO_MIN:.1f} and {_ASPECT_RATIO_MAX:.1f} (e.g., 16:9)" ) normalized = f"{width}:{height}" if len(normalized) > 10: raise ValueError("Image aspect ratio is too long") return normalized # Project status states PROJECT_STATUSES = { 'DRAFT', 'OUTLINE_GENERATED', 'DESCRIPTIONS_GENERATED', 'GENERATING_IMAGES', 'COMPLETED' } # Page status states PAGE_STATUSES = { 'DRAFT', 'DESCRIPTION_GENERATED', 'GENERATING', 'COMPLETED', 'FAILED' } # Task status states TASK_STATUSES = { 'PENDING', 'PROCESSING', 'COMPLETED', 'FAILED' } # Task types TASK_TYPES = { 'GENERATE_DESCRIPTIONS', 'GENERATE_IMAGES', 'EXPORT_EDITABLE_PPTX' } def validate_project_status(status: str) -> bool: """Validate project status""" return status in PROJECT_STATUSES def validate_page_status(status: str) -> bool: """Validate page status""" return status in PAGE_STATUSES def validate_task_status(status: str) -> bool: """Validate task status""" return status in TASK_STATUSES def validate_task_type(task_type: str) -> bool: """Validate task type""" return task_type in TASK_TYPES def allowed_file(filename: str, allowed_extensions: Set[str]) -> bool: """Check if file extension is allowed""" return '.' in filename and \ filename.rsplit('.', 1)[1].lower() in allowed_extensions ================================================ FILE: create-test-data.mjs ================================================ import fetch from 'node-fetch'; import FormData from 'form-data'; import fs from 'fs'; import path from 'path'; const BASE_URL = process.env.BASE_URL || 'http://localhost:5401'; // 创建项目 async function createProject(title) { const response = await fetch(`${BASE_URL}/api/projects`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ creation_type: 'idea', idea_prompt: title, template_style: '简约商务风格', image_aspect_ratio: '16:9' }) }); const data = await response.json(); return data.data.project_id; } // 创建临时测试文件 function createTempFile(filename, content) { const tempDir = '/tmp/test-attachments'; if (!fs.existsSync(tempDir)) { fs.mkdirSync(tempDir, { recursive: true }); } const filepath = path.join(tempDir, filename); fs.writeFileSync(filepath, content); return filepath; } // 上传参考文件 async function uploadFile(projectId, filename, content) { const filepath = createTempFile(filename, content); const formData = new FormData(); formData.append('file', fs.createReadStream(filepath)); if (projectId) { formData.append('project_id', projectId); } const response = await fetch(`${BASE_URL}/api/reference-files`, { method: 'POST', body: formData }); fs.unlinkSync(filepath); return response.json(); } async function main() { console.log('Creating test projects and attachments...\n'); const projects = [ '产品发布会演示', '季度业绩报告', '市场营销策略', '技术架构设计', '团队培训材料' ]; for (const title of projects) { console.log(`Creating project: ${title}`); const projectId = await createProject(title); // 为每个项目上传2-3个文件 const fileCount = Math.floor(Math.random() * 2) + 2; for (let i = 0; i < fileCount; i++) { const filename = `${title.substring(0, 4)}_文档${i + 1}.txt`; const content = `这是 ${title} 的参考文档 ${i + 1}\n创建时间: ${new Date().toISOString()}`; await uploadFile(projectId, filename, content); console.log(` - Uploaded: ${filename}`); await new Promise(resolve => setTimeout(resolve, 200)); } } // 上传一些全局文件(不关联项目) console.log('\nCreating global attachments...'); const globalFiles = ['通用模板.txt', '公司Logo说明.txt', '品牌指南.txt']; for (const filename of globalFiles) { await uploadFile(null, filename, `全局文件: ${filename}\n${new Date().toISOString()}`); console.log(` - Uploaded: ${filename}`); await new Promise(resolve => setTimeout(resolve, 200)); } console.log('\n✅ Test data created successfully!'); } main().catch(console.error); ================================================ FILE: create-test-data.sh ================================================ #!/bin/bash BASE_URL="http://localhost:5401" echo "Creating test projects and attachments..." # 创建项目并上传文件 projects=("产品发布会演示" "季度业绩报告" "市场营销策略" "技术架构设计" "团队培训材料") for title in "${projects[@]}"; do echo -e "\nCreating project: $title" # 创建项目 response=$(curl -s -X POST "$BASE_URL/api/projects" \ -H "Content-Type: application/json" \ -d "{\"creation_type\":\"idea\",\"idea_prompt\":\"$title\",\"template_style\":\"简约商务风格\",\"image_aspect_ratio\":\"16:9\"}") project_id=$(echo $response | grep -o '"project_id":"[^"]*"' | cut -d'"' -f4) if [ -n "$project_id" ]; then # 为每个项目上传2-3个文件 for i in {1..3}; do filename="${title:0:8}_文档${i}.txt" echo "这是 $title 的参考文档 $i" > /tmp/test_file.txt curl -s -X POST "$BASE_URL/api/reference-files" \ -F "file=@/tmp/test_file.txt;filename=$filename" \ -F "project_id=$project_id" > /dev/null echo " - Uploaded: $filename" sleep 0.2 done fi done # 上传全局文件 echo -e "\nCreating global attachments..." for name in "通用模板" "公司Logo说明" "品牌指南"; do filename="${name}.txt" echo "全局文件: $filename" > /tmp/test_file.txt curl -s -X POST "$BASE_URL/api/reference-files" \ -F "file=@/tmp/test_file.txt;filename=$filename" > /dev/null echo " - Uploaded: $filename" sleep 0.2 done rm -f /tmp/test_file.txt echo -e "\n✅ Test data created successfully!" ================================================ FILE: docker/nginx-allinone.conf ================================================ server { listen 80; server_name localhost; root /usr/share/nginx/html; index index.html; client_max_body_size 50M; gzip on; gzip_vary on; gzip_min_length 1024; gzip_types text/plain text/css text/xml text/javascript application/x-javascript application/xml+rss application/json; location / { try_files $uri $uri/ /index.html; } location ^~ /api { proxy_pass http://127.0.0.1:5000; proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection 'upgrade'; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; proxy_cache_bypass $http_upgrade; proxy_read_timeout 300s; proxy_connect_timeout 300s; } location ^~ /files { proxy_pass http://127.0.0.1:5000; proxy_http_version 1.1; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; proxy_read_timeout 300s; proxy_connect_timeout 300s; add_header Cache-Control "no-cache, no-store, must-revalidate"; } location /health { proxy_pass http://127.0.0.1:5000/health; proxy_http_version 1.1; proxy_set_header Host $host; } location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$ { expires 1y; add_header Cache-Control "public, immutable"; } } ================================================ FILE: docker/start-backend.sh ================================================ #!/bin/sh set -e cd /app uv run --directory backend alembic upgrade head exec uv run --directory backend python app.py ================================================ FILE: docker/supervisord.conf ================================================ [supervisord] nodaemon=true logfile=/dev/null logfile_maxbytes=0 pidfile=/tmp/supervisord.pid [program:backend] command=/app/docker/start-backend.sh directory=/app autostart=true autorestart=true stdout_logfile=/dev/stdout stdout_logfile_maxbytes=0 stderr_logfile=/dev/stderr stderr_logfile_maxbytes=0 priority=1 [program:nginx] command=nginx -g "daemon off;" autostart=true autorestart=true stdout_logfile=/dev/stdout stdout_logfile_maxbytes=0 stderr_logfile=/dev/stderr stderr_logfile_maxbytes=0 priority=2 ================================================ FILE: docker-compose.allinone.yml ================================================ services: app: build: context: . dockerfile: Dockerfile.allinone args: DOCKER_REGISTRY: ${DOCKER_REGISTRY:-} GHCR_REGISTRY: ${GHCR_REGISTRY:-ghcr.io/} APT_MIRROR: ${APT_MIRROR:-} PYPI_INDEX_URL: ${PYPI_INDEX_URL:-} NPM_REGISTRY: ${NPM_REGISTRY:-} container_name: banana-slides ports: - "${PORT:-3000}:80" env_file: - .env volumes: - ./backend/instance:/app/backend/instance - ./uploads:/app/uploads restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost/health"] interval: 30s timeout: 10s retries: 3 start_period: 15s ================================================ FILE: docker-compose.prod.yml ================================================ # 此配置文件用于直接拉取 Docker Hub 上已构建好的镜像 services: backend: # 使用预构建的后端镜像 image: ${DOCKER_IMAGE_BACKEND:-anoinex/banana-slides-backend:latest} container_name: banana-slides-backend ports: # 宿主机端口:容器内部端口 # 宿主机端口由 BACKEND_PORT 控制,容器内部固定 5000 - "${BACKEND_PORT:-5000}:5000" # 从 .env 文件自动加载所有环境变量 env_file: - .env # Vertex AI 配置(使用 Vertex AI 时取消以下注释) # environment: # - GOOGLE_APPLICATION_CREDENTIALS=/app/gcp-service-account.json volumes: # 持久化数据库 - ./backend/instance:/app/backend/instance # 持久化上传的文件 - ./uploads:/app/uploads # GCP 服务账户文件(仅 Vertex AI 用户需要取消下一行注释) # - ./gcp-service-account.json:/app/gcp-service-account.json:ro restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:5000/health"] interval: 30s timeout: 10s retries: 3 start_period: 10s networks: - banana-slides-network frontend: # 使用预构建的前端镜像 image: ${DOCKER_IMAGE_FRONTEND:-anoinex/banana-slides-frontend:latest} container_name: banana-slides-frontend ports: - "${FRONTEND_PORT:-3000}:80" depends_on: - backend restart: unless-stopped networks: - banana-slides-network networks: banana-slides-network: driver: bridge volumes: backend-data: uploads-data: ================================================ FILE: docker-compose.yml ================================================ services: backend: build: context: . dockerfile: backend/Dockerfile args: DOCKER_REGISTRY: ${DOCKER_REGISTRY:-} GHCR_REGISTRY: ${GHCR_REGISTRY:-ghcr.io/} APT_MIRROR: ${APT_MIRROR:-} PYPI_INDEX_URL: ${PYPI_INDEX_URL:-} container_name: banana-slides-backend ports: # 宿主机端口:容器内部端口 # 宿主机端口由 BACKEND_PORT 控制,容器内部固定 5000 - "${BACKEND_PORT:-5000}:5000" # 从 .env 文件自动加载所有环境变量 env_file: - .env # Uncomment below to use Vertex AI with a GCP service-account key # environment: # - GOOGLE_APPLICATION_CREDENTIALS=/app/gcp-sa-key.json volumes: # 持久化数据库 - ./backend/instance:/app/backend/instance # 持久化上传的文件 - ./uploads:/app/uploads # Mount GCP service-account key (Vertex AI only — uncomment if needed) # - ./gcp-service-account.json:/app/gcp-sa-key.json:ro restart: unless-stopped healthcheck: # 健康检查使用容器内部固定端口 5000 test: ["CMD", "curl", "-f", "http://localhost:5000/health"] interval: 30s timeout: 10s retries: 3 start_period: 10s networks: - banana-slides-network frontend: build: context: . dockerfile: frontend/Dockerfile # Windows 兼容性:禁用符号链接跟随 + 前端构建参数 args: DOCKER_BUILDKIT: 1 DOCKER_REGISTRY: ${DOCKER_REGISTRY:-} NPM_REGISTRY: ${NPM_REGISTRY:-} container_name: banana-slides-frontend ports: - "${FRONTEND_PORT:-3000}:80" depends_on: - backend restart: unless-stopped networks: - banana-slides-network networks: banana-slides-network: driver: bridge volumes: backend-data: uploads-data: ================================================ FILE: docs/configuration.mdx ================================================ --- title: "Configuration" description: "Environment variables and provider setup" --- ## AI Provider Set `AI_PROVIDER_FORMAT` in `.env` to choose your provider: | Format | Description | |--------|-------------| | `gemini` | Google Gemini API (default) | | `openai` | OpenAI-compatible API | | `vertex` | Google Cloud Vertex AI | | `lazyllm` | Multi-vendor Chinese model routing | ## Gemini (Default) ```env AI_PROVIDER_FORMAT=gemini GOOGLE_API_KEY=your-api-key GOOGLE_API_BASE=https://generativelanguage.googleapis.com ``` The free tier of Gemini API only supports text generation, not image generation. ## OpenAI-Compatible ```env AI_PROVIDER_FORMAT=openai OPENAI_API_KEY=your-api-key OPENAI_API_BASE=https://api.openai.com/v1 ``` ## Vertex AI ```env AI_PROVIDER_FORMAT=vertex VERTEX_PROJECT_ID=your-gcp-project-id VERTEX_LOCATION=global GOOGLE_APPLICATION_CREDENTIALS=./gcp-service-account.json ``` `gemini-3-*` series models require `VERTEX_LOCATION=global`. ## LazyLLM (Multi-Vendor) Routes requests to different Chinese AI vendors for text, image, and caption tasks: ```env AI_PROVIDER_FORMAT=lazyllm TEXT_MODEL_SOURCE=deepseek IMAGE_MODEL_SOURCE=doubao IMAGE_CAPTION_MODEL_SOURCE=qwen ``` Set API keys for the vendors you use: ```env DOUBAO_API_KEY=your-key # Volcengine DEEPSEEK_API_KEY=your-key # DeepSeek QWEN_API_KEY=your-key # Alibaba Qwen GLM_API_KEY=your-key # Zhipu GLM SILICONFLOW_API_KEY=your-key # SiliconFlow SENSENOVA_API_KEY=your-key # SenseNova MINIMAX_API_KEY=your-key # MiniMax ``` ## AIHubMix (Recommended Proxy) [AIHubMix](https://aihubmix.com/?aff=17EC) is a recommended API proxy that supports both Gemini and OpenAI API formats, with stable high-concurrency performance for text-to-image generation. [Apply for an AIHubMix API key here](https://aihubmix.com/token?aff=17EC). ```env AI_PROVIDER_FORMAT=openai OPENAI_API_KEY=your-aihubmix-key OPENAI_API_BASE=https://aihubmix.com/v1 ``` ## MinerU (PDF Parsing) [MinerU](https://mineru.net) provides high-quality PDF parsing for reference file uploads. [Apply for a MinerU token here](https://mineru.net/apiManage/token). ```env MINERU_API_BASE=https://mineru.net MINERU_TOKEN=your-mineru-token ``` ## Baidu API Key For enhanced editable PPTX export with OCR-based text extraction, apply for an [IAM API Key](https://console.bce.baidu.com/iam/#/iam/apikey/list) from Baidu Cloud (generous free tier available): ```env BAIDU_API_KEY=your-baidu-api-key ``` ## Runtime Settings Override All of the above can also be configured via the web UI's Settings page. Settings configured there are stored in the database and override `.env` values. Use "Reset to Default" in Settings to revert to `.env` values. ================================================ FILE: docs/docs.json ================================================ { "$schema": "https://mintlify.com/docs.json", "theme": "mint", "name": "Banana Slides", "colors": { "primary": "#F59E0B", "light": "#FBBF24", "dark": "#D97706" }, "favicon": "/logo/banana.svg", "navigation": { "languages": [ { "language": "zh", "groups": [ { "group": "快速上手", "pages": ["zh/index", "zh/quickstart", "zh/configuration"] }, { "group": "创作流程", "pages": [ "zh/features/creation", "zh/features/outline", "zh/features/descriptions", "zh/features/images" ] }, { "group": "素材与导出", "pages": [ "zh/features/materials", "zh/features/export", "zh/features/import-export" ] }, { "group": "其他", "pages": [ "zh/history", "zh/features/overview", "zh/faq" ] } ] }, { "language": "en", "groups": [ { "group": "Getting Started", "pages": ["index", "quickstart", "configuration"] }, { "group": "Creation Flow", "pages": [ "features/creation", "features/outline", "features/descriptions", "features/images" ] }, { "group": "Materials & Export", "pages": [ "features/materials", "features/export", "features/import-export" ] }, { "group": "More", "pages": [ "history", "features/overview", "faq" ] } ] } ] } } ================================================ FILE: docs/faq.mdx ================================================ --- title: "FAQ" description: "Frequently asked questions" --- ## Generated text is garbled or blurry - Check if you're on 1K low-resolution mode. Switch to 2K or 4K in **Project Settings → Global Settings** — the difference in text clarity is significant. Note: some OpenAI-format proxy services don't support higher resolutions; use Gemini format instead. - Include the specific text you want rendered in the page description — AI renders it more accurately when given explicit content. ## Editable PPTX export has overlapping text or missing styles Usually an API configuration issue. Make sure `BAIDU_API_KEY` is set correctly — see [Configuration](/configuration#baidu-api-key). You can also try adjusting the text extraction method in **Project Settings → Export Settings** (OCR / Vision model / Hybrid). Different methods perform differently depending on the layout. ## The regular PPTX export has text I can't edit in PowerPoint Regular PPTX embeds slides as images — text inside images is not editable. Use **Export Editable PPTX (Beta)** if you need editable text. ## Does the free Gemini API tier work? Text generation (outlines, descriptions) works on the free tier, but image generation requires a paid tier. ## Getting 503 errors or repeated retry failures Usually caused by incorrect model configuration. Run the **Service Test** at the bottom of the Settings page first to pinpoint the issue. Check backend logs for details: ```bash docker logs --tail 200 banana-slides-backend ``` ## Environment variable changes not taking effect - Docker deployments require a container restart after editing `.env`. - If you've configured settings via the web UI, those database values override `.env`. Click **Reset to Default** in Settings to clear them. ## PPT Renovation didn't parse correctly - Upload PDF instead of PPTX for more stable results. Direct PPTX upload requires server-side LibreOffice conversion, which may fail due to missing fonts (Arial, Calibri, etc.). - Docker users who need PPTX support inside the container: ```bash docker exec -it banana-slides-backend bash -c "apt-get update && apt-get install -y libreoffice-impress" ``` ## How do I change the aspect ratio? Select a ratio on the home page before creating, or change it in **Project Settings** on the preview page. Images must be regenerated after changing the ratio. Note: some image generation models don't support certain aspect ratios. If generation fails, try switching to 16:9. ## Generation is slow - Increase **Max description workers** and **Max image workers** in Settings to speed up parallel generation. - If you're on a free API tier, rate limits may apply. Consider upgrading or using a relay service like [AIHubMix](https://aihubmix.com/?aff=17EC). ## How do I regenerate just one page? On the Slide Preview page, click the edit icon (pencil) on that slide, update the description or add an edit instruction, then click **Generate Image**. You can also use multi-select to batch regenerate several pages. ## Can I include bar charts, line charts, or tree diagrams in slides? Two approaches: **Option 1: Describe in text + generation requirements (recommended)** Write the chart content directly in the page description using plain text or Markdown, and specify the chart style in **Extra Requirements** or **Description Requirements**. AI can usually render it directly. Example: ``` Right side of the page shows a 5-year revenue bar chart with data: 2020: $1.2M, 2021: $1.8M, 2022: $2.4M, 2023: $3.1M, 2024: $4.2M. Bars in blue gradient, white background. ``` **Option 2: Create the chart as an image first** Generate the chart with Excel, Python (matplotlib), or similar tools, then paste it (Ctrl+V) into the description card as a reference image. AI will draw in a similar style. ## How do I keep chart styles consistent across all slides? Use **text-based style description** rather than uploading a reference image — tests show text gives better generation consistency. Write explicit style rules in **Extra Requirements** or **Style Description**, for example: - `All charts use blue color scheme (#2563EB as primary), white background, no borders` - `Chart fonts are always sans-serif, data labels always shown above bars` - `Line charts use circular data point markers, line weight 2px` The more specific the constraints, the more consistent the output across pages. If you care about letter spacing or font sizes, include those too. ## Where is the AI input bar for editing outlines and descriptions? It's in the **top navigation bar** of the Outline Editor and Description Editor — fill in your instruction and press **Ctrl+Enter**. Example: `Add a case study section after page 3`. ================================================ FILE: docs/features/creation.mdx ================================================ --- title: "Creating a Presentation" description: "Four ways to start — pick the one that fits you" --- On the home page, choose a creation method, fill in your content, and click **Create New Project**. ## From an Idea (Simplest) Just one sentence describing your topic — AI handles the full pipeline: outline → descriptions → images. **Best for**: No existing material, want a PPT generated quickly **Steps:** 1. Select the **From Idea** tab 2. Enter your topic, e.g.: `Create an 8-slide presentation about carbon neutrality` 3. Choose a template and aspect ratio (optional) 4. Click **Create New Project** You'll land in the Outline Editor to review and adjust the AI-generated outline. ## From an Outline You have a structure and don't want AI to guess: **Best for**: You have an existing outline or table of contents **Steps:** 1. Select the **From Outline** tab 2. Paste your outline — supports title + bullet points, or titles only 3. Click **Create New Project** — AI automatically parses it into a structured outline **Format example:** ``` Slide 1: The Origins of AI - 1956 Dartmouth Conference - Vision of early researchers Slide 2: The Rise of Machine Learning The shift from rule-based to data-driven approaches ``` ## From Descriptions You already have detailed per-page descriptions and want to skip the outline step: **Best for**: Content is fully defined, or you've generated descriptions from another AI tool **Steps:** 1. Select the **From Description** tab 2. Paste your full page descriptions (including layout, color, content) 3. Click **Create New Project** — skips outline, goes straight to image generation **Format example:** ``` Slide 1: Cover Clean cover page, company logo centered, white bold title, dark blue gradient background. Slide 2: Market Overview 5-year market growth bar chart, key figures highlighted in cards. ``` ## PPT Renovation Upload an existing PDF or PPTX — AI parses the content and regenerates a visually refreshed version: **Best for**: Outdated PPT needing a visual refresh, or using an existing PPT as a starting point **Steps:** 1. Select the **PPT Renovation** tab 2. Upload a PDF or PPTX file 3. Optionally check **Keep original layout** to stay closer to the original structure 4. Click **Create New Project** We recommend converting PPTX to PDF locally before uploading. Direct PPTX upload requires server-side LibreOffice conversion, which may cause layout issues due to missing fonts. ## Aspect Ratio Choose an aspect ratio below the input area before creating the project: | Ratio | Best for | |-------|---------| | 16:9 | Standard widescreen (default) | | 4:3 | Traditional slides | | 21:9 | Ultrawide displays | | 1:1 | Square (social media) | | 9:16 | Portrait (mobile) | Some image generation models don't support certain aspect ratios. If generation fails, try switching to 16:9. The ratio can also be changed in Project Settings after creation — images must be regenerated to take effect. ## Template and Style Set the visual style before creating: **Option 1: Choose a template** - Pick from the preset template gallery, or upload a custom template image **Option 2: Text style description** - Check **Use text description for style** - Describe the desired visual style, e.g.: `Modern tech aesthetic, dark background, blue-purple gradient tones` ## Upload Reference Files Attach reference files when creating — AI extracts the content as source material: - **Supported formats**: PDF, DOCX, PPTX, XLSX, CSV, TXT, MD - **Upload methods**: Click the paperclip icon, drag and drop, or paste (Ctrl+V) - **File size limit**: 200MB Files parse automatically after upload. Project creation is blocked until parsing completes. See [Materials & Files](/features/materials). ================================================ FILE: docs/features/descriptions.mdx ================================================ --- title: "Write Descriptions" description: "Generate or write detailed visual descriptions for each slide" --- The Description Editor is the third step. Each page's "description" tells the AI what to draw — layout, color palette, charts, text content. The more specific, the better the result. ## Batch Generate All Descriptions Click **Batch Generate Descriptions** — AI generates descriptions for every page automatically. Review and edit page by page when done. **Adjust detail level before generating** (dropdown next to the button): | Level | Best for | |-------|---------| | Concise | Quick prototypes, speed priority | | Default | Everyday use | | Detailed | Fine-tuning, quality priority | Include specific text content in descriptions (e.g., headline copy, data values) — this significantly improves text rendering quality on generated slides. ## Refine with AI The top input bar lets you adjust all descriptions at once. Press **Ctrl+Enter** to submit. **Common commands:** - `Make all descriptions more detailed with specific text content` - `Change everything to a dark blue tech aesthetic` - `Add a data comparison table to page 3's description` - `Remove all mentions of "gradient background" from every description` ## Regenerate a Single Page Each description card has a **Regenerate** button (🔄) in the top-right: 1. Click the regenerate icon 2. Confirm in the dialog 3. AI regenerates just that page's description and replaces the old one ## Edit a Page Manually Click directly on the text in any description card to edit it inline. Changes save automatically when you click elsewhere. ## Set Description Requirements Expand **Description Generation Requirements** in the toolbar to add overall constraints — applied on every generation. **Example requirements:** - `Keep each description under 100 words` - `Favor data and charts over plain text` - `Maintain a clean minimalist style` ## Add a Reference Image for a Page At the bottom of each description card: 1. Find the **Upload Images** section 2. Upload or select a reference image from the material library 3. Add an edit instruction (optional), then click **Generate Image** ## Import / Export Click **Import/Export**: - **Export Descriptions**: Export description content only - **Export Outline & Descriptions**: Full export including key points - **Import**: Load from file, appended to existing pages ## Next Step Once all pages have descriptions, the **Next →** button activates. Click it to go to the [Slide Preview](/features/images) and start generating images. ================================================ FILE: docs/features/editing.mdx ================================================ --- title: "Editing Slides" description: "Modify presentations with natural language" --- ## Natural Language Editing Instead of navigating complex menus, describe your changes in plain language: - "Change the third page to a case study" - "Replace this chart with a pie chart" - "Make the background darker" - "Add a summary bullet list" ## Outline Editing In the Outline Editor, refine the structure by typing instructions like: - "Add a section about competitive analysis after the introduction" - "Merge the last two pages" - "Move the conclusion before the Q&A page" You can also manually drag and drop pages to reorder them, or edit chapter assignments. ## Region-Based Editing Select a specific area of a slide in the preview and describe what you want changed. The AI regenerates only that region while keeping the rest intact. Steps: 1. Go to the Slide Preview page and click **Region Select** 2. Drag to select the area you want to modify on the slide 3. Describe the change in the input box 4. Click **Generate Image** — AI redraws only the selected region ## Description Detail Level Next to the **Batch Generate Descriptions** button in the Detail Editor, you can choose a detail level: | Level | Description | |-------|-------------| | Concise | Brief descriptions, faster generation, good for quick prototypes | | Default | Balanced between detail and speed | | Detailed | Richer descriptions give AI more compositional reference, ideal for fine-tuning | ## Version History Every time you regenerate a slide image, the previous version is automatically saved. Click **History Versions** on the preview page to view and restore any previous version — no need to worry about accidentally overwriting a result you liked. ================================================ FILE: docs/features/export.mdx ================================================ --- title: "Export Options" description: "Export presentations as PPTX, PDF, or images" --- ## Formats | Format | Description | |--------|-------------| | PPTX | Slides are embedded as images in the PPT file. You can play and reorder pages in PowerPoint, but the text inside images cannot be directly edited | | Editable PPTX (Beta) | Extracts text from slide images and rebuilds it as native PowerPoint text boxes, enabling direct text editing in PowerPoint | | PDF | Pixel-perfect output, ready for presentation | | Images | Each page exported as an individual image file | ## Aspect Ratio Defaults to 16:9. You can change the ratio when creating a project or in **Project Settings** on the preview page. Supports 16:9, 4:3, 21:9, 1:1, 9:16, and more. Images must be regenerated after changing the ratio. ## Editable PPTX (Beta) The extraction process preserves font size, color, bold styling, text positioning, and table content. However, due to OCR accuracy limits and current model capabilities, complex layouts may have discrepancies from the original slide appearance. For best extraction results, configure `BAIDU_API_KEY` in your environment. See [Configuration](/configuration#baidu-api-key). ## Page Selection You can select specific pages to export rather than exporting the entire presentation. Enable **Multi-select** on the Slide Preview page, check the pages you want, then click **Export**. ================================================ FILE: docs/features/images.mdx ================================================ --- title: "Generate & Refine Images" description: "Generate slide images and fine-tune them page by page" --- The Slide Preview is the final editing step. Generate images, refine details, browse version history, then export. ## Batch Generate All Images Click **Batch Generate Images** — AI generates all pages in parallel. Each card shows a progress animation; previews appear as they complete. If the current resolution is 1K, a warning dialog recommends switching to 2K or 4K — the difference in text clarity is significant. Change this in **Project Settings → Global Settings**. ## Single-Page Refinement When one slide doesn't look right, no need to regenerate everything: 1. Click the **Edit** icon (pencil) on the slide card 2. Update the **page description** or add an **edit instruction** in the right panel 3. Click **Generate Image** — only this page regenerates **Edit instruction examples:** - `Change the background to a dark gradient` - `Move the title to the top and increase font size` - `Replace the right side with a bar chart, keep the data` - `Change the overall style to hand-drawn illustration` To save description changes without regenerating the image, click **Save Outline/Description Only**. ## Region Editing: Change Only Part of a Slide Want to adjust a specific area without touching the rest: 1. Enter single-page edit mode, then click **Region Select** above the image 2. **Click and drag** to select the area you want to modify 3. The selected region is cropped and added as a reference image 4. Describe the change in the edit instruction box, then click **Generate Image** AI redraws the full slide with extra focus on the selected region. ## Version History Every regeneration automatically saves the previous version — nothing is lost. 1. Enter single-page edit mode 2. Click **History Versions** in the bottom-left of the image 3. Select any historical version to restore it Experiment freely with different prompts — you can always roll back to a version you liked. ## Batch Regenerate Selected Pages To regenerate only a subset of pages: 1. Click **Multi-select** at the top 2. Check the pages you want to regenerate 3. Click **Generate Selected (N)** ## Project Settings Click the gear icon **Project Settings** in the top-right — three tabs: ### Project Settings Tab | Setting | Notes | |---------|-------| | Aspect Ratio | Changes take effect after regenerating images | | Extra Requirements | Applied to all image generation, e.g. "avoid real human photos" | | Style Description | Visual constraints, e.g. "minimalist, white background, thin lines" | | Change Template | Affects only future generations, not existing images | ### Export Settings Tab Configure **Editable PPTX** extraction behavior: - **Text Extractor**: OCR / Vision model / Hybrid — each has quality vs. speed tradeoffs - **Image Inpainting**: Pixel-based / AI-based / Hybrid - **Error Handling**: Enable **Allow Partial Results** to get output even when errors occur ### Global Settings Tab Change API provider, model names, image resolution (1K / 2K / 4K), max concurrent workers. Run **Service Test** here to verify your configuration is working. ================================================ FILE: docs/features/import-export.mdx ================================================ --- title: "Import & Export Format" description: "Markdown format for importing and exporting outlines and descriptions" --- Banana Slides uses Markdown files (`.md` / `.txt`) to import and export outlines and page descriptions. The format is human-readable and easy to edit in any text editor. ## Format Structure ```markdown # Project Title > 生成时间: 2025/1/1 12:00:00 --- ## 第 1 页: Cover Page > 章节: Introduction **大纲要点:** - Company name and logo - Presentation date **页面描述:** A clean cover page with the company logo centered, title in bold white text on a dark blue gradient background. --- ## 第 2 页: Market Overview > 章节: Analysis **大纲要点:** - Industry growth trends - Key market segments **页面描述:** A data-driven page showing a bar chart of market growth over the past 5 years, with key statistics highlighted in callout boxes. --- ``` ## Field Reference | Field | Syntax | Required | |-------|--------|----------| | Page header | `## 第 N 页: Title` | Yes | | Chapter | `> 章节: Name` | No | | Key points | `**大纲要点:**` followed by `- point` lines | No | | Description | `**页面描述:**` followed by free text | No | Pages are separated by `---`. ## Export Options You can export from both the Outline Editor and Detail Editor: | Source | Content | |--------|---------| | Outline Editor | Key points only (no descriptions) | | Detail Editor | Both key points and descriptions | ## Import Behavior - Imported pages are **appended** to the existing project — they do not replace current pages. - Both `**大纲要点:**` and `**页面描述:**` markers are optional. If omitted, bullet lines (`- ...`) are treated as key points. - HTML tags in imported content are automatically stripped. - Accepts `.md` and `.txt` files. ## Minimal Example A valid import file can be as simple as: ```markdown ## 第 1 页: Introduction - Welcome and agenda - Speaker introduction ## 第 2 页: Summary - Key takeaways ``` ## Generate with AI Copy the prompt below into any AI assistant (ChatGPT, Claude, etc.), replace the placeholders, and import the output directly into Banana Slides. For outline-only import, remove the `**页面描述:**` requirement from the prompt. ```text Generate a presentation outline in the following Markdown format. Topic: [YOUR TOPIC] Number of pages: [NUMBER, e.g. 8] Language: [Chinese / English] Style notes: [e.g. "professional and data-driven" or "playful and visual"] Rules: 1. Each page starts with "## 第 N 页: Title" 2. Group pages into chapters using "> 章节: Chapter Name" 3. Under "**大纲要点:**", list 2-4 key points as "- point" 4. Under "**页面描述:**", write 1-2 sentences describing the visual layout, colors, charts, or imagery for that slide 5. Separate pages with "---" 6. First page should be a cover, last page should be a closing/thank-you page Output ONLY the Markdown, no extra explanation. ``` ================================================ FILE: docs/features/materials.mdx ================================================ --- title: "Materials & Files" description: "Upload reference files, paste images, or generate custom materials with AI" --- ## Reference Files Upload documents as source material — AI references the extracted content when generating outlines, descriptions, and images. ### Supported Formats | Format | Extracted Content | |--------|-----------------| | PDF | Text + embedded images | | DOCX / DOC | Document content and structure | | PPTX / PPT | Slide content | | XLSX / XLS / CSV | Spreadsheet data | | Markdown / TXT | Plain text | PDF parsing uses [MinerU](https://mineru.net) for high-quality extraction. Configure a MinerU token for better results — see [Configuration](/configuration#mineru-pdf-parsing). ### Upload Methods - Click the **paperclip icon** next to the input area to open a file picker - **Drag and drop** files into the input area - **Paste** (Ctrl+V) files or images from clipboard - Click **Material Center** to select from previously uploaded files File size limit: **200MB**. Files parse automatically after upload and show "Parsing" status until complete. ### Where to Upload | Location | Notes | |----------|-------| | Home page | Upload before creating a project — applies to the whole project | | Outline Editor | Manage files in the left panel | | Description Editor | View and manage files in the top section | | Slide Preview (single-page edit) | Attach a reference image to one specific page | ## Pasting Images Paste an image anywhere (Ctrl+V) in the home page, Outline Editor, or Description Cards: 1. Copy an image (screenshot, copy from web, etc.) 2. Press **Ctrl+V** in the input area 3. The image uploads automatically — AI recognizes its content and inserts it as a Markdown image reference at the cursor position ## Style References Upload an image as a visual style reference — AI will match its color palette, layout patterns, and design language. Upload in the **Select Style Template** section on the home page, or in the **Upload Images** section of single-page editing. ## Material Generator Click **Generate Material** in the top navigation bar to create custom AI-generated images: 1. Describe the image you want, e.g.: `Blue-purple gradient background with geometric shapes and tech-style lines` 2. Select an aspect ratio 3. Optionally upload a reference image to guide the style 4. Click **Generate Material** Generated images are automatically saved to the material library and available for reuse in projects. ## Material Center Click **Material Center** in the top navigation bar to manage all materials: - **Filter**: By project, or view all / unassociated materials - **Preview**: Click the eye icon for fullscreen preview - **Download**: Select items and download — multiple files are packaged as ZIP - **Delete**: Individual or batch delete - **Upload**: Add new images directly from Material Center ================================================ FILE: docs/features/outline.mdx ================================================ --- title: "Edit the Outline" description: "Review and adjust the AI-generated outline, or build one from scratch" --- The Outline Editor is the second step. Each slide is a card with a **title** and **key points** — no visuals yet. ## Refine with AI The top input bar is the fastest way to edit the outline. Describe what you want in plain language and press **Ctrl+Enter**. **Common commands:** - `Add a competitive analysis section after the introduction` - `Merge pages 4 and 5, keep the key points` - `Move the conclusion before the Q&A page` - `Change the title of page 2 to "Core Advantages"` - `Delete page 6` You can chain multiple instructions — AI remembers context. For example: "Add a case study section", then "Move it to page 3". ## Edit Cards Manually Click any card to edit its title or key points inline: 1. Click the title or bullet text on a card 2. Edit directly 3. Click elsewhere to auto-save The delete button is in the top-right corner of each card — requires confirmation. ## Add a New Page Click **Add Page** in the toolbar to insert a blank page at the end of the list. ## Reorder by Dragging Grab the drag handle (⠿ icon) on the left side of a card and drag it to the target position — the order updates immediately. ## Regenerate the Outline Not happy with the current outline? Click **Regenerate Outline** to have AI start fresh. Regenerating overwrites all existing outline content. A confirmation dialog appears before proceeding. This cannot be undone. ## Set Generation Requirements Expand **Outline Generation Requirements** in the toolbar to add overall constraints. These apply to every generation and regeneration. **Example requirements:** - `No more than 4 bullet points per page` - `Use a timeline structure ordered by year` - `Emphasize data and case studies` ## Import / Export Click the **Import/Export** button: - **Export Outline**: Save the current outline as a `.md` file — edit it in any text editor and reimport - **Import**: Load an outline from a `.md` or `.txt` file, **appended** to the end of existing pages See [Import/Export Format](/features/import-export) for the file format spec. ## Next Step Once the outline looks good, click **Next →** in the top-right to go to the [Description Editor](/features/descriptions). ================================================ FILE: docs/features/overview.mdx ================================================ --- title: "Features Overview" description: "What Banana Slides can do" --- ## The Workflow ``` Create Project → Edit Outline → Write Descriptions → Generate Images → Export ``` Every step supports AI assistance and manual fine-tuning. ## Four Creation Paths | Path | Best for | |------|---------| | From an idea | Just a topic — let AI generate everything | | From an outline | You have an existing structure | | From descriptions | Content is already clear, generate images directly | | PPT Renovation | Upload an old PPT, AI regenerates a fresh version | See [Creating a Presentation](/features/creation). ## AI-Assisted Editing Every editing step has a top input bar — tell AI what to do in plain language: - **Outline Editor**: `Add a competitive analysis section after page 3` - **Description Editor**: `Make all descriptions more detailed with specific text content` - **Slide Preview** (single page): `Change the background to dark blue, increase title font size` See [Edit Outline](/features/outline), [Write Descriptions](/features/descriptions), [Generate & Refine Images](/features/images). ## Material Support - Upload reference files (PDF, DOCX, PPTX, spreadsheets, etc.) — AI extracts content to inform generation - Paste images — AI recognizes content and inserts them - Use the **Material Generator** to create custom backgrounds or illustrations with AI - **Material Center** manages all your materials for easy reuse See [Materials & Files](/features/materials). ## Export - **PPTX**: Slides embedded as images, playable and reorderable in PowerPoint - **Editable PPTX (Beta)**: Text extracted as native text boxes, editable in PowerPoint - **PDF**: Pixel-perfect output - **Images**: Each page as an individual file Multiple aspect ratios supported, with selective page export. See [Export Options](/features/export). ================================================ FILE: docs/history.mdx ================================================ --- title: "Manage Projects" description: "View, rename, and delete past projects" --- Click **History** in the top navigation bar to see all your presentations. ## Continue Editing a Project Click any project card to jump back into that project and continue where you left off. ## Rename a Project Hover over the project title — when the edit icon appears, click it, type the new name, then press **Ctrl+Enter** to save or **Esc** to cancel. ## Delete Projects - **Delete one**: Click the delete icon on the card and confirm - **Delete multiple**: Check several projects and click **Batch Delete** Deletion is permanent and cannot be undone — all pages, descriptions, and generated images are removed. ================================================ FILE: docs/index.mdx ================================================ --- title: "Banana Slides" description: "AI-native presentation generation — Vibe your slides like vibe coding" --- Banana Slides is an AI-native PPT generation app. Enter an idea or upload an existing file, and AI automatically generates the outline, descriptions, and slide images — with natural language refinement at every step. ## Get Started Deploy and generate your first PPT in 5 minutes Set up your AI provider and API keys ## Core Features Four creation paths: from idea, outline, descriptions, or renovate an existing PPT Refine structure with natural language or drag-and-drop reordering Batch generate, refine with AI, or manually edit page by page Batch generate, single-page editing, region editing, version history Upload reference files, generate materials with AI, manage your library Export as PPTX, PDF, or images — multiple aspect ratios supported ================================================ FILE: docs/logo/.gitkeep ================================================ ================================================ FILE: docs/quickstart.mdx ================================================ --- title: "Quick Start" description: "Try the demo or deploy Banana Slides locally" --- ## Online Demo No installation needed — try it instantly: [bananaslides.online](https://bananaslides.online/) --- ## Self-Hosting ### Step 1: Install Docker Download and install [Docker Desktop](https://www.docker.com/products/docker-desktop/). After installation, launch it and confirm the Docker icon appears in the system tray (Windows) or menu bar (macOS). ```bash curl -fsSL https://get.docker.com | sh ``` ### Step 2: Download and Start Create a new directory, enter it, and run the following command to download the config files and start the service: ```bash mkdir banana-slides && cd banana-slides && \ curl -O https://raw.githubusercontent.com/Anionex/banana-slides/main/docker-compose.prod.yml && \ curl -O https://raw.githubusercontent.com/Anionex/banana-slides/main/.env.example && \ cp .env.example .env ``` Edit the `.env` file to fill in your API key (see [Configuration](/configuration)), then start: ```bash docker compose -f docker-compose.prod.yml up -d ``` [AIHubMix](https://aihubmix.com/?aff=17EC) is recommended for API keys — it supports both Gemini and OpenAI formats and handles high-concurrency image generation reliably. ### Step 3: Open the App Visit [http://localhost:3000](http://localhost:3000) in your browser. Default ports are 3000 (frontend) and 5000 (backend). To change them, set `FRONTEND_PORT` and `BACKEND_PORT` in `.env`. --- ## Update ```bash docker compose -f docker-compose.prod.yml pull && \ docker compose -f docker-compose.prod.yml up -d ``` ## View Logs ```bash docker logs -f --tail 100 banana-slides-backend ``` --- ## Deploy from Source ### Requirements - Python 3.10+ - [uv](https://github.com/astral-sh/uv) package manager - Node.js 16+ and npm ### Backend ```bash git clone https://github.com/Anionex/banana-slides cd banana-slides cp .env.example .env # Edit .env and add your API key cd backend uv run alembic upgrade head && uv run python app.py ``` ### Frontend ```bash cd frontend npm install npm run dev ``` ================================================ FILE: docs/zh/configuration.mdx ================================================ --- title: "配置" description: "环境变量与服务商设置" --- ## AI 服务商 在 `.env` 中设置 `AI_PROVIDER_FORMAT` 选择服务商: | 格式 | 说明 | |------|------| | `gemini` | Google Gemini API(默认) | | `openai` | OpenAI 兼容 API | | `vertex` | Google Cloud Vertex AI | | `lazyllm` | 多厂商国产模型路由 | ## Gemini(默认) ```env AI_PROVIDER_FORMAT=gemini GOOGLE_API_KEY=your-api-key GOOGLE_API_BASE=https://generativelanguage.googleapis.com ``` Gemini API 免费层仅支持文本生成,不支持图片生成。 ## OpenAI 兼容 ```env AI_PROVIDER_FORMAT=openai OPENAI_API_KEY=your-api-key OPENAI_API_BASE=https://api.openai.com/v1 ``` ## Vertex AI ```env AI_PROVIDER_FORMAT=vertex VERTEX_PROJECT_ID=your-gcp-project-id VERTEX_LOCATION=global GOOGLE_APPLICATION_CREDENTIALS=./gcp-service-account.json ``` `gemini-3-*` 系列模型需要 `VERTEX_LOCATION=global`。 ## LazyLLM(多厂商路由) 将请求路由到不同国产 AI 厂商: ```env AI_PROVIDER_FORMAT=lazyllm TEXT_MODEL_SOURCE=deepseek IMAGE_MODEL_SOURCE=doubao IMAGE_CAPTION_MODEL_SOURCE=qwen ``` 设置对应厂商的 API Key: ```env DOUBAO_API_KEY=your-key # 火山引擎/豆包 DEEPSEEK_API_KEY=your-key # DeepSeek QWEN_API_KEY=your-key # 阿里云/通义千问 GLM_API_KEY=your-key # 智谱 GLM SILICONFLOW_API_KEY=your-key # 硅基流动 SENSENOVA_API_KEY=your-key # 商汤日日新 MINIMAX_API_KEY=your-key # MiniMax ``` ## AIHubMix(推荐中转) [AIHubMix](https://aihubmix.com/?aff=17EC) 是推荐的 API 中转服务,同时支持 Gemini 和 OpenAI 两种接口格式,且能稳定进行高并发文生图操作。[点击此处申请 AIHubMix API Key](https://aihubmix.com/token?aff=17EC)。 ```env AI_PROVIDER_FORMAT=openai OPENAI_API_KEY=your-aihubmix-key OPENAI_API_BASE=https://aihubmix.com/v1 ``` ## MinerU(PDF 解析) [MinerU](https://mineru.net) 提供高质量的 PDF 解析服务,用于参考文件上传时的内容提取。[点击此处申请 MinerU Token](https://mineru.net/apiManage/token)。 ```env MINERU_API_BASE=https://mineru.net MINERU_TOKEN=your-mineru-token ``` ## 百度 API Key 配置百度 API Key 以获得更好的可编辑 PPTX 导出效果(有充足免费额度): ```env BAIDU_API_KEY=your-baidu-api-key ``` 从百度智能云[申请 IAM API Key](https://console.bce.baidu.com/iam/#/iam/apikey/list)。 ## 运行时设置覆盖 以上所有配置也可通过网页设置页面进行配置。通过设置页面配置的参数会存储在数据库中,优先级高于 `.env`。点击"还原默认设置"可恢复为 `.env` 中的值。 ================================================ FILE: docs/zh/faq.mdx ================================================ --- title: "常见问题" description: "常见问题解答" --- ## 生成的文字有乱码或模糊 - 检查是否处于 1K 低分辨率模式。在「**项目设置 → 全局设置**」中切换到 2K 或 4K 分辨率,文字清晰度差异明显。注意:部分 OpenAI 格式中转服务不支持调高分辨率,建议改用 Gemini 格式。 - 在页面描述中包含具体要渲染的文字内容,AI 更容易正确渲染。 ## 可编辑 PPTX 导出效果不好,文字错位或缺失 通常是 API 配置问题。确保 `BAIDU_API_KEY` 设置正确,详见[配置说明](/zh/configuration#百度-api-key)。 也可以在「**项目设置 → 导出设置**」中调整文本提取方法(OCR / 视觉模型 / 混合),不同模型对不同排版效果差异较大。 ## 普通 PPTX 导出后文字无法在 PowerPoint 中编辑 普通 PPTX 是将幻灯片图片嵌入 PPT,图片内的文字不可编辑。需要可编辑文字请使用「**导出可编辑 PPTX(Beta)**」。 ## 免费版 Gemini API 能用吗? 文本生成(大纲、描述)可以用,但图片生成需要付费层级。 ## 出现 503 错误或持续重试 通常是模型配置错误。建议先在设置页底部点击「**服务测试**」验证配置,再排查具体问题。 也可以查看后端日志: ```bash docker logs --tail 200 banana-slides-backend ``` ## 修改 .env 后没有生效 - Docker 部署需重启容器才生效。 - 如果在网页设置页配置过参数,数据库中的值优先级高于 `.env`,点击「**还原默认设置**」可以清除。 ## PPT 翻新上传后解析结果不对 - 推荐先在本地将 PPTX 转为 PDF 再上传,效果更稳定。 - 直接上传 PPTX 需要服务端 LibreOffice 转换,可能因缺少字体(微软雅黑、Calibri 等)导致排版偏差。 - Docker 用户需要在容器内安装 LibreOffice 才支持 PPTX 上传: ```bash docker exec -it banana-slides-backend bash -c "apt-get update && apt-get install -y libreoffice-impress" ``` ## 如何更改画面比例? 在首页创建项目前选择比例,或在幻灯片预览页的「**项目设置**」中修改。修改后需要重新生成图片才会生效。 注意:部分图像生成模型不支持某些比例,若生成报错可尝试切换到 16:9。 ## 生成速度很慢 - 在「**设置**」中调大「**描述生成最大并发数**」和「**图像生成最大并发数**」,提高并行度。 - 如果使用免费 API 额度,可能有速率限制,考虑使用付费额度或 [AIHubMix](https://aihubmix.com/?aff=17EC) 等中转服务。 ## 如何只重新生成某一页? 在幻灯片预览页,点击该页的编辑图标(铅笔),在右侧修改描述或填写修改指令,点击「**生成图片**」,只重新生成这一页。 ## 幻灯片里能放柱状图、折线图、树状图这些图表吗? 有两种方案: **方案一:描述 + 生成要求(推荐)** 在页面描述中直接用 Markdown 或文字描述图表内容,并在「描述生成要求」或「额外要求」中注明图表风格,AI 大概率能直接生成出来。示例描述: ``` 页面右侧展示过去5年营收柱状图,数据为:2020: 120万、2021: 180万、2022: 240万、2023: 310万、2024: 420万,柱子为蓝色渐变,背景白色。 ``` **方案二:先做成图片再粘贴** 用 Excel、Python(matplotlib)等工具提前生成图表图片,然后直接粘贴(Ctrl+V)到描述卡片中作为参考图,AI 会参考图片风格进行绘制。 ## 如何保证每页图表风格统一? 推荐使用**文字描述风格**而非上传参考图片——根据测试,文字方式对生成一致性的控制效果更好。 在「额外要求」或「风格描述」中明确写下统一规范,例如: - `所有图表使用蓝色系配色(#2563EB 为主色),白色背景,无边框` - `图表字体统一为无衬线字体,数据标签始终显示在柱子顶部` - `折线图使用圆形数据点标记,线条粗细 2px` 风格约束写得越具体,各页一致性越高。如果对字间距、字号这类细节有要求,也可以直接写进去。 ## AI 修改大纲/描述的输入框在哪里? 在**大纲编辑器**和**描述编辑器**的顶部导航栏中,有一个输入框,填写指令后按 **Ctrl+Enter** 提交。例如:`在第三页后加一页案例分析`。 ================================================ FILE: docs/zh/features/creation.mdx ================================================ --- title: "创建演示文稿" description: "四种方式开始创作,选一个最适合你的" --- 在首页选择创建方式,填写内容,点击「**创建新项目**」。 ## 从想法开始(最简单) 只需一句话描述主题,AI 自动完成大纲→描述→图片的全流程。 **适合**:没有现成材料,想快速生成一份 PPT **步骤:** 1. 选择「**一句话生成**」标签 2. 输入主题,例如:`生成一份关于碳中和的汇报 PPT,8 页` 3. 选择模板和画面比例(可选) 4. 点击「**创建新项目**」 之后会进入大纲编辑器,可以检查和修改 AI 生成的大纲。 ## 从大纲开始 已有结构思路,不想让 AI 凭空猜测: **适合**:有现成大纲或目录,想让 AI 在你的框架内填充内容 **步骤:** 1. 选择「**从大纲生成**」标签 2. 粘贴大纲内容,支持标题 + 要点格式,也可以只写标题 3. 点击「**创建新项目**」,AI 自动切分为结构化大纲 **格式示例:** ``` 第一页:AI 的起源 - 1956 年达特茅斯会议 - 早期研究者的愿景 第二页:机器学习的发展 从规则驱动到数据驱动的转变 ``` ## 从描述开始 已经有详细的每页描述,想跳过大纲步骤直接生图: **适合**:内容已经很明确,或从其他 AI 工具生成了完整描述 **步骤:** 1. 选择「**从描述生成**」标签 2. 粘贴每页的完整描述(包含布局、配色、内容等) 3. 点击「**创建新项目**」,直接进入图片生成 **格式示例:** ``` 第一页:封面 简洁封面,公司 Logo 居中,标题白色粗体,深蓝渐变背景。 第二页:市场概况 展示过去 5 年市场增长柱状图,关键数据用高亮卡片突出。 ``` ## PPT 翻新 上传已有 PDF 或 PPTX,AI 解析内容并重新生成风格焕新的版本: **适合**:老旧 PPT 需要视觉翻新,或想以已有 PPT 为基础重新创作 **步骤:** 1. 选择「**PPT 翻新**」标签 2. 上传 PDF 或 PPTX 文件 3. 可选勾选「**保留原始排版布局**」,让 AI 更贴近原版结构 4. 点击「**创建新项目**」 推荐先在本地将 PPTX 转为 PDF 再上传。直接上传 PPTX 需要服务端 LibreOffice 转换,可能因缺少字体导致排版偏差。 ## 画面比例 创建项目前,在输入区域下方选择画面比例: | 比例 | 适用场景 | |------|---------| | 16:9 | 标准宽屏(默认) | | 4:3 | 传统幻灯片 | | 21:9 | 超宽屏展示 | | 1:1 | 正方形(社交媒体) | | 9:16 | 竖屏(手机展示) | 部分图像生成模型不支持某些比例,若生成报错可尝试切换到 16:9。项目创建后也可在「项目设置」中修改,修改后需重新生成图片才生效。 ## 选择模板和风格 创建前可以设定视觉风格,有两种方式: **方式一:选择模板** - 从预设模板库中点选一个,或上传自定义模板图片 **方式二:文字描述风格** - 勾选「**使用文字描述风格**」 - 在文本框中描述期望的视觉风格,例如:`现代科技风,深色背景,蓝紫渐变色调` ## 上传参考文件 创建时可以附加参考文件,AI 会提取内容作为创作素材: - **支持格式**:PDF、DOCX、PPTX、XLSX、CSV、TXT、MD - **上传方式**:点击回形针图标、直接拖拽、或粘贴(Ctrl+V) - **文件大小**:最大 200MB 文件上传后会自动解析,解析完成前无法创建项目。详见[素材与文件](/zh/features/materials)。 ================================================ FILE: docs/zh/features/descriptions.mdx ================================================ --- title: "完善描述" description: "为每页幻灯片生成或编写详细的视觉描述" --- 描述编辑器是第三步。每页的「描述」告诉 AI 这张幻灯片画什么——包括布局、配色、图表、文字内容。描述越具体,生成效果越好。 ## 批量生成所有描述 点击「**批量生成描述**」,AI 自动为每页生成描述,全部完成后可逐页查看和修改。 **生成前可调整详细程度**(按钮旁边的下拉): | 级别 | 适用场景 | |------|---------| | 精简 | 快速原型,速度优先 | | 默认 | 日常使用 | | 详细 | 精细打磨,效果优先 | 描述中加入具体文字内容(如标题文案、数据),可以显著提升幻灯片上的文字渲染质量。 ## 用 AI 批量修改描述 顶部输入栏支持对所有页面的描述批量调整,按 **Ctrl+Enter** 提交。 **常用指令示例:** - `让所有描述更详细,加入具体的文字内容` - `统一改成深蓝色科技风格` - `第 3 页的描述加上一个数据对比表格` - `删除所有描述中提到"渐变背景"的部分` ## 单独重新生成某一页 每张描述卡片右上角有「**重新生成**」按钮: 1. 点击重新生成图标(🔄) 2. 弹出确认框,确认后 AI 单独重新生成这一页的描述 3. 生成完成后自动替换原内容 ## 手动编辑单页描述 直接点击描述卡片中的文字区域,即可手动编辑该页描述,点击其他地方自动保存。 ## 设置描述生成要求 操作栏展开「**描述生成要求**」,填写对描述的整体约束,生成时 AI 会遵守。 **示例要求:** - `每页描述控制在 100 字以内` - `多使用数据和图表,少用纯文字` - `保持简洁的极简主义风格` ## 为某页指定参考图片 在描述卡片底部的编辑区域,可以上传该页的参考图片: 1. 滚动到描述卡片底部,找到「**上传图片**」区域 2. 上传或从素材库选择参考图片 3. 填写修改指令(可选),点击「**生成图片**」 ## 导入 / 导出描述 点击「**导入/导出**」: - **导出描述**:仅导出描述内容 - **导出大纲和描述**:导出完整内容(含要点) - **导入**:从文件加载描述,追加到现有页面 ## 完成后 所有页面有描述后,右上角「**下一步 →**」会激活,点击进入[幻灯片预览](/zh/features/images),开始生成图片。 ================================================ FILE: docs/zh/features/editing.mdx ================================================ --- title: "编辑幻灯片" description: "用自然语言修改演示文稿" --- ## 自然语言编辑 直接用口头描述修改,无需菜单操作: - "把第三页改成案例分析" - "把这个图换成饼图" - "背景调暗一点" - "加一个要点列表" ## 大纲编辑 在大纲编辑器中输入指令调整结构: - "在引言后面加一节竞品分析" - "合并最后两页" - "把总结移到问答页前面" 也可以手动拖拽页面重新排序,或编辑章节归属。 ## 区域编辑 在幻灯片预览中框选特定区域,描述修改内容。AI 只重新生成选中区域,其余部分保持不变。 操作步骤: 1. 进入幻灯片预览页,点击「区域选图」按钮 2. 在幻灯片上拖拽框选要修改的区域 3. 在输入框中描述修改内容 4. 点击「生成图片」,AI 只重绘选中区域 ## 描述详细程度 在详细编辑器的「批量生成描述」按钮旁,可选择描述的详细程度: | 级别 | 说明 | |------|------| | 精简 | 简洁描述,生成速度更快,适合快速原型 | | 默认 | 平衡详细程度与生成速度 | | 详细 | 更丰富的描述,AI 有更多构图参考,适合精细打磨 | ## 历史版本 每次重新生成幻灯片图片时,旧版本会自动保留。在预览页点击「历史版本」可查看和切换到任意历史版本,不必担心误操作覆盖满意的效果。 ================================================ FILE: docs/zh/features/export.mdx ================================================ --- title: "导出选项" description: "导出为 PPTX、PDF 或图片" --- ## 格式 | 格式 | 说明 | |------|------| | PPTX | 幻灯片以图片形式嵌入 PPT,可在 PowerPoint 中播放和调整页面顺序,但图片内的文字无法直接编辑 | | 可编辑 PPTX(Beta) | 从图片中提取文字重建为原生文本框,可在 PowerPoint 中直接编辑文字内容 | | PDF | 像素级精确输出,可直接演示 | | 图片 | 每页导出为独立图片文件 | ## 画面比例 默认 16:9,也可在创建项目时或项目预览页的「项目设置」中更改。支持 16:9、4:3、21:9、1:1、9:16 等多种比例。比例变更后需重新生成图片才会生效。 ## 可编辑 PPTX(Beta) 提取过程保留字号、颜色、加粗样式、文字定位和表格内容,但受限于 OCR 精度和当前模型能力,复杂排版可能存在偏差,与原图效果有出入。 配置 `BAIDU_API_KEY` 可获得最佳提取效果,详见[配置说明](/zh/configuration#百度-api-key)。 ## 选页导出 可选择特定页面导出,无需导出整个演示文稿。在幻灯片预览页开启「多选」,勾选要导出的页面后点击「导出」即可。 ================================================ FILE: docs/zh/features/images.mdx ================================================ --- title: "生成与精修图片" description: "生成幻灯片图片,并对单页进行精细调整" --- 幻灯片预览是最后一个编辑步骤。在这里生成图片、精修细节、查看历史版本,最后导出。 ## 批量生成所有图片 点击「**批量生成图片**」,AI 并行生成所有页面的图片。 生成时每张卡片显示进度动画,完成后立即预览。 如果当前分辨率为 1K,系统会弹出提示建议切换到 2K 或 4K,文字清晰度差异明显。在「项目设置 → 全局设置」中调整。 ## 单页精修 对某张幻灯片不满意时,无需重新生成全部: 1. 点击幻灯片卡片上的「**编辑**」图标(铅笔) 2. 在右侧面板修改**页面描述**或填写**修改指令** 3. 点击「**生成图片**」,AI 只重新生成这一页 **修改指令示例:** - `把背景改成深蓝色渐变` - `标题字体加大,移到画面上方` - `右侧换成柱状图,数据不变` - `整体风格改成手绘插画风` 如果只想保存描述的修改而不重新生图,点击「**仅保存大纲/描述**」。 ## 区域编辑:只改幻灯片的某个部分 不想改整页,只想调整某个局部区域: 1. 进入单页编辑模式后,点击左侧图片上方的「**区域选图**」 2. 在幻灯片预览上**拖拽框选**要修改的区域 3. 选中区域会被裁剪并添加为参考图 4. 在修改指令中描述要做什么,点击「**生成图片**」 AI 会重绘整张幻灯片,但以选中区域的内容作为重点参考。 ## 查看和恢复历史版本 每次重新生成,旧版本自动保留,不会丢失。 1. 进入单页编辑模式 2. 图片左下角点击「**历史版本**」 3. 选择任意历史版本,点击切换 大胆尝试不同提示词,不满意随时回到之前版本。 ## 多选批量生成 只想重新生成其中几页: 1. 点击顶部「**多选**」 2. 勾选要重新生成的页面 3. 点击「**生成选中页面 (N)**」 ## 项目设置 点击右上角齿轮图标「**项目设置**」,进入三个配置标签: ### 项目设置标签 | 配置项 | 说明 | |--------|------| | 画面比例 | 修改后重新生成图片才生效 | | 额外要求 | 所有图片生成都会遵守,如"避免使用真人照片" | | 风格描述 | 视觉风格约束,如"极简主义、白色背景、细线条" | | 更换模板 | 切换参考模板,只影响后续新生成的页面 | ### 导出设置标签 配置「**可编辑 PPTX**」的提取方式: - **文本提取方法**:OCR / 视觉模型 / 混合(效果和速度各有权衡) - **图像修复方法**:像素级 / AI / 混合 - **错误处理**:开启「返回半成品」可在报错时仍尝试输出结果 ### 全局设置标签 修改 API 提供商、模型名称、图像分辨率(1K / 2K / 4K)、并发数等。也可以在这里运行**服务测试**验证配置是否正常。 ================================================ FILE: docs/zh/features/import-export.mdx ================================================ --- title: "导入导出格式" description: "大纲和描述的 Markdown 导入导出格式说明" --- Banana Slides 使用 Markdown 文件(`.md` / `.txt`)导入导出大纲和页面描述,格式可读性强,可用任意文本编辑器编辑。 ## 格式结构 ```markdown # 项目标题 > 生成时间: 2025/1/1 12:00:00 --- ## 第 1 页: 封面 > 章节: 引言 **大纲要点:** - 公司名称和 Logo - 演示日期 **页面描述:** 简洁的封面页,公司 Logo 居中,标题使用白色粗体字,深蓝渐变背景。 --- ## 第 2 页: 市场概况 > 章节: 分析 **大纲要点:** - 行业增长趋势 - 主要市场细分 **页面描述:** 数据驱动的页面,展示过去 5 年市场增长柱状图,关键数据用高亮卡片突出。 --- ``` ## 字段说明 | 字段 | 语法 | 必填 | |------|------|------| | 页面标题 | `## 第 N 页: 标题` | 是 | | 章节 | `> 章节: 名称` | 否 | | 大纲要点 | `**大纲要点:**` 后跟 `- 要点` 行 | 否 | | 页面描述 | `**页面描述:**` 后跟自由文本 | 否 | 页面之间用 `---` 分隔。 ## 导出选项 可从大纲编辑器和详细编辑器导出: | 来源 | 内容 | |------|------| | 大纲编辑器 | 仅大纲要点 | | 详细编辑器 | 大纲要点 + 页面描述 | ## 导入行为 - 导入的页面会**追加**到现有项目,不会替换已有页面。 - `**大纲要点:**` 和 `**页面描述:**` 标记可省略,省略时列表行(`- ...`)自动识别为要点。 - 导入内容中的 HTML 标签会被自动清除。 - 支持 `.md` 和 `.txt` 文件。 ## 最简示例 一个有效的导入文件可以很简单: ```markdown ## 第 1 页: 开场介绍 - 欢迎与议程 - 演讲者介绍 ## 第 2 页: 总结 - 核心要点回顾 ``` ## 用 AI 生成 将下面的提示词复制到任意 AI 助手(ChatGPT、Claude、Kimi 等),替换占位符,输出结果可直接导入 Banana Slides。 如果只需要大纲(不含描述),从提示词中删除 `**页面描述:**` 相关要求即可。 ```text 请按以下 Markdown 格式生成一份演示文稿大纲。 主题:[你的主题] 页数:[数量,如 8] 语言:[中文 / 英文] 风格备注:[如"专业数据风"或"活泼插画风"] 格式要求: 1. 每页以 "## 第 N 页: 标题" 开头 2. 用 "> 章节: 章节名" 对页面分组 3. 在 "**大纲要点:**" 下用 "- 要点" 列出 2-4 个要点 4. 在 "**页面描述:**" 下用 1-2 句话描述该页的视觉布局、配色、图表或画面 5. 页面之间用 "---" 分隔 6. 第一页为封面,最后一页为结束/致谢页 只输出 Markdown,不要额外解释。 ``` ================================================ FILE: docs/zh/features/materials.mdx ================================================ --- title: "素材与文件" description: "上传参考文件、粘贴图片,或用 AI 生成专属素材" --- ## 上传参考文件 上传文档后,AI 在生成大纲、描述和图片时会参考其中的内容。 ### 支持的格式 | 格式 | 提取内容 | |------|---------| | PDF | 文本 + 嵌入图片 | | DOCX / DOC | 文档内容和结构 | | PPTX / PPT | 幻灯片内容 | | XLSX / XLS / CSV | 表格数据 | | Markdown / TXT | 纯文本 | PDF 解析使用 [MinerU](https://mineru.net) 提供高质量内容提取。配置 MinerU Token 后效果更好,详见[配置说明](/zh/configuration#mineru-pdf-解析)。 ### 上传方式 - 点击输入区域旁的**回形针图标**,打开文件选择器 - 直接**拖拽文件**到输入区域 - **粘贴**(Ctrl+V)文件或图片 - 点击「**素材中心**」从历史上传的文件中选择 文件大小上限:**200MB**。上传后自动开始解析,解析期间显示「解析中」状态。 ### 在哪里上传 | 位置 | 说明 | |------|------| | 首页 | 创建项目前上传,作为整个项目的参考素材 | | 大纲编辑器 | 左侧面板可管理已上传的文件 | | 描述编辑器 | 顶部区域可查看和管理参考文件 | | 幻灯片预览(单页编辑) | 可为单独一页指定参考图片 | ## 粘贴图片 在首页、大纲编辑器或描述卡片中,直接粘贴图片(Ctrl+V): 1. 复制图片(截图、从网页复制等) 2. 在输入框中按 **Ctrl+V** 3. 图片自动上传,AI 识别内容后以 Markdown 格式插入到光标位置 识别出的图片内容会作为上下文参考。 ## 风格参考图 上传图片作为视觉风格参考,AI 会匹配其配色、布局和设计语言。 在首页「选择风格模板」区域或单页编辑的「上传图片」中上传参考图。 ## 素材生成工具 点击顶部导航栏的「**素材生成**」,用 AI 生成自定义图片: 1. 在提示词框中描述想要的图片,例如:`蓝紫渐变背景,带几何图形和科技感线条` 2. 选择画面比例 3. 可选上传参考图引导风格 4. 点击「**生成素材**」 生成完成后自动保存到素材库,可在项目中复用。 ## 素材中心 点击顶部导航栏的「**素材中心**」,查看和管理所有历史素材: - **筛选**:按项目筛选,或查看全部/未关联项目的素材 - **预览**:点击眼睛图标全屏预览 - **下载**:选中后点击下载,多个文件自动打包为 ZIP - **删除**:单个删除或批量删除 - **上传**:直接在素材中心上传新图片 ================================================ FILE: docs/zh/features/outline.mdx ================================================ --- title: "编辑大纲" description: "检查、调整 AI 生成的大纲,或从头手动搭建" --- 大纲编辑器是第二步。每张幻灯片对应一张卡片,卡片包含**标题**和**要点**,不涉及视觉效果。 ## 用 AI 修改大纲 顶部输入栏是最高效的大纲编辑方式。用自然语言告诉 AI 要做什么,按 **Ctrl+Enter** 提交。 **常用指令示例:** - `在引言后面加一节竞品分析` - `合并第 4、5 页,保留关键要点` - `把总结移到问答页前面` - `第 2 页标题改为"核心优势"` - `删除第 6 页` 可以连续发多条指令,AI 会记住上下文。比如先说"加一节案例分析",再说"把它移到第三页"。 ## 手动编辑页面 点击任意卡片可以直接编辑标题和要点: 1. 点击卡片上的标题或要点文字 2. 直接修改内容 3. 点击其他地方自动保存 每张卡片右上角有**删除**按钮,点击后需确认。 ## 添加新页面 点击操作栏的「**添加页面**」,在列表末尾插入一张空白页。 ## 拖拽调整顺序 拖住卡片左侧的拖拽把手(⠿ 图标),上下拖动到目标位置后松手,顺序立即生效。 ## 重新生成大纲 如果对当前大纲不满意,点击「**重新生成大纲**」让 AI 重新生成。 重新生成会覆盖所有现有大纲内容,操作前会弹出确认框。已有内容无法恢复。 ## 设置生成要求 在操作栏展开「**大纲生成要求**」,填写对大纲的整体要求,之后每次生成/重新生成都会遵守。 **示例要求:** - `每页要点不超过 4 条` - `使用时间线结构,按年份排列` - `重点突出数据和案例` ## 导入 / 导出大纲 点击「**导入/导出**」按钮: - **导出大纲**:将当前大纲保存为 `.md` 文件,可用文本编辑器修改后再导入 - **导入**:从 `.md` 或 `.txt` 文件加载大纲,**追加**到现有页面末尾 导入的格式要求见[导入导出格式](/zh/features/import-export)。 ## 完成后 大纲确认后,点击右上角「**下一步 →**」进入[描述编辑器](/zh/features/descriptions)。 ================================================ FILE: docs/zh/features/overview.mdx ================================================ --- title: "功能总览" description: "Banana Slides 能做什么" --- ## 完整工作流程 ``` 创建项目 → 编辑大纲 → 完善描述 → 生成图片 → 导出 ``` 每步都可以用 AI 辅助,也可以手动精细调整。 ## 四种创作路径 | 路径 | 适合场景 | |------|---------| | 从想法开始 | 只有主题,让 AI 全部生成 | | 从大纲开始 | 有现成目录结构 | | 从描述开始 | 每页内容已很明确,直接生图 | | PPT 翻新 | 上传旧 PPT,AI 重新生成新版 | 详见[创建演示文稿](/zh/features/creation)。 ## AI 辅助编辑 每个编辑步骤都有顶部 AI 输入栏,用自然语言告诉 AI 要做什么: - **大纲编辑器**:`加一节竞品分析,放在第三页后面` - **描述编辑器**:`让所有描述更详细,加入具体文字内容` - **幻灯片预览**(单页):`把背景改成深色,标题字号加大` 详见[编辑大纲](/zh/features/outline)、[完善描述](/zh/features/descriptions)、[生成与精修图片](/zh/features/images)。 ## 素材支持 - 上传参考文件(PDF、DOCX、PPTX、表格等),AI 提取内容辅助生成 - 粘贴图片,AI 识别内容后插入 - 用「素材生成」工具 AI 生成专属背景图或插图 - 「素材中心」管理所有历史素材,方便复用 详见[素材与文件](/zh/features/materials)。 ## 导出 - **PPTX**:图片嵌入格式,可播放,可调整顺序 - **可编辑 PPTX(Beta)**:提取文字为原生文本框,可直接在 PowerPoint 编辑 - **PDF**:像素级精确输出 - **图片**:每页单独导出 支持多种画面比例,支持选页导出。详见[导出选项](/zh/features/export)。 ================================================ FILE: docs/zh/history.mdx ================================================ --- title: "管理项目" description: "查看、重命名和删除历史项目" --- 点击顶部导航栏的「**历史项目**」,查看所有已创建的演示文稿。 ## 继续编辑项目 点击任意项目卡片,直接跳转到该项目的编辑页面继续工作。 ## 重命名项目 将鼠标悬停在项目标题上,出现编辑图标后点击,直接修改标题,按 **Ctrl+Enter** 保存,按 **Esc** 取消。 ## 删除项目 - **删除单个**:点击卡片上的删除图标,确认后删除 - **批量删除**:勾选多个项目,点击「**批量删除**」 删除操作不可恢复,包括所有页面、描述和生成的图片。 ================================================ FILE: docs/zh/index.mdx ================================================ --- title: "Banana Slides" description: "AI 原生演示文稿生成 — Vibe your slides like vibe coding" --- Banana Slides 是一款 AI 原生 PPT 生成应用。输入想法或上传已有文件,AI 自动生成大纲、描述和幻灯片图片,全程可用自然语言精修。 ## 快速开始 5 分钟部署并生成第一份 PPT 配置 AI 服务商和 API 密钥 ## 核心功能 四种创作路径:从想法、大纲、描述,或翻新已有 PPT 用自然语言调整结构,或手动拖拽排序 批量生成描述,用 AI 口头修改,手动精调 批量生图、单页精修、区域编辑、历史版本 上传参考文件,AI 生成素材,管理素材库 导出为 PPTX、PDF 或图片,支持多种比例 ================================================ FILE: docs/zh/quickstart.mdx ================================================ --- title: "快速开始" description: "立即体验或在本地部署 Banana Slides" --- ## 在线 Demo 无需安装,直接体验:[bananaslides.online](https://bananaslides.online/) --- ## 自托管部署 ### 第一步:安装 Docker 下载并安装 [Docker Desktop](https://www.docker.com/products/docker-desktop/),安装完成后启动它,确保系统托盘(Windows)或菜单栏(macOS)中出现 Docker 图标。 ```bash curl -fsSL https://get.docker.com | sh ``` ### 第二步:一键下载并启动 新建一个目录,进入后执行以下命令,自动下载配置文件并启动服务: ```bash mkdir banana-slides && cd banana-slides && \ curl -O https://raw.githubusercontent.com/Anionex/banana-slides/main/docker-compose.prod.yml && \ curl -O https://raw.githubusercontent.com/Anionex/banana-slides/main/.env.example && \ cp .env.example .env ``` 然后编辑 `.env` 文件,填入你的 API 密钥(详见[配置说明](/zh/configuration)),再启动: ```bash docker compose -f docker-compose.prod.yml up -d ``` 推荐使用 [AIHubMix](https://aihubmix.com/?aff=17EC) 获取 API 密钥,同时支持 Gemini 和 OpenAI 格式,且能稳定进行高并发生图。 ### 第三步:访问应用 浏览器打开 [http://localhost:3000](http://localhost:3000),即可开始使用。 默认端口为 3000(前端)和 5000(后端)。如需修改,在 `.env` 中设置 `FRONTEND_PORT` 和 `BACKEND_PORT`。 --- ## 更新 ```bash docker compose -f docker-compose.prod.yml pull && \ docker compose -f docker-compose.prod.yml up -d ``` ## 查看日志 ```bash docker logs -f --tail 100 banana-slides-backend ``` --- ## 从源码部署 ### 环境要求 - Python 3.10+ - [uv](https://github.com/astral-sh/uv) 包管理器 - Node.js 16+ 和 npm ### 后端 ```bash git clone https://github.com/Anionex/banana-slides cd banana-slides cp .env.example .env # 编辑 .env 填入 API 密钥 cd backend uv run alembic upgrade head && uv run python app.py ``` ### 前端 ```bash cd frontend npm install npm run dev ``` ================================================ FILE: frontend/.eslintrc.cjs ================================================ module.exports = { root: true, env: { browser: true, es2020: true }, extends: [ 'eslint:recommended', 'plugin:@typescript-eslint/recommended', 'plugin:react-hooks/recommended', ], ignorePatterns: ['dist', '.eslintrc.cjs'], parser: '@typescript-eslint/parser', plugins: ['react-refresh'], rules: { 'react-refresh/only-export-components': 'off', 'react-hooks/exhaustive-deps': 'off', '@typescript-eslint/no-explicit-any': 'off', '@typescript-eslint/no-unused-vars': ['warn', { argsIgnorePattern: '^_', varsIgnorePattern: '^_' }], }, } ================================================ FILE: frontend/.gitignore ================================================ # Logs logs *.log npm-debug.log* yarn-debug.log* yarn-error.log* pnpm-debug.log* lerna-debug.log* node_modules dist dist-ssr *.local # Editor directories and files .vscode/* !.vscode/extensions.json .idea .DS_Store *.suo *.ntvs* *.njsproj *.sln *.sw? # Environment variables .env.local .env.development.local .env.test.local .env.production.local # E2E testing (Playwright) test-results/ playwright-report/ playwright/.cache/ *.trace.zip ================================================ FILE: frontend/Dockerfile ================================================ # 镜像源配置参数(可通过 build args 覆盖) ARG DOCKER_REGISTRY= ARG NPM_REGISTRY= # 构建阶段 # 如果指定了 DOCKER_REGISTRY,使用镜像源;否则使用官方源 FROM ${DOCKER_REGISTRY:-}node:18-alpine AS builder # 重新声明ARG(FROM之后ARG作用域失效,需要重新声明) ARG NPM_REGISTRY= WORKDIR /app # 复制 package.json COPY frontend/package.json ./ # 安装依赖(如果配置了 NPM_REGISTRY,先设置镜像源) COPY frontend/package-lock.json* ./ RUN if [ -n "$NPM_REGISTRY" ]; then \ npm config set registry "$NPM_REGISTRY"; \ fi && \ (npm install --frozen-lockfile || npm install) # 复制前端源代码 COPY frontend/ ./ # 构建应用 RUN npm run build # 生产阶段 # 如果指定了 DOCKER_REGISTRY,使用镜像源;否则使用官方源 FROM ${DOCKER_REGISTRY:-}nginx:alpine # 复制构建产物到 nginx COPY --from=builder /app/dist /usr/share/nginx/html # 复制 nginx 配置文件 COPY frontend/nginx.conf /etc/nginx/conf.d/default.conf # 暴露端口 EXPOSE 80 # 启动 nginx CMD ["nginx", "-g", "daemon off;"] ================================================ FILE: frontend/README.md ================================================ # 蕉幻 (Banana Slides) 前端 这是蕉幻 AI PPT 生成器的前端应用。 ## 技术栈 - **框架**: React 18 + TypeScript - **构建工具**: Vite - **状态管理**: Zustand - **样式**: TailwindCSS - **路由**: React Router - **拖拽**: @dnd-kit - **图标**: Lucide React ## 开始开发 ### 1. 安装依赖 ```bash npm install ``` ### 2. 配置环境变量 **注意**:现在不再需要配置 `VITE_API_BASE_URL`! 前端使用相对路径,通过代理自动转发到后端: - **开发环境**:通过 Vite proxy 自动转发到后端 - **生产环境**:通过 nginx proxy 自动转发到后端服务 **一键修改后端端口**: 只需在项目根目录的 `.env` 文件中修改 `BACKEND_PORT` 环境变量(默认 5000),前端和后端都会自动使用新端口: ```env BACKEND_PORT=8080 # 修改为 8080 或其他端口 ``` 这样无论后端运行在什么地址(localhost、IP 或域名),前端都能自动适配,无需手动配置。 ### 3. 启动开发服务器 ```bash npm run dev ``` 应用将在 http://localhost:3000 启动 ### 4. 构建生产版本 ```bash npm run build ``` ## 项目结构 ``` src/ ├── api/ # API 封装 │ ├── client.ts # Axios 实例配置 │ └── endpoints.ts # API 端点 ├── components/ # 组件 │ ├── shared/ # 通用组件 │ ├── outline/ # 大纲编辑组件 │ └── preview/ # 预览组件 ├── pages/ # 页面 │ ├── Home.tsx # 首页 │ ├── OutlineEditor.tsx # 大纲编辑页 │ ├── DetailEditor.tsx # 详细描述编辑页 │ └── SlidePreview.tsx # 预览页 ├── store/ # 状态管理 │ └── useProjectStore.ts ├── types/ # TypeScript 类型 │ └── index.ts ├── utils/ # 工具函数 │ └── index.ts ├── App.tsx # 应用入口 ├── main.tsx # React 挂载点 └── index.css # 全局样式 ``` ## 主要功能 ### 1. 首页 (/) - 三种创建方式:一句话生成、从大纲生成、从描述生成 - 风格模板选择和上传 ### 2. 大纲编辑页 (/project/:id/outline) - 拖拽排序页面 - 编辑大纲内容 - 自动生成大纲 ### 3. 详细描述编辑页 (/project/:id/detail) - 批量生成页面描述 - 编辑单页描述 - 网格展示所有页面 ### 4. 预览页 (/project/:id/preview) - 查看生成的图片 - 编辑单页(自然语言修改) - 导出为 PPTX/PDF ## 开发注意事项 ### 状态管理 - 使用 Zustand 进行全局状态管理 - 关键状态会同步到 localStorage - 页面刷新后自动恢复项目 ### 异步任务 - 使用轮询机制监控长时间任务 - 显示实时进度 - 完成后自动刷新数据 ### 图片处理 - 所有图片路径需通过 `getImageUrl()` 处理 - 支持相对路径和绝对路径 ### 拖拽功能 - 使用 @dnd-kit 实现 - 支持键盘操作 - 乐观更新 UI ## 与后端集成 确保后端服务运行在配置的端口(默认 5000): ```bash cd ../backend python app.py ``` ## 浏览器支持 - Chrome (推荐) - Firefox - Safari - Edge ================================================ FILE: frontend/e2e/README.md ================================================ # E2E 测试说明 ## 📋 测试策略 本项目采用**单一真正的 E2E 测试**策略,避免"伪 E2E"测试造成混淆。 ### 测试金字塔 ``` ┌──────────────────┐ │ E2E 测试 │ ← 少量,测试完整流程,需要真实 API │ (api-full-flow) │ └──────────────────┘ ▲ │ ┌───────────────────┐ │ 集成测试 │ ← 中等,测试 API 端点,使用 mock │ (backend/tests/) │ └───────────────────┘ ▲ │ ┌─────────────────────┐ │ 单元测试 │ ← 大量,快速,独立 │ (前端 + 后端) │ └─────────────────────┘ ``` --- ## 🎯 E2E 测试文件 ### 1. **api-full-flow.spec.ts** ⭐ 主要 E2E 测试 **特点**: - ✅ 真正的端到端测试(完整流程) - ✅ 使用真实的 AI API(Google Gemini) - ✅ 测试从创建到导出的完整链路 - ✅ 在 CI 中自动运行(如果配置了 API key) **测试流程**: ``` 1. 创建项目(从想法/大纲/描述) ↓ 2. 等待 AI 生成大纲 ↓ 3. 生成页面描述 ↓ 4. 生成页面图片 ↓ 5. 导出 PPT 文件 ``` **运行条件**: - ⚠️ 需要真实的 `GOOGLE_API_KEY` - ⚠️ 需要约 10-15 分钟 - ⚠️ 会消耗 API 配额(约 $0.01-0.05/次) **本地运行**: ```bash # 1. 确保 .env 中配置了真实的 GOOGLE_API_KEY # 2. 启动服务 docker compose up -d # 3. 等待服务就绪(使用智能等待脚本) ./scripts/wait-for-health.sh http://localhost:5000/health 60 2 ./scripts/wait-for-health.sh http://localhost:3000 60 2 # 4. 运行测试 npx playwright test api-full-flow.spec.ts --workers=1 ``` **CI 运行**: - 自动运行:在 `docker-test` job 中 - 条件:`GOOGLE_API_KEY` 已在 GitHub Secrets 中配置 - 跳过:如果没有配置 API key,会跳过并显示说明 --- ### 2. **ui-full-flow.spec.ts** 🎨 UI 驱动的完整测试 **特点**: - ✅ 从浏览器 UI 开始操作(模拟真实用户) - ✅ 测试完整的用户交互流程 - ✅ 需要真实的 AI API(Google Gemini) - ⚠️ 运行时间更长(15-20 分钟) - ✅ 在 CI 中自动运行(如果有 API key) **用途**: - 发布前的最终验证 - 验证真实用户体验 - CI/CD 完整流程测试 **本地运行**: ```bash # 1. 确保 .env 中配置了真实的 GOOGLE_API_KEY # 2. 启动服务 docker compose up -d # 3. 等待服务就绪 ./scripts/wait-for-health.sh http://localhost:5000/health 60 2 ./scripts/wait-for-health.sh http://localhost:3000 60 2 # 4. 运行测试 npx playwright test ui-full-flow.spec.ts --workers=1 ``` **CI 运行**: - 自动运行:在 `docker-test` job 中 - 条件:`GOOGLE_API_KEY` 已在 GitHub Secrets 中配置 - 跳过:如果没有配置 API key 或是 Fork PR,会跳过并显示说明 --- ## 🚫 已删除的测试 以下测试文件已被删除(避免混淆): - ~~`home.spec.ts`~~ - 基础 UI 测试(不是真正的 E2E) - ~~`create-ppt.spec.ts`~~ - API 集成测试(不是真正的 E2E) **原因**: - 它们不调用真实 AI API,不是真正的端到端测试 - 测试的内容已被其他测试覆盖: - UI 交互 → 前端单元测试 - API 端点 → 后端集成测试 - 完整流程 → `api-full-flow.spec.ts` --- ## 🔧 CI 配置 ### 在 GitHub Actions 中的运行逻辑 ```yaml # .github/workflows/ci-test.yml docker-test job: ├─ 构建 Docker 镜像 ├─ 启动服务 ├─ 健康检查 ├─ Docker 环境测试 └─ E2E 测试 (api-full-flow.spec.ts) ├─ 如果有 GOOGLE_API_KEY → 运行完整 E2E └─ 如果没有 API key → 跳过,显示说明 ``` ### 配置 GitHub Secrets 要在 CI 中运行 E2E 测试,需要配置: 1. 进入仓库 → **Settings** → **Secrets and variables** → **Actions** 2. 添加 Secret: - Name: `GOOGLE_API_KEY` - Value: 你的 Google Gemini API 密钥 - 获取地址:https://aistudio.google.com/app/apikey ### 如果没有配置 API key CI 会跳过 E2E 测试,并显示: ``` ⚠️ Skipping E2E tests Reason: GOOGLE_API_KEY not configured or using mock key Note: Other tests already passed: ✅ Backend unit tests ✅ Backend integration tests (with mock AI) ✅ Frontend unit tests ✅ Docker environment tests E2E tests require a real Google API key to test the complete AI generation workflow. ``` **这是正常的!** 其他测试已经覆盖了大部分功能。 --- ## 📊 测试覆盖范围 | 测试层级 | 测试内容 | 需要真实 API | 运行时间 | CI 运行 | |---------|---------|-------------|---------|---------| | **前端单元测试** | React 组件、hooks、工具函数 | ❌ | < 1 分钟 | ✅ 总是 | | **后端单元测试** | Services、Utils、Models | ❌ | < 2 分钟 | ✅ 总是 | | **后端集成测试** | API 端点(mock AI) | ❌ | < 3 分钟 | ✅ 总是 | | **Docker 环境测试** | 容器启动、健康检查 | ❌ | < 5 分钟 | ✅ 总是 | | **E2E 测试** | 完整 AI 生成流程 | ✅ | 10-15 分钟 | ⚠️ 有 API key 时 | --- ## 🎯 最佳实践 ### 开发时 1. **日常开发**:运行单元测试和集成测试 ```bash # 后端 cd backend && uv run pytest tests/ # 前端 cd frontend && npm test ``` 2. **提交 PR 前**:确保 CI 的所有测试通过 - Light Check(自动运行) - Full Test(添加 `ready-for-test` 标签触发) 3. **大功能完成后**:本地运行一次 E2E 测试 ```bash # 确保 .env 配置了真实 API key npx playwright test api-full-flow.spec.ts ``` ### 发布前 1. **最终验证**:运行完整的 UI E2E 测试 ```bash npx playwright test ui-full-flow.spec.ts ``` 2. **检查 CI**:确保所有测试(包括 E2E)都通过 --- ## 🐛 调试失败的测试 ### 查看测试报告 ```bash # 运行测试后,打开 HTML 报告 npx playwright show-report ``` ### 查看失败截图和视频 测试失败时,Playwright 会自动保存: - 截图:`test-results/**/test-failed-*.png` - 视频:`test-results/**/video.webm` - 追踪:`test-results/**/trace.zip` ### 查看追踪 ```bash npx playwright show-trace test-results/**/trace.zip ``` ### UI 模式调试 ```bash # 在 UI 模式下运行测试(可以看到浏览器操作过程) npx playwright test --ui ``` --- ## 📚 相关文档 - [Playwright 文档](https://playwright.dev) - [CI 配置说明](../.github/CI_SETUP.md) - [项目 README](../README.md) --- **最后更新**: 2025-12-22 **测试策略**: 单一真正的 E2E 测试 ================================================ FILE: frontend/e2e/access-code.spec.ts ================================================ import { test, expect } from '@playwright/test'; // ===== Mock Tests ===== test.describe('Access Code Guard (mocked)', () => { test('shows app directly when access code is disabled', async ({ page }) => { await page.route('**/api/access-code/check', route => route.fulfill({ json: { data: { enabled: false } } }) ); await page.goto('/'); // Verify app loaded (no access code prompt) await expect(page.getByText('请输入访问口令')).not.toBeVisible({ timeout: 5000 }); }); test('shows access code prompt when enabled and no saved code', async ({ page }) => { await page.route('**/api/access-code/check', route => route.fulfill({ json: { data: { enabled: true } } }) ); await page.goto('/'); await expect(page.getByText('请输入访问口令')).toBeVisible({ timeout: 10000 }); await expect(page.locator('input[type="password"]')).toBeVisible(); }); test('grants access after correct code submission', async ({ page }) => { await page.route('**/api/access-code/check', route => route.fulfill({ json: { data: { enabled: true } } }) ); await page.route('**/api/access-code/verify', route => route.fulfill({ json: { data: { valid: true } } }) ); await page.goto('/'); await page.locator('input[type="password"]').fill('test123'); await page.getByRole('button', { name: '确认' }).click(); await expect(page.getByText('请输入访问口令')).not.toBeVisible({ timeout: 10000 }); }); test('shows error on wrong code', async ({ page }) => { await page.route('**/api/access-code/check', route => route.fulfill({ json: { data: { enabled: true } } }) ); await page.route('**/api/access-code/verify', route => route.fulfill({ status: 403, json: { error: 'Invalid access code' } }) ); await page.goto('/'); await page.locator('input[type="password"]').fill('wrong'); await page.getByRole('button', { name: '确认' }).click(); await expect(page.getByText('口令错误')).toBeVisible({ timeout: 5000 }); }); test('shows connection error with retry when backend is unreachable', async ({ page }) => { await page.route('**/api/access-code/check', route => route.abort('connectionrefused') ); await page.goto('/'); await expect(page.getByText('无法连接到后端服务')).toBeVisible({ timeout: 10000 }); await expect(page.getByText('请检查后端服务是否正常运行')).toBeVisible(); await expect(page.getByRole('button', { name: '重试' })).toBeVisible(); // No access code input should be shown await expect(page.locator('input[type="password"]')).not.toBeVisible(); }); test('retry button re-checks access after connection error', async ({ page }) => { let shouldSucceed = false; await page.route('**/api/access-code/check', route => { if (!shouldSucceed) return route.abort('connectionrefused'); return route.fulfill({ json: { data: { enabled: false } } }); }); await page.goto('/'); await expect(page.getByText('无法连接到后端服务')).toBeVisible({ timeout: 10000 }); shouldSucceed = true; await page.getByRole('button', { name: '重试' }).click(); await expect(page.getByText('无法连接到后端服务')).not.toBeVisible({ timeout: 10000 }); }); test('auto-verifies saved code from localStorage', async ({ page }) => { let verified = false; await page.route('**/api/access-code/check', route => route.fulfill({ json: { data: { enabled: true } } }) ); await page.route('**/api/access-code/verify', route => { verified = true; return route.fulfill({ json: { data: { valid: true } } }); }); // Set localStorage before navigating await page.goto('/'); await page.evaluate(() => localStorage.setItem('banana-access-code', 'saved-code')); await page.reload(); await expect(page.getByText('请输入访问口令')).not.toBeVisible({ timeout: 10000 }); expect(verified).toBe(true); }); }); ================================================ FILE: frontend/e2e/aspect-ratio-lock-integration.spec.ts ================================================ /** * Aspect Ratio Lock - Integration E2E Test * * Uses real backend to verify aspect ratio lock when project has generated images. */ import { test, expect } from '@playwright/test' import { seedProjectWithImages } from './helpers/seed-project' const BASE = process.env.BASE_URL || 'http://localhost:3000' const API = `http://localhost:${Number(new URL(BASE).port) + 2000}` test.describe('Aspect ratio lock (integration)', () => { test.setTimeout(30_000) let projectId: string test('aspect ratio locked after images generated', async ({ page }) => { // Seed project with 1 page that has a real image const seeded = await seedProjectWithImages(API, 1) projectId = seeded.projectId await page.goto(`/project/${projectId}/preview`) await page.waitForLoadState('networkidle') // Open project settings const settingsBtn = page.locator('button').filter({ hasText: /设置|Settings/ }).first() await settingsBtn.click() // Verify locked state await expect(page.getByText(/已生成图片的项目无法调整|Cannot change aspect ratio/)).toBeVisible() // All ratio buttons should be disabled for (const ratio of ['16:9', '4:3', '1:1', '9:16', '3:2']) { await expect(page.locator(`button:has-text("${ratio}")`).first()).toBeDisabled() } }) }) ================================================ FILE: frontend/e2e/aspect-ratio-lock.spec.ts ================================================ /** * Aspect Ratio Lock & Help Tooltip - Mock E2E Tests * * Covers: * 1. Aspect ratio buttons disabled when project has generated images * 2. Help icon (?) tooltip visible next to aspect ratio title * 3. Locked description text shown when images exist * 4. Buttons clickable when no images exist */ import { test, expect } from '@playwright/test' const PROJECT_ID = 'mock-aspect-lock' const baseMockProject = { project_id: PROJECT_ID, status: 'COMPLETED', idea_prompt: 'Test aspect ratio lock', image_aspect_ratio: '16:9', created_at: '2025-01-01T00:00:00', updated_at: '2025-01-01T00:00:00', } const pageWithImage = { page_id: 'page-1', order_index: 0, outline_content: { title: 'Page 1', points: ['Point'] }, description_content: { text: 'desc' }, generated_image_url: '/files/mock/pages/test.png', status: 'COMPLETED', } const pageWithoutImage = { page_id: 'page-1', order_index: 0, outline_content: { title: 'Page 1', points: ['Point'] }, description_content: { text: 'desc' }, generated_image_url: null, status: 'DRAFT', } function mockRoutes(page: any, pages: any[]) { return page.route('**/api/projects/' + PROJECT_ID, async (route: any) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { ...baseMockProject, pages }, }), }) }) } test.describe('Aspect ratio lock (mock)', () => { test('buttons disabled & locked text when project has images', async ({ page }) => { await mockRoutes(page, [pageWithImage]) await page.goto(`/project/${PROJECT_ID}/preview`) await page.waitForLoadState('networkidle') // Open project settings modal const settingsBtn = page.locator('button').filter({ hasText: /设置|Settings/ }).first() await settingsBtn.click() // Verify locked description text await expect(page.getByText(/已生成图片的项目无法调整|Cannot change aspect ratio/)).toBeVisible() // Verify buttons are disabled for (const ratio of ['16:9', '4:3', '1:1', '9:16', '3:2']) { await expect(page.locator(`button:has-text("${ratio}")`).first()).toBeDisabled() } // Save button should not be visible await expect(page.locator('button').filter({ hasText: /^保存$|^Save$/ }).first()).not.toBeVisible() }) test('help icon tooltip visible', async ({ page }) => { await mockRoutes(page, [pageWithImage]) await page.goto(`/project/${PROJECT_ID}/preview`) await page.waitForLoadState('networkidle') const settingsBtn = page.locator('button').filter({ hasText: /设置|Settings/ }).first() await settingsBtn.click() // Help icon should exist const helpIcon = page.locator('.lucide-help-circle').first() await expect(helpIcon).toBeVisible() // Hover to show tooltip await helpIcon.hover() await expect(page.getByText(/部分模型仅支持特定|Some models only support/)).toBeVisible() }) test('buttons enabled when no images exist', async ({ page }) => { await mockRoutes(page, [pageWithoutImage]) await page.goto(`/project/${PROJECT_ID}/preview`) await page.waitForLoadState('networkidle') const settingsBtn = page.locator('button').filter({ hasText: /设置|Settings/ }).first() await settingsBtn.click() // Normal description text await expect(page.getByText(/设置生成幻灯片|Set the aspect ratio/)).toBeVisible() // Buttons should be enabled const btn43 = page.locator('button:has-text("4:3")').first() await expect(btn43).toBeEnabled() }) }) ================================================ FILE: frontend/e2e/attachment-sort-filter.spec.ts ================================================ import { test, expect } from '@playwright/test'; test.describe('Attachment Sorting and Filtering', () => { const BASE_URL = process.env.BASE_URL || 'http://localhost:3401'; test('should sort attachments by newest first (default)', async ({ page }) => { await page.route('**/api/reference-files?project_id=all', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ data: { files: [ { id: '1', filename: 'old.pdf', created_at: '2024-01-01T00:00:00Z', file_size: 1000, parse_status: 'completed' }, { id: '2', filename: 'new.pdf', created_at: '2024-12-01T00:00:00Z', file_size: 2000, parse_status: 'completed' }, { id: '3', filename: 'middle.pdf', created_at: '2024-06-01T00:00:00Z', file_size: 1500, parse_status: 'completed' } ] } }) }); }); await page.goto(`${BASE_URL}`); await page.click('button:has-text("上传文件")'); const fileItems = page.locator('.divide-y > div'); await expect(fileItems.nth(0)).toContainText('new.pdf'); await expect(fileItems.nth(1)).toContainText('middle.pdf'); await expect(fileItems.nth(2)).toContainText('old.pdf'); }); test('should sort attachments by oldest first', async ({ page }) => { await page.route('**/api/reference-files?project_id=all', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ data: { files: [ { id: '1', filename: 'old.pdf', created_at: '2024-01-01T00:00:00Z', file_size: 1000, parse_status: 'completed' }, { id: '2', filename: 'new.pdf', created_at: '2024-12-01T00:00:00Z', file_size: 2000, parse_status: 'completed' } ] } }) }); }); await page.goto(`${BASE_URL}`); await page.click('button:has-text("上传文件")'); await page.selectOption('select >> nth=1', 'oldest'); const fileItems = page.locator('.divide-y > div'); await expect(fileItems.nth(0)).toContainText('old.pdf'); await expect(fileItems.nth(1)).toContainText('new.pdf'); }); test('should sort attachments by name A-Z', async ({ page }) => { await page.route('**/api/reference-files?project_id=all', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ data: { files: [ { id: '1', filename: 'zebra.pdf', created_at: '2024-01-01T00:00:00Z', file_size: 1000, parse_status: 'completed' }, { id: '2', filename: 'apple.pdf', created_at: '2024-01-01T00:00:00Z', file_size: 2000, parse_status: 'completed' }, { id: '3', filename: 'banana.pdf', created_at: '2024-01-01T00:00:00Z', file_size: 1500, parse_status: 'completed' } ] } }) }); }); await page.goto(`${BASE_URL}`); await page.click('button:has-text("上传文件")'); await page.selectOption('select >> nth=1', 'name-asc'); const fileItems = page.locator('.divide-y > div'); await expect(fileItems.nth(0)).toContainText('apple.pdf'); await expect(fileItems.nth(1)).toContainText('banana.pdf'); await expect(fileItems.nth(2)).toContainText('zebra.pdf'); }); test('should sort attachments by name Z-A', async ({ page }) => { await page.route('**/api/reference-files?project_id=all', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ data: { files: [ { id: '1', filename: 'apple.pdf', created_at: '2024-01-01T00:00:00Z', file_size: 1000, parse_status: 'completed' }, { id: '2', filename: 'zebra.pdf', created_at: '2024-01-01T00:00:00Z', file_size: 2000, parse_status: 'completed' } ] } }) }); }); await page.goto(`${BASE_URL}`); await page.click('button:has-text("上传文件")'); await page.selectOption('select >> nth=1', 'name-desc'); const fileItems = page.locator('.divide-y > div'); await expect(fileItems.nth(0)).toContainText('zebra.pdf'); await expect(fileItems.nth(1)).toContainText('apple.pdf'); }); test('should show all projects in filter dropdown', async ({ page }) => { await page.route('**/api/projects*', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ data: { projects: [ { id: 'proj1', title: 'Project Alpha' }, { id: 'proj2', title: 'Project Beta' }, { id: 'proj3', title: 'Project Gamma' } ], total: 3 } }) }); }); await page.route('**/api/reference-files?project_id=all', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ data: { files: [] } }) }); }); await page.goto(`${BASE_URL}`); await page.click('button:has-text("上传文件")'); const filterSelect = page.locator('select').first(); await expect(filterSelect.locator('option')).toHaveCount(5); await expect(filterSelect).toContainText('Project Alpha'); await expect(filterSelect).toContainText('Project Beta'); await expect(filterSelect).toContainText('Project Gamma'); }); test('should filter by specific project with one click', async ({ page }) => { let requestedProjectId = ''; await page.route('**/api/projects*', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ data: { projects: [ { id: 'proj1', title: 'Project Alpha' }, { id: 'proj2', title: 'Project Beta' } ], total: 2 } }) }); }); await page.route('**/api/reference-files**', async (route) => { const url = route.request().url(); const match = url.match(/project_id=([^&]+)/); requestedProjectId = match ? match[1] : ''; await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ data: { files: requestedProjectId === 'proj2' ? [ { id: 'f1', filename: 'beta-file.pdf', created_at: '2024-01-01T00:00:00Z', file_size: 1000, parse_status: 'completed' } ] : [] } }) }); }); await page.goto(`${BASE_URL}`); await page.click('button:has-text("上传文件")'); const filterSelect = page.locator('select').first(); await filterSelect.selectOption('proj2'); await page.waitForTimeout(500); expect(requestedProjectId).toBe('proj2'); await expect(page.locator('.divide-y > div')).toContainText('beta-file.pdf'); }); }); ================================================ FILE: frontend/e2e/badge-status-after-generation.spec.ts ================================================ import { test, expect, Page, Route } from '@playwright/test' import { seedProjectWithImages } from './helpers/seed-project' const PROJECT_ID = 'badge-race-mock' const PAGE_IDS = ['p-1', 'p-2', 'p-3'] function makePage(id: string, idx: number, status: string, hasImage: boolean) { return { page_id: id, order_index: idx, outline_content: { title: `Slide ${idx + 1}`, points: ['pt'] }, description_content: { text: `Desc ${idx + 1}` }, generated_image_url: hasImage ? `/files/${PROJECT_ID}/pages/${id}_v1.jpg` : null, status, created_at: '2026-01-01T00:00:00', updated_at: '2026-01-01T00:00:00', } } function projectJson(pages: ReturnType[], projectStatus = 'COMPLETED') { return { success: true, data: { id: PROJECT_ID, creation_type: 'idea', idea_prompt: 'test', status: projectStatus, template_style: 'default', image_aspect_ratio: '16:9', pages, created_at: '2026-01-01T00:00:00', updated_at: '2026-01-01T00:00:00', }, } } async function mockCommonRoutes(page: Page) { await page.route('**/api/access-code/check', (r) => r.fulfill({ status: 200, contentType: 'application/json', body: '{"success":true,"data":{"enabled":false}}' })) await page.route('**/api/user-templates', (r) => r.fulfill({ status: 200, contentType: 'application/json', body: '{"success":true,"data":{"templates":[]}}' })) await page.route('**/api/projects/*/pages/*/image-versions', (r) => r.fulfill({ status: 200, contentType: 'application/json', body: '{"success":true,"data":{"versions":[]}}' })) await page.route('**/files/**', (r) => r.fulfill({ status: 200, contentType: 'image/jpeg', body: Buffer.from([0xff, 0xd8, 0xff, 0xe0]) })) } // ─── Mock tests ─── test.describe('Badge status after image generation (mock)', () => { test.beforeEach(async ({ page }) => { await page.addInitScript(() => localStorage.setItem('hasSeenHelpModal', 'true')) }) test('all badges show COMPLETED when project pages are COMPLETED', async ({ page }) => { await mockCommonRoutes(page) const completedPages = PAGE_IDS.map((id, i) => makePage(id, i, 'COMPLETED', true)) await page.route(`**/api/projects/${PROJECT_ID}`, (r) => { if (r.request().method() === 'GET') { return r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(projectJson(completedPages)) }) } return r.continue() }) await page.goto(`/project/${PROJECT_ID}/preview`) const badges = page.locator('[data-testid="status-badge"]') await expect(badges.first()).toBeVisible({ timeout: 10000 }) const count = await badges.count() expect(count).toBe(3) for (let i = 0; i < count; i++) { await expect(badges.nth(i)).toHaveAttribute('data-status', 'COMPLETED') } }) test('badges transition from GENERATING to COMPLETED after sync', async ({ page }) => { await mockCommonRoutes(page) // Phase 1: pages are GENERATING let phase: 'generating' | 'completed' = 'generating' await page.route(`**/api/projects/${PROJECT_ID}`, (r) => { if (r.request().method() !== 'GET') return r.continue() if (phase === 'generating') { const pages = PAGE_IDS.map((id, i) => makePage(id, i, 'GENERATING', false)) return r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(projectJson(pages, 'GENERATING_IMAGES')) }) } const pages = PAGE_IDS.map((id, i) => makePage(id, i, 'COMPLETED', true)) return r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(projectJson(pages)) }) }) await page.goto(`/project/${PROJECT_ID}/preview`) const badges = page.locator('[data-testid="status-badge"]') await expect(badges.first()).toBeVisible({ timeout: 10000 }) // Verify initial state: GENERATING for (let i = 0; i < 3; i++) { await expect(badges.nth(i)).toHaveAttribute('data-status', 'GENERATING') } // Switch to completed phase and trigger a re-sync via navigation phase = 'completed' await page.evaluate(() => location.reload()) // Verify final state: COMPLETED await expect(badges.first()).toBeVisible({ timeout: 10000 }) for (let i = 0; i < 3; i++) { await expect(badges.nth(i)).toHaveAttribute('data-status', 'COMPLETED') } }) }) // ─── Integration test (real backend) ─── test.describe('Badge status (integration)', () => { test.beforeEach(async ({ page }) => { await page.addInitScript(() => localStorage.setItem('hasSeenHelpModal', 'true')) }) test('seeded project shows COMPLETED badges on preview page', async ({ page, baseURL }) => { const { projectId } = await seedProjectWithImages(baseURL!, 3) await page.goto(`/project/${projectId}/preview`) const badges = page.locator('[data-testid="status-badge"]') await expect(badges.first()).toBeVisible({ timeout: 10000 }) const count = await badges.count() expect(count).toBe(3) for (let i = 0; i < count; i++) { await expect(badges.nth(i)).toHaveAttribute('data-status', 'COMPLETED') } }) }) ================================================ FILE: frontend/e2e/desc-regeneration-skeleton.spec.ts ================================================ /** * Mock E2E test: Skeleton stays visible during batch description RE-generation. * * When re-generating descriptions, the backend sets page.status to GENERATING_DESCRIPTION. * The skeleton must stay until the status changes to DESCRIPTION_GENERATED with new content. */ import { test, expect } from '@playwright/test' const PROJECT_ID = 'mock-proj-regen-skeleton' function makePage(id: string, index: number, title: string, opts?: { description?: string, status?: string }) { return { id, page_id: id, title, sort_order: index, order_index: index, status: opts?.status || (opts?.description ? 'DESCRIPTION_GENERATED' : 'DRAFT'), outline_content: { title, points: [`Point for ${title}`] }, description_content: opts?.description ? { text: opts.description } : null, generated_image_path: null, } } const OLD_DESC_1 = 'Old description for page one' const OLD_DESC_2 = 'Old description for page two' const NEW_DESC_1 = 'Brand new description for page one' const NEW_DESC_2 = 'Brand new description for page two' test.describe('Skeleton during description re-generation', () => { test('skeleton stays visible until page status changes from GENERATING_DESCRIPTION', async ({ page }) => { let regenerationStarted = false let syncCountAfterRegen = 0 // Mock GET project await page.route(`**/api/projects/${PROJECT_ID}`, async (route) => { if (route.request().method() !== 'GET') { await route.continue(); return } let pages if (!regenerationStarted) { // Before re-generation: all pages already have descriptions pages = [ makePage('p1', 0, 'Page One', { description: OLD_DESC_1 }), makePage('p2', 1, 'Page Two', { description: OLD_DESC_2 }), ] } else { syncCountAfterRegen++ if (syncCountAfterRegen <= 2) { // Still processing: backend has set status to GENERATING_DESCRIPTION pages = [ makePage('p1', 0, 'Page One', { description: OLD_DESC_1, status: 'GENERATING_DESCRIPTION' }), makePage('p2', 1, 'Page Two', { description: OLD_DESC_2, status: 'GENERATING_DESCRIPTION' }), ] } else if (syncCountAfterRegen <= 4) { // Page 1 done (status changed + new content), page 2 still generating pages = [ makePage('p1', 0, 'Page One', { description: NEW_DESC_1 }), makePage('p2', 1, 'Page Two', { description: OLD_DESC_2, status: 'GENERATING_DESCRIPTION' }), ] } else { // All done pages = [ makePage('p1', 0, 'Page One', { description: NEW_DESC_1 }), makePage('p2', 1, 'Page Two', { description: NEW_DESC_2 }), ] } } await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { project_id: PROJECT_ID, id: PROJECT_ID, status: 'DESCRIPTIONS_GENERATED', creation_type: 'idea', pages, }, }), }) }) // Mock POST generate descriptions await page.route('**/api/projects/*/generate/descriptions', async (route) => { regenerationStarted = true await route.fulfill({ status: 202, contentType: 'application/json', body: JSON.stringify({ success: true, data: { task_id: 'mock-regen-task' } }), }) }) // Mock task polling let taskCallCount = 0 await page.route(`**/api/projects/${PROJECT_ID}/tasks/*`, async (route) => { taskCallCount++ const completed = taskCallCount >= 4 await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { task_id: 'mock-regen-task', status: completed ? 'COMPLETED' : 'PROCESSING', progress: { total: 2, completed: Math.min(taskCallCount, 2) }, }, }), }) }) // Mock reference files await page.route('**/api/projects/*/files*', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: [] }), }) }) // Navigate to detail editor const baseUrl = process.env.BASE_URL || 'http://localhost:3000' await page.goto(`${baseUrl}/project/${PROJECT_ID}/detail`) // Verify old descriptions are visible before re-generation await expect(page.getByText(OLD_DESC_1)).toBeVisible({ timeout: 10000 }) await expect(page.getByText(OLD_DESC_2)).toBeVisible() // Click batch generate (triggers re-generation confirmation dialog) await page.getByRole('button', { name: /批量生成描述|Batch Generate/i }).click() // Confirm the regeneration dialog (may or may not appear) try { await page.getByRole('button', { name: /确认|确定/ }).click({ timeout: 2000 }) } catch { // Dialog may not appear, which is expected } // After clicking generate, skeleton should appear — old descriptions should NOT be visible await expect(page.getByText(/生成中|Generating/).first()).toBeVisible({ timeout: 5000 }) // Old descriptions should be hidden while skeleton is showing await expect(page.getByText(OLD_DESC_1)).not.toBeVisible() await expect(page.getByText(OLD_DESC_2)).not.toBeVisible() // Wait for page 1 new description to appear (status changed to DESCRIPTION_GENERATED) await expect(page.getByText(NEW_DESC_1)).toBeVisible({ timeout: 15000 }) // Wait for page 2 new description await expect(page.getByText(NEW_DESC_2)).toBeVisible({ timeout: 15000 }) // Verify final state: both new descriptions visible, old ones gone await expect(page.getByText(OLD_DESC_1)).not.toBeVisible() await expect(page.getByText(OLD_DESC_2)).not.toBeVisible() }) }) ================================================ FILE: frontend/e2e/description-detail-level.spec.ts ================================================ /** * E2E tests for description detail level selector. * * Mock tests: verify UI rendering, default selection, click behavior, * and that the correct detail_level is sent in API requests. * * Integration test: verify selector works with real backend. */ import { test, expect } from '@playwright/test' const PROJECT_ID = 'mock-proj-detail-level' function makePage(id: string, index: number, title: string, description?: string) { return { id, page_id: id, title, sort_order: index, order_index: index, status: description ? 'DESCRIPTION_GENERATED' : 'DRAFT', outline_content: { title, points: [`Point for ${title}`] }, description_content: description ? { text: description } : null, generated_image_path: null, } } const pages = [ makePage('p1', 0, 'Title Page'), makePage('p2', 1, 'Introduction'), makePage('p3', 2, 'Conclusion'), ] async function setupMockRoutes(page: import('@playwright/test').Page) { // Mock access code check (required — AccessCodeGuard blocks rendering) await page.route('**/api/access-code/check', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { enabled: false } }), }) }) // Mock project GET await page.route(`**/api/projects/${PROJECT_ID}`, async (route) => { if (route.request().method() !== 'GET') { await route.continue(); return } await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { project_id: PROJECT_ID, id: PROJECT_ID, status: 'OUTLINE_GENERATED', creation_type: 'idea', pages, }, }), }) }) // Mock reference files await page.route('**/api/projects/*/files*', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: [] }), }) }) } test.describe('Detail level selector — mock tests', () => { test('renders selector with "standard" selected by default', async ({ page }) => { await setupMockRoutes(page) await page.goto(`/project/${PROJECT_ID}/detail`) await page.waitForSelector('text=批量生成描述') // The selector should be visible with 3 buttons const buttons = page.locator('button', { hasText: /精简|标准|详细/ }) await expect(buttons).toHaveCount(3) // "标准" should have the active style (bg-banana-500) const standardBtn = page.locator('button', { hasText: '标准' }) await expect(standardBtn).toHaveClass(/bg-banana-500/) }) test('clicking a level option changes the selection', async ({ page }) => { await setupMockRoutes(page) await page.goto(`/project/${PROJECT_ID}/detail`) await page.waitForSelector('text=批量生成描述') // Click "精简" const conciseBtn = page.locator('button', { hasText: '精简' }) await conciseBtn.click() await expect(conciseBtn).toHaveClass(/bg-banana-500/) // "标准" should no longer be active const standardBtn = page.locator('button', { hasText: '标准' }) await expect(standardBtn).not.toHaveClass(/bg-banana-500/) // Click "详细" const detailedBtn = page.locator('button', { hasText: '详细' }) await detailedBtn.click() await expect(detailedBtn).toHaveClass(/bg-banana-500/) await expect(conciseBtn).not.toHaveClass(/bg-banana-500/) }) test('batch generate sends correct detail_level in request', async ({ page }) => { await setupMockRoutes(page) // Capture the POST body let capturedBody: any = null await page.route('**/api/projects/*/generate/descriptions', async (route) => { capturedBody = JSON.parse(route.request().postData() || '{}') await route.fulfill({ status: 202, contentType: 'application/json', body: JSON.stringify({ success: true, data: { task_id: 'mock-task-1' } }), }) }) // Mock task polling — immediately complete await page.route(`**/api/projects/${PROJECT_ID}/tasks/*`, async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { task_id: 'mock-task-1', status: 'COMPLETED', progress: { total: 3, completed: 3 } }, }), }) }) await page.goto(`/project/${PROJECT_ID}/detail`) await page.waitForSelector('text=批量生成描述') // Select "详细" then click batch generate await page.locator('button', { hasText: '详细' }).click() await page.locator('button', { hasText: '批量生成描述' }).click() // Wait for the request to be captured await expect.poll(() => capturedBody).toBeTruthy() expect(capturedBody.detail_level).toBe('detailed') }) test('default detail_level is "default" when not changed', async ({ page }) => { await setupMockRoutes(page) let capturedBody: any = null await page.route('**/api/projects/*/generate/descriptions', async (route) => { capturedBody = JSON.parse(route.request().postData() || '{}') await route.fulfill({ status: 202, contentType: 'application/json', body: JSON.stringify({ success: true, data: { task_id: 'mock-task-2' } }), }) }) await page.route(`**/api/projects/${PROJECT_ID}/tasks/*`, async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { task_id: 'mock-task-2', status: 'COMPLETED', progress: { total: 3, completed: 3 } }, }), }) }) await page.goto(`/project/${PROJECT_ID}/detail`) await page.waitForSelector('text=批量生成描述') await page.locator('button', { hasText: '批量生成描述' }).click() await expect.poll(() => capturedBody).toBeTruthy() expect(capturedBody.detail_level).toBe('default') }) }) ================================================ FILE: frontend/e2e/description-no-flicker.spec.ts ================================================ /** * Mock E2E test: Description cards should not flicker during batch generation. * * Simulates incremental description generation via polling. * Verifies that already-completed cards keep their content stable * while other pages are still generating. */ import { test, expect } from '@playwright/test' const PROJECT_ID = 'mock-proj-flicker' function makePage(id: string, index: number, title: string, description?: string) { return { id, page_id: id, title, sort_order: index, order_index: index, status: description ? 'COMPLETED' : 'DRAFT', outline_content: { title, points: [`Point for ${title}`] }, description_content: description ? { text: description } : null, generated_image_path: null, } } test.describe('Description cards stability during generation', () => { test('already-completed cards stay stable while others generate', async ({ page }) => { // Flag: set to true after generation starts, controls which stage to return let generationStarted = false let syncCountAfterGen = 0 // Mock GET project await page.route(`**/api/projects/${PROJECT_ID}`, async (route) => { if (route.request().method() !== 'GET') { await route.continue(); return } let pages if (!generationStarted) { // Before generation: all pages have no description pages = [ makePage('p1', 0, 'Page One'), makePage('p2', 1, 'Page Two'), makePage('p3', 2, 'Page Three'), ] } else { syncCountAfterGen++ if (syncCountAfterGen <= 2) { // Stage 1: page 1 done pages = [ makePage('p1', 0, 'Page One', '# Description for Page One\n\nThis is page one content.'), makePage('p2', 1, 'Page Two'), makePage('p3', 2, 'Page Three'), ] } else if (syncCountAfterGen <= 4) { // Stage 2: pages 1+2 done pages = [ makePage('p1', 0, 'Page One', '# Description for Page One\n\nThis is page one content.'), makePage('p2', 1, 'Page Two', '# Description for Page Two\n\nThis is page two content.'), makePage('p3', 2, 'Page Three'), ] } else { // Stage 3: all done pages = [ makePage('p1', 0, 'Page One', '# Description for Page One\n\nThis is page one content.'), makePage('p2', 1, 'Page Two', '# Description for Page Two\n\nThis is page two content.'), makePage('p3', 2, 'Page Three', '# Description for Page Three\n\nThis is page three content.'), ] } } await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { project_id: PROJECT_ID, id: PROJECT_ID, status: 'OUTLINE_GENERATED', creation_type: 'idea', pages, }, }), }) }) // Mock POST generate descriptions await page.route('**/api/projects/*/generate/descriptions', async (route) => { generationStarted = true await route.fulfill({ status: 202, contentType: 'application/json', body: JSON.stringify({ success: true, data: { task_id: 'mock-desc-task' } }), }) }) // Mock task polling let taskCallCount = 0 await page.route(`**/api/projects/${PROJECT_ID}/tasks/*`, async (route) => { taskCallCount++ const completed = taskCallCount >= 4 await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { task_id: 'mock-desc-task', status: completed ? 'COMPLETED' : 'PROCESSING', progress: { total: 3, completed: Math.min(taskCallCount, 3) }, }, }), }) }) // Mock reference files await page.route('**/api/projects/*/files*', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: [] }), }) }) // Navigate to detail editor const baseUrl = process.env.BASE_URL || 'http://localhost:3000' await page.goto(`${baseUrl}/project/${PROJECT_ID}/detail`) // Wait for page cards to appear (no descriptions yet) await expect(page.locator('text=第 1 页')).toBeVisible({ timeout: 10000 }) await expect(page.locator('text=第 3 页')).toBeVisible() // Click batch generate await page.locator('button:has-text("批量生成描述")').click() // Wait for page 1 description to appear await expect(page.locator('text=This is page one content')).toBeVisible({ timeout: 15000 }) // Wait for page 2 description await expect(page.locator('text=This is page two content')).toBeVisible({ timeout: 15000 }) // Verify page 1 is STILL visible (not flickered away) await expect(page.locator('text=This is page one content')).toBeVisible() // Wait for page 3 (all done) await expect(page.locator('text=This is page three content')).toBeVisible({ timeout: 15000 }) // Final: all three descriptions visible simultaneously await expect(page.locator('text=This is page one content')).toBeVisible() await expect(page.locator('text=This is page two content')).toBeVisible() await expect(page.locator('text=This is page three content')).toBeVisible() }) }) ================================================ FILE: frontend/e2e/editable-export-failure.spec.ts ================================================ import { expect, test } from '@playwright/test'; test.describe('Editable export failure UI', () => { test('shows toast and task panel error when style extraction fails', async ({ page }) => { const projectId = 'mock-editable-export-failure'; let pollCount = 0; await page.addInitScript(() => localStorage.setItem('hasSeenHelpModal', 'true')); page.on('pageerror', error => console.log('pageerror:', error.message)); page.on('console', message => { if (message.type() === 'error') { console.log('console-error:', message.text()); } }); await page.route(url => new URL(url).pathname.startsWith('/api/'), async route => { const url = new URL(route.request().url()); if (url.pathname === '/api/access-code/check') { return route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { enabled: false } }), }); } if (url.pathname === `/api/projects/${projectId}`) { return route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { project_id: projectId, id: projectId, status: 'COMPLETED', template_style: 'default', export_allow_partial: false, pages: [ { id: 'p1', page_id: 'p1', order_index: 0, generated_image_path: '/files/mock/slide-1.png', outline_content: { title: 'Slide 1', points: [] }, description_content: { text: 'desc' }, status: 'COMPLETED', }, ], }, }), }); } if (url.pathname === `/api/projects/${projectId}/export/editable-pptx`) { return route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { task_id: 'editable-export-task-1' }, }), }); } if (url.pathname === `/api/projects/${projectId}/tasks/editable-export-task-1`) { pollCount += 1; return route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { task_id: 'editable-export-task-1', task_type: 'EXPORT_EDITABLE_PPTX', status: 'FAILED', error_message: '文本样式提取失败: 当前图片样式提取模型不支持图片输入: caption_provider 不支持图片输入', progress: { total: 100, completed: 0, failed: 1, percent: 0, help_text: '当前用于图片样式提取的 caption/image_caption 模型不支持图片输入。', }, }, }), }); } if (url.pathname === '/api/settings') { return route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: {} }) }); } if (url.pathname === '/api/output-language') { return route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { language: 'zh' } }), }); } if (url.pathname === '/api/user-templates') { return route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { templates: [] } }), }); } if (url.pathname.includes('/image-versions')) { return route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { versions: [] } }), }); } return route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: {} }) }); }); await page.route('**/files/**', async route => { await route.fulfill({ status: 200, contentType: 'image/png', body: Buffer.alloc(256) }); }); await page.goto(`/project/${projectId}/preview`); await page.waitForFunction(() => document.body.innerText.length > 50, { timeout: 15000 }); await page.locator('button:has-text("导出")').first().click(); await page.getByRole('button', { name: /导出可编辑 PPTX/ }).click(); await expect .poll(() => pollCount, { timeout: 10000 }) .toBeGreaterThan(0); await expect(page.getByText('当前图片样式提取模型不支持图片输入')).toBeVisible({ timeout: 10000 }); await expect(page.getByRole('button', { name: /^1$/ })).toBeVisible({ timeout: 10000 }); }); }); ================================================ FILE: frontend/e2e/export-aspect-ratio.spec.ts ================================================ /** * Export Aspect Ratio - Integration E2E Test * * Verifies that PDF and PPTX exports use the project's aspect ratio * instead of hardcoding 16:9. */ import { test, expect } from '@playwright/test' import { execSync } from 'child_process' import * as fs from 'fs' import * as path from 'path' import { fileURLToPath } from 'url' const BASE = process.env.BASE_URL || 'http://localhost:3000' // Derive backend URL from frontend URL (frontend 3xxx → backend 5xxx, same offset) const API = `http://localhost:${Number(new URL(BASE).port) + 2000}` // Minimal 1x1 red PNG (68 bytes) const TINY_PNG = Buffer.from( 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==', 'base64' ) // Worktree root (two levels up from frontend/e2e/) const __filename = fileURLToPath(import.meta.url) const __dirname = path.dirname(__filename) const WORKTREE_ROOT = path.resolve(__dirname, '..', '..') const UPLOADS_DIR = path.join(WORKTREE_ROOT, 'uploads') const DB_PATH = path.join(WORKTREE_ROOT, 'backend', 'instance', 'database.db') const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i function assertUUID(val: string, label: string) { if (!UUID_RE.test(val)) throw new Error(`Invalid ${label}: ${val}`) } interface ProjectData { projectId: string pageId: string imagePath: string } async function setupProject( request: any, aspectRatio: string ): Promise { // Create project const projRes = await request.post(`${API}/api/projects`, { data: { creation_type: 'idea', idea_prompt: 'test export aspect ratio', image_aspect_ratio: aspectRatio, }, }) expect(projRes.ok()).toBeTruthy() const proj = await projRes.json() const projectId = proj.data.project_id // Create page const pageRes = await request.post(`${API}/api/projects/${projectId}/pages`, { data: { order_index: 0 }, }) expect(pageRes.ok()).toBeTruthy() const page = await pageRes.json() const pageId = page.data.page_id // Place test image on disk const pagesDir = path.join(UPLOADS_DIR, projectId, 'pages') fs.mkdirSync(pagesDir, { recursive: true }) const imgFile = `test_${pageId}.png` const imgAbsPath = path.join(pagesDir, imgFile) fs.writeFileSync(imgAbsPath, TINY_PNG) // Update DB to set generated_image_path (validate UUIDs to prevent injection) assertUUID(projectId, 'projectId') assertUUID(pageId, 'pageId') const relPath = `${projectId}/pages/${imgFile}` execSync( `sqlite3 "${DB_PATH}" "UPDATE pages SET generated_image_path='${relPath}', status='IMAGE_GENERATED' WHERE id='${pageId}';"` ) return { projectId, pageId, imagePath: imgAbsPath } } function cleanup(projectId: string) { assertUUID(projectId, 'projectId') const dir = path.join(UPLOADS_DIR, projectId) if (fs.existsSync(dir)) { fs.rmSync(dir, { recursive: true, force: true }) } try { execSync( `sqlite3 "${DB_PATH}" "DELETE FROM pages WHERE project_id='${projectId}'; DELETE FROM projects WHERE id='${projectId}';"` ) } catch { /* best effort */ } } test.describe.serial('Export aspect ratio', () => { test.setTimeout(30_000) const createdProjects: string[] = [] test.afterAll(async () => { for (const id of createdProjects) cleanup(id) }) test('PDF export uses 4:3 page dimensions', async ({ request }) => { const { projectId } = await setupProject(request, '4:3') createdProjects.push(projectId) const res = await request.get( `${API}/api/projects/${projectId}/export/pdf` ) expect(res.ok()).toBeTruthy() const body = await res.json() const downloadUrl = body.data.download_url_absolute // Download the PDF const pdfRes = await request.get(downloadUrl) expect(pdfRes.ok()).toBeTruthy() const pdfBuf = Buffer.from(await pdfRes.body()) // Parse PDF MediaBox to verify aspect ratio // MediaBox format: [0 0 width height] in points (1 inch = 72 pt) const pdfStr = pdfBuf.toString('latin1') const match = pdfStr.match(/\/MediaBox\s*\[\s*([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s*\]/) expect(match).not.toBeNull() const pdfW = parseFloat(match![3]) const pdfH = parseFloat(match![4]) const ratio = pdfW / pdfH // 4:3 = 1.333... expect(ratio).toBeCloseTo(4 / 3, 1) // Should NOT be 16:9 (1.778) expect(Math.abs(ratio - 16 / 9)).toBeGreaterThan(0.1) }) test('PPTX export uses 4:3 slide dimensions', async ({ request }) => { const { projectId } = await setupProject(request, '4:3') createdProjects.push(projectId) const res = await request.get( `${API}/api/projects/${projectId}/export/pptx` ) expect(res.ok()).toBeTruthy() const body = await res.json() const downloadUrl = body.data.download_url_absolute // Extract slide dimensions from PPTX (ZIP containing XML) // The download_url is like /files/{id}/exports/file.pptx const pptxPath = path.join(UPLOADS_DIR, body.data.download_url.replace('/files/', '')) if (!pptxPath.startsWith(UPLOADS_DIR)) throw new Error('Invalid pptx path') const xml = execSync(`unzip -p "${pptxPath}" ppt/presentation.xml`).toString() const sldSzMatch = xml.match(/sldSz\s+cx="(\d+)"\s+cy="(\d+)"/) expect(sldSzMatch).not.toBeNull() const cx = parseInt(sldSzMatch![1]) const cy = parseInt(sldSzMatch![2]) const ratio = cx / cy // 4:3 = 1.333... expect(ratio).toBeCloseTo(4 / 3, 1) expect(Math.abs(ratio - 16 / 9)).toBeGreaterThan(0.1) }) }) ================================================ FILE: frontend/e2e/export-images.spec.ts ================================================ /** * E2E tests for image export feature. * * 1. Backend API tests: error case + happy path (single & multi-image export) * 2. Mock UI tests: verify the export menu renders the image export option */ import { test, expect } from '@playwright/test' import { seedProjectWithImages } from './helpers/seed-project' test.describe('Export Images - Backend API', () => { test('returns 400 when project has no images', async ({ request }) => { // Create a project const createResp = await request.post('/api/projects', { data: { creation_type: 'idea', idea_prompt: 'test', template_style: 'default' }, }) if (!createResp.ok()) { test.skip(true, 'Backend unavailable'); return } const projectId = (await createResp.json()).data?.project_id if (!projectId) { test.skip(true, 'No project_id'); return } const resp = await request.get(`/api/projects/${projectId}/export/images`) expect(resp.ok()).toBe(false) expect(resp.status()).toBe(400) }) test('exports single image successfully', async ({ request, baseURL }) => { const { projectId } = await seedProjectWithImages(baseURL!, 1) const resp = await request.get(`/api/projects/${projectId}/export/images`) expect(resp.ok()).toBe(true) const data = (await resp.json()).data expect(data.download_url).toContain(`/files/${projectId}/exports/`) expect(data.download_url).toContain('.jpg') // Verify the file is downloadable const fileResp = await request.get(data.download_url) expect(fileResp.ok()).toBe(true) expect(fileResp.headers()['content-type']).toContain('image/jpeg') }) test('exports multiple images as ZIP', async ({ request, baseURL }) => { const { projectId } = await seedProjectWithImages(baseURL!, 2) const resp = await request.get(`/api/projects/${projectId}/export/images`) expect(resp.ok()).toBe(true) const data = (await resp.json()).data expect(data.download_url).toContain('.zip') // Verify the ZIP is downloadable const fileResp = await request.get(data.download_url) expect(fileResp.ok()).toBe(true) }) }) test.describe('Export Images - UI Mock', () => { test.setTimeout(60_000) test('export dropdown contains image export option', async ({ page }) => { const PID = 'mock-img-export' // Intercept API requests (use function matcher to avoid catching Vite source files like /src/api/...) await page.route(url => new URL(url).pathname.startsWith('/api/'), async (route) => { const url = new URL(route.request().url()) if (url.pathname === `/api/projects/${PID}`) { return route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { project_id: PID, id: PID, status: 'IMAGES_GENERATED', template_style: 'default', pages: [ { id: 'p1', page_id: 'p1', title: 'Slide 1', order_index: 0, generated_image_path: '/files/x/1.png', page_number: 1, outline_content: { title: 'Slide 1' }, status: 'COMPLETED' }, ], }, }), }) } if (url.pathname === '/api/settings') { return route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: {} }) }) } if (url.pathname === '/api/output-language') { return route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { language: 'zh' } }) }) } if (url.pathname === '/api/user-templates') { return route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { templates: [] } }) }) } // Default: 200 empty return route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: {} }) }) }) // Mock image files await page.route('**/files/**', async (route) => { await route.fulfill({ status: 200, contentType: 'image/png', body: Buffer.alloc(100) }) }) await page.goto(`/project/${PID}/preview`) // Wait for page content to render (the preview title or page count text) await page.waitForFunction(() => document.body.innerText.length > 50, { timeout: 15000 }) // Find and click the export button using text content const exportBtn = page.locator('button:has-text("导出")').first() await expect(exportBtn).toBeVisible({ timeout: 10000 }) await exportBtn.click() // Verify image export option appears in the dropdown const imgExportBtn = page.locator('button:has-text("导出为图片")') await expect(imgExportBtn).toBeVisible({ timeout: 5000 }) }) test('image export calls correct API endpoint', async ({ page }) => { const PID = 'mock-img-export2' let imageExportCalled = false await page.route(url => new URL(url).pathname.startsWith('/api/'), async (route) => { const url = new URL(route.request().url()) if (url.pathname === `/api/projects/${PID}/export/images`) { imageExportCalled = true return route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { download_url: '/files/x/slides.zip', download_url_absolute: 'http://localhost/files/x/slides.zip' } }), }) } if (url.pathname === `/api/projects/${PID}`) { return route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { project_id: PID, id: PID, status: 'IMAGES_GENERATED', template_style: 'default', pages: [ { id: 'p1', page_id: 'p1', title: 'S1', order_index: 0, generated_image_path: '/files/x/1.png', page_number: 1, outline_content: { title: 'S1' }, status: 'COMPLETED' }, ], }, }), }) } if (url.pathname === '/api/settings') { return route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: {} }) }) } if (url.pathname === '/api/output-language') { return route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { language: 'zh' } }) }) } if (url.pathname === '/api/user-templates') { return route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { templates: [] } }) }) } return route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: {} }) }) }) await page.route('**/files/**', async (route) => { await route.fulfill({ status: 200, contentType: 'image/png', body: Buffer.alloc(100) }) }) await page.goto(`/project/${PID}/preview`) await page.waitForFunction(() => document.body.innerText.length > 50, { timeout: 15000 }) await page.locator('button:has-text("导出")').first().click() await page.locator('button:has-text("导出为图片")').click() await expect.poll(() => imageExportCalled).toBe(true) }) }) ================================================ FILE: frontend/e2e/extract-style-caption.spec.ts ================================================ /** * E2E tests for extract-style using caption_provider. * * Mock test: verify frontend handles the extract-style API correctly. */ import { test, expect } from '@playwright/test' const BASE_URL = process.env.BASE_URL || 'http://localhost:3000' const TINY_PNG = Buffer.from( 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==', 'base64', ) /** Click the extract-style button and provide a fake image via filechooser. */ async function triggerStyleExtract(page: import('@playwright/test').Page) { // Enable text style mode first (checkbox defaults to off) const toggle = page.getByText(/使用文字描述风格|Use text description for style/) await toggle.scrollIntoViewIfNeeded() await toggle.click() // Wait for the extract button to appear, then find the sibling hidden input const btn = page.getByText(/从图片提取风格|Extract from image/) await expect(btn).toBeVisible() // The hidden file input is right after the button, not multiple, not disabled const fileInput = page.locator('input[type="file"][accept="image/*"]:not([multiple]):not([disabled])') await fileInput.setInputFiles({ name: 'style.png', mimeType: 'image/png', buffer: TINY_PNG }) } test.describe('Extract style - Mock tests', () => { test.beforeEach(async ({ page }) => { await page.addInitScript(() => localStorage.setItem('hasSeenHelpModal', 'true')) }) test('should extract style and show success toast', async ({ page }) => { const mockStyle = 'Modern minimalist blue gradient' await page.route('**/api/extract-style', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { style_description: mockStyle } }), }) }) await page.goto(BASE_URL) await triggerStyleExtract(page) await expect(page.getByText(/风格提取成功|Style extracted successfully/)).toBeVisible({ timeout: 5000 }) }) test('should show error toast when extract-style fails', async ({ page }) => { await page.route('**/api/extract-style', async (route) => { await route.fulfill({ status: 503, contentType: 'application/json', body: JSON.stringify({ success: false, error: { code: 'AI_SERVICE_ERROR', message: 'caption_provider error' }, }), }) }) await page.goto(BASE_URL) await triggerStyleExtract(page) await expect(page.getByText(/风格提取失败|Style extraction failed/)).toBeVisible({ timeout: 5000 }) }) test('should send multipart POST to /api/extract-style', async ({ page }) => { let requestOk = false await page.route('**/api/extract-style', async (route) => { const req = route.request() expect(req.method()).toBe('POST') expect(req.headers()['content-type'] || '').toContain('multipart/form-data') requestOk = true await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { style_description: 'ok' } }), }) }) await page.goto(BASE_URL) await triggerStyleExtract(page) await expect(page.getByText(/风格提取成功|Style extracted successfully/)).toBeVisible({ timeout: 5000 }) expect(requestOk).toBe(true) }) }) ================================================ FILE: frontend/e2e/failed-file-reselect.spec.ts ================================================ /** * E2E test: Failed files can be re-selected in selector and re-parsed from card */ import { test, expect } from '@playwright/test' test.use({ baseURL: process.env.BASE_URL || 'http://localhost:3000' }) const FILE_FAILED = 'file-failed-001' const FILE_COMPLETED = 'file-completed-002' const mockFileList = () => ({ success: true, data: { files: [ { id: FILE_FAILED, filename: 'broken.pdf', file_size: 1000, file_type: 'application/pdf', parse_status: 'failed', error_message: 'MinerU timeout' }, { id: FILE_COMPLETED, filename: 'good.pdf', file_size: 2000, file_type: 'application/pdf', parse_status: 'completed' }, ] } }) const mockSettings = () => ({ success: true, data: { ai_provider_format: 'gemini', google_api_key: 'fake' } }) test.describe('Failed file re-selection (mocked)', () => { test.setTimeout(60_000) test('selecting a failed file in selector triggers re-parse on confirm', async ({ page }) => { let parseCalled = false await page.route('**/api/settings', r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(mockSettings()) })) await page.route('**/api/reference-files/project/**', r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(mockFileList()) })) await page.route(`**/api/reference-files/${FILE_FAILED}/parse`, r => { parseCalled = true r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { file: { id: FILE_FAILED, filename: 'broken.pdf', file_size: 1000, file_type: 'application/pdf', parse_status: 'parsing' }, message: 'ok' } }) }) }) await page.addInitScript(() => localStorage.setItem('hasSeenHelpModal', 'true')) await page.goto('/') // Click paperclip button to open file selector const paperclip = page.locator('button[title]').filter({ has: page.locator('svg.lucide-paperclip') }) await paperclip.click() // Wait for file selector modal (by title) const modal = page.getByRole('dialog', { name: '选择参考文件' }) await expect(modal).toBeVisible({ timeout: 5_000 }) // Click the failed file row to select it await modal.locator('text=broken.pdf').first().click() // Click confirm button await modal.getByRole('button', { name: /确定/ }).click() // Verify parse was triggered for the failed file expect(parseCalled).toBe(true) }) test('failed file card shows reparse button', async ({ page }) => { const PROJECT_ID = 'mock-proj-reparse' let parseCalled = false await page.route('**/api/settings', r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(mockSettings()) })) await page.route(`**/api/projects/${PROJECT_ID}`, r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { id: PROJECT_ID, project_id: PROJECT_ID, title: 'Test', status: 'OUTLINE_GENERATED', creation_type: 'idea', pages: [{ id: 'p1', page_id: 'p1', title: 'Page 1', order_index: 0, outline_content: { title: 'Page 1', points: ['p'] } }], reference_files: [{ id: FILE_FAILED, filename: 'broken.pdf', file_size: 1000, file_type: 'application/pdf', parse_status: 'failed', error_message: 'MinerU timeout' }] } }) })) await page.route(`**/api/projects/${PROJECT_ID}/pages`, r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { pages: [] } }) })) await page.route(`**/api/reference-files/project/${PROJECT_ID}`, r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { files: [{ id: FILE_FAILED, filename: 'broken.pdf', file_size: 1000, file_type: 'application/pdf', parse_status: 'failed', error_message: 'MinerU timeout' }] } }) })) await page.route(`**/api/reference-files/${FILE_FAILED}`, r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { file: { id: FILE_FAILED, filename: 'broken.pdf', file_size: 1000, file_type: 'application/pdf', parse_status: 'failed', error_message: 'MinerU timeout' } } }) })) await page.route(`**/api/reference-files/${FILE_FAILED}/parse`, r => { parseCalled = true r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { file: { id: FILE_FAILED, filename: 'broken.pdf', file_size: 1000, file_type: 'application/pdf', parse_status: 'parsing' }, message: 'ok' } }) }) }) await page.goto(`/project/${PROJECT_ID}/outline`) // Find the failed file card const card = page.locator('text=broken.pdf').first() await card.waitFor({ state: 'visible', timeout: 10_000 }) // The reparse button (RefreshCw icon) should be visible on the card const cardContainer = card.locator('xpath=ancestor::div[contains(@class,"w-72")]') const reparseBtn = cardContainer.locator('button').filter({ has: page.locator('svg.lucide-refresh-cw') }) await expect(reparseBtn).toBeVisible({ timeout: 3_000 }) // Click reparse await reparseBtn.click() expect(parseCalled).toBe(true) }) }) ================================================ FILE: frontend/e2e/file-preview-scrollbar.spec.ts ================================================ /** * E2E test: FilePreviewModal scrollbar fix * * Verifies that the PDF/file preview modal does not have nested scroll containers * (which caused double vertical scrollbars and a horizontal scrollbar). */ import { test, expect } from '@playwright/test' test.use({ baseURL: process.env.BASE_URL || 'http://localhost:3000' }) const LONG_MARKDOWN = '# Test Document\n\n' + 'Lorem ipsum dolor sit amet. '.repeat(200) + '\n\n```\n' + 'const x = 1; // a very long code line '.repeat(5) + '\n```\n' const PROJECT_ID = 'mock-proj-001' const FILE_ID = 'mock-file-001' test.describe('FilePreviewModal scrollbar fix (mocked)', () => { test.setTimeout(60_000) test('modal should not have nested scroll containers', async ({ page }) => { // Mock settings API (prevents help modal from blocking) await page.route('**/api/settings', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { ai_provider_format: 'gemini', google_api_key: 'fake' } }) }) }) // Mock project API await page.route(`**/api/projects/${PROJECT_ID}`, async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { id: PROJECT_ID, project_id: PROJECT_ID, title: 'Test Project', status: 'OUTLINE_GENERATED', creation_type: 'idea', pages: [{ id: 'p1', page_id: 'p1', title: 'Page 1', order_index: 0, outline_content: { title: 'Page 1', points: ['point 1'] } }] } }) }) }) // Mock pages API await page.route(`**/api/projects/${PROJECT_ID}/pages`, async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { pages: [{ id: 'p1', title: 'Page 1', order_index: 0 }] } }) }) }) // Mock reference files list (actual endpoint: /api/reference-files/project/:id) await page.route(`**/api/reference-files/project/${PROJECT_ID}`, async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { files: [{ id: FILE_ID, filename: 'test-document.pdf', file_size: 12345, file_type: 'application/pdf', parse_status: 'completed', }] } }) }) }) // Mock single file detail (for preview) await page.route(`**/api/reference-files/${FILE_ID}`, async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { file: { id: FILE_ID, filename: 'test-document.pdf', file_size: 12345, file_type: 'application/pdf', parse_status: 'completed', markdown_content: LONG_MARKDOWN, } } }) }) }) // Navigate to outline editor (correct route: /project/:id/outline) await page.goto(`/project/${PROJECT_ID}/outline`) // Click the file card to open preview const fileCard = page.locator('text=test-document.pdf').first() await fileCard.waitFor({ state: 'visible', timeout: 10_000 }) await fileCard.click() // Wait for the file preview modal (second dialog, after any help modal) const modal = page.locator('[role="dialog"]').last() await expect(modal).toBeVisible({ timeout: 5_000 }) // KEY ASSERTION: between the dialog and the prose, there should be only ONE // scrollable ancestor (the Modal's own content area), not two (which was the bug). const proseDiv = modal.locator('.prose') await expect(proseDiv).toBeVisible() const scrollableAncestorCount = await proseDiv.evaluate(el => { let count = 0 let node = el.parentElement while (node && !node.hasAttribute('role')) { const ov = getComputedStyle(node).overflowY if (ov === 'auto' || ov === 'scroll') count++ node = node.parentElement } return count }) expect(scrollableAncestorCount).toBe(1) // Prose itself should hide horizontal overflow await expect(proseDiv).toHaveCSS('overflow-x', 'hidden') }) }) ================================================ FILE: frontend/e2e/generation-fail.spec.ts ================================================ import { test, expect, Page } from '@playwright/test' async function setupFailureMocks(page: Page, projectId: string, failUrl: string) { // Routes don't overlap in practice; order doesn't matter here await page.route(`**/api/projects/${projectId}`, async (route) => { if (route.request().method() === 'DELETE') { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true }), }) } else { await route.continue() } }) await page.route(failUrl, async (route) => { await route.fulfill({ status: 503, contentType: 'application/json', body: JSON.stringify({ error: { message: 'AI service unavailable' } }), }) }) await page.route('**/api/projects', async (route) => { if (route.request().method() === 'POST') { await route.fulfill({ status: 201, contentType: 'application/json', body: JSON.stringify({ success: true, data: { project_id: projectId } }), }) } else { await route.continue() } }) } test.describe('Generation failure handling', () => { test.beforeEach(async ({ page }) => { await page.addInitScript(() => localStorage.setItem('hasSeenHelpModal', 'true')) await page.goto('/') }) test('outline: stays on Home when generateOutline fails', async ({ page }) => { await setupFailureMocks(page, 'test-outline-fail', '**/api/projects/*/generate/outline') await page.locator('button').filter({ hasText: /从大纲生成|From Outline/i }).click() const editor = page.locator('[role="textbox"][contenteditable="true"]').first() await editor.click() await editor.pressSequentially('Slide 1: Intro\nSlide 2: Content\nSlide 3: Summary', { delay: 10 }) await page.locator('button').filter({ hasText: /下一步|Next/i }).click() await expect(page.getByText(/AI service unavailable/i)).toBeVisible({ timeout: 15000 }) expect(page.url()).not.toContain('/outline') expect(page.url()).not.toContain('/detail') }) test('description: stays on Home when generateFromDescription fails', async ({ page }) => { await setupFailureMocks(page, 'test-desc-fail', '**/api/projects/*/generate/from-description') await page.locator('button').filter({ hasText: /从描述生成|From Description/i }).click() const editor = page.locator('[role="textbox"][contenteditable="true"]').first() await editor.click() await editor.pressSequentially('page1 intro page2 content page3 summary', { delay: 10 }) await page.locator('button').filter({ hasText: /下一步|Next/i }).click() await expect(page.getByText(/AI service unavailable/i)).toBeVisible({ timeout: 15000 }) expect(page.url()).not.toContain('/detail') expect(page.url()).not.toContain('/outline') }) }) // --- Integration tests (real backend) --- test.describe('Generation failure rollback (integration)', () => { test.beforeEach(async ({ page }) => { await page.addInitScript(() => localStorage.setItem('hasSeenHelpModal', 'true')) await page.goto('/') }) test('outline: failed generation deletes project and stays on Home', async ({ page }) => { // Capture project_id from creation response (no mock — real backend) let projectId: string | null = null page.on('response', async (resp) => { if (resp.url().includes('/api/projects') && resp.request().method() === 'POST' && resp.status() === 201) { const body = await resp.json().catch(() => null) projectId = body?.data?.project_id ?? null } }) await page.locator('button').filter({ hasText: /从大纲生成|From Outline/i }).click() const editor = page.locator('[role="textbox"][contenteditable="true"]').first() await editor.click() await editor.pressSequentially('Slide 1: Intro\nSlide 2: Body\nSlide 3: End', { delay: 10 }) await page.locator('button').filter({ hasText: /下一步|Next/i }).click() // Wait for either error toast (failure) or navigation (success) const errorLocator = page.locator('[class*="error"], [class*="toast"], [role="alert"]').first() const navigated = page.waitForURL(/\/(outline|detail)/, { timeout: 60000 }).then(() => 'navigated' as const) const errored = errorLocator.waitFor({ timeout: 60000 }).then(() => 'errored' as const) const result = await Promise.race([navigated, errored]) if (result === 'errored') { // Generation failed → should stay on Home expect(page.url()).not.toContain('/outline') expect(page.url()).not.toContain('/detail') // Verify project was rolled back (deleted) — fetch inside browser goes through Vite proxy if (projectId) { const status = await page.evaluate(async (id) => { const r = await fetch(`/api/projects/${id}`) return r.status }, projectId) expect(status).toBe(404) } } else { expect(page.url()).toMatch(/\/(outline|detail)/) } }) test('description: failed generation deletes project and stays on Home', async ({ page }) => { let projectId: string | null = null page.on('response', async (resp) => { if (resp.url().includes('/api/projects') && resp.request().method() === 'POST' && resp.status() === 201) { const body = await resp.json().catch(() => null) projectId = body?.data?.project_id ?? null } }) await page.locator('button').filter({ hasText: /从描述生成|From Description/i }).click() const editor = page.locator('[role="textbox"][contenteditable="true"]').first() await editor.click() await editor.pressSequentially('page1 intro page2 content page3 summary', { delay: 10 }) await page.locator('button').filter({ hasText: /下一步|Next/i }).click() const errorLocator = page.locator('[class*="error"], [class*="toast"], [role="alert"]').first() const navigated = page.waitForURL(/\/(outline|detail)/, { timeout: 60000 }).then(() => 'navigated' as const) const errored = errorLocator.waitFor({ timeout: 60000 }).then(() => 'errored' as const) const result = await Promise.race([navigated, errored]) if (result === 'errored') { expect(page.url()).not.toContain('/outline') expect(page.url()).not.toContain('/detail') if (projectId) { const status = await page.evaluate(async (id) => { const r = await fetch(`/api/projects/${id}`) return r.status }, projectId) expect(status).toBe(404) } } else { expect(page.url()).toMatch(/\/(outline|detail)/) } }) }) ================================================ FILE: frontend/e2e/generation-requirements.spec.ts ================================================ import { test, expect } from '@playwright/test' const PROJECT_ID = 'mock-gen-req-project' const mockProject = (overrides: Record = {}) => ({ project_id: PROJECT_ID, status: 'OUTLINE_GENERATED', idea_prompt: 'Test idea', creation_type: 'idea', outline_requirements: '', description_requirements: '', pages: [ { page_id: 'page-1', order_index: 0, outline_content: { title: 'Page One', points: ['Point A'] }, description_content: { text: 'Page one description', generated_at: '2025-01-01' }, status: 'DESCRIPTION_GENERATED', }, ], created_at: '2025-01-01T00:00:00', updated_at: '2025-01-01T00:00:00', ...overrides, }) /** Locate the outline requirements editor (contentEditable inside data-testid wrapper) */ const outlineReqEditor = (page: import('@playwright/test').Page) => page.locator('[data-testid="outline-requirements-textarea"] [contenteditable="true"]').first() /** Locate the description requirements editor */ const descReqEditor = (page: import('@playwright/test').Page) => page.locator('[data-testid="desc-requirements-textarea"] [contenteditable="true"]').first() /** Locate the outline requirements toggle button */ const outlineReqToggle = (page: import('@playwright/test').Page) => page.locator('[data-testid="outline-requirements-toggle"]').first() /** Locate the description requirements toggle button */ const descReqToggle = (page: import('@playwright/test').Page) => page.locator('[data-testid="desc-requirements-toggle"]').first() /** Clear and type into a contentEditable element */ async function clearAndType(editor: import('@playwright/test').Locator, text: string) { await editor.focus() await editor.press('Control+a') if (text) { await editor.page().keyboard.insertText(text) } else { await editor.press('Backspace') } } // ── Mock tests ────────────────────────────────────────────────────── test.describe('Generation requirements - OutlineEditor (mock)', () => { test('shows collapsible requirements section that auto-saves', async ({ page }) => { let savedPayload: Record | null = null await page.route(`**/api/projects/${PROJECT_ID}`, async (route) => { if (route.request().method() === 'PUT') { savedPayload = route.request().postDataJSON() await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: mockProject(savedPayload) }), }) } else { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: mockProject() }), }) } }) await page.goto(`/project/${PROJECT_ID}/outline`) await page.waitForLoadState('networkidle') // Requirements toggle should exist const toggle = outlineReqToggle(page) await expect(toggle).toBeVisible() // Expand it await toggle.click() // Editor should now be visible const editor = outlineReqEditor(page) await expect(editor).toBeVisible() // Type requirements await clearAndType(editor, '限制在5页以内') // Blur to trigger save await page.locator('header').first().click() // Wait for save await expect.poll(() => savedPayload, { timeout: 5000 }).not.toBeNull() expect(savedPayload).toHaveProperty('outline_requirements', '限制在5页以内') }) test('auto-expands when requirements exist', async ({ page }) => { await page.route(`**/api/projects/${PROJECT_ID}`, async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: mockProject({ outline_requirements: 'Some requirement' }), }), }) }) await page.goto(`/project/${PROJECT_ID}/outline`) await page.waitForLoadState('networkidle') // Should auto-expand when there are existing requirements const editor = outlineReqEditor(page) await expect(editor).toBeVisible() await expect(editor).toContainText('Some requirement') }) }) test.describe('Generation requirements - DetailEditor (mock)', () => { test('shows collapsible requirements section that auto-saves', async ({ page }) => { let savedPayload: Record | null = null await page.route(`**/api/projects/${PROJECT_ID}`, async (route) => { if (route.request().method() === 'PUT') { savedPayload = route.request().postDataJSON() await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: mockProject(savedPayload) }), }) } else { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: mockProject() }), }) } }) await page.goto(`/project/${PROJECT_ID}/detail`) await page.waitForLoadState('networkidle') // Requirements toggle should exist const toggle = descReqToggle(page) await expect(toggle).toBeVisible() // Expand it await toggle.click() // Editor should be visible const editor = descReqEditor(page) await expect(editor).toBeVisible() // Type requirements await clearAndType(editor, '每页不超过50字') // Blur to trigger save await page.locator('header').first().click() // Wait for save await expect.poll(() => savedPayload, { timeout: 5000 }).not.toBeNull() expect(savedPayload).toHaveProperty('description_requirements', '每页不超过50字') }) test('auto-expands when requirements exist', async ({ page }) => { await page.route(`**/api/projects/${PROJECT_ID}`, async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: mockProject({ description_requirements: 'Existing desc requirement' }), }), }) }) await page.goto(`/project/${PROJECT_ID}/detail`) await page.waitForLoadState('networkidle') // Should auto-expand with existing content const editor = descReqEditor(page) await expect(editor).toBeVisible() await expect(editor).toContainText('Existing desc requirement') }) }) // ── Integration tests ─────────────────────────────────────────────── test.describe('Generation requirements (integration)', () => { let projectId: string test.beforeEach(async ({ request }) => { const res = await request.post('/api/projects', { data: { idea_prompt: 'Integration test for requirements', creation_type: 'idea' }, }) const body = await res.json() projectId = body.data.project_id }) test('outline requirements: save, reload, verify persisted', async ({ page }) => { await page.goto(`/project/${projectId}/outline`) await page.waitForLoadState('networkidle') // Expand requirements section const toggle = outlineReqToggle(page) await toggle.click() // Type requirements const editor = outlineReqEditor(page) await expect(editor).toBeVisible() await clearAndType(editor, '限制在8页以内') // Blur and wait for save const savePromise = page.waitForResponse( (resp) => resp.url().includes(`/api/projects/${projectId}`) && resp.request().method() === 'PUT' ) await page.locator('header').first().click() await savePromise // Reload and verify persisted await page.reload() await page.waitForLoadState('networkidle') // Should auto-expand since there's content const editorAfter = outlineReqEditor(page) await expect(editorAfter).toBeVisible() await expect(editorAfter).toContainText('限制在8页以内') }) test('description requirements: save, reload, verify persisted', async ({ page, request }) => { // Create a page so detail editor has content const outlineRes = await request.post(`/api/projects/${projectId}/pages`, { data: { outline_content: { title: 'Test Page', points: ['Point 1'] }, order_index: 0, }, }) expect(outlineRes.ok()).toBeTruthy() await page.goto(`/project/${projectId}/detail`) await page.waitForLoadState('networkidle') // Expand requirements section const toggle = descReqToggle(page) await toggle.click() // Type requirements const editor = descReqEditor(page) await expect(editor).toBeVisible() await clearAndType(editor, '多使用数据和案例') // Blur and wait for save const savePromise = page.waitForResponse( (resp) => resp.url().includes(`/api/projects/${projectId}`) && resp.request().method() === 'PUT' ) await page.locator('header').first().click() await savePromise // Reload and verify persisted await page.reload() await page.waitForLoadState('networkidle') const editorAfter = descReqEditor(page) await expect(editorAfter).toBeVisible() await expect(editorAfter).toContainText('多使用数据和案例') }) test('clearing requirements saves empty string', async ({ page }) => { // First set a requirement via API const setRes = await page.request.put(`/api/projects/${projectId}`, { data: { outline_requirements: 'Temporary requirement' }, }) expect(setRes.ok()).toBeTruthy() await page.goto(`/project/${projectId}/outline`) await page.waitForLoadState('networkidle') // Should auto-expand with existing content const editor = outlineReqEditor(page) await expect(editor).toBeVisible() await expect(editor).toContainText('Temporary requirement') // Clear it await clearAndType(editor, '') // Blur and wait for save const savePromise = page.waitForResponse( (resp) => resp.url().includes(`/api/projects/${projectId}`) && resp.request().method() === 'PUT' ) await page.locator('header').first().click() await savePromise // Reload and verify cleared await page.reload() await page.waitForLoadState('networkidle') // After clearing, the toggle should still exist const toggle = outlineReqToggle(page) await expect(toggle).toBeVisible() }) }) ================================================ FILE: frontend/e2e/helpers/seed-project.ts ================================================ /** * Shared helper to create projects with real images for E2E testing. * Bypasses AI image generation by placing fixture images on disk + updating DB directly. * * Usage: * - Playwright: import { seedProjectWithImages } from './helpers/seed-project' * - CLI: npx tsx frontend/e2e/helpers/seed-project.ts [PAGE_COUNT] */ import { execSync } from 'child_process' import * as fs from 'fs' import * as path from 'path' const cwd = process.cwd() const FRONTEND_DIR = cwd.endsWith('frontend') ? cwd : path.join(cwd, 'frontend') const PROJECT_ROOT = path.resolve(FRONTEND_DIR, '..') const DB_PATH = path.join(PROJECT_ROOT, 'backend', 'instance', 'database.db') const UPLOADS = path.join(PROJECT_ROOT, 'uploads') const FIXTURES = path.join(FRONTEND_DIR, 'e2e', 'fixtures') function sql(query: string) { execSync(`sqlite3 -cmd ".timeout 5000" "${DB_PATH}" "${query.replace(/"/g, '\\"')}"`) } /** Get fixture image path (cycles through slide_1.jpg, slide_2.jpg, slide_3.jpg) */ function getFixtureImage(index: number): string { const num = (index % 3) + 1 return path.join(FIXTURES, `slide_${num}.jpg`) } export interface SeededProject { projectId: string pageIds: string[] } /** * Create a project with N pages, each having a real image on disk. * @param baseUrl - Backend base URL, e.g. "http://localhost:5441" */ export async function seedProjectWithImages( baseUrl: string, pageCount = 1 ): Promise { const post = async (urlPath: string, body: object) => { const resp = await fetch(`${baseUrl}${urlPath}`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(body), }) return resp.json() } const projectId = (await post('/api/projects', { creation_type: 'idea', idea_prompt: 'e2e test', template_style: 'default', })).data?.project_id const pageIds: string[] = [] fs.mkdirSync(path.join(UPLOADS, projectId, 'pages'), { recursive: true }) for (let i = 0; i < pageCount; i++) { const pageId = (await post(`/api/projects/${projectId}/pages`, { order_index: i, outline_content: { title: `Slide ${i + 1}` }, })).data?.page_id pageIds.push(pageId) const rel = `${projectId}/pages/${pageId}_v1.jpg` fs.copyFileSync(getFixtureImage(i), path.join(UPLOADS, rel)) sql(`UPDATE pages SET generated_image_path='${rel}', status='COMPLETED' WHERE id='${pageId}'`) } sql(`UPDATE projects SET status='IMAGES_GENERATED' WHERE id='${projectId}'`) return { projectId, pageIds } } // CLI entry point: npx tsx frontend/e2e/helpers/seed-project.ts [PAGE_COUNT] if (process.argv[1]?.includes('seed-project')) { const { createHash } = await import('crypto') const pageCount = parseInt(process.argv[2] || '3', 10) // Auto-detect backend port (same MD5 logic as app.py) const envFile = path.join(PROJECT_ROOT, '.env') let port = '5000' if (fs.existsSync(envFile)) { const m = fs.readFileSync(envFile, 'utf8').match(/^BACKEND_PORT=(\d+)/m) if (m) port = m[1] } if (port === '5000') { const basename = path.basename(PROJECT_ROOT) const offset = parseInt(createHash('md5').update(basename).digest('hex').slice(0, 8), 16) % 500 port = String(5000 + offset) } const baseUrl = `http://localhost:${port}` const res = await seedProjectWithImages(baseUrl, pageCount) const fport = parseInt(port) - 2000 console.log(`Project: ${res.projectId}`) console.log(`Preview: http://localhost:${fport}/project/${res.projectId}/preview`) } ================================================ FILE: frontend/e2e/history-pagination.spec.ts ================================================ /** * E2E tests for history page pagination. * * Mock tests: verify pagination UI renders correctly, page navigation works, * and correct API params are sent. * * Integration test: create enough projects to span multiple pages, * verify pagination controls appear and navigate correctly. */ import { test, expect } from '@playwright/test' const PAGE_SIZE = 5 function makeProject(index: number) { const id = `proj-${String(index).padStart(3, '0')}` const label = `P-${String(index).padStart(2, '0')}` return { id, project_id: id, idea_prompt: label, status: 'DRAFT', created_at: new Date(Date.now() - index * 60000).toISOString(), updated_at: new Date(Date.now() - index * 60000).toISOString(), pages: [ { id: `page-${id}`, page_id: `page-${id}`, title: label, order_index: 0, status: 'DRAFT', outline_content: { title: label, points: [] }, }, ], } } async function setupMockRoutes( page: import('@playwright/test').Page, totalProjects: number ) { // Mock access code check await page.route('**/api/access-code/check', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { enabled: false } }), }) }) // Mock projects list API — handles both with and without query string await page.route('**/api/projects**', async (route) => { const req = route.request() if (req.method() !== 'GET' || req.url().includes('/api/projects/')) { await route.fallback() return } const url = new URL(req.url()) const limit = parseInt(url.searchParams.get('limit') || String(PAGE_SIZE)) const offset = parseInt(url.searchParams.get('offset') || '0') const allProjects = Array.from({ length: totalProjects }, (_, i) => makeProject(i + 1) ) const sliced = allProjects.slice(offset, offset + limit) await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { projects: sliced, total: totalProjects, limit, offset, }, }), }) }) } // ───────────────── Mock tests ───────────────── test.describe('History pagination — mock', () => { test('should not show pagination when projects fit on one page', async ({ page, }) => { await setupMockRoutes(page, 3) // 3 < PAGE_SIZE, no pagination await page.goto('/history') await expect(page.getByRole('heading', { name: 'P-01', exact: true })).toBeVisible() await expect(page.locator('nav[aria-label="Pagination"]')).not.toBeVisible() }) test('should show pagination when projects exceed one page', async ({ page, }) => { await setupMockRoutes(page, 12) // 3 pages: 5 + 5 + 2 await page.goto('/history') await expect(page.getByRole('heading', { name: 'P-01', exact: true })).toBeVisible() const pagination = page.locator('nav[aria-label="Pagination"]') await expect(pagination).toBeVisible() await expect( pagination.locator('button[aria-current="page"]') ).toHaveText('1') await expect(pagination.locator('button:text-is("3")')).toBeVisible() }) test('should navigate to next page and load correct projects', async ({ page, }) => { await setupMockRoutes(page, 12) await page.goto('/history') await expect(page.getByRole('heading', { name: 'P-01', exact: true })).toBeVisible() const pagination = page.locator('nav[aria-label="Pagination"]') await pagination.locator('button:text-is("2")').click() // Page 2: P-06 to P-10 await expect(page.getByRole('heading', { name: 'P-06', exact: true })).toBeVisible() await expect(page.getByRole('heading', { name: 'P-01', exact: true })).not.toBeVisible() await expect( pagination.locator('button[aria-current="page"]') ).toHaveText('2') }) test('should navigate to last page with fewer items', async ({ page }) => { await setupMockRoutes(page, 12) // last page has 2 items (P-11, P-12) await page.goto('/history') await expect(page.getByRole('heading', { name: 'P-01', exact: true })).toBeVisible() const pagination = page.locator('nav[aria-label="Pagination"]') await pagination.locator('button:text-is("3")').click() await expect(page.getByRole('heading', { name: 'P-11', exact: true })).toBeVisible() await expect(page.getByRole('heading', { name: 'P-12', exact: true })).toBeVisible() await expect( pagination.locator('button[aria-current="page"]') ).toHaveText('3') }) test('previous/next buttons should work correctly', async ({ page }) => { await setupMockRoutes(page, 15) // 3 pages await page.goto('/history') await expect(page.getByRole('heading', { name: 'P-01', exact: true })).toBeVisible() const pagination = page.locator('nav[aria-label="Pagination"]') const prevButton = pagination.locator('button[aria-label="Previous page"]') const nextButton = pagination.locator('button[aria-label="Next page"]') await expect(prevButton).toBeDisabled() await nextButton.click() await expect(page.getByRole('heading', { name: 'P-06', exact: true })).toBeVisible() await expect(prevButton).not.toBeDisabled() await prevButton.click() await expect(page.getByRole('heading', { name: 'P-01', exact: true })).toBeVisible() }) test('should send correct limit and offset params in API request', async ({ page, }) => { const requests: string[] = [] await setupMockRoutes(page, 12) page.on('request', (req) => { if (req.url().includes('/api/projects')) { requests.push(req.url()) } }) await page.goto('/history') await expect(page.getByRole('heading', { name: 'P-01', exact: true })).toBeVisible() const firstReq = requests.find((r) => r.includes('limit=')) expect(firstReq).toContain('limit=5') expect(firstReq).toContain('offset=0') requests.length = 0 const pagination = page.locator('nav[aria-label="Pagination"]') await pagination.locator('button:text-is("2")').click() await expect(page.getByRole('heading', { name: 'P-06', exact: true })).toBeVisible() const secondReq = requests.find((r) => r.includes('limit=')) expect(secondReq).toContain('limit=5') expect(secondReq).toContain('offset=5') }) }) // ───────────────── Integration test ───────────────── test.describe('History pagination — integration', () => { const frontendUrl = process.env.BASE_URL || 'http://localhost:3000' const frontendPort = parseInt(new URL(frontendUrl).port || '3000') const BACKEND_URL = `http://localhost:${frontendPort + 2000}` async function createSimpleProject(index: number): Promise { const resp = await fetch(`${BACKEND_URL}/api/projects`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ creation_type: 'idea', idea_prompt: `PagTest-${String(index).padStart(2, '0')}` }), }) const json = await resp.json() return json.data?.project_id } async function deleteProject(projectId: string) { await fetch(`${BACKEND_URL}/api/projects/${projectId}`, { method: 'DELETE' }) } test('pagination works with real backend data', async ({ page }) => { // Create 8 projects (enough for 2 pages with PAGE_SIZE=5) const projectIds: string[] = [] for (let i = 0; i < 8; i++) { const id = await createSimpleProject(i + 1) if (id) projectIds.push(id) } expect(projectIds.length).toBe(8) try { await page.goto('/history') await page.waitForLoadState('networkidle') await expect(page.locator('text=/历史项目|Project History/')).toBeVisible({ timeout: 10000 }) const pagination = page.locator('nav[aria-label="Pagination"]') await expect(pagination).toBeVisible({ timeout: 10000 }) await expect( pagination.locator('button[aria-current="page"]') ).toHaveText('1') await pagination.locator('button:text-is("2")').click() await page.waitForLoadState('networkidle') await expect( pagination.locator('button[aria-current="page"]') ).toHaveText('2') } finally { await Promise.all(projectIds.map(id => deleteProject(id))) } }) }) ================================================ FILE: frontend/e2e/image-prompt-ratio.spec.ts ================================================ /** * Image Prompt Aspect Ratio - Integration E2E Test * * Verifies that the project's aspect ratio is correctly stored, updated, * and passed through to image generation tasks. */ import { test, expect } from '@playwright/test' const BASE = process.env.BASE_URL || 'http://localhost:3000' const API = `http://localhost:${Number(new URL(BASE).port) + 2000}` test.describe('Image prompt aspect ratio', () => { let projectId: string test.afterEach(async ({ request }) => { if (projectId) { await request.delete(`${API}/api/projects/${projectId}`) projectId = '' } }) test('project stores and returns custom aspect ratio (4:3)', async ({ request, }) => { // Create project with 4:3 ratio const projRes = await request.post(`${API}/api/projects`, { data: { creation_type: 'idea', idea_prompt: 'test ratio storage', image_aspect_ratio: '4:3', page_count: 1, }, }) expect(projRes.ok()).toBeTruthy() const proj = await projRes.json() projectId = proj.data.project_id // Verify it persists on GET const getRes = await request.get(`${API}/api/projects/${projectId}`) expect(getRes.ok()).toBeTruthy() const fetched = await getRes.json() expect(fetched.data.image_aspect_ratio).toBe('4:3') }) test('project aspect ratio can be updated from 16:9 to 1:1', async ({ request, }) => { // Create with default const projRes = await request.post(`${API}/api/projects`, { data: { creation_type: 'idea', idea_prompt: 'test ratio update', page_count: 1, }, }) expect(projRes.ok()).toBeTruthy() const proj = await projRes.json() projectId = proj.data.project_id // Verify default is 16:9 const getRes = await request.get(`${API}/api/projects/${projectId}`) const fetched = await getRes.json() expect(fetched.data.image_aspect_ratio).toBe('16:9') // Update to 1:1 const updateRes = await request.put( `${API}/api/projects/${projectId}`, { data: { image_aspect_ratio: '1:1' } } ) expect(updateRes.ok()).toBeTruthy() // Verify update persisted const getRes2 = await request.get(`${API}/api/projects/${projectId}`) const fetched2 = await getRes2.json() expect(fetched2.data.image_aspect_ratio).toBe('1:1') }) }) ================================================ FILE: frontend/e2e/image-queued-status.spec.ts ================================================ import { test, expect, Page } from '@playwright/test' const PROJECT_ID = 'queued-status-mock' const PAGE_IDS = ['p-1', 'p-2', 'p-3', 'p-4'] function makePage(id: string, idx: number, status: string, hasImage: boolean) { return { page_id: id, order_index: idx, outline_content: { title: `Slide ${idx + 1}`, points: ['pt'] }, description_content: { text: `Desc ${idx + 1}` }, generated_image_url: hasImage ? `/files/${PROJECT_ID}/pages/${id}_v1.jpg` : null, status, created_at: '2026-01-01T00:00:00', updated_at: '2026-01-01T00:00:00', } } function projectJson(pages: ReturnType[], projectStatus = 'COMPLETED') { return { success: true, data: { id: PROJECT_ID, creation_type: 'idea', idea_prompt: 'test', status: projectStatus, template_style: 'default', image_aspect_ratio: '16:9', pages, created_at: '2026-01-01T00:00:00', updated_at: '2026-01-01T00:00:00', }, } } async function mockCommonRoutes(page: Page) { await page.route('**/api/access-code/check', (r) => r.fulfill({ status: 200, contentType: 'application/json', body: '{"success":true,"data":{"enabled":false}}' })) await page.route('**/api/user-templates', (r) => r.fulfill({ status: 200, contentType: 'application/json', body: '{"success":true,"data":{"templates":[]}}' })) await page.route('**/api/projects/*/pages/*/image-versions', (r) => r.fulfill({ status: 200, contentType: 'application/json', body: '{"success":true,"data":{"versions":[]}}' })) await page.route('**/files/**', (r) => r.fulfill({ status: 200, contentType: 'image/jpeg', body: Buffer.from([0xff, 0xd8, 0xff, 0xe0]) })) } // ─── Mock tests ─── test.describe('QUEUED status during batch image generation (mock)', () => { test.beforeEach(async ({ page }) => { await page.addInitScript(() => localStorage.setItem('hasSeenHelpModal', 'true')) }) test('badges show QUEUED status for pages waiting in queue', async ({ page }) => { await mockCommonRoutes(page) // Mix of statuses: 1 generating, 3 queued (simulating batch generation with concurrency limit) const pages = [ makePage('p-1', 0, 'GENERATING', false), makePage('p-2', 1, 'QUEUED', false), makePage('p-3', 2, 'QUEUED', false), makePage('p-4', 3, 'QUEUED', false), ] await page.route(`**/api/projects/${PROJECT_ID}`, (r) => { if (r.request().method() === 'GET') { return r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(projectJson(pages, 'GENERATING_IMAGES')) }) } return r.continue() }) await page.goto(`/project/${PROJECT_ID}/preview`) const badges = page.locator('[data-testid="status-badge"]') await expect(badges.first()).toBeVisible({ timeout: 10000 }) // First page should be GENERATING await expect(badges.nth(0)).toHaveAttribute('data-status', 'GENERATING') // Remaining pages should be QUEUED for (let i = 1; i < 4; i++) { await expect(badges.nth(i)).toHaveAttribute('data-status', 'QUEUED') } }) test('badges transition from QUEUED → GENERATING → COMPLETED', async ({ page }) => { await mockCommonRoutes(page) // Phase 1: all pages QUEUED // Phase 2: 2 generating, 2 queued // Phase 3: all completed let phase: 'queued' | 'partial' | 'completed' = 'queued' await page.route(`**/api/projects/${PROJECT_ID}`, (r) => { if (r.request().method() !== 'GET') return r.continue() if (phase === 'queued') { const pages = PAGE_IDS.map((id, i) => makePage(id, i, 'QUEUED', false)) return r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(projectJson(pages, 'GENERATING_IMAGES')) }) } if (phase === 'partial') { const pages = [ makePage('p-1', 0, 'GENERATING', false), makePage('p-2', 1, 'GENERATING', false), makePage('p-3', 2, 'QUEUED', false), makePage('p-4', 3, 'QUEUED', false), ] return r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(projectJson(pages, 'GENERATING_IMAGES')) }) } const pages = PAGE_IDS.map((id, i) => makePage(id, i, 'COMPLETED', true)) return r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(projectJson(pages)) }) }) await page.goto(`/project/${PROJECT_ID}/preview`) const badges = page.locator('[data-testid="status-badge"]') await expect(badges.first()).toBeVisible({ timeout: 10000 }) // Phase 1: all QUEUED for (let i = 0; i < 4; i++) { await expect(badges.nth(i)).toHaveAttribute('data-status', 'QUEUED') } // Phase 2: partial progress phase = 'partial' await page.evaluate(() => location.reload()) await expect(badges.first()).toBeVisible({ timeout: 10000 }) await expect(badges.nth(0)).toHaveAttribute('data-status', 'GENERATING') await expect(badges.nth(1)).toHaveAttribute('data-status', 'GENERATING') await expect(badges.nth(2)).toHaveAttribute('data-status', 'QUEUED') await expect(badges.nth(3)).toHaveAttribute('data-status', 'QUEUED') // Phase 3: all completed phase = 'completed' await page.evaluate(() => location.reload()) await expect(badges.first()).toBeVisible({ timeout: 10000 }) for (let i = 0; i < 4; i++) { await expect(badges.nth(i)).toHaveAttribute('data-status', 'COMPLETED') } }) test('QUEUED pages show skeleton in slide cards', async ({ page }) => { await mockCommonRoutes(page) const pages = [ makePage('p-1', 0, 'QUEUED', false), makePage('p-2', 1, 'QUEUED', false), ] await page.route(`**/api/projects/${PROJECT_ID}`, (r) => { if (r.request().method() === 'GET') { return r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(projectJson(pages, 'GENERATING_IMAGES')) }) } return r.continue() }) await page.goto(`/project/${PROJECT_ID}/preview`) // Skeleton elements (animate-shimmer) should be visible for QUEUED pages const skeletons = page.locator('.animate-shimmer') await expect(skeletons.first()).toBeVisible({ timeout: 10000 }) const count = await skeletons.count() expect(count).toBeGreaterThanOrEqual(2) }) }) // ─── Integration test (real backend) ─── test.describe('QUEUED status (integration)', () => { test.beforeEach(async ({ page }) => { await page.addInitScript(() => localStorage.setItem('hasSeenHelpModal', 'true')) }) test('batch generate sets pages to QUEUED status in backend', async ({ baseURL }) => { // Create a project with description content via API const createRes = await fetch(`${baseURL}/api/projects`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ idea_prompt: 'QUEUED status integration test', creation_type: 'idea', }), }) const createData = await createRes.json() const projectId = createData.data.project_id || createData.data.id // Add description content to pages so they can be generated const projectRes = await fetch(`${baseURL}/api/projects/${projectId}`) const projectData = await projectRes.json() const pages = projectData.data.pages || [] for (const p of pages) { const pageId = p.page_id || p.id await fetch(`${baseURL}/api/projects/${projectId}/pages/${pageId}`, { method: 'PUT', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ description_content: { text: `Test description for page ${pageId}` }, }), }) } // Trigger batch generation — this should set pages to QUEUED immediately // We use a template_style since we don't have a template image await fetch(`${baseURL}/api/projects/${projectId}`, { method: 'PUT', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ template_style: 'modern minimalist' }), }) const genRes = await fetch(`${baseURL}/api/projects/${projectId}/generate/images`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ max_workers: 1 }), // Use 1 worker so most pages stay QUEUED }) if (genRes.status !== 202) { // May fail if no AI key configured — skip gracefully test.skip(true, 'Image generation not available (missing API key or config)') return } // Immediately check page statuses — they should be QUEUED const checkRes = await fetch(`${baseURL}/api/projects/${projectId}`) const checkData = await checkRes.json() const checkPages = checkData.data.pages || [] // At least some pages should be in QUEUED status const queuedPages = checkPages.filter((p: any) => p.status === 'QUEUED') const generatingPages = checkPages.filter((p: any) => p.status === 'GENERATING') // All pages should be either QUEUED or GENERATING (the task may have already started for 1 page) for (const p of checkPages) { expect(['QUEUED', 'GENERATING']).toContain(p.status) } // With max_workers=1, at most 1 page should be GENERATING expect(generatingPages.length).toBeLessThanOrEqual(1) // The rest should be QUEUED expect(queuedPages.length).toBeGreaterThanOrEqual(checkPages.length - 1) }) }) ================================================ FILE: frontend/e2e/import-markdown.spec.ts ================================================ /** * E2E test: Import outline / description from Markdown files */ import { test, expect } from '@playwright/test' import * as path from 'path' import * as fs from 'fs' test.use({ baseURL: process.env.BASE_URL || 'http://localhost:3000' }) const PROJECT_ID = 'mock-import-proj' const mockProject = (pages: any[] = []) => ({ success: true, data: { id: PROJECT_ID, project_id: PROJECT_ID, title: 'Test', status: 'OUTLINE_GENERATED', creation_type: 'idea', idea_prompt: 'test', pages, } }) const mockSettings = () => ({ success: true, data: { ai_provider_format: 'gemini', google_api_key: 'fake' } }) // Unified format fixture (outline + description in one file) const UNIFIED_MD = `# 项目 ## 第 1 页: AI简介 > 章节: 引言 **大纲要点:** - 什么是人工智能 - AI的历史 **页面描述:** 这是关于AI简介的描述内容。 --- ## 第 2 页: AI应用 > 章节: 正文 **大纲要点:** - 医疗领域 - 教育领域 **页面描述:** 这是关于AI应用的描述内容。 --- ` // Legacy format (no markers) — should still parse const LEGACY_MD = `# 大纲 ## 第 1 页: 旧格式页面 > 章节: 测试 - 要点一 - 要点二 ` const EMPTY_MD = `# 空文件 没有任何页面内容 ` test.describe('Import Markdown (mocked)', () => { test.setTimeout(60_000) let addPageCalls: any[] let projectPages: any[] test.beforeEach(async ({ page }) => { addPageCalls = [] projectPages = [] await page.route('**/api/settings', r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(mockSettings()) })) await page.route('**/api/access-code/check', r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { required: false } }) })) await page.route('**/api/user-templates', r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: [] }) })) await page.route(`**/api/reference-files/project/${PROJECT_ID}`, r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { files: [] } }) })) // Project endpoint: returns current pages state await page.route(`**/api/projects/${PROJECT_ID}`, r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(mockProject(projectPages)) })) // Add page endpoint: capture calls and grow projectPages await page.route(`**/api/projects/${PROJECT_ID}/pages`, async (route) => { if (route.request().method() === 'POST') { const body = route.request().postDataJSON() addPageCalls.push(body) const newPage = { id: `page-${addPageCalls.length}`, page_id: `page-${addPageCalls.length}`, order_index: body.order_index ?? projectPages.length, outline_content: body.outline_content || { title: '', points: [] }, description_content: body.description_content || null, part: body.part || null, status: 'DRAFT', } projectPages.push(newPage) await route.fulfill({ status: 201, contentType: 'application/json', body: JSON.stringify({ success: true, data: newPage }) }) } else { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { pages: projectPages } }) }) } }) }) function writeTempFile(name: string, content: string): string { const filePath = path.join('/tmp', name) fs.writeFileSync(filePath, content, 'utf-8') return filePath } test('import unified markdown on outline page — preserves outline + description', async ({ page }) => { const mdPath = writeTempFile('test-unified.md', UNIFIED_MD) await page.goto(`/project/${PROJECT_ID}/outline`) await page.waitForSelector('button:has-text("导入")', { timeout: 10_000 }) const fileInput = page.locator('input[type="file"][accept=".md,.txt"]').first() await fileInput.setInputFiles(mdPath) await expect(page.locator('text=导入成功').first()).toBeVisible({ timeout: 5_000 }) expect(addPageCalls).toHaveLength(2) // Page 1: outline + description + part expect(addPageCalls[0].outline_content.title).toBe('AI简介') expect(addPageCalls[0].outline_content.points).toContain('什么是人工智能') expect(addPageCalls[0].outline_content.points).toContain('AI的历史') expect(addPageCalls[0].part).toBe('引言') expect(addPageCalls[0].description_content).toEqual({ text: '这是关于AI简介的描述内容。' }) // Page 2 expect(addPageCalls[1].outline_content.title).toBe('AI应用') expect(addPageCalls[1].outline_content.points).toContain('医疗领域') expect(addPageCalls[1].part).toBe('正文') expect(addPageCalls[1].description_content).toEqual({ text: '这是关于AI应用的描述内容。' }) }) test('import unified markdown on detail page — same result', async ({ page }) => { const mdPath = writeTempFile('test-unified-detail.md', UNIFIED_MD) await page.goto(`/project/${PROJECT_ID}/detail`) await page.waitForSelector('button:has-text("导入")', { timeout: 10_000 }) const fileInput = page.locator('input[type="file"][accept=".md,.txt"]').first() await fileInput.setInputFiles(mdPath) await expect(page.locator('text=导入成功').first()).toBeVisible({ timeout: 5_000 }) expect(addPageCalls).toHaveLength(2) expect(addPageCalls[0].outline_content.title).toBe('AI简介') expect(addPageCalls[0].description_content).toEqual({ text: '这是关于AI简介的描述内容。' }) }) test('import legacy format still works', async ({ page }) => { const mdPath = writeTempFile('test-legacy.md', LEGACY_MD) await page.goto(`/project/${PROJECT_ID}/outline`) await page.waitForSelector('button:has-text("导入")', { timeout: 10_000 }) const fileInput = page.locator('input[type="file"][accept=".md,.txt"]').first() await fileInput.setInputFiles(mdPath) await expect(page.locator('text=导入成功').first()).toBeVisible({ timeout: 5_000 }) expect(addPageCalls).toHaveLength(1) expect(addPageCalls[0].outline_content.title).toBe('旧格式页面') expect(addPageCalls[0].outline_content.points).toContain('要点一') expect(addPageCalls[0].part).toBe('测试') }) test('import empty markdown shows error toast', async ({ page }) => { const mdPath = writeTempFile('test-empty.md', EMPTY_MD) await page.goto(`/project/${PROJECT_ID}/outline`) await page.waitForSelector('button:has-text("导入")', { timeout: 10_000 }) const fileInput = page.locator('input[type="file"][accept=".md,.txt"]').first() await fileInput.setInputFiles(mdPath) await expect(page.locator('text=文件中未找到有效页面').first()).toBeVisible({ timeout: 5_000 }) expect(addPageCalls).toHaveLength(0) }) test('export→import round-trip preserves data', async ({ page }) => { // Pre-populate project with pages that have outline + description const existingPages = [ { id: 'p1', page_id: 'p1', order_index: 0, part: '第一章', outline_content: { title: '导论', points: ['背景介绍', '研究目的'] }, description_content: { text: '这是导论页面的详细描述。' }, status: 'DESCRIPTION_GENERATED', }, { id: 'p2', page_id: 'p2', order_index: 1, part: null, outline_content: { title: '方法论', points: ['实验设计'] }, description_content: { text: '方法论的描述内容。' }, status: 'DESCRIPTION_GENERATED', }, ] // Override project route to return pages await page.route(`**/api/projects/${PROJECT_ID}`, r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(mockProject(existingPages)) })) // Use detail page "导出大纲+描述" for full export await page.goto(`/project/${PROJECT_ID}/detail`) await page.waitForSelector('button:has-text("导出大纲+描述")', { timeout: 10_000 }) const [download] = await Promise.all([ page.waitForEvent('download'), page.click('button:has-text("导出大纲+描述")'), ]) const downloadPath = await download.path() const exportedContent = fs.readFileSync(downloadPath!, 'utf-8') // Verify exported content has both markers expect(exportedContent).toContain('**大纲要点:**') expect(exportedContent).toContain('**页面描述:**') expect(exportedContent).toContain('导论') expect(exportedContent).toContain('背景介绍') expect(exportedContent).toContain('这是导论页面的详细描述。') // Import the exported file back addPageCalls = [] const reimportPath = writeTempFile('roundtrip.md', exportedContent) const fileInput = page.locator('input[type="file"][accept=".md,.txt"]').first() await fileInput.setInputFiles(reimportPath) await expect(page.locator('text=导入成功').first()).toBeVisible({ timeout: 5_000 }) // Verify round-trip fidelity expect(addPageCalls).toHaveLength(2) expect(addPageCalls[0].outline_content.title).toBe('导论') expect(addPageCalls[0].outline_content.points).toEqual(['背景介绍', '研究目的']) expect(addPageCalls[0].part).toBe('第一章') expect(addPageCalls[0].description_content).toEqual({ text: '这是导论页面的详细描述。' }) expect(addPageCalls[1].outline_content.title).toBe('方法论') expect(addPageCalls[1].outline_content.points).toEqual(['实验设计']) expect(addPageCalls[1].description_content).toEqual({ text: '方法论的描述内容。' }) expect(addPageCalls[1].part).toBeUndefined() }) }) ================================================ FILE: frontend/e2e/lazyllm-global-vendor.spec.ts ================================================ /** * E2E tests for lazyllm global vendor fix. * * Bug: selecting a lazyllm vendor (e.g., "doubao") as global provider converted * it to "lazyllm" on save, losing vendor info. The backend then defaulted to * hardcoded 'deepseek' for text source, causing API key lookup failures. * * Fix: vendor name is now stored directly in ai_provider_format (e.g., "doubao"). */ import { test, expect } from '@playwright/test' // ─── Mock tests ──────────────────────────────────────────────────── test.describe('Global lazyllm vendor — mock tests', () => { test.setTimeout(30_000) test('save sends vendor name directly, not "lazyllm"', async ({ page }) => { // Mock GET settings const mockSettings = { success: true, message: 'Success', data: { id: 1, ai_provider_format: 'gemini', api_base_url: '', api_key_length: 0, text_model: '', image_model: '', image_caption_model: '', image_resolution: '2K', image_aspect_ratio: '16:9', max_description_workers: 5, max_image_workers: 8, output_language: 'zh', enable_text_reasoning: false, text_thinking_budget: 1024, enable_image_reasoning: false, image_thinking_budget: 1024, mineru_api_base: '', mineru_token_length: 0, baidu_api_key_length: 0, text_model_source: '', text_api_key_length: 0, text_api_base_url: null, image_model_source: '', image_api_key_length: 0, image_api_base_url: null, image_caption_model_source: '', image_caption_api_key_length: 0, image_caption_api_base_url: null, lazyllm_api_keys_info: {}, }, } let capturedPayload: any = null await page.route('**/api/settings', async route => { if (route.request().method() === 'GET') { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(mockSettings), }) } else if (route.request().method() === 'PUT') { capturedPayload = route.request().postDataJSON() await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { ...mockSettings.data, ai_provider_format: 'doubao' }, }), }) } }) await page.goto('/settings') // Select "doubao" as global provider const globalProviderSelect = page.locator('select').first() await globalProviderSelect.selectOption('doubao') // Fill doubao API key (vendor key input appears for lazyllm vendors) const vendorKeyInput = page.locator('input[type="password"]').first() await vendorKeyInput.fill('test-doubao-key-123') // Save await page.getByRole('button', { name: /保存|Save/ }).click() await expect(page.locator('text=保存成功').or(page.locator('text=saved'))).toBeVisible({ timeout: 5000 }) // Key assertion: payload should send "doubao", NOT "lazyllm" expect(capturedPayload).not.toBeNull() expect(capturedPayload.ai_provider_format).toBe('doubao') }) test('loading vendor name from backend displays correct dropdown value', async ({ page }) => { const mockSettings = { success: true, message: 'Success', data: { id: 1, ai_provider_format: 'qwen', api_base_url: '', api_key_length: 0, text_model: '', image_model: '', image_caption_model: '', image_resolution: '2K', image_aspect_ratio: '16:9', max_description_workers: 5, max_image_workers: 8, output_language: 'zh', enable_text_reasoning: false, text_thinking_budget: 1024, enable_image_reasoning: false, image_thinking_budget: 1024, mineru_api_base: '', mineru_token_length: 0, baidu_api_key_length: 0, text_model_source: '', text_api_key_length: 0, text_api_base_url: null, image_model_source: '', image_api_key_length: 0, image_api_base_url: null, image_caption_model_source: '', image_caption_api_key_length: 0, image_caption_api_base_url: null, lazyllm_api_keys_info: { qwen: 15 }, }, } await page.route('**/api/settings', route => route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(mockSettings) }) ) await page.goto('/settings') // Global provider dropdown should show "qwen" const globalSelect = page.locator('select').first() await expect(globalSelect).toHaveValue('qwen') // Vendor key input should be visible (not Gemini/OpenAI base URL fields) await expect(page.locator('text=API Base URL').first()).toBeHidden() }) test('backward compat: "lazyllm" format resolves to first configured vendor', async ({ page }) => { // Old data with generic "lazyllm" format const mockSettings = { success: true, message: 'Success', data: { id: 1, ai_provider_format: 'lazyllm', api_base_url: '', api_key_length: 0, text_model: '', image_model: '', image_caption_model: '', image_resolution: '2K', image_aspect_ratio: '16:9', max_description_workers: 5, max_image_workers: 8, output_language: 'zh', enable_text_reasoning: false, text_thinking_budget: 1024, enable_image_reasoning: false, image_thinking_budget: 1024, mineru_api_base: '', mineru_token_length: 0, baidu_api_key_length: 0, text_model_source: '', text_api_key_length: 0, text_api_base_url: null, image_model_source: '', image_api_key_length: 0, image_api_base_url: null, image_caption_model_source: '', image_caption_api_key_length: 0, image_caption_api_base_url: null, lazyllm_api_keys_info: { doubao: 20 }, }, } await page.route('**/api/settings', route => route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(mockSettings) }) ) await page.goto('/settings') // resolveLazyllmVendor should resolve "lazyllm" to "doubao" (first configured vendor) const globalSelect = page.locator('select').first() await expect(globalSelect).toHaveValue('doubao') }) }) // ─── Integration tests ───────────────────────────────────────────── test.describe('Global lazyllm vendor — integration tests', () => { test.describe.configure({ mode: 'serial' }) test.setTimeout(30_000) test('save doubao as global provider, reload shows doubao', async ({ page }) => { await page.goto('/settings') // Select doubao as global provider const globalSelect = page.locator('select').first() await globalSelect.selectOption('doubao') // Fill a test doubao API key const vendorKeyInput = page.locator('input[type="password"]').first() await vendorKeyInput.fill('test-doubao-integration-key') // Save await page.getByRole('button', { name: /保存|Save/ }).click() await expect(page.locator('text=保存成功').or(page.locator('text=saved'))).toBeVisible({ timeout: 5000 }) // Reload page await page.goto('/settings') // Should still show doubao (not fall back to generic lazyllm / deepseek) await expect(page.locator('select').first()).toHaveValue('doubao') }) test('save qwen as global provider, verify backend stores vendor name', async ({ page }) => { await page.goto('/settings') // Select qwen const globalSelect = page.locator('select').first() await globalSelect.selectOption('qwen') // Fill qwen API key const vendorKeyInput = page.locator('input[type="password"]').first() await vendorKeyInput.fill('test-qwen-key') // Save await page.getByRole('button', { name: /保存|Save/ }).click() await expect(page.locator('text=保存成功').or(page.locator('text=saved'))).toBeVisible({ timeout: 5000 }) // Verify via API that backend stored "qwen", not "lazyllm" const response = await page.request.get('/api/settings') const data = await response.json() expect(data.data.ai_provider_format).toBe('qwen') }) test('reset after vendor save restores default format', async ({ page }) => { await page.goto('/settings') // First save doubao const globalSelect = page.locator('select').first() await globalSelect.selectOption('doubao') const vendorKeyInput = page.locator('input[type="password"]').first() await vendorKeyInput.fill('test-key') await page.getByRole('button', { name: /保存|Save/ }).click() await expect(page.locator('text=保存成功').or(page.locator('text=saved'))).toBeVisible({ timeout: 5000 }) // Reset await page.getByRole('button', { name: /重置|Reset/ }).click() await page.getByRole('button', { name: /确定重置|Confirm/ }).click() await expect(page.locator('text=设置已重置').or(page.locator('text=reset successfully'))).toBeVisible({ timeout: 5000 }) // After reset, format should revert to .env default (typically gemini) const response = await page.request.get('/api/settings') const data = await response.json() // Format should no longer be "doubao" expect(data.data.ai_provider_format).not.toBe('doubao') }) }) ================================================ FILE: frontend/e2e/lazyllm-image-content-type.spec.ts ================================================ /** * E2E tests for LazyLLM image content-type fallback. * * Mock test: verifies the frontend handles image generation errors gracefully * and that the generate-images API endpoint is called correctly. * * Integration test: verifies the generate-images endpoint returns a proper * response (success or known error) without crashing. */ import { test, expect } from '@playwright/test' import { seedProjectWithImages } from './helpers/seed-project' const BASE = process.env.BASE_URL ?? 'http://localhost:3000' // --------------------------------------------------------------------------- // Mock test — frontend behaviour when image generation fails // --------------------------------------------------------------------------- test.describe('Image generation error handling (mock)', () => { test('shows error state when generate-images returns 503', async ({ page }) => { const projectId = 'mock-img-err-proj' // Mock project fetch await page.route(`**/api/projects/${projectId}`, async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { project_id: projectId, idea_prompt: 'test', pages: [{ page_id: 'p1', title: 'Page 1', description_content: 'desc', image_url: null }], }, }), }) }) // Mock generate-images to return error await page.route(`**/api/projects/${projectId}/generate-images`, async (route) => { await route.fulfill({ status: 503, contentType: 'application/json', body: JSON.stringify({ error: { message: 'LazyLLM content-type error' } }), }) }) await page.addInitScript(() => localStorage.setItem('hasSeenHelpModal', 'true')) await page.goto(`${BASE}/detail/${projectId}`) // Trigger image generation const genBtn = page.locator('button').filter({ hasText: /生成图片|Generate Images/i }).first() if (await genBtn.isVisible({ timeout: 5000 }).catch(() => false)) { await genBtn.click() // Should show some error feedback (toast / alert) const errorVisible = await page .locator('[class*="error"], [class*="toast"], [role="alert"]') .first() .isVisible({ timeout: 10000 }) .catch(() => false) // Either error shown or page still functional (no crash) expect(page.url()).toContain('/detail/') } }) }) // --------------------------------------------------------------------------- // Integration test — generate-images endpoint smoke test // --------------------------------------------------------------------------- test.describe('Image generation endpoint (integration)', () => { test('generate-images endpoint responds without server crash', async ({ page }) => { // Seed a project with real images so we have a valid project_id const { projectId } = await seedProjectWithImages(BASE, 1) // Navigate first so relative URLs resolve through Vite proxy await page.goto(BASE) // Call generate-images via browser fetch (goes through Vite proxy) const resp = await page.evaluate(async (id) => { const r = await fetch(`/api/projects/${id}/generate-images`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ page_ids: [] }), }) return { status: r.status, ok: r.ok } }, projectId) // Endpoint should return 2xx (task queued) or 4xx (validation), never 5xx crash expect(resp.status).toBeLessThan(500) }) }) ================================================ FILE: frontend/e2e/markdown-card-style.spec.ts ================================================ import { test, expect } from '@playwright/test'; const BASE = process.env.BASE_URL || 'http://localhost:3000'; const PROJECT_ID = 'mock-style-test'; // 1x1 transparent PNG as data URL (always loads successfully) const TINY_PNG = 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=='; function makePage(id: string, index: number, title: string, description: string) { return { id, page_id: id, title, sort_order: index, order_index: index, status: 'COMPLETED', outline_content: { title, points: [`Point for ${title}`] }, description_content: { text: description }, generated_image_path: null, }; } function setupMocks(page: import('@playwright/test').Page, pages: ReturnType[]) { return Promise.all([ page.route(`**/api/projects/${PROJECT_ID}`, async (route) => { if (route.request().method() !== 'GET') { await route.continue(); return; } await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { project_id: PROJECT_ID, id: PROJECT_ID, status: 'DESCRIPTIONS_GENERATED', creation_type: 'idea', pages, }, }), }); }), page.route('**/api/projects/*/files*', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: [] }), }); }), ]); } test.describe('Markdown image size and DescriptionCard max height', () => { test('markdown images should have constrained size classes', async ({ page }) => { const pages = [ makePage('p1', 0, 'Cover', `Some text\n\n![test image](${TINY_PNG})\n\nMore text`), ]; await setupMocks(page, pages); await page.goto(`${BASE}/project/${PROJECT_ID}/detail`); await expect(page.locator('text=第 1 页')).toBeVisible({ timeout: 10000 }); const img = page.locator('.markdown-content img').first(); await expect(img).toBeVisible({ timeout: 10000 }); // Verify the image has constrained size classes await expect(img).toHaveClass(/max-w-48/); await expect(img).toHaveClass(/max-h-36/); }); test('description card content area should have max height with scroll', async ({ page }) => { const longText = Array(50).fill('This is a long line of description text for testing overflow.').join('\n\n'); const pages = [ makePage('p1', 0, 'Cover', longText), ]; await setupMocks(page, pages); await page.goto(`${BASE}/project/${PROJECT_ID}/detail`); await expect(page.locator('text=第 1 页')).toBeVisible({ timeout: 10000 }); const contentArea = page.getByTestId('description-card-content'); await expect(contentArea).toHaveClass(/max-h-96/); await expect(contentArea).toHaveClass(/overflow-y-auto/); // Verify actual computed max-height (max-h-96 = 24rem = 384px) const maxHeight = await contentArea.evaluate((el) => getComputedStyle(el).maxHeight); expect(maxHeight).toBe('384px'); }); }); ================================================ FILE: frontend/e2e/material-aspect-ratio.spec.ts ================================================ import { test, expect, type Page } from '@playwright/test'; import { ASPECT_RATIO_OPTIONS } from '../src/config/aspectRatio'; /** * E2E tests for material generation aspect ratio selector. * Tests both UI rendering (mock) and API payload (mock). */ test.describe('Material generation aspect ratio selector', () => { test.beforeEach(async ({ page }) => { // Disable access code guard await page.route('**/api/access-code/check', (route) => route.fulfill({ json: { data: { enabled: false } } }) ); // Mark help modal as already seen to prevent it from blocking interactions await page.addInitScript(() => { localStorage.setItem('hasSeenHelpModal', 'true'); }); await page.goto('/'); await page.waitForLoadState('networkidle'); }); async function openMaterialGeneratorModal(page: Page) { // Use dispatchEvent to reliably trigger the click on the 素材生成 button // (regular click may be blocked by overlay elements) const materialBtn = page.locator('button', { hasText: /素材生成/ }).first(); await expect(materialBtn).toBeAttached({ timeout: 5000 }); await materialBtn.dispatchEvent('click'); // Wait for the MaterialGeneratorModal dialog to appear (identified by its title) await expect(page.getByRole('dialog', { name: /素材生成|Generate Material/ })).toBeVisible({ timeout: 5000 }); } test('should render aspect ratio selector with all options in material generator modal', async ({ page }) => { await openMaterialGeneratorModal(page); const dialog = page.getByRole('dialog', { name: /素材生成|Generate Material/ }); // Check the aspect ratio label is visible await expect(dialog.getByText(/生成比例|Aspect Ratio/)).toBeVisible(); // Check that all ratio buttons are visible inside the dialog (derived from config) for (const { value } of ASPECT_RATIO_OPTIONS) { await expect(dialog.locator('button', { hasText: value })).toBeVisible(); } }); test('should default to 16:9 and allow changing aspect ratio selection', async ({ page }) => { await openMaterialGeneratorModal(page); const dialog = page.getByRole('dialog', { name: /素材生成|Generate Material/ }); // 16:9 should be the default selected ratio const btn169 = dialog.locator('button', { hasText: '16:9' }).first(); await expect(btn169).toHaveClass(/border-banana-500/); // Click on 4:3 const btn43 = dialog.locator('button', { hasText: '4:3' }).first(); await btn43.click(); // 4:3 should now be selected await expect(btn43).toHaveClass(/border-banana-500/); // 16:9 should no longer be selected await expect(btn169).not.toHaveClass(/border-banana-500/); }); test('should send selected aspect_ratio in material generation API request', async ({ page }) => { let capturedAspectRatio: string | null = null; let requestIntercepted = false; // Intercept the material generation call (global, projectId=none) await page.route('**/api/projects/none/materials/generate', async (route) => { const request = route.request(); const postData = request.postData() || ''; // Multipart form: find aspect_ratio field value const match = postData.match(/name="aspect_ratio"\r\n\r\n([^\r\n]*)/); if (match) { capturedAspectRatio = match[1].trim(); } requestIntercepted = true; await route.fulfill({ status: 202, contentType: 'application/json', body: JSON.stringify({ success: true, data: { task_id: 'mock-task-id', status: 'PENDING' }, }), }); }); // Mock task status poll await page.route('**/api/projects/global/tasks/mock-task-id', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { id: 'mock-task-id', status: 'COMPLETED', progress: { image_url: '/files/materials/test.png', total: 1, completed: 1, failed: 0 }, }, }), }); }); await openMaterialGeneratorModal(page); const dialog = page.getByRole('dialog', { name: /素材生成|Generate Material/ }); // Select 1:1 ratio await dialog.locator('button', { hasText: '1:1' }).first().click(); // Fill in prompt await dialog.locator('textarea').first().fill('test material prompt'); // Click the generate button and wait for the API response const [response] = await Promise.all([ page.waitForResponse('**/api/projects/none/materials/generate'), dialog.locator('button', { hasText: /生成素材|Generate Material/ }).first().click(), ]); expect(response.status()).toBe(202); expect(requestIntercepted).toBe(true); expect(capturedAspectRatio).toBe('1:1'); }); }); ================================================ FILE: frontend/e2e/outline-autosave-blur.spec.ts ================================================ import { test, expect } from '@playwright/test' const PROJECT_ID = 'mock-autosave-project' const mockProject = { project_id: PROJECT_ID, status: 'OUTLINE_GENERATED', idea_prompt: 'Original idea text', creation_type: 'idea', pages: [ { page_id: 'page-1', order_index: 0, outline_content: { title: 'Page One', points: ['Point A'] }, status: 'DRAFT', }, ], created_at: '2025-01-01T00:00:00', updated_at: '2025-01-01T00:00:00', } // Mock test: verify blur triggers save API call test.describe('Outline auto-save on blur (mock)', () => { test('saves input text when textarea loses focus', async ({ page }) => { let savePayload: { idea_prompt?: string } | null = null await page.route(`**/api/projects/${PROJECT_ID}`, async (route) => { if (route.request().method() === 'PUT') { savePayload = route.request().postDataJSON() await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { ...mockProject, idea_prompt: savePayload?.idea_prompt } }), }) } else { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: mockProject }), }) } }) await page.goto(`/project/${PROJECT_ID}/outline`) await page.waitForLoadState('networkidle') // Find the contenteditable editor in the left panel (desktop) const editor = page.locator('[contenteditable="true"]').first() await expect(editor).toBeVisible() // Type new text await editor.click() await editor.pressSequentially(' updated content') // Blur by clicking outside await page.locator('header').first().click() // Wait for the save API call await expect.poll(() => savePayload, { timeout: 5000 }).not.toBeNull() expect(savePayload).toHaveProperty('idea_prompt') expect(savePayload.idea_prompt).toContain('updated content') }) test('does not save when content is unchanged', async ({ page }) => { let putCalled = false await page.route(`**/api/projects/${PROJECT_ID}`, async (route) => { if (route.request().method() === 'PUT') { putCalled = true await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: mockProject }), }) } else { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: mockProject }), }) } }) await page.goto(`/project/${PROJECT_ID}/outline`) await page.waitForLoadState('networkidle') // Click editor then blur without changing content const editor = page.locator('[contenteditable="true"]').first() await editor.click() await page.locator('header').first().click() // Verify no save is triggered after blur await expect.poll(() => putCalled, { timeout: 2000 }).toBe(false) }) }) // Integration test: verify data persists after blur test.describe('Outline auto-save on blur (integration)', () => { let projectId: string test.beforeEach(async ({ request }) => { const res = await request.post('/api/projects', { data: { idea_prompt: 'Integration test idea', creation_type: 'idea' }, }) const body = await res.json() projectId = body.data.project_id }) test('persists edited text after blur and page reload', async ({ page }) => { await page.goto(`/project/${projectId}/outline`) await page.waitForLoadState('networkidle') const editor = page.locator('[contenteditable="true"]').first() await expect(editor).toBeVisible() // Edit the text await editor.click() await page.keyboard.press('End') await editor.pressSequentially(' - auto saved') // Blur to trigger save and wait for the PUT request to complete const savePromise = page.waitForResponse( (resp) => resp.url().includes(`/api/projects/${projectId}`) && resp.request().method() === 'PUT' ) await page.locator('header').first().click() await savePromise // Reload and verify text persisted await page.reload() await page.waitForLoadState('networkidle') const editorAfter = page.locator('[contenteditable="true"]').first() await expect(editorAfter).toContainText('auto saved', { timeout: 5000 }) }) }) ================================================ FILE: frontend/e2e/outline-null-crash.spec.ts ================================================ import { test, expect } from '@playwright/test' const PROJECT_ID = 'mock-null-outline' const mockProject = { project_id: PROJECT_ID, status: 'OUTLINE_GENERATED', idea_prompt: 'Test project', pages: [ { page_id: 'page-1', order_index: 0, outline_content: { title: 'Normal Page', points: ['Point A', 'Point B'] }, status: 'DRAFT', }, { page_id: 'page-2', order_index: 1, outline_content: null, status: 'DRAFT', }, ], created_at: '2025-01-01T00:00:00', updated_at: '2025-01-01T00:00:00', } test.describe('OutlineCard null outline_content', () => { test('renders without crash when outline_content is null', async ({ page }) => { await page.route('**/api/projects/' + PROJECT_ID, async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: mockProject }), }) }) // Navigate to outline editor await page.goto(`/project/${PROJECT_ID}/outline`) // The normal page should render its title await expect(page.getByText('Normal Page')).toBeVisible() // Page 2 (null outline) should render without crashing — check page number label await expect(page.getByText(/Page 2|第 2 页/)).toBeVisible() }) }) ================================================ FILE: frontend/e2e/parsing-preview-toast.spec.ts ================================================ /** * E2E test: Clicking a parsing attachment shows toast instead of preview modal */ import { test, expect } from '@playwright/test' test.use({ baseURL: process.env.BASE_URL || 'http://localhost:3000' }) const PROJECT_ID = 'mock-proj-parse' const FILE_PARSING = 'file-parsing-001' const FILE_COMPLETED = 'file-completed-002' const mockSettings = () => ({ success: true, data: { ai_provider_format: 'gemini', google_api_key: 'fake' } }) const mockProject = () => ({ success: true, data: { id: PROJECT_ID, project_id: PROJECT_ID, title: 'Test', status: 'OUTLINE_GENERATED', creation_type: 'idea', pages: [{ id: 'p1', page_id: 'p1', title: 'Page 1', order_index: 0, outline_content: { title: 'Page 1', points: ['p'] } }] } }) const mockFiles = () => ({ success: true, data: { files: [ { id: FILE_PARSING, filename: 'parsing-doc.pdf', file_size: 1000, file_type: 'application/pdf', parse_status: 'parsing' }, { id: FILE_COMPLETED, filename: 'done-doc.pdf', file_size: 2000, file_type: 'application/pdf', parse_status: 'completed' }, ] } }) test.describe('Parsing attachment preview toast (mocked)', () => { test.setTimeout(60_000) test.beforeEach(async ({ page }) => { await page.route('**/api/settings', r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(mockSettings()) })) await page.route(`**/api/projects/${PROJECT_ID}`, r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(mockProject()) })) await page.route(`**/api/projects/${PROJECT_ID}/pages`, r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { pages: [] } }) })) await page.route(`**/api/reference-files/project/${PROJECT_ID}`, r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(mockFiles()) })) await page.route(`**/api/reference-files/${FILE_COMPLETED}`, r => r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { file: { id: FILE_COMPLETED, filename: 'done-doc.pdf', file_size: 2000, file_type: 'application/pdf', parse_status: 'completed', markdown_content: '# Done' } } }) })) }) test('clicking parsing file shows toast, not preview modal', async ({ page }) => { let parsingFileFetched = false await page.route(`**/api/reference-files/${FILE_PARSING}`, async r => { parsingFileFetched = true await r.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { file: { id: FILE_PARSING, filename: 'parsing-doc.pdf', file_size: 1000, file_type: 'application/pdf', parse_status: 'parsing', markdown_content: null } } }) }) }) await page.goto(`/project/${PROJECT_ID}/outline`) const parsingCard = page.locator('text=parsing-doc.pdf').first() await parsingCard.waitFor({ state: 'visible', timeout: 10_000 }) await parsingCard.click() const toast = page.locator('text=解析完成后可预览').or(page.locator('text=Preview available after parsing')) await expect(toast.first()).toBeVisible({ timeout: 3_000 }) expect(parsingFileFetched).toBe(false) }) test('clicking completed file still opens preview modal', async ({ page }) => { await page.goto(`/project/${PROJECT_ID}/outline`) const completedCard = page.locator('text=done-doc.pdf').first() await completedCard.waitFor({ state: 'visible', timeout: 10_000 }) await completedCard.click() const modal = page.locator('[role="dialog"]').last() await expect(modal).toBeVisible({ timeout: 5_000 }) await expect(modal.locator('.prose h1')).toBeVisible({ timeout: 3_000 }) }) }) ================================================ FILE: frontend/e2e/pdf-export-metadata.spec.ts ================================================ /** * E2E tests for PDF export with author metadata */ import { test, expect } from '@playwright/test' import { seedProjectWithImages } from './helpers/seed-project' test.describe('PDF Export - Backend API', () => { test('exports PDF with author metadata', async ({ request, baseURL }) => { const { projectId } = await seedProjectWithImages(baseURL!, 2) const resp = await request.get(`/api/projects/${projectId}/export/pdf`) expect(resp.ok()).toBe(true) const data = (await resp.json()).data expect(data.download_url).toContain('.pdf') // Verify the PDF is downloadable const fileResp = await request.get(data.download_url) expect(fileResp.ok()).toBe(true) expect(fileResp.headers()['content-type']).toContain('application/pdf') // Verify PDF has content (non-zero size) const pdfBuffer = await fileResp.body() expect(pdfBuffer.length).toBeGreaterThan(1000) // Verify PDF contains metadata (check for "banana-slides" in PDF content) const pdfContent = pdfBuffer.toString('utf-8') expect(pdfContent).toContain('banana-slides') }) }) ================================================ FILE: frontend/e2e/per-model-startup-creds.spec.ts ================================================ /** * Integration E2E test for issue #284: * Per-model API credentials must be loaded into app.config on backend startup. * * Strategy: save per-model settings → restart backend → verify startup logs * contain the loaded credentials, proving _load_settings_to_config() works. */ import { test, expect } from '@playwright/test' import { execSync } from 'child_process' import path from 'path' import { fileURLToPath } from 'url' const __filename = fileURLToPath(import.meta.url) const __dirname = path.dirname(__filename) const backendPort = (() => { const m = (process.env.BASE_URL ?? '').match(/:(\d+)$/) // backend base=5000, frontend base=3000, same offset → backend = frontend + 2000 return m ? Number(m[1]) + 2000 : 5000 })() const BACKEND_URL = `http://localhost:${backendPort}` const PROJECT_ROOT = path.resolve(__dirname, '..', '..') const LOG_FILE = path.join('/tmp', `startup-creds-backend-${process.pid}.log`) function restartBackend() { // Kill existing backend try { execSync(`lsof -ti:${backendPort} | xargs kill -9 2>/dev/null`, { timeout: 5000 }) } catch { /* may already be dead */ } execSync('sleep 1') // Truncate log so we only see fresh startup output execSync(`truncate -s 0 ${LOG_FILE}`) // Start backend fresh execSync( `cd ${PROJECT_ROOT}/backend && nohup uv run python app.py >> ${LOG_FILE} 2>&1 &`, { timeout: 10000 }, ) // Wait for backend to be ready for (let i = 0; i < 20; i++) { try { execSync(`curl -sf --noproxy localhost ${BACKEND_URL}/api/settings`, { timeout: 3000 }) return } catch { execSync('sleep 1') } } throw new Error('Backend did not start within 20s') } // Clean up after all tests: reset settings and remove temp log test.afterAll(async ({ browser }) => { const page = await browser.newPage() await page.goto('/settings') await page.getByRole('button', { name: /重置/ }).click() await page.getByRole('button', { name: /确定重置/ }).click() await expect(page.locator('text=已重置').or(page.locator('text=reset'))).toBeVisible({ timeout: 5000 }) await page.close() try { execSync(`rm -f ${LOG_FILE}`) } catch { /* ignore */ } }) test.describe('Per-model API credentials loaded on startup (#284)', () => { test.describe.configure({ mode: 'serial' }) test.setTimeout(60_000) test('saved per-model credentials appear in startup logs after restart', async ({ request }) => { // 1. Save per-model settings via API (through Vite proxy) const payload = { text_model_source: 'openai', text_api_base_url: 'https://startup-test.example.com/v1', text_api_key: 'sk-startup-test-key-284', } const saveRes = await request.put('/api/settings', { data: payload }) expect(saveRes.ok()).toBeTruthy() // 2. Restart backend restartBackend() // 3. Read startup logs and verify per-model credentials were loaded const logs = execSync(`cat ${LOG_FILE}`).toString() expect(logs).toContain('Loaded TEXT_API_BASE from settings: https://startup-test.example.com/v1') expect(logs).toContain('Loaded TEXT_API_KEY from settings') expect(logs).toContain('Loaded TEXT_MODEL_SOURCE from settings: openai') }) test('settings page shows correct values after backend restart', async ({ page }) => { // Navigate to settings — backend was restarted in previous test await page.goto('/settings') // Find the text model group (first one with a select) const textGroup = page.locator('.space-y-4 > div').filter({ has: page.locator('select') }).nth(0) // Verify provider is still openai await expect(textGroup.locator('select')).toHaveValue('openai') // Verify API Base URL persisted const baseUrlInput = textGroup.locator('input[type="text"]').nth(1) await expect(baseUrlInput).toHaveValue('https://startup-test.example.com/v1') // Verify API Key shows placeholder indicating it's set const apiKeyInput = textGroup.locator('input[type="password"]') const placeholder = await apiKeyInput.getAttribute('placeholder') expect(placeholder).toMatch(/长度|length/i) }) }) ================================================ FILE: frontend/e2e/preset-capsules.spec.ts ================================================ import { test, expect } from '@playwright/test' const PROJECT_ID = 'mock-preset-project' const mockProject = (overrides: Record = {}) => ({ project_id: PROJECT_ID, status: 'OUTLINE_GENERATED', idea_prompt: 'Test idea', creation_type: 'idea', outline_requirements: '', description_requirements: '', pages: [ { page_id: 'page-1', order_index: 0, outline_content: { title: 'Page One', points: ['Point A'] }, description_content: { text: 'Page one description', generated_at: '2025-01-01' }, status: 'DESCRIPTION_GENERATED', }, ], created_at: '2025-01-01T00:00:00', updated_at: '2025-01-01T00:00:00', ...overrides, }) /** Shared mock route handler for project API */ async function setupProjectMock(page: import('@playwright/test').Page) { await page.route(`**/api/projects/${PROJECT_ID}`, async (route) => { if (route.request().method() === 'PUT') { const data = route.request().postDataJSON() await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: mockProject(data) }), }) } else { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: mockProject() }), }) } }) } // ── Mock tests: Outline presets ────────────────────────────────────── test.describe('Preset capsules - OutlineEditor (mock)', () => { test.beforeEach(async ({ page }) => { await page.addInitScript(() => { localStorage.removeItem('presetCapsules_outline') localStorage.setItem('outlineReqOpen', 'true') }) await setupProjectMock(page) }) test('displays preset area with add button', async ({ page }) => { await page.goto(`/project/${PROJECT_ID}/outline`) await page.waitForLoadState('networkidle') const presets = page.locator('[data-testid="outline-presets"]') await expect(presets).toBeVisible() await expect(page.locator('[data-testid="outline-add-preset"]')).toBeVisible() }) test('can add custom preset', async ({ page }) => { await page.goto(`/project/${PROJECT_ID}/outline`) await page.waitForLoadState('networkidle') await page.locator('[data-testid="outline-add-preset"]').click() await page.locator('[data-testid="outline-preset-name-input"]').fill('我的预设') await page.locator('[data-testid="outline-preset-content-input"]').fill('自定义提示词内容') await page.locator('[data-testid="outline-preset-confirm"]').click() const userPreset = page.locator('[data-testid="outline-user-preset-0"]') await expect(userPreset).toBeVisible() await expect(userPreset).toContainText('我的预设') await expect(page.locator('[data-testid="outline-delete-preset-0"]')).toBeVisible() }) test('clicking custom preset appends content', async ({ page }) => { await page.addInitScript(() => { localStorage.setItem('presetCapsules_outline', JSON.stringify([ { name: '测试预设', content: '测试提示词' } ])) }) await page.goto(`/project/${PROJECT_ID}/outline`) await page.waitForLoadState('networkidle') const textarea = page.locator('[data-testid="outline-requirements-textarea"]') const userPreset = page.locator('[data-testid="outline-user-preset-0"]') await expect(userPreset).toBeVisible() await userPreset.locator('button').first().click() await expect(textarea).toHaveValue('测试提示词') }) test('can delete custom preset', async ({ page }) => { await page.addInitScript(() => { localStorage.setItem('presetCapsules_outline', JSON.stringify([ { name: '待删除', content: '内容' } ])) }) await page.goto(`/project/${PROJECT_ID}/outline`) await page.waitForLoadState('networkidle') await expect(page.locator('[data-testid="outline-user-preset-0"]')).toBeVisible() await page.locator('[data-testid="outline-delete-preset-0"]').click() await expect(page.locator('[data-testid="outline-user-preset-0"]')).not.toBeVisible() }) test('can cancel adding preset with Escape', async ({ page }) => { await page.goto(`/project/${PROJECT_ID}/outline`) await page.waitForLoadState('networkidle') await page.locator('[data-testid="outline-add-preset"]').click() await expect(page.locator('[data-testid="outline-preset-name-input"]')).toBeVisible() await page.locator('[data-testid="outline-preset-name-input"]').press('Escape') await expect(page.locator('[data-testid="outline-preset-name-input"]')).not.toBeVisible() await expect(page.locator('[data-testid="outline-add-preset"]')).toBeVisible() }) test('add button is disabled when fields are empty', async ({ page }) => { await page.goto(`/project/${PROJECT_ID}/outline`) await page.waitForLoadState('networkidle') await page.locator('[data-testid="outline-add-preset"]').click() const confirmBtn = page.locator('[data-testid="outline-preset-confirm"]') await expect(confirmBtn).toBeDisabled() await page.locator('[data-testid="outline-preset-name-input"]').fill('名称') await expect(confirmBtn).toBeDisabled() await page.locator('[data-testid="outline-preset-content-input"]').fill('内容') await expect(confirmBtn).toBeEnabled() }) }) // ── Mock tests: Description presets ────────────────────────────────── test.describe('Preset capsules - DetailEditor (mock)', () => { test.beforeEach(async ({ page }) => { await page.addInitScript(() => { localStorage.removeItem('presetCapsules_description') localStorage.setItem('descReqOpen', 'true') }) await setupProjectMock(page) }) test('displays preset area with add button', async ({ page }) => { await page.goto(`/project/${PROJECT_ID}/detail`) await page.waitForLoadState('networkidle') const presets = page.locator('[data-testid="description-presets"]') await expect(presets).toBeVisible() await expect(page.locator('[data-testid="description-add-preset"]')).toBeVisible() }) test('description custom presets are independent from outline presets', async ({ page }) => { await page.addInitScript(() => { localStorage.setItem('presetCapsules_outline', JSON.stringify([ { name: '大纲预设', content: '大纲内容' } ])) localStorage.setItem('presetCapsules_description', JSON.stringify([ { name: '描述预设', content: '描述内容' } ])) }) await page.goto(`/project/${PROJECT_ID}/detail`) await page.waitForLoadState('networkidle') const descPreset = page.locator('[data-testid="description-user-preset-0"]') await expect(descPreset).toBeVisible() await expect(descPreset).toContainText('描述预设') await expect(page.locator('text=大纲预设')).not.toBeVisible() }) }) // ── Integration tests ──────────────────────────────────────────────── test.describe('Preset capsules (integration)', () => { let projectId: string test.beforeEach(async ({ request, page }) => { const res = await request.post('/api/projects', { data: { idea_prompt: 'Preset integration test', creation_type: 'idea' }, }) const body = await res.json() projectId = body.data.project_id await page.goto('/') await page.evaluate(() => { localStorage.removeItem('presetCapsules_outline') localStorage.removeItem('presetCapsules_description') localStorage.setItem('outlineReqOpen', 'true') }) }) test('custom preset click appends to textarea and auto-saves', async ({ page }) => { // Seed a preset await page.evaluate(() => { localStorage.setItem('presetCapsules_outline', JSON.stringify([ { name: '集成预设', content: '集成测试内容' } ])) }) await page.goto(`/project/${projectId}/outline`) await page.waitForLoadState('networkidle') const textarea = page.locator('[data-testid="outline-requirements-textarea"]') await expect(textarea).toBeVisible() // Click user preset await page.locator('[data-testid="outline-user-preset-0"]').locator('button').first().click() await expect(textarea).toHaveValue('集成测试内容') // Wait for debounced auto-save const savePromise = page.waitForResponse( (resp) => resp.url().includes(`/api/projects/${projectId}`) && resp.request().method() === 'PUT' ) await savePromise // Reload and verify persisted await page.reload() await page.waitForLoadState('networkidle') const textareaAfter = page.locator('[data-testid="outline-requirements-textarea"]') await expect(textareaAfter).toBeVisible() await expect(textareaAfter).toHaveValue('集成测试内容') }) test('custom presets persist in localStorage across page navigations', async ({ page }) => { await page.goto(`/project/${projectId}/outline`) await page.waitForLoadState('networkidle') await page.locator('[data-testid="outline-add-preset"]').click() await page.locator('[data-testid="outline-preset-name-input"]').fill('集成测试预设') await page.locator('[data-testid="outline-preset-content-input"]').fill('集成测试内容') await page.locator('[data-testid="outline-preset-confirm"]').click() await expect(page.locator('[data-testid="outline-user-preset-0"]')).toBeVisible() // Navigate away and back await page.goto(`/project/${projectId}/outline`) await page.waitForLoadState('networkidle') const userPreset = page.locator('[data-testid="outline-user-preset-0"]') await expect(userPreset).toBeVisible() await expect(userPreset).toContainText('集成测试预设') }) }) ================================================ FILE: frontend/e2e/preview-text-style-template.spec.ts ================================================ /** * E2E tests for text style mode in SlidePreview template modal. * * Mock test: verify toggle, TextStyleSelector rendering, preset click, apply button. * Integration test: verify style is persisted after apply and survives page reload. */ import { test, expect } from '@playwright/test' import { seedProjectWithImages } from './helpers/seed-project' const BASE_URL = process.env.BASE_URL || 'http://localhost:3000' const BACKEND_URL = BASE_URL.replace(/:\d+$/, (m) => `:${parseInt(m.slice(1)) + 2000}`) /** Set up all mocks needed for SlidePreview to render */ async function setupMocks(page: import('@playwright/test').Page) { // AccessCodeGuard: bypass await page.route('**/api/access-code/check', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { enabled: false } }) }) }) // Project data await page.route('**/api/projects/*', async (route) => { if (route.request().method() === 'GET') { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { id: 'mock-proj', status: 'IMAGES_GENERATED', template_style: '', pages: [{ id: 'p1', order_index: 0, status: 'COMPLETED', outline_content: { title: 'Slide 1' }, generated_image_path: 'mock.jpg' }], }, }), }) } else { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true }) }) } }) await page.route('**/api/user-templates', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { templates: [] } }) }) }) await page.route('**/api/settings', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: {} }) }) }) // Image versions await page.route('**/image-versions', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { versions: [] } }) }) }) // Image files await page.route('**/files/**', async (route) => { await route.fulfill({ status: 200, contentType: 'image/jpeg', body: Buffer.from([]) }) }) } test.describe('Preview text style template - Mock tests', () => { test.beforeEach(async ({ page }) => { await page.addInitScript(() => localStorage.setItem('hasSeenHelpModal', 'true')) }) test('toggle switches between image template and text style mode', async ({ page }) => { await setupMocks(page) await page.goto(`${BASE_URL}/project/mock-proj/preview`) // Open template modal await page.getByText(/更换模板|Change Template/).click() // Initially should show toggle label but NOT TextStyleSelector content await expect(page.getByText(/使用文字描述风格|Use text description for style/)).toBeVisible() await expect(page.getByText(/快速选择预设风格|Quick select preset styles/)).not.toBeVisible() // Toggle to text style mode (click label text — the actual input is sr-only/off-screen) await page.getByText(/使用文字描述风格|Use text description for style/).click() // Now TextStyleSelector should be visible await expect(page.getByText(/快速选择预设风格|Quick select preset styles/)).toBeVisible() // Apply button should appear await expect(page.getByText(/应用风格|Apply Style/)).toBeVisible() }) test('clicking preset style fills textarea', async ({ page }) => { await setupMocks(page) await page.goto(`${BASE_URL}/project/mock-proj/preview`) await page.getByText(/更换模板|Change Template/).click() // Toggle to text style mode await page.getByText(/使用文字描述风格|Use text description for style/).click() // Click first preset style button (简约商务 / Business Simple) await page.getByText(/简约商务|Business Simple/).click() // Textarea should now contain the preset description await expect(page.locator('textarea')).not.toHaveValue('') }) test('closing modal without apply discards preset change', async ({ page }) => { await setupMocks(page) await page.goto(`${BASE_URL}/project/mock-proj/preview`) await page.getByText(/更换模板|Change Template/).click() // Toggle to text style, click a preset await page.getByText(/使用文字描述风格|Use text description for style/).click() await page.getByText(/简约商务|Business Simple/).click() await expect(page.locator('textarea')).not.toHaveValue('') // Close modal without clicking Apply await page.getByText(/关闭|Close/).click() await expect(page.getByText(/快速选择预设风格|Quick select preset styles/)).not.toBeVisible() // Reopen — toggle is still on, textarea should be empty (draft discarded) await page.getByRole('button', { name: /更换模板|Change Template/ }).click() await expect(page.locator('textarea')).toHaveValue('') }) }) test.describe('Preview text style template - Integration tests', () => { let projectId: string test.beforeAll(async () => { const seeded = await seedProjectWithImages(BACKEND_URL, 1) projectId = seeded.projectId }) test.beforeEach(async ({ page }) => { await page.addInitScript(() => localStorage.setItem('hasSeenHelpModal', 'true')) }) test('apply text style persists and survives reload', async ({ page }) => { await page.goto(`${BASE_URL}/project/${projectId}/preview`) await page.waitForLoadState('networkidle') // Open template modal await page.getByText(/更换模板|Change Template/).click() // Toggle to text style mode await page.getByText(/使用文字描述风格|Use text description for style/).click() // Type a custom style const textarea = page.locator('textarea') await textarea.fill('E2E test custom style description') // Click apply await page.getByText(/应用风格|Apply Style/).click() // Modal should close await expect(page.getByText(/快速选择预设风格|Quick select preset styles/)).not.toBeVisible() // Reload and verify persistence await page.reload() await page.waitForLoadState('networkidle') // Reopen template modal and toggle to text style to verify saved value await page.getByText(/更换模板|Change Template/).click() await page.getByText(/使用文字描述风格|Use text description for style/).click() await expect(page.locator('textarea')).toHaveValue('E2E test custom style description') }) }) ================================================ FILE: frontend/e2e/renovation-aspect-ratio.spec.ts ================================================ /** * PPT Renovation Aspect Ratio - Integration E2E Test * * Verifies that PPT renovation projects preserve the original PDF's * aspect ratio instead of always defaulting to 16:9. */ import { test, expect } from '@playwright/test' import * as fs from 'fs' import * as path from 'path' import { fileURLToPath } from 'url' const BASE = process.env.BASE_URL || 'http://localhost:3000' const API = `http://localhost:${Number(new URL(BASE).port) + 2000}` const __filename = fileURLToPath(import.meta.url) const __dirname = path.dirname(__filename) test.describe.serial('Renovation aspect ratio', () => { test.setTimeout(60_000) const createdProjects: string[] = [] test.afterAll(async ({ request }) => { for (const id of createdProjects) { try { await request.delete(`${API}/api/projects/${id}`) } catch { /* best effort */ } } }) test('4:3 PDF gets 4:3 aspect ratio on project', async ({ request }) => { const pdfPath = path.join(__dirname, 'fixtures', 'test-4-3.pdf') const pdfBuffer = fs.readFileSync(pdfPath) const res = await request.post(`${API}/api/projects/renovation`, { multipart: { file: { name: 'test-4-3.pdf', mimeType: 'application/pdf', buffer: pdfBuffer, }, }, }) expect(res.ok()).toBeTruthy() const body = await res.json() const projectId = body.data.project_id createdProjects.push(projectId) // Fetch the project to check its aspect ratio const projRes = await request.get(`${API}/api/projects/${projectId}`) expect(projRes.ok()).toBeTruthy() const projData = await projRes.json() expect(projData.data.image_aspect_ratio).toBe('4:3') }) test('16:9 PDF gets 16:9 aspect ratio on project', async ({ request }) => { const pdfPath = path.join(__dirname, 'fixtures', 'test-16-9.pdf') const pdfBuffer = fs.readFileSync(pdfPath) const res = await request.post(`${API}/api/projects/renovation`, { multipart: { file: { name: 'test-16-9.pdf', mimeType: 'application/pdf', buffer: pdfBuffer, }, }, }) expect(res.ok()).toBeTruthy() const body = await res.json() const projectId = body.data.project_id createdProjects.push(projectId) // Fetch the project to check its aspect ratio const projRes = await request.get(`${API}/api/projects/${projectId}`) expect(projRes.ok()).toBeTruthy() const projData = await projRes.json() expect(projData.data.image_aspect_ratio).toBe('16:9') }) test('aspect ratio reflected in SlidePreview UI', async ({ page, request }) => { // Upload a 4:3 PDF const pdfPath = path.join(__dirname, 'fixtures', 'test-4-3.pdf') const pdfBuffer = fs.readFileSync(pdfPath) const res = await request.post(`${API}/api/projects/renovation`, { multipart: { file: { name: 'test-4-3.pdf', mimeType: 'application/pdf', buffer: pdfBuffer, }, }, }) expect(res.ok()).toBeTruthy() const body = await res.json() const projectId = body.data.project_id createdProjects.push(projectId) // Navigate to SlidePreview await page.goto(`/project/${projectId}/preview`) await page.waitForLoadState('networkidle') // Open project settings const settingsBtn = page.locator('button').filter({ hasText: /设置|Settings/ }).first() await settingsBtn.click() // The 4:3 button should be the active/selected one (has border-banana-500 class) const ratioButton = page.locator('button:has-text("4:3")').first() await expect(ratioButton).toBeVisible() await expect(ratioButton).toHaveClass(/border-banana-500/) }) }) ================================================ FILE: frontend/e2e/settings-api-clarity.spec.ts ================================================ import { test, expect } from '@playwright/test'; test.beforeEach(async ({ page }) => { await page.goto('/settings'); await page.waitForLoadState('networkidle'); }); test('default API config section shows provider dropdown instead of buttons', async ({ page }) => { await expect(page.getByText('默认 API 配置')).toBeVisible(); // Should have a provider dropdown (select), not buttons const section = page.getByTestId('global-api-config-section'); const providerSelect = section.locator('select').first(); await expect(providerSelect).toBeVisible(); // Dropdown should contain same vendors as per-model const texts = await providerSelect.locator('option').allTextContents(); expect(texts).toContain('Gemini'); expect(texts).toContain('OpenAI'); expect(texts).toContain('DeepSeek'); }); test('per-model provider placeholder references default config', async ({ page }) => { const defaultOption = page.locator('option', { hasText: '默认配置' }); await expect(defaultOption.first()).toBeAttached(); }); ================================================ FILE: frontend/e2e/settings-api-links.spec.ts ================================================ import { test, expect } from '@playwright/test'; test.describe('Settings page API key labels and links', () => { test.beforeEach(async ({ page }) => { await page.goto('/settings'); await page.waitForLoadState('networkidle'); }); test('Baidu section title should not contain OCR', async ({ page }) => { const baiduSection = page.locator('h2').filter({ hasText: /百度配置|Baidu Configuration/ }); await expect(baiduSection).toBeVisible(); await expect(page.locator('h2').filter({ hasText: /百度 OCR 配置|Baidu OCR Configuration/ })).not.toBeVisible(); }); test('Baidu API Key label should not contain OCR', async ({ page }) => { const baiduLabel = page.locator('label').filter({ hasText: /百度 API Key|Baidu API Key/ }); await expect(baiduLabel).toBeVisible(); await expect(page.locator('label:has-text("百度 OCR API Key")')).not.toBeVisible(); }); test('MinerU Token field has application link', async ({ page }) => { const mineruLink = page.locator('a[href="https://mineru.net/apiManage/token"]'); await expect(mineruLink).toBeVisible(); await expect(mineruLink).toHaveAttribute('target', '_blank'); }); test('Baidu API Key field has application link', async ({ page }) => { const baiduLink = page.locator('a[href="https://console.bce.baidu.com/iam/#/iam/apikey/list"]'); await expect(baiduLink).toBeVisible(); await expect(baiduLink).toHaveAttribute('target', '_blank'); }); test('AIHubMix has apply link', async ({ page }) => { const aihubLink = page.locator('a[href="https://aihubmix.com/token?aff=17EC"]'); await expect(aihubLink).toBeVisible(); await expect(aihubLink).toHaveAttribute('target', '_blank'); }); }); ================================================ FILE: frontend/e2e/settings-back-to-top.spec.ts ================================================ import { test, expect } from '@playwright/test' // Mock test: verify UI logic with mocked backend test.describe('Settings back-to-top button (mock)', () => { test('shows button on scroll and scrolls to top on click', async ({ page }) => { await page.route('**/api/settings', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { ai_provider_format: 'gemini', image_resolution: '2K', max_description_workers: 5, max_image_workers: 8, output_language: 'zh' } }) }) }) await page.goto('/settings') await page.waitForLoadState('networkidle') const btn = page.getByTestId('back-to-top-button') await expect(btn).not.toBeVisible() await page.evaluate(() => window.scrollTo(0, 500)) await expect(btn).toBeVisible({ timeout: 3000 }) await btn.click() await page.waitForFunction(() => window.scrollY < 50, null, { timeout: 3000 }) expect(await page.evaluate(() => window.scrollY)).toBeLessThan(50) }) }) // Integration test: verify with real backend settings data test.describe('Settings back-to-top button (integration)', () => { test('works with real backend settings loaded', async ({ page }) => { await page.goto('/settings') await page.waitForLoadState('networkidle') const btn = page.getByTestId('back-to-top-button') await expect(btn).not.toBeVisible() await page.evaluate(() => window.scrollTo(0, 500)) await expect(btn).toBeVisible({ timeout: 3000 }) await btn.click() await page.waitForFunction(() => window.scrollY < 50, null, { timeout: 3000 }) expect(await page.evaluate(() => window.scrollY)).toBeLessThan(50) }) }) ================================================ FILE: frontend/e2e/settings-backfill.spec.ts ================================================ /** * E2E tests for Settings page env backfill behavior. * * Mock tests verify the frontend correctly renders backfilled values. * Integration tests verify the backend actually backfills None fields from Config. */ import { test, expect } from '@playwright/test' const BASE_URL = process.env.BASE_URL || 'http://localhost:3000' test.describe('Settings backfill - Mock tests', () => { test('should display env-backfilled values on first load', async ({ page }) => { // Mock GET /api/settings to return data as if backend backfilled from env await page.route('**/api/settings', async (route) => { if (route.request().method() === 'GET') { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { id: 1, ai_provider_format: 'gemini', api_base_url: null, api_key_length: 39, image_resolution: '2K', image_aspect_ratio: '16:9', max_description_workers: 5, max_image_workers: 8, text_model: 'gemini-2.5-flash', image_model: 'gemini-2.0-flash-preview-image-generation', mineru_api_base: null, mineru_token_length: 0, image_caption_model: null, output_language: 'zh', enable_text_reasoning: false, text_thinking_budget: 1024, enable_image_reasoning: false, image_thinking_budget: 1024, baidu_api_key_length: 0, text_model_source: null, image_model_source: null, image_caption_model_source: null, lazyllm_api_keys_info: {}, }, }), }) } else { await route.continue() } }) await page.goto(`${BASE_URL}/settings`) await page.waitForLoadState('networkidle') // text_model should be populated from env const textModel = page.locator('input[value="gemini-2.5-flash"]') await expect(textModel).toBeVisible() // image_model should be populated from env const imageModel = page.locator('input[value="gemini-2.0-flash-preview-image-generation"]') await expect(imageModel).toBeVisible() // API key placeholder should show length > 0 (已设置(长度: 39)) const apiKeyInput = page.locator('input[type="password"]').first() const placeholder = await apiKeyInput.getAttribute('placeholder') expect(placeholder).toContain('39') }) test('should show length 0 when api_key is not configured', async ({ page }) => { await page.route('**/api/settings', async (route) => { if (route.request().method() === 'GET') { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { id: 1, ai_provider_format: 'gemini', api_base_url: null, api_key_length: 0, image_resolution: '2K', image_aspect_ratio: '16:9', max_description_workers: 5, max_image_workers: 8, text_model: '', image_model: '', mineru_api_base: null, mineru_token_length: 0, image_caption_model: null, output_language: 'zh', enable_text_reasoning: false, text_thinking_budget: 1024, enable_image_reasoning: false, image_thinking_budget: 1024, baidu_api_key_length: 0, text_model_source: null, image_model_source: null, image_caption_model_source: null, lazyllm_api_keys_info: {}, }, }), }) } else { await route.continue() } }) await page.goto(`${BASE_URL}/settings`) await page.waitForLoadState('networkidle') // API key placeholder should NOT contain a non-zero length const apiKeyInput = page.locator('input[type="password"]').first() const placeholder = await apiKeyInput.getAttribute('placeholder') // After frontend fix: when length is 0, should show default placeholder, not "已设置(长度: 0)" expect(placeholder).not.toContain('已设置') }) }) test.describe('Settings backfill - Integration tests', () => { test('GET /api/settings should return backfilled env values', async ({ request }) => { // Reset then clear text_model — backend converts empty string to NULL in DB await request.post(`${BASE_URL}/api/settings/reset`) await request.put(`${BASE_URL}/api/settings`, { data: { text_model: '' }, }) // GET triggers backfill: NULL fields get re-populated from env Config const resp = await request.get(`${BASE_URL}/api/settings`) expect(resp.ok()).toBeTruthy() const data = (await resp.json()).data // text_model should be backfilled from env (non-empty if TEXT_MODEL is set) expect(data.text_model).not.toBe('') expect(data.text_model).not.toBeNull() expect(data).toHaveProperty('api_key_length') expect(typeof data.api_key_length).toBe('number') expect(data).toHaveProperty('image_model') }) test('Settings page should load and display values from backend', async ({ page }) => { // Reset settings to ensure env values are loaded await page.request.post(`${BASE_URL}/api/settings/reset`) await page.goto(`${BASE_URL}/settings`) await page.waitForLoadState('networkidle') // The page should load without errors - check that the settings form is visible // Look for the save button as indicator the page loaded const saveButton = page.getByRole('button', { name: /保存|Save/ }) await expect(saveButton).toBeVisible() // Verify the reset button exists const resetButton = page.getByRole('button', { name: /重置|Reset/ }) await expect(resetButton).toBeVisible() }) }) ================================================ FILE: frontend/e2e/settings-env-fallback.spec.ts ================================================ /** * E2E tests for issue #289: _sync_settings_to_config should restore .env * defaults instead of popping config keys when DB fields are NULL. * * Mock test: verifies the settings save UI flow works when API key is not touched. * Integration test: verifies backend preserves config state after saving without api_key. */ import { test, expect } from '@playwright/test' const BASE_URL = process.env.BASE_URL || 'http://localhost:3000' test.describe('Settings env fallback - Mock tests', () => { test('saving settings without touching API key should succeed', async ({ page }) => { // Mock GET /api/settings — DB has NULL api_key (relies on .env) await page.route('**/api/settings', async (route) => { if (route.request().method() === 'GET') { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { id: 1, ai_provider_format: 'gemini', api_base_url: null, api_key_length: 39, image_resolution: '2K', image_aspect_ratio: '16:9', max_description_workers: 5, max_image_workers: 8, text_model: 'gemini-2.5-flash', image_model: 'gemini-2.0-flash-preview-image-generation', mineru_api_base: null, mineru_token_length: 0, image_caption_model: null, output_language: 'zh', enable_text_reasoning: false, text_thinking_budget: 1024, enable_image_reasoning: false, image_thinking_budget: 1024, baidu_api_key_length: 0, text_model_source: null, image_model_source: null, image_caption_model_source: null, lazyllm_api_keys_info: {}, text_api_key_length: 0, text_api_base_url: null, image_api_key_length: 0, image_api_base_url: null, image_caption_api_key_length: 0, image_caption_api_base_url: null, }, }), }) } else { await route.continue() } }) // Mock PUT /api/settings — capture payload to verify api_key is NOT sent let putPayload: Record | null = null await page.route('**/api/settings', async (route) => { if (route.request().method() === 'PUT') { putPayload = route.request().postDataJSON() await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { id: 1, image_resolution: '4K' }, }), }) } else { await route.fallback() } }) await page.goto(`${BASE_URL}/settings`) await page.waitForLoadState('networkidle') // Change image resolution from 2K to 4K (a non-key field) const resolutionSelect = page.locator('select').filter({ has: page.locator('option[value="2K"]') }) await resolutionSelect.selectOption('4K') // Click save const saveButton = page.getByRole('button', { name: /保存|Save/i }) await saveButton.click() // Verify success toast appears (text is "设置保存成功" or "Settings saved successfully") await expect(page.getByText(/设置保存成功|Settings saved successfully/)).toBeVisible({ timeout: 5000 }) // Verify PUT payload does NOT include api_key (frontend only sends it when user types a new value) expect(putPayload).not.toBeNull() expect(putPayload).not.toHaveProperty('api_key') }) }) test.describe('Settings env fallback - Integration tests', () => { test('saving without api_key should not corrupt backend config', async ({ request }) => { // 1. Get initial settings state const getRes1 = await request.get(`${BASE_URL}/api/settings`) expect(getRes1.ok()).toBeTruthy() const initial = (await getRes1.json()).data const initialKeyLen = initial.api_key_length // 2. Save settings with only image_resolution (no api_key in payload) // This triggers _sync_settings_to_config with settings.api_key = NULL const putRes = await request.put(`${BASE_URL}/api/settings`, { data: { image_resolution: '4K' }, }) expect(putRes.ok()).toBeTruthy() // 3. Save again with a different field to trigger _sync_settings_to_config a second time // Before the fix, the second save would find config keys already popped const putRes2 = await request.put(`${BASE_URL}/api/settings`, { data: { image_resolution: '2K' }, }) expect(putRes2.ok()).toBeTruthy() // 4. Verify settings are still consistent — api_key_length should be unchanged // (to_dict backfills from Config, so this confirms no crash; the real fix // is that app.config keys are preserved for services like _create_file_parser) const getRes2 = await request.get(`${BASE_URL}/api/settings`) expect(getRes2.ok()).toBeTruthy() const after = (await getRes2.json()).data expect(after.api_key_length).toBe(initialKeyLen) // Restore original resolution await request.put(`${BASE_URL}/api/settings`, { data: { image_resolution: initial.image_resolution }, }) }) }) ================================================ FILE: frontend/e2e/settings-per-model-provider-integration.spec.ts ================================================ /** * Integration E2E test for per-model provider configuration. * Hits the REAL backend — verifies save persistence, reload, and reset. */ import { test, expect, Page } from '@playwright/test' /** Helper: get the nth model config group (0=text, 1=image, 2=caption) */ function getModelGroup(page: Page, index: number) { return page.locator('.space-y-4 > div').filter({ has: page.locator('select') }).nth(index) } // Clean up after all tests: reset settings to defaults test.afterAll(async ({ browser }) => { const page = await browser.newPage() await page.goto('/settings') await page.getByRole('button', { name: /重置/ }).click() await page.getByRole('button', { name: /确定重置/ }).click() await page.waitForTimeout(1000) await page.close() }) test.describe('Settings: Per-model provider integration (real backend)', () => { test.describe.configure({ mode: 'serial' }) test.setTimeout(30_000) test('save per-model provider config persists to backend', async ({ page }) => { await page.goto('/settings') const textGroup = getModelGroup(page, 0) const textSelect = textGroup.locator('select') // Switch text model provider to OpenAI await textSelect.selectOption('openai') // Fill API Base URL const baseUrlInput = textGroup.locator('input[type="text"]').nth(1) await baseUrlInput.fill('https://integration-test.example.com/v1') // Fill API Key const apiKeyInput = textGroup.locator('input[type="password"]') await apiKeyInput.fill('sk-integration-test-key') // Click save await page.getByRole('button', { name: /保存/ }).click() await expect(page.locator('text=保存成功').or(page.locator('text=saved'))).toBeVisible({ timeout: 5000 }) }) test('reload page shows persisted per-model config', async ({ page }) => { await page.goto('/settings') const textGroup = getModelGroup(page, 0) const textSelect = textGroup.locator('select') // Verify provider selection persisted await expect(textSelect).toHaveValue('openai') // Verify API Base URL persisted const baseUrlInput = textGroup.locator('input[type="text"]').nth(1) await expect(baseUrlInput).toHaveValue('https://integration-test.example.com/v1') // Verify API Key shows "已设置" placeholder (length > 0) const apiKeyInput = textGroup.locator('input[type="password"]') const placeholder = await apiKeyInput.getAttribute('placeholder') expect(placeholder).toMatch(/长度|length/i) }) test('reset clears per-model config from backend', async ({ page }) => { await page.goto('/settings') // Verify we start with per-model config const textGroup = getModelGroup(page, 0) await expect(textGroup.locator('select')).toHaveValue('openai') // Click reset await page.getByRole('button', { name: /重置/ }).click() await page.getByRole('button', { name: /确定重置/ }).click() // Wait for reset to complete await expect(page.locator('text=已重置').or(page.locator('text=reset'))).toBeVisible({ timeout: 5000 }) // Verify provider reverted to env default await expect(textGroup.locator('select')).not.toHaveValue('openai') // Verify API Base URL field is hidden (lazyllm vendor or empty = no base URL) await expect(textGroup.locator('text=API Base URL')).toBeHidden() }) }) ================================================ FILE: frontend/e2e/settings-per-model-provider.spec.ts ================================================ /** * E2E test for per-model provider configuration in Settings page. * Tests: load with saved config, provider switching, save, reload persistence, reset. */ import { test, expect, Page } from '@playwright/test' // Mock settings data with per-model provider config const mockSettingsWithPerModel = { success: true, message: 'Success', data: { id: 1, ai_provider_format: 'gemini', api_base_url: 'https://aihubmix.com/gemini', api_key_length: 51, text_model: 'glm-4.5', image_model: 'imagen-3.0-generate-001', image_caption_model: 'gemini-3-flash-preview', image_resolution: '2K', image_aspect_ratio: '16:9', max_description_workers: 5, max_image_workers: 8, output_language: 'zh', enable_text_reasoning: false, text_thinking_budget: 1024, enable_image_reasoning: false, image_thinking_budget: 1024, mineru_api_base: '', mineru_token_length: 0, baidu_api_key_length: 0, // Per-model provider config text_model_source: 'openai', text_api_key_length: 26, text_api_base_url: 'https://test-openai.example.com/v1', image_model_source: 'gemini', image_api_key_length: 30, image_api_base_url: 'https://test-gemini.example.com', image_caption_model_source: 'doubao', image_caption_api_key_length: 0, image_caption_api_base_url: null, lazyllm_api_keys_info: {}, }, } // Default settings (after reset) const mockDefaultSettings = { success: true, message: 'Success', data: { ...mockSettingsWithPerModel.data, text_model_source: 'deepseek', text_api_key_length: 0, text_api_base_url: null, image_model_source: 'doubao', image_api_key_length: 0, image_api_base_url: null, image_caption_model_source: 'doubao', image_caption_api_key_length: 0, image_caption_api_base_url: null, }, } /** Helper: get the nth model config group (0=text, 1=image, 2=caption) */ function getModelGroup(page: Page, index: number) { return page.locator('.space-y-4 > div').filter({ has: page.locator('select') }).nth(index) } test.describe('Settings: Per-model provider configuration', () => { test.setTimeout(30_000) test('loads saved per-model provider config correctly', async ({ page }) => { await page.route('**/api/settings', route => route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(mockSettingsWithPerModel) }) ) await page.goto('/settings') // Text model: OpenAI selected → should show API Base URL + API Key fields const textSelect = page.locator('select').nth(0) await expect(textSelect).toHaveValue('openai') const textGroup = getModelGroup(page, 0) const textBaseUrl = textGroup.locator('input[type="text"]').nth(1) // nth(0) is model name await expect(textBaseUrl).toHaveValue('https://test-openai.example.com/v1') // Image model: Gemini selected → should show API Base URL + API Key fields const imageSelect = page.locator('select').nth(1) await expect(imageSelect).toHaveValue('gemini') const imageGroup = getModelGroup(page, 1) const imageBaseUrl = imageGroup.locator('input[type="text"]').nth(1) await expect(imageBaseUrl).toHaveValue('https://test-gemini.example.com') // Image caption: Doubao (lazyllm vendor) → should show vendor API Key, NOT base URL const captionSelect = page.locator('select').nth(2) await expect(captionSelect).toHaveValue('doubao') // Doubao is lazyllm vendor → no API Base URL field, but has vendor API Key const captionGroup = getModelGroup(page, 2) await expect(captionGroup.locator('text=API Base URL')).toBeHidden() await expect(captionGroup.locator('text=API Key').first()).toBeVisible() }) test('switching provider shows/hides conditional fields', async ({ page }) => { await page.route('**/api/settings', route => route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(mockDefaultSettings) }) ) await page.goto('/settings') const textGroup = getModelGroup(page, 0) const textSelect = textGroup.locator('select') // Default: deepseek (lazyllm) → vendor API Key shown, no Base URL await expect(textSelect).toHaveValue('deepseek') await expect(textGroup.locator('text=API Base URL')).toBeHidden() // Switch to OpenAI → API Base URL + API Key appear await textSelect.selectOption('openai') await expect(textGroup.locator('text=API Base URL')).toBeVisible() await expect(textGroup.locator('input[type="password"]')).toBeVisible() // Switch to Gemini → still shows API Base URL + API Key await textSelect.selectOption('gemini') await expect(textGroup.locator('text=API Base URL')).toBeVisible() // Switch to default → no extra fields await textSelect.selectOption('') await expect(textGroup.locator('text=API Base URL')).toBeHidden() await expect(textGroup.locator('input[type="password"]')).toBeHidden() }) test('save sends correct per-model payload', async ({ page }) => { await page.route('**/api/settings', async route => { if (route.request().method() === 'GET') { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(mockDefaultSettings) }) } else if (route.request().method() === 'PUT') { const body = route.request().postDataJSON() // Verify per-model fields in payload expect(body.text_model_source).toBe('openai') expect(body.text_api_base_url).toBe('https://new-openai.example.com') expect(body.text_api_key).toBe('sk-test-key-123') await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { ...mockDefaultSettings.data, text_model_source: 'openai', text_api_base_url: 'https://new-openai.example.com', text_api_key_length: 15 }, }), }) } }) await page.goto('/settings') // Switch text model to OpenAI and fill credentials const textGroup = getModelGroup(page, 0) await textGroup.locator('select').selectOption('openai') await textGroup.locator('input[type="text"]').nth(1).fill('https://new-openai.example.com') await textGroup.locator('input[type="password"]').fill('sk-test-key-123') // Save await page.getByRole('button', { name: /保存/ }).click() // Verify success toast await expect(page.locator('text=保存成功').or(page.locator('text=saved'))).toBeVisible({ timeout: 5000 }) }) test('reload persists saved per-model config', async ({ page }) => { let usePerModel = false await page.route('**/api/settings', route => { const data = usePerModel ? mockSettingsWithPerModel : mockDefaultSettings route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(data) }) }) // First load — default config await page.goto('/settings') await expect(page.locator('select').nth(0)).toHaveValue('deepseek') // Simulate reload with updated data usePerModel = true await page.goto('/settings') await expect(page.locator('select').nth(0)).toHaveValue('openai') const textGroup = getModelGroup(page, 0) const textBaseUrl = textGroup.locator('input[type="text"]').nth(1) await expect(textBaseUrl).toHaveValue('https://test-openai.example.com/v1') }) test('reset clears per-model config', async ({ page }) => { let isReset = false await page.route('**/api/settings', route => { const data = isReset ? mockDefaultSettings : mockSettingsWithPerModel route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(data) }) }) await page.route('**/api/settings/reset', async route => { isReset = true await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(mockDefaultSettings) }) }) await page.goto('/settings') // Verify initial state has per-model config await expect(page.locator('select').nth(0)).toHaveValue('openai') // Click reset await page.getByRole('button', { name: /重置/ }).click() // Confirm dialog await page.getByRole('button', { name: /确定重置/ }).click() // After reset: sources revert to env defaults, no API base URL fields await expect(page.locator('select').nth(0)).toHaveValue('deepseek') const textGroup = getModelGroup(page, 0) await expect(textGroup.locator('text=API Base URL')).toBeHidden() }) }) ================================================ FILE: frontend/e2e/settings-read-only.spec.ts ================================================ import { test, expect } from '@playwright/test'; import { execSync } from 'child_process'; import path from 'path'; import { fileURLToPath } from 'url'; const BASE = process.env.BASE_URL ?? 'http://localhost:5173'; const DB_PATH = process.env.DB_PATH ?? path.resolve(path.dirname(fileURLToPath(import.meta.url)), '../../backend/instance/database.db'); function dbQuery(sql: string): string { return execSync(`sqlite3 "${DB_PATH}" "${sql}"`).toString().trim(); } // ===== Integration Tests ===== test.describe.configure({ mode: 'serial' }); test.describe('Settings .env fallback behavior', () => { test('GET /api/settings does not persist .env defaults to DB', async ({ request }) => { const original = dbQuery('SELECT quote(text_model) FROM settings WHERE id=1;'); dbQuery('UPDATE settings SET text_model=NULL WHERE id=1;'); try { const res = await request.get(`${BASE}/api/settings`); expect(res.ok()).toBeTruthy(); const data = (await res.json()).data; // API returns .env default even though DB is NULL expect(data.text_model).toBeTruthy(); // DB field is still NULL (no write side-effect) const dbVal = dbQuery('SELECT quote(text_model) FROM settings WHERE id=1;'); expect(dbVal).toBe('NULL'); } finally { dbQuery('UPDATE settings SET text_model=' + original + ' WHERE id=1;'); } }); test('PUT /api/settings persists value to DB', async ({ request }) => { const original = dbQuery('SELECT quote(text_model) FROM settings WHERE id=1;'); try { const res = await request.put(`${BASE}/api/settings`, { data: { text_model: 'test-model-persist' }, }); expect(res.ok()).toBeTruthy(); // Verify DB has the saved value const dbVal = dbQuery('SELECT text_model FROM settings WHERE id=1;'); expect(dbVal).toBe('test-model-persist'); } finally { dbQuery('UPDATE settings SET text_model=' + original + ' WHERE id=1;'); } }); test('POST /api/settings/reset clears fields to NULL', async ({ request }) => { const origModel = dbQuery('SELECT quote(text_model) FROM settings WHERE id=1;'); const origRes = dbQuery('SELECT quote(image_resolution) FROM settings WHERE id=1;'); // Ensure non-NULL values exist before reset dbQuery("UPDATE settings SET text_model='before-reset', image_resolution='4K' WHERE id=1;"); try { const res = await request.post(`${BASE}/api/settings/reset`); expect(res.ok()).toBeTruthy(); // Verify DB fields are NULL after reset const modelVal = dbQuery('SELECT quote(text_model) FROM settings WHERE id=1;'); expect(modelVal).toBe('NULL'); const resVal = dbQuery('SELECT quote(image_resolution) FROM settings WHERE id=1;'); expect(resVal).toBe('NULL'); // API still returns .env defaults (not NULL) const getRes = await request.get(`${BASE}/api/settings`); const data = (await getRes.json()).data; expect(data.image_resolution).toBeTruthy(); } finally { dbQuery('UPDATE settings SET text_model=' + origModel + ', image_resolution=' + origRes + ' WHERE id=1;'); } }); test('NULL fields in DB fall back to .env on every GET', async ({ request }) => { const origLang = dbQuery('SELECT quote(output_language) FROM settings WHERE id=1;'); const origFormat = dbQuery('SELECT quote(ai_provider_format) FROM settings WHERE id=1;'); dbQuery('UPDATE settings SET output_language=NULL, ai_provider_format=NULL WHERE id=1;'); try { const res = await request.get(`${BASE}/api/settings`); expect(res.ok()).toBeTruthy(); const data = (await res.json()).data; // These should return .env defaults, not NULL expect(data.output_language).toBeTruthy(); expect(data.ai_provider_format).toBeTruthy(); } finally { dbQuery('UPDATE settings SET output_language=' + origLang + ', ai_provider_format=' + origFormat + ' WHERE id=1;'); } }); }); ================================================ FILE: frontend/e2e/settings-reset-fallback.spec.ts ================================================ /** * E2E tests for settings reset fallback behavior. * * Verifies that after saving custom model/format values then resetting, * both the API response (via to_dict) AND the internal app.config * correctly fall back to .env defaults. * * This covers the regression where _sync_settings_to_config skipped * restoring text_model, image_model, and ai_provider_format to .env * defaults when DB fields were NULL after reset. */ import { test, expect } from '@playwright/test' const BASE = process.env.BASE_URL ?? 'http://localhost:5173' test.describe.configure({ mode: 'serial' }) test.describe('Settings reset fallback - Integration tests', () => { // Capture .env defaults from initial state let envDefaults: { text_model: string image_model: string ai_provider_format: string output_language: string } test.beforeAll(async ({ request }) => { // Reset first to ensure clean state, then read .env defaults const resetRes = await request.post(`${BASE}/api/settings/reset`) expect(resetRes.ok()).toBeTruthy() const configRes = await request.get(`${BASE}/api/settings/active-config`) expect(configRes.ok()).toBeTruthy() envDefaults = (await configRes.json()).data expect(envDefaults.text_model).toBeTruthy() expect(envDefaults.image_model).toBeTruthy() expect(envDefaults.ai_provider_format).toBeTruthy() }) test('reset after custom save restores app.config to .env defaults', async ({ request }) => { // 1. Save custom values const putRes = await request.put(`${BASE}/api/settings`, { data: { text_model: 'custom-test-model', image_model: 'custom-image-model', ai_provider_format: 'openai', output_language: 'en', }, }) expect(putRes.ok()).toBeTruthy() // 2. Verify app.config picked up the custom values const configAfterSave = await request.get(`${BASE}/api/settings/active-config`) expect(configAfterSave.ok()).toBeTruthy() const savedConfig = (await configAfterSave.json()).data expect(savedConfig.text_model).toBe('custom-test-model') expect(savedConfig.image_model).toBe('custom-image-model') expect(savedConfig.ai_provider_format).toBe('openai') expect(savedConfig.output_language).toBe('en') // 3. Reset settings const resetRes = await request.post(`${BASE}/api/settings/reset`) expect(resetRes.ok()).toBeTruthy() // 4. Verify app.config has .env defaults (not stale custom values) const configAfterReset = await request.get(`${BASE}/api/settings/active-config`) expect(configAfterReset.ok()).toBeTruthy() const resetConfig = (await configAfterReset.json()).data expect(resetConfig.text_model).toBe(envDefaults.text_model) expect(resetConfig.image_model).toBe(envDefaults.image_model) expect(resetConfig.ai_provider_format).toBe(envDefaults.ai_provider_format) expect(resetConfig.output_language).toBe(envDefaults.output_language) }) test('save after reset still uses .env defaults in app.config', async ({ request }) => { // This tests the double-save scenario: reset → save unrelated field → // verify model fields in app.config are still .env defaults (not missing) // 1. Save custom text_model await request.put(`${BASE}/api/settings`, { data: { text_model: 'will-be-reset' }, }) // 2. Reset const resetRes = await request.post(`${BASE}/api/settings/reset`) expect(resetRes.ok()).toBeTruthy() // 3. Save an unrelated field (triggers _sync_settings_to_config with NULL text_model) const putRes = await request.put(`${BASE}/api/settings`, { data: { image_resolution: '4K' }, }) expect(putRes.ok()).toBeTruthy() // 4. app.config should still have .env defaults const configRes = await request.get(`${BASE}/api/settings/active-config`) expect(configRes.ok()).toBeTruthy() const config = (await configRes.json()).data expect(config.text_model).toBe(envDefaults.text_model) expect(config.image_model).toBe(envDefaults.image_model) expect(config.ai_provider_format).toBe(envDefaults.ai_provider_format) // Cleanup await request.put(`${BASE}/api/settings`, { data: { image_resolution: null }, }) }) test('API response and app.config agree after reset', async ({ request }) => { // Save custom values then reset — both to_dict() and app.config should return .env defaults await request.put(`${BASE}/api/settings`, { data: { text_model: 'mismatch-test-model', image_model: 'mismatch-test-image', }, }) await request.post(`${BASE}/api/settings/reset`) // Get both API response and active config const [settingsRes, configRes] = await Promise.all([ request.get(`${BASE}/api/settings`), request.get(`${BASE}/api/settings/active-config`), ]) expect(settingsRes.ok()).toBeTruthy() expect(configRes.ok()).toBeTruthy() const apiData = (await settingsRes.json()).data const configData = (await configRes.json()).data // API response and app.config must agree expect(apiData.text_model).toBe(configData.text_model) expect(apiData.image_model).toBe(configData.image_model) expect(apiData.ai_provider_format).toBe(configData.ai_provider_format) expect(apiData.output_language).toBe(configData.output_language) }) }) ================================================ FILE: frontend/e2e/settings-test-vendor-format.spec.ts ================================================ import { test, expect } from '@playwright/test'; test.beforeEach(async ({ page }) => { // Mock settings API to return deepseek as default provider await page.route('**/api/settings', async (route) => { if (route.request().method() === 'GET') { await route.fulfill({ json: { data: { ai_provider_format: 'lazyllm', lazyllm_api_keys_info: { deepseek: 10 }, api_base_url: '', api_key_length: 0, text_model: '', image_model: '', image_caption_model: '', text_model_source: '', image_model_source: '', image_caption_model_source: '', image_resolution: '2K', max_description_workers: 5, max_image_workers: 8, output_language: 'zh', mineru_api_base: '', mineru_token_length: 0, enable_text_reasoning: false, text_thinking_budget: 1024, enable_image_reasoning: false, image_thinking_budget: 1024, baidu_api_key_length: 0, text_api_key_length: 0, text_api_base_url: '', image_api_key_length: 0, image_api_base_url: '', image_caption_api_key_length: 0, image_caption_api_base_url: '', } } }); } else { await route.continue(); } }); await page.goto('/settings'); await page.waitForLoadState('networkidle'); }); test('service test sends lazyllm format instead of raw vendor name', async ({ page }) => { const section = page.getByTestId('global-api-config-section'); const providerSelect = section.locator('select').first(); await expect(providerSelect).toHaveValue('deepseek'); let capturedPayload: any = null; await page.route('**/api/settings/tests/text-model', async (route) => { capturedPayload = route.request().postDataJSON(); await route.fulfill({ json: { data: { task_id: 'mock-task-123' } } }); }); const textModelTestBtn = page.locator('button', { hasText: /开始测试|Start Test/ }).nth(1); await textModelTestBtn.click(); expect(capturedPayload).toBeTruthy(); expect(capturedPayload.ai_provider_format).toBe('lazyllm'); }); test('service test sends empty model source to clear saved per-model override', async ({ page }) => { let capturedPayload: any = null; await page.route('**/api/settings/tests/text-model', async (route) => { capturedPayload = route.request().postDataJSON(); await route.fulfill({ json: { data: { task_id: 'mock-task-123' } } }); }); const textModelTestBtn = page.locator('button', { hasText: /开始测试|Start Test/ }).nth(1); await textModelTestBtn.click(); expect(capturedPayload).toBeTruthy(); // Per-model sources should always be sent (even empty) so backend clears saved overrides expect(capturedPayload).toHaveProperty('text_model_source', ''); expect(capturedPayload).toHaveProperty('image_model_source', ''); expect(capturedPayload).toHaveProperty('image_caption_model_source', ''); }); ================================================ FILE: frontend/e2e/smart-merge.spec.ts ================================================ /** * Position-based Page Merge - Mock E2E Tests * * Verifies that regenerating/refining outline preserves descriptions and images * by page position, and that trailing pages are deleted when the new outline is shorter. */ import { test, expect } from '@playwright/test' const BASE = process.env.BASE_URL || 'http://localhost:3000' const PROJECT_ID = 'mock-merge-proj' const INITIAL_PAGES = [ { page_id: 'page-0', order_index: 0, part: null, outline_content: { title: 'Introduction', points: ['overview'] }, description_content: { text: 'Intro description' }, generated_image_url: '/files/mock/pages/img-0.jpg', status: 'IMAGE_GENERATED', }, { page_id: 'page-1', order_index: 1, part: null, outline_content: { title: 'Details', points: ['detail1'] }, description_content: { text: 'Details description' }, generated_image_url: '/files/mock/pages/img-1.jpg', status: 'IMAGE_GENERATED', }, { page_id: 'page-2', order_index: 2, part: null, outline_content: { title: 'Conclusion', points: ['summary'] }, description_content: { text: 'Conclusion description' }, generated_image_url: '/files/mock/pages/img-2.jpg', status: 'IMAGE_GENERATED', }, ] // After refine with fewer pages: positions 0,1 preserved, position 2 deleted const REFINED_FEWER_PAGES = [ { page_id: 'page-0', order_index: 0, part: null, outline_content: { title: 'New Intro Title', points: ['updated'] }, description_content: { text: 'Intro description' }, generated_image_url: '/files/mock/pages/img-0.jpg', status: 'IMAGE_GENERATED', }, { page_id: 'page-1', order_index: 1, part: null, outline_content: { title: 'New Details Title', points: ['updated'] }, description_content: { text: 'Details description' }, generated_image_url: '/files/mock/pages/img-1.jpg', status: 'IMAGE_GENERATED', }, ] // After refine with more pages: positions 0,1,2 preserved, position 3 new const REFINED_MORE_PAGES = [ { page_id: 'page-0', order_index: 0, part: null, outline_content: { title: 'Intro Refined', points: ['new'] }, description_content: { text: 'Intro description' }, generated_image_url: '/files/mock/pages/img-0.jpg', status: 'IMAGE_GENERATED', }, { page_id: 'page-1', order_index: 1, part: null, outline_content: { title: 'Details Refined', points: ['new'] }, description_content: { text: 'Details description' }, generated_image_url: '/files/mock/pages/img-1.jpg', status: 'IMAGE_GENERATED', }, { page_id: 'page-2', order_index: 2, part: null, outline_content: { title: 'Conclusion Refined', points: ['new'] }, description_content: { text: 'Conclusion description' }, generated_image_url: '/files/mock/pages/img-2.jpg', status: 'IMAGE_GENERATED', }, { page_id: 'page-3', order_index: 3, part: null, outline_content: { title: 'New Extra Page', points: ['extra'] }, description_content: null, generated_image_url: null, status: 'DRAFT', }, ] function setupMocks(page: import('@playwright/test').Page, pagesRef: { current: typeof INITIAL_PAGES }) { return Promise.all([ page.route(`**/api/projects/${PROJECT_ID}`, async (route) => { if (route.request().method() === 'GET') { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { project_id: PROJECT_ID, creation_type: 'idea', idea_prompt: 'test presentation', status: 'OUTLINE_GENERATED', pages: pagesRef.current, }, }), }) } else { await route.continue() } }), page.route('**/files/mock/pages/**', async (route) => { const pixel = Buffer.from( 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==', 'base64' ) await route.fulfill({ status: 200, contentType: 'image/png', body: pixel }) }), ]) } test.describe('Position-based Page Merge (Mocked)', () => { test.setTimeout(30_000) test('refine with fewer pages: trailing pages removed, earlier pages preserved', async ({ page }) => { const pagesRef = { current: [...INITIAL_PAGES] } await setupMocks(page, pagesRef) await page.route(`**/api/projects/${PROJECT_ID}/refine/outline`, async (route) => { pagesRef.current = REFINED_FEWER_PAGES await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { pages: REFINED_FEWER_PAGES, message: '大纲修改成功' }, }), }) }) await page.goto(`${BASE}/project/${PROJECT_ID}/outline`) await expect(page.getByText('Introduction')).toBeVisible({ timeout: 5000 }) await expect(page.getByText('Details')).toBeVisible() await expect(page.getByText('Conclusion')).toBeVisible() // Refine to reduce pages const refineInput = page.locator('input[placeholder*="增加"], textarea[placeholder*="增加"], input[placeholder*="Add"], textarea[placeholder*="Add"]') if (await refineInput.count() > 0) { await refineInput.first().fill('删除最后一页') const refinePromise = page.waitForResponse( (r) => r.url().includes('/refine/outline') && r.status() === 200 ) await refineInput.first().press('Control+Enter') await refinePromise // After: 2 pages, Conclusion gone await expect(page.getByText('New Intro Title')).toBeVisible({ timeout: 5000 }) await expect(page.getByText('New Details Title')).toBeVisible() await expect(page.getByText('Conclusion')).not.toBeVisible() } }) test('refine with more pages: all old pages preserved, new page added as DRAFT', async ({ page }) => { const pagesRef = { current: [...INITIAL_PAGES] } await setupMocks(page, pagesRef) await page.route(`**/api/projects/${PROJECT_ID}/refine/outline`, async (route) => { pagesRef.current = REFINED_MORE_PAGES await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { pages: REFINED_MORE_PAGES, message: '大纲修改成功' }, }), }) }) await page.goto(`${BASE}/project/${PROJECT_ID}/outline`) await expect(page.getByText('Introduction')).toBeVisible({ timeout: 5000 }) const refineInput = page.locator('input[placeholder*="增加"], textarea[placeholder*="增加"], input[placeholder*="Add"], textarea[placeholder*="Add"]') if (await refineInput.count() > 0) { await refineInput.first().fill('增加一页额外内容') const refinePromise = page.waitForResponse( (r) => r.url().includes('/refine/outline') && r.status() === 200 ) await refineInput.first().press('Control+Enter') await refinePromise // All 4 pages visible await expect(page.getByText('Intro Refined')).toBeVisible({ timeout: 5000 }) await expect(page.getByText('Details Refined')).toBeVisible() await expect(page.getByText('Conclusion Refined')).toBeVisible() await expect(page.getByText('New Extra Page')).toBeVisible() } }) test('regenerate shows warning dialog mentioning page deletion', async ({ page }) => { await setupMocks(page, { current: INITIAL_PAGES }) await page.goto(`${BASE}/project/${PROJECT_ID}/outline`) await expect(page.getByText('Introduction')).toBeVisible({ timeout: 5000 }) // Click regenerate button const regenButton = page.getByRole('button', { name: /重新生成|Regenerate/i }) if (await regenButton.count() > 0) { await regenButton.click() // Warning dialog should mention page deletion const dialog = page.locator('[role="dialog"], .modal, [class*="dialog"]') await expect(dialog).toBeVisible({ timeout: 3000 }) // Check that warning mentions deletion of pages const dialogText = await dialog.textContent() expect(dialogText).toMatch(/删除|removed|remove/i) } }) }) ================================================ FILE: frontend/e2e/streaming-descriptions.spec.ts ================================================ import { test, expect } from '@playwright/test'; const BASE_URL = process.env.BASE_URL || 'http://localhost:3240'; /** * Helper: create a project with outline pages via API and navigate to detail editor */ async function createProjectWithOutline(page: import('@playwright/test').Page, ideaPrompt: string) { // Create project const resp = await page.request.post(`${BASE_URL}/api/projects`, { data: { creation_type: 'idea', idea_prompt: ideaPrompt, }, }); const body = await resp.json(); const projectId = body.data?.project_id; expect(projectId).toBeTruthy(); // Create some pages with outlines const pageTitles = ['Introduction', 'Main Content', 'Conclusion']; for (let i = 0; i < pageTitles.length; i++) { await page.request.post(`${BASE_URL}/api/projects/${projectId}/pages`, { data: { order_index: i, outline_content: { title: pageTitles[i], points: [`Point ${i + 1}A`, `Point ${i + 1}B`] }, status: 'DRAFT', }, }); } // Update project status await page.request.put(`${BASE_URL}/api/projects/${projectId}`, { data: { status: 'OUTLINE_GENERATED' }, }); await page.goto(`${BASE_URL}/project/${projectId}/detail`); await page.waitForLoadState('networkidle'); return projectId; } // ===== Mock Tests ===== test.describe('Streaming Descriptions - Mock Tests', () => { test('should render descriptions incrementally via SSE', async ({ page }) => { const projectId = await createProjectWithOutline(page, 'Test streaming descriptions'); // Get page IDs const projectResp = await page.request.get(`${BASE_URL}/api/projects/${projectId}`); const projectData = await projectResp.json(); const pages = projectData.data?.pages || []; expect(pages.length).toBe(3); // Mock SSE streaming endpoint let mockCalled = false; await page.route(`**/api/projects/*/generate/descriptions/stream`, async (route) => { mockCalled = true; const sseEvents = pages.map((p: any, i: number) => { const descEvent = `event: description\ndata: ${JSON.stringify({ page_index: i, page_id: p.page_id, text: `页面标题:Page ${i + 1}\n\n页面文字:\n- Content for page ${i + 1}`, extra_fields: i === 0 ? { '排版布局': '居中布局,大标题' } : { '排版布局': '左文右图' }, })}\n\n`; return descEvent; }); const doneEvent = `event: done\ndata: ${JSON.stringify({ total: pages.length, pages: pages.map((p: any, i: number) => ({ ...p, status: 'DESCRIPTION_GENERATED', description_content: { text: `页面标题:Page ${i + 1}\n\n页面文字:\n- Content for page ${i + 1}`, extra_fields: i === 0 ? { '排版布局': '居中布局,大标题' } : { '排版布局': '左文右图' }, }, })), })}\n\n`; const body = sseEvents.join('') + doneEvent; await route.fulfill({ status: 200, headers: { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', }, body, }); }); // Also mock the settings to return streaming mode (cached in sessionStorage) await page.evaluate(() => { sessionStorage.setItem('banana-settings', JSON.stringify({ description_generation_mode: 'streaming', })); }); // Click the generate descriptions button const generateBtn = page.locator('button').filter({ hasText: /生成描述|Generate/ }); await generateBtn.first().click(); // Wait for descriptions to appear await expect(page.locator('text=Content for page 1')).toBeVisible({ timeout: 10000 }); await expect(page.locator('text=Content for page 2')).toBeVisible({ timeout: 10000 }); await expect(page.locator('text=Content for page 3')).toBeVisible({ timeout: 10000 }); expect(mockCalled).toBe(true); }); test('should display extra fields when present', async ({ page }) => { const projectId = await createProjectWithOutline(page, 'Test extra fields display'); // Get page IDs const projectResp = await page.request.get(`${BASE_URL}/api/projects/${projectId}`); const projectData = await projectResp.json(); const pages = projectData.data?.pages || []; // Update a page with description_content that includes extra_fields await page.request.put( `${BASE_URL}/api/projects/${projectId}/pages/${pages[0].page_id}/description`, { data: { description_content: { text: '页面标题:Test Page\n\n页面文字:\n- Test content', extra_fields: { '排版布局': '居中布局,大标题+副标题' }, }, }, } ); // Navigate to detail editor await page.goto(`${BASE_URL}/project/${projectId}/detail`); await page.waitForLoadState('networkidle'); // Check extra field is displayed await expect(page.locator('text=排版布局')).toBeVisible({ timeout: 5000 }); await expect(page.locator('text=居中布局,大标题+副标题')).toBeVisible({ timeout: 5000 }); }); test('should display extra fields from old layout_suggestion format (backward compat)', async ({ page }) => { const projectId = await createProjectWithOutline(page, 'Test backward compat'); const projectResp = await page.request.get(`${BASE_URL}/api/projects/${projectId}`); const projectData = await projectResp.json(); const pages = projectData.data?.pages || []; // Old format with layout_suggestion await page.request.put( `${BASE_URL}/api/projects/${projectId}/pages/${pages[0].page_id}/description`, { data: { description_content: { text: '测试页面内容', layout_suggestion: '左右分栏布局', }, }, } ); await page.goto(`${BASE_URL}/project/${projectId}/detail`); await page.waitForLoadState('networkidle'); // Old layout_suggestion should be mapped to "排版建议" field (legacy name) await expect(page.locator('text=排版建议')).toBeVisible({ timeout: 5000 }); await expect(page.locator('text=左右分栏布局')).toBeVisible({ timeout: 5000 }); }); test('should fall back to parallel mode when setting is parallel', async ({ page }) => { const projectId = await createProjectWithOutline(page, 'Test parallel mode'); // Get page IDs const projectResp = await page.request.get(`${BASE_URL}/api/projects/${projectId}`); const projectData = await projectResp.json(); const pages = projectData.data?.pages || []; // Set parallel mode in sessionStorage await page.evaluate(() => { sessionStorage.setItem('banana-settings', JSON.stringify({ description_generation_mode: 'parallel', })); }); // Mock the parallel endpoint (not streaming) let parallelCalled = false; await page.route(`**/api/projects/*/generate/descriptions`, async (route) => { // Only intercept POST (not the stream endpoint which has /stream suffix) if (route.request().url().includes('/stream')) { return route.continue(); } parallelCalled = true; await route.fulfill({ status: 202, contentType: 'application/json', body: JSON.stringify({ success: true, data: { task_id: 'mock-task-123', status: 'GENERATING_DESCRIPTIONS', total_pages: pages.length }, }), }); }); // Mock task polling await page.route(`**/api/tasks/mock-task-123`, async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { status: 'COMPLETED', progress: { total: pages.length, completed: pages.length } }, }), }); }); // Click generate const generateBtn = page.locator('button').filter({ hasText: /生成描述|Generate/ }); await generateBtn.first().click(); // Wait a bit for the mode dispatch await page.waitForTimeout(2000); expect(parallelCalled).toBe(true); }); }); // ===== Integration Tests ===== test.describe('Streaming Descriptions - Integration Tests', () => { test('DetailEditor settings panel should show generation mode and extra fields', async ({ page }) => { const projectId = await createProjectWithOutline(page, 'Test settings panel'); await page.goto(`${BASE_URL}/project/${projectId}/detail`); await page.waitForLoadState('networkidle'); // Find the Settings2 button by its title attribute const gearBtn = page.locator('button[title="描述设置"], button[title="Description Settings"]'); await expect(gearBtn).toBeVisible({ timeout: 5000 }); await gearBtn.click(); // Check generation mode buttons await expect(page.locator('text=流式').or(page.locator('text=Streaming'))).toBeVisible({ timeout: 3000 }); await expect(page.locator('text=并行').or(page.locator('text=Parallel'))).toBeVisible({ timeout: 3000 }); // Check detail level buttons await expect(page.locator('text=精简').or(page.locator('text=Concise'))).toBeVisible(); await expect(page.locator('text=默认').or(page.locator('text=Default'))).toBeVisible(); await expect(page.getByRole('button', { name: /详细|Detailed/ })).toBeVisible(); // Check extra fields section await expect(page.locator('text=额外字段').or(page.locator('text=Extra Fields'))).toBeVisible(); // Default field "排版建议" should be shown await expect(page.locator('text=排版布局')).toBeVisible(); }); test('should persist generation mode via settings API', async ({ page }) => { const projectId = await createProjectWithOutline(page, 'Test mode persist'); await page.goto(`${BASE_URL}/project/${projectId}/detail`); await page.waitForLoadState('networkidle'); // Open settings panel const gearBtn = page.locator('button[title="描述设置"], button[title="Description Settings"]'); await gearBtn.click(); // Click parallel button const parallelBtn = page.locator('button').filter({ hasText: /并行|Parallel/ }); await parallelBtn.first().click(); // Wait for debounced save await page.waitForTimeout(1500); // Verify via API const settingsResp = await page.request.get(`${BASE_URL}/api/settings`); const settingsData = await settingsResp.json(); expect(settingsData.data?.description_generation_mode).toBe('parallel'); // Reset back to streaming await page.request.put(`${BASE_URL}/api/settings`, { data: { description_generation_mode: 'streaming' }, }); }); test('should add and remove extra fields', async ({ page }) => { const projectId = await createProjectWithOutline(page, 'Test extra fields config'); await page.goto(`${BASE_URL}/project/${projectId}/detail`); await page.waitForLoadState('networkidle'); // Open settings panel const gearBtn = page.locator('button[title="描述设置"], button[title="Description Settings"]'); await gearBtn.click(); // Add a new field via input const fieldInput = page.locator('input[placeholder="添加字段"], input[placeholder="Add Field"]'); await fieldInput.fill('配图建议'); await fieldInput.press('Enter'); // New field should appear as an active pill button const newPill = page.locator('button').filter({ hasText: '配图建议' }); await expect(newPill).toBeVisible({ timeout: 3000 }); // Wait for debounced save await page.waitForTimeout(1500); // Verify via API — both fields should be active const settingsResp = await page.request.get(`${BASE_URL}/api/settings`); const settingsData = await settingsResp.json(); expect(settingsData.data?.description_extra_fields).toContain('配图建议'); expect(settingsData.data?.description_extra_fields).toContain('排版布局'); // Toggle off 配图建议 by clicking the pill await newPill.click(); await page.waitForTimeout(1500); // Verify it's removed from active fields but still visible in pool const settingsResp2 = await page.request.get(`${BASE_URL}/api/settings`); const settingsData2 = await settingsResp2.json(); expect(settingsData2.data?.description_extra_fields).not.toContain('配图建议'); await expect(newPill).toBeVisible(); // Still in pool, just inactive // Clean up: reset extra fields await page.request.put(`${BASE_URL}/api/settings`, { data: { description_extra_fields: ['视觉元素', '视觉焦点', '排版布局', '演讲者备注'] }, }); // Clean up localStorage pool await page.evaluate(() => localStorage.removeItem('banana-available-extra-fields')); }); test('edit dialog should preserve extra fields on save', async ({ page }) => { const projectId = await createProjectWithOutline(page, 'Test edit extra fields'); const projectResp = await page.request.get(`${BASE_URL}/api/projects/${projectId}`); const projectData = await projectResp.json(); const pages = projectData.data?.pages || []; // Set a page with extra_fields await page.request.put( `${BASE_URL}/api/projects/${projectId}/pages/${pages[0].page_id}/description`, { data: { description_content: { text: '测试内容', extra_fields: { '排版布局': '居中布局' }, }, }, } ); await page.goto(`${BASE_URL}/project/${projectId}/detail`); await page.waitForLoadState('networkidle'); // Click edit on first card const editBtn = page.locator('button').filter({ hasText: /编辑|Edit/ }).first(); await editBtn.click(); // Modal should be visible with extra field input await expect(page.locator('label').filter({ hasText: '排版布局' })).toBeVisible({ timeout: 5000 }); const fieldTextarea = page.locator('textarea').filter({ hasText: '居中布局' }); await expect(fieldTextarea).toBeVisible(); // Edit the extra field value await fieldTextarea.fill('左右分栏'); // Save const saveBtn = page.locator('button').filter({ hasText: /保存|Save/ }); await saveBtn.click(); // Verify the card shows updated value (use paragraph to avoid matching textarea) await expect(page.getByRole('paragraph').filter({ hasText: '左右分栏' })).toBeVisible({ timeout: 5000 }); }); test('single page regeneration should still work', async ({ page }) => { const projectId = await createProjectWithOutline(page, 'Test single page regen'); await page.goto(`${BASE_URL}/project/${projectId}/detail`); await page.waitForLoadState('networkidle'); // Click regenerate on the first page card const regenBtn = page.locator('button').filter({ hasText: /重新生成|Regenerate/ }); await expect(regenBtn.first()).toBeVisible({ timeout: 5000 }); }); }); ================================================ FILE: frontend/e2e/streaming-outline.spec.ts ================================================ import { test, expect } from '@playwright/test'; const BASE_URL = process.env.BASE_URL || 'http://localhost:3240'; /** * Helper: create a project via API and navigate to outline editor */ async function createProjectAndNavigate(page: import('@playwright/test').Page, ideaPrompt: string) { const resp = await page.request.post(`${BASE_URL}/api/projects`, { data: { creation_type: 'idea', idea_prompt: ideaPrompt, }, }); const body = await resp.json(); const projectId = body.data?.project_id; expect(projectId).toBeTruthy(); await page.goto(`${BASE_URL}/project/${projectId}/outline`); await page.waitForLoadState('networkidle'); return projectId; } // ===== Mock Tests ===== test.describe('Streaming Outline - Mock Tests', () => { test('should render cards incrementally as SSE pages arrive', async ({ page }) => { const projectId = await createProjectAndNavigate(page, 'Test streaming outline'); // Mock the SSE streaming endpoint let requestReceived = false; await page.route(`**/api/projects/*/generate/outline/stream`, async (route) => { requestReceived = true; // Simulate SSE response with 3 pages arriving sequentially const pages = [ { index: 0, title: 'Introduction', points: ['Welcome', 'Overview'], part: null }, { index: 1, title: 'Main Content', points: ['Topic A', 'Topic B'], part: 'Part 1' }, { index: 2, title: 'Conclusion', points: ['Summary', 'Q&A'], part: 'Part 1' }, ]; let sseBody = ''; for (const p of pages) { sseBody += `event: page\ndata: ${JSON.stringify(p)}\n\n`; } // Done event with fake persisted pages (include real IDs) const donePages = pages.map((p, i) => ({ id: `real-page-${i}`, order_index: i, outline_content: { title: p.title, points: p.points }, part: p.part, status: 'DRAFT', })); sseBody += `event: done\ndata: ${JSON.stringify({ total: 3, pages: donePages })}\n\n`; await route.fulfill({ status: 200, headers: { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', }, body: sseBody, }); }); // Click the generate button const generateBtn = page.getByRole('button', { name: /自动生成|Auto Generate/i }); await generateBtn.click(); // Wait for cards to appear await expect(page.getByText('Introduction')).toBeVisible({ timeout: 10000 }); await expect(page.getByText('Main Content')).toBeVisible(); await expect(page.getByText('Conclusion')).toBeVisible(); // Verify the SSE endpoint was called expect(requestReceived).toBe(true); // Verify all 3 cards are rendered const cards = page.locator('[class*="animate-slide-in-up"], [data-testid="outline-card"]'); // At minimum, check that the page titles are visible await expect(page.getByText('Topic A')).toBeVisible(); await expect(page.getByText('Summary')).toBeVisible(); }); test('should show error message on SSE error event', async ({ page }) => { const projectId = await createProjectAndNavigate(page, 'Test error handling'); await page.route(`**/api/projects/*/generate/outline/stream`, async (route) => { const sseBody = `event: error\ndata: ${JSON.stringify({ message: 'AI service unavailable' })}\n\n`; await route.fulfill({ status: 200, headers: { 'Content-Type': 'text/event-stream' }, body: sseBody, }); }); const generateBtn = page.getByRole('button', { name: /自动生成|Auto Generate/i }); await generateBtn.click(); // The error should be displayed somewhere in the UI // Wait a moment for the error to propagate await page.waitForTimeout(1000); // The store sets error state, which may show as a toast or error message // Just verify no cards appeared await expect(page.getByText('Introduction')).not.toBeVisible(); }); test('should disable generate button during streaming and re-enable on completion', async ({ page }) => { const projectId = await createProjectAndNavigate(page, 'Test button state'); await page.route(`**/api/projects/*/generate/outline/stream`, async (route) => { const pageEvent = `event: page\ndata: ${JSON.stringify({ index: 0, title: 'Page 1', points: ['Point'] })}\n\n`; const doneEvent = `event: done\ndata: ${JSON.stringify({ total: 1, pages: [{id: 'p1', order_index: 0, outline_content: {title: 'Page 1', points: ['Point']}}] })}\n\n`; await route.fulfill({ status: 200, headers: { 'Content-Type': 'text/event-stream' }, body: pageEvent + doneEvent, }); }); const generateBtn = page.getByRole('button', { name: /自动生成|Auto Generate/i }); await generateBtn.click(); // Assert button shows disabled "Generating..." state await expect(page.getByRole('button', { name: /生成中|Generating/i })).toBeDisabled(); // Wait for page to render await expect(page.getByText('Page 1')).toBeVisible(); // Assert button re-enables with "Regenerate" text await expect(page.getByRole('button', { name: /重新生成|Regenerate/i })).toBeEnabled(); }); }); // ===== Integration Tests ===== test.describe('Streaming Outline - Integration Tests', () => { // Skip in CI — requires real AI API keys test.skip(!!process.env.CI, 'Requires real AI backend'); test('should stream outline from real backend and persist pages', async ({ page }) => { // Create project const projectId = await createProjectAndNavigate(page, 'A 3-page presentation about cats'); // Click generate const generateBtn = page.getByRole('button', { name: /自动生成|Auto Generate/i }); await generateBtn.click(); // Wait for at least one card to appear (streaming in progress) // The first card should appear within 15 seconds await expect(page.locator('h4').first()).toBeVisible({ timeout: 30000 }); // Wait for streaming to complete - "Regenerate" button appears when done await expect(page.getByRole('button', { name: /重新生成|Regenerate/i })).toBeVisible({ timeout: 60000 }); // Verify multiple cards were generated const cardTitles = page.locator('h4'); const count = await cardTitles.count(); expect(count).toBeGreaterThanOrEqual(2); // Reload page and verify pages persisted await page.reload(); await page.waitForLoadState('networkidle'); const reloadedTitles = page.locator('h4'); const reloadedCount = await reloadedTitles.count(); expect(reloadedCount).toBe(count); }); }); ================================================ FILE: frontend/e2e/ui-full-flow-mocked.spec.ts ================================================ /** * UI-driven E2E test with Mocked Backend * * This test simulates the complete user operation flow but mocks all backend API calls. * This allows fast testing (1-2 minutes) without waiting for real AI generation. * * Use this for: * - Quick UI regression testing * - CI/CD pipeline (fast feedback) * - Development iteration * * For real E2E testing with actual AI, use ui-full-flow.spec.ts */ import { test, expect } from '@playwright/test' import * as fs from 'fs' import * as path from 'path' test.describe('UI-driven E2E test (Mocked Backend)', () => { test.setTimeout(2 * 60 * 1000) // 2 minutes max test('User Full Flow: Create and export PPT with mocked API', async ({ page }) => { console.log('\n========================================') console.log('🌐 Starting UI-driven E2E test (Mocked Backend)') console.log('========================================\n') // Mock API responses await page.route('**/api/projects', async (route) => { if (route.request().method() === 'POST') { await route.fulfill({ status: 201, contentType: 'application/json', body: JSON.stringify({ success: true, data: { project_id: 'mock-project-123', status: 'DRAFT' } }) }) } else { await route.continue() } }) // Mock outline generation await page.route('**/api/projects/*/generate/outline', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { task_id: 'mock-outline-task' } }) }) }) // Mock project status (outline generated) await page.route('**/api/projects/mock-project-123', async (route) => { await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { project_id: 'mock-project-123', status: 'OUTLINE_GENERATED', outline_content: { pages: [ { title: '什么是AI', order_index: 0 }, { title: 'AI的应用', order_index: 1 }, { title: 'AI的未来', order_index: 2 } ] } } }) }) }) // Mock description generation await page.route('**/api/projects/*/generate/descriptions', async (route) => { await route.fulfill({ status: 202, // 202 Accepted for async operations contentType: 'application/json', body: JSON.stringify({ success: true, data: { task_id: 'mock-desc-task' } }) }) }) // Mock image generation await page.route('**/api/projects/*/generate/images', async (route) => { await route.fulfill({ status: 202, // 202 Accepted for async operations contentType: 'application/json', body: JSON.stringify({ success: true, data: { task_id: 'mock-image-task' } }) }) }) // Mock PPT export await page.route('**/api/projects/*/export/pptx**', async (route) => { // Create a minimal mock PPTX file const mockPptxPath = path.join(__dirname, 'fixtures', 'mock-presentation.pptx') if (fs.existsSync(mockPptxPath)) { const buffer = fs.readFileSync(mockPptxPath) await route.fulfill({ status: 200, contentType: 'application/vnd.openxmlformats-officedocument.presentationml.presentation', body: buffer }) } else { // If mock file doesn't exist, return a simple response await route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ success: true, data: { download_url: '/files/mock-project-123/exports/mock-presentation.pptx' } }) }) } }) // ==================================== // Step 1: Visit homepage // ==================================== console.log('📱 Step 1: Opening homepage...') await page.goto('http://localhost:3000') await expect(page).toHaveTitle(/蕉幻|Banana/i) console.log('✓ Homepage loaded successfully\n') // ==================================== // Step 2: Ensure "一句话生成" tab is selected (it's selected by default) // ==================================== console.log('🖱️ Step 2: Ensuring "一句话生成" tab is selected...') // The "一句话生成" tab is selected by default, but we can click it to ensure it's active await page.click('button:has-text("一句话生成")').catch(() => { // If click fails, the tab might already be selected, which is fine }) await page.waitForSelector('textarea, input[type="text"]', { timeout: 10000 }) console.log('✓ Create form displayed\n') // ==================================== // Step 3: Enter idea and click "Next" // ==================================== console.log('✍️ Step 3: Entering idea content...') const ideaInput = page.locator('textarea, input[type="text"]').first() await ideaInput.fill('创建一份关于人工智能基础的简短PPT,包含3页:什么是AI、AI的应用、AI的未来') console.log('🚀 Clicking "Next" button...') await page.click('button:has-text("下一步")') // Wait for navigation (mocked response should be fast) await page.waitForTimeout(1000) console.log('✓ Clicked "Next" button\n') // ==================================== // Step 4: Verify outline editor page loaded // ==================================== console.log('📋 Step 4: Verifying outline editor page...') await page.waitForSelector('button:has-text("自动生成大纲"), button:has-text("重新生成大纲")', { timeout: 10000 }) console.log('✓ Outline editor page loaded\n') // ==================================== // Step 5: Click generate outline (mocked) // ==================================== console.log('📋 Step 5: Clicking batch generate outline button (mocked)...') const generateOutlineBtn = page.locator('button:has-text("自动生成大纲"), button:has-text("重新生成大纲")') await generateOutlineBtn.first().click() // Wait for mocked response (should be instant, but UI might need time to update) await page.waitForTimeout(2000) console.log('✓ Mocked outline generation triggered\n') // ==================================== // Step 6: Verify UI shows outline (mocked data) // ==================================== console.log('✅ Step 6: Verifying UI shows outline items...') // The UI should show the mocked outline data await expect(page.locator('.outline-card, [data-testid="outline-item"], .outline-section').first()) .toBeVisible({ timeout: 10000 }) console.log('✓ Outline items visible in UI\n') // ==================================== // Step 7: Navigate to description editor // ==================================== console.log('➡️ Step 7: Clicking "Next" to go to description editor...') const nextBtn = page.locator('button:has-text("下一步")') if (await nextBtn.count() > 0) { await nextBtn.first().click() await page.waitForTimeout(1000) console.log('✓ Navigated to description editor\n') } // ==================================== // Step 8: Test description generation UI (mocked) // ==================================== console.log('✍️ Step 8: Testing description generation UI (mocked)...') await page.waitForSelector('button:has-text("批量生成描述")', { timeout: 10000 }) const generateDescBtn = page.locator('button:has-text("批量生成描述")') await generateDescBtn.first().click() await page.waitForTimeout(2000) // Mock response should be fast console.log('✓ Mocked description generation triggered\n') // ==================================== // Step 9: Navigate to image generation // ==================================== console.log('➡️ Step 9: Navigating to image generation page...') const nextBtn2 = page.locator('button:has-text("下一步")') if (await nextBtn2.count() > 0) { await nextBtn2.first().click() await page.waitForTimeout(1000) console.log('✓ Navigated to image generation page\n') } // ==================================== // Step 10: Test image generation UI (mocked) // ==================================== console.log('🎨 Step 10: Testing image generation UI (mocked)...') await page.waitForSelector('button:has-text("批量生成图片")', { timeout: 10000 }) const generateImageBtn = page.locator('button:has-text("批量生成图片")') if (await generateImageBtn.count() > 0) { await generateImageBtn.first().click() await page.waitForTimeout(2000) console.log('✓ Mocked image generation triggered\n') } // ==================================== // Step 11: Test export UI // ==================================== console.log('📦 Step 11: Testing export UI...') const exportBtn = page.locator('button:has-text("导出"), button:has-text("下载"), button:has-text("完成")') if (await exportBtn.count() > 0) { const downloadPromise = page.waitForEvent('download', { timeout: 10000 }).catch(() => null) await exportBtn.first().click() const download = await downloadPromise if (download) { const downloadPath = path.join('test-results', 'e2e-mocked-test-output.pptx') await download.saveAs(downloadPath) console.log(`✓ Mock PPT file downloaded: ${downloadPath}\n`) } else { console.log('⚠️ Download event not triggered (may be handled differently in UI)\n') } } // ==================================== // Final verification // ==================================== console.log('========================================') console.log('✅ Mocked E2E test completed!') console.log('========================================\n') // Take final screenshot await page.screenshot({ path: 'test-results/e2e-mocked-final-state.png', fullPage: true }) }) }) ================================================ FILE: frontend/e2e/ui-full-flow.spec.ts ================================================ /** * UI-driven end-to-end test: From user interface operations to final PPT export * * This test simulates the complete user operation flow in the browser: * 1. Enter idea in frontend * 2. Click "下一步" (Next) button * 3. Click batch generate outline button on outline editor page * 4. Wait for outline generation (visible in UI) * 5. Click "下一步" (Next) to go to description editor page * 6. Click batch generate descriptions button * 7. Wait for descriptions to generate (visible in UI) * 8. Test retry single card functionality * 9. Click "生成图片" (Generate Images) to go to image generation page * 10. Click batch generate images button * 11. Wait for images to generate (visible in UI) * 12. Export PPT * 13. Verify downloaded file * * Note: * - This test requires real AI API keys * - Takes 10-15 minutes to complete * - Depends on frontend UI stability * - Recommended to run only before release or in Nightly Build */ import { test, expect } from '@playwright/test' import * as fs from 'fs' import * as path from 'path' test.describe('UI-driven E2E test: From user interface to PPT export', () => { // Increase timeout to 25 minutes (image generation may need retries on API disconnects) test.setTimeout(25 * 60 * 1000) test('User Full Flow: Create and export PPT in browser', async ({ page }) => { console.log('\n========================================') console.log('🌐 Starting UI-driven E2E test (via frontend interface)') console.log('========================================\n') // ==================================== // Step 1: Visit homepage // ==================================== console.log('📱 Step 1: Opening homepage...') // Prevent HelpModal from appearing (it opens with a 500ms delay on first visit) await page.addInitScript(() => { localStorage.setItem('hasSeenHelpModal', 'true') }) await page.goto('http://localhost:3000') // Verify page loaded await expect(page).toHaveTitle(/蕉幻|Banana/i) console.log('✓ Homepage loaded successfully\n') // ==================================== // Step 2: Ensure "一句话生成" tab is selected (it's selected by default) // ==================================== console.log('🖱️ Step 2: Ensuring "一句话生成" tab is selected...') // The "一句话生成" tab is selected by default, but we can click it to ensure it's active await page.click('button:has-text("一句话生成")').catch(() => { // If click fails, the tab might already be selected, which is fine }) // Wait for form to appear (MarkdownTextarea uses contentEditable div with role="textbox") await page.waitForSelector('[role="textbox"], textarea, input[type="text"]', { timeout: 10000 }) console.log('✓ Create form displayed\n') // ==================================== // Step 3: Enter idea and click "Next" // ==================================== console.log('✍️ Step 3: Entering idea content...') const ideaInput = page.locator('[role="textbox"], textarea, input[type="text"]').first() await ideaInput.click() await ideaInput.pressSequentially('创建一份关于人工智能基础的简短PPT,包含3页:什么是AI、AI的应用、AI的未来') console.log('🚀 Clicking "Next" button...') await page.click('button:has-text("下一步")') // Wait for navigation to outline editor page await page.waitForURL(/\/project\/.*\/outline/, { timeout: 10000 }) console.log('✓ Clicked "Next" button and navigated to outline editor page\n') // ==================================== // Step 4: Click batch generate outline button on outline editor page // ==================================== console.log('⏳ Step 4: Waiting for outline editor page to load...') await page.waitForSelector('button:has-text("自动生成大纲"), button:has-text("重新生成大纲")', { timeout: 10000 }) console.log('📋 Step 4: Clicking batch generate outline button...') const generateOutlineBtn = page.locator('button:has-text("自动生成大纲"), button:has-text("重新生成大纲")') await generateOutlineBtn.first().click() console.log('✓ Clicked batch generate outline button\n') // ==================================== // Step 5: Wait for outline generation to complete (smart wait) // ==================================== console.log('⏳ Step 5: Waiting for outline generation (may take 3-5 minutes)...') // Outline generation uses SSE streaming: the button shows "生成中..." and // pages appear incrementally. Wait for the first card, then for streaming // to finish (button text reverts from "生成中..."). const streamingBtn = page.locator('button:has-text("生成中...")') await streamingBtn.waitFor({ state: 'visible', timeout: 10000 }).catch(() => { console.log(' Streaming button state not detected, generation may have completed quickly') }) // Wait for at least one outline card (pages stream in one by one) await expect(page.locator('text=/第 \\d+ 页/').first()).toBeVisible({ timeout: 300000 }) console.log(' First outline card appeared') // Wait for streaming to finish (button reverts from "生成中...") await expect(streamingBtn).toBeHidden({ timeout: 300000 }) // Verify outline content const outlineItems = page.locator('text=/第 \\d+ 页/') const outlineCount = await outlineItems.count() expect(outlineCount).toBeGreaterThan(0) console.log(`✓ Outline generated successfully, total ${outlineCount} pages\n`) // Take screenshot of current state await page.screenshot({ path: 'test-results/e2e-outline-generated.png' }) // ==================================== // Step 6: Click "Next" to go to description editor page // ==================================== console.log('➡️ Step 6: Clicking "Next" to go to description editor page...') const nextBtn = page.locator('button:has-text("下一步")') if (await nextBtn.count() > 0) { await nextBtn.first().click() // Wait for navigation to detail editor page await page.waitForURL(/\/project\/.*\/detail/, { timeout: 10000 }) console.log('✓ Clicked "Next" button and navigated to description editor page\n') } // ==================================== // Step 7: Click batch generate descriptions button // ==================================== console.log('✍️ Step 7: Clicking batch generate descriptions button...') // Wait for description editor page to load await page.waitForSelector('button:has-text("批量生成描述")', { timeout: 10000 }) const generateDescBtn = page.locator('button:has-text("批量生成描述")') await generateDescBtn.first().click() console.log('✓ Clicked batch generate descriptions button\n') // ==================================== // Step 8: Wait for descriptions to generate (smart wait) // ==================================== console.log('⏳ Step 8: Waiting for descriptions to generate (may take 2-5 minutes)...') // Smart wait: The "生成图片" button is disabled until ALL pages have description_content. // Wait for it to become enabled as the definitive signal that all descriptions are done. const generateImagesBtnForWait = page.locator('button:has-text("生成图片")').first() await expect(async () => { await expect(generateImagesBtnForWait).toBeEnabled() }).toPass({ timeout: 300000, intervals: [3000, 5000, 10000] }) console.log('✓ All descriptions generated (生成图片 button enabled)\n') await page.screenshot({ path: 'test-results/e2e-descriptions-generated.png' }) // ==================================== // Step 9: Test retry single card functionality // ==================================== console.log('🔄 Step 9: Testing retry single card functionality...') // Find the first description card with retry button const retryButtons = page.locator('button:has-text("重新生成")') const retryCount = await retryButtons.count() if (retryCount > 0) { // Click the first retry button await retryButtons.first().click() console.log('✓ Clicked retry button on first card') // Handle confirmation dialog if it appears (appears when page already has description) try { const confirmDialog = page.locator('div[role="dialog"]:has-text("确认重新生成")') await confirmDialog.waitFor({ state: 'visible', timeout: 2000 }) console.log(' Confirmation dialog appeared, clicking confirm...') // Click the confirm button in the dialog const confirmButton = page.locator('button:has-text("确定"), button:has-text("确认")').last() await confirmButton.click() // Wait for dialog to be completely hidden await confirmDialog.waitFor({ state: 'hidden', timeout: 5000 }) // Also wait for the modal backdrop to disappear const modalBackdrop = page.locator('.fixed.inset-0.bg-black\\/50') await modalBackdrop.waitFor({ state: 'hidden', timeout: 3000 }).catch(() => { console.log(' Modal backdrop already gone or not found') }) // Extra wait to ensure CSS transitions complete await page.waitForTimeout(300) console.log(' Confirmed regeneration and dialog closed') } catch (e) { // Dialog didn't appear or already closed, continue console.log(' No confirmation dialog, continuing...') } // Wait for the card to show generating state await page.waitForSelector('button:has-text("生成中...")', { timeout: 5000 }).catch(() => { // If "生成中..." doesn't appear, check for other loading indicators console.log(' Waiting for generation state...') }) // Wait for regeneration to complete - ensure no cards are still generating // (can't just check for any "重新生成" button as other cards already have one) await expect(async () => { const generatingButtons = await page.locator('button:has-text("生成中...")').count() expect(generatingButtons).toBe(0) }).toPass({ timeout: 120000, intervals: [2000, 5000, 10000] }) console.log('✓ Single card retry completed successfully\n') await page.screenshot({ path: 'test-results/e2e-single-card-retry.png' }) } else { console.log('⚠️ No retry buttons found, skipping single card retry test\n') } // ==================================== // Step 10: Click "生成图片" to go to image generation page // ==================================== console.log('➡️ Step 10: Clicking "生成图片" to go to image generation page...') // Ensure no modal backdrop is blocking the UI // This is important after the single card retry which may have shown a confirmation dialog const modalBackdrop = page.locator('.fixed.inset-0').filter({ hasText: '' }).first() const backdropCount = await page.locator('.fixed.inset-0').filter({ hasText: '' }).count() if (backdropCount > 0) { const isBackdropVisible = await modalBackdrop.isVisible().catch(() => false) if (isBackdropVisible) { console.log(' Modal backdrop detected, attempting to close modal...') // Try pressing Escape to close any open modal await page.keyboard.press('Escape') await page.waitForTimeout(300) // Try clicking close button if exists const closeButton = page.locator('button:has-text("取消"), button[aria-label="Close"]').first() if (await closeButton.isVisible().catch(() => false)) { await closeButton.click().catch(() => {}) } // Wait for backdrop to disappear await page.waitForTimeout(500) // Final check - if backdrop still visible, wait longer const stillVisible = await modalBackdrop.isVisible().catch(() => false) if (stillVisible) { console.log(' Backdrop still visible, waiting up to 3 seconds...') await modalBackdrop.waitFor({ state: 'hidden', timeout: 3000 }).catch(() => { console.log(' Warning: Backdrop may still be present') }) } console.log(' Modal cleared') } } else { console.log(' No modal backdrop detected') } // Extra safety wait to ensure all animations complete await page.waitForTimeout(1500) const generateImagesNavBtn = page.locator('button:has-text("生成图片")').first() // Wait for button to be enabled (it's disabled until all descriptions are generated) await generateImagesNavBtn.waitFor({ state: 'visible', timeout: 10000 }) // Allow enough time for the single card retry from Step 9 to complete await expect(generateImagesNavBtn).toBeEnabled({ timeout: 30000 }) // Ensure button is in viewport await generateImagesNavBtn.scrollIntoViewIfNeeded() // Log current URL before clicking const urlBeforeClick = page.url() console.log(` Current URL before click: ${urlBeforeClick}`) // Try normal click first let clickSucceeded = false try { await generateImagesNavBtn.click({ timeout: 2000 }) console.log(' Button clicked successfully (normal click)') clickSucceeded = true } catch (e) { console.log(' Normal click blocked by overlay') } // Check if navigation started await page.waitForTimeout(200) const urlAfterFirstAttempt = page.url() if (!clickSucceeded || urlAfterFirstAttempt === urlBeforeClick) { console.log(' Navigation did not start, using JavaScript to trigger navigation...') // Extract project ID from current URL const match = urlBeforeClick.match(/\/project\/([^/]+)\//) if (match) { const projectId = match[1] const targetUrl = `http://localhost:3000/project/${projectId}/preview` console.log(` Navigating to: ${targetUrl}`) await page.goto(targetUrl, { waitUntil: 'domcontentloaded' }) } else { throw new Error('Could not extract project ID from URL') } } // Wait for navigation to complete console.log(' Waiting for preview page to load...') await page.waitForURL(/\/project\/.*\/preview/, { timeout: 10000 }) console.log('✓ Successfully navigated to preview page\n') // ==================================== // Step 11: Select template (required before generating images) // ==================================== console.log('🎨 Step 11: Selecting template...') // Click "更换模板" button to open template selection modal // The button might be hidden on small screens, so try multiple selectors const changeTemplateBtn = page.locator('button:has-text("更换模板"), button[title="更换模板"]').first() await changeTemplateBtn.waitFor({ state: 'visible', timeout: 10000 }) await changeTemplateBtn.scrollIntoViewIfNeeded() await changeTemplateBtn.click() console.log('✓ Clicked "更换模板" button, opening template selection modal...') // Wait for template modal to open (check for modal title and preset templates section) await page.waitForSelector('text="更换模板"', { timeout: 5000 }) await page.waitForSelector('text="预设模板"', { timeout: 5000 }) await page.waitForTimeout(500) // Wait for modal animation // Select the first preset template let templateSelected = false // Click the first preset template card in the grid (if name click didn't work) if (!templateSelected) { try { // Find the preset templates section and click the first template card // The preset templates are in a grid with class containing "aspect-[4/3]" const presetSection = page.locator('h4:has-text("预设模板")').locator('..') const firstTemplateCard = presetSection.locator('div[class*="aspect-[4/3]"]').first() await firstTemplateCard.waitFor({ state: 'visible', timeout: 3000 }) await firstTemplateCard.click() templateSelected = true console.log('✓ Selected first preset template by clicking first card') } catch (e) { console.log(' Warning: Could not select template by card, trying alternative...') } } if (!templateSelected) { throw new Error('Failed to select preset template') } // Wait for template selection to complete dynamically // The handleTemplateSelect function will: // 1. Show "正在上传模板..." (isUploadingTemplate = true) // 2. Upload template and sync project // 3. Close modal (setIsTemplateModalOpen(false)) // 4. Show success toast "模板更换成功" console.log(' Waiting for template upload to complete...') // Wait for "正在上传模板..." to appear (indicates upload started) const uploadingText = page.locator('text="正在上传模板..."') const uploadStarted = await uploadingText.isVisible({ timeout: 3000 }).catch(() => false) if (uploadStarted) { console.log(' Template upload started, waiting for completion...') } // Wait for modal to close (most reliable indicator that selection is complete) // Modal component returns null when isOpen=false, so the modal DOM disappears // We check for the modal's unique content that only exists when modal is open await expect(async () => { // Check if modal backdrop or modal content is still visible // The modal has a backdrop with class "fixed inset-0 bg-black/50" // and the modal content has title "更换模板" in a specific structure const modalBackdrop = page.locator('.fixed.inset-0.bg-black\\/50').first() const modalContent = page.locator('h2:has-text("更换模板")').first() const isBackdropVisible = await modalBackdrop.isVisible().catch(() => false) const isContentVisible = await modalContent.isVisible().catch(() => false) if (isBackdropVisible || isContentVisible) { throw new Error('Template selection modal still open') } return true }).toPass({ timeout: 30000, // Wait up to 30 seconds for upload and modal close intervals: [1000, 2000, 3000] // Check every 1-3 seconds }) console.log('✓ Template upload completed and modal closed') // Optionally wait for success toast (non-blocking, just for verification) try { await page.waitForSelector('text="模板更换成功"', { timeout: 3000 }) console.log('✓ Success toast appeared') } catch (e) { // Toast might have disappeared quickly, that's okay } console.log('✓ Template selected successfully\n') // ==================================== // Step 12: Click batch generate images button // ==================================== console.log('🎨 Step 12: Clicking batch generate images button...') // Wait for image generation page to load (button text includes page count like "批量生成图片 (3)") const generateImageBtn = page.locator('button').filter({ hasText: '批量生成图片' }) await generateImageBtn.waitFor({ state: 'visible', timeout: 10000 }) if (await generateImageBtn.count() > 0) { await generateImageBtn.first().click() console.log('✓ Clicked batch generate images button\n') // Wait for images to generate (should complete within 5 minutes) console.log('⏳ Step 13: Waiting for images to generate (should complete within 5 minutes)...') // Get expected page count from the button text (e.g., "批量生成图片 (3)") let pageCount = 3 // default try { const buttonText = await generateImageBtn.first().textContent() const match = buttonText?.match(/\((\d+)\)/) if (match) { pageCount = parseInt(match[1], 10) } } catch (e) { // Fallback: try to count page thumbnails or cards const thumbnails = page.locator('[data-page-index], .page-thumbnail, .slide-thumbnail') const thumbnailCount = await thumbnails.count() if (thumbnailCount > 0) { pageCount = thumbnailCount } } console.log(` Expected ${pageCount} pages to generate images`) // Wait strategy: Image generation is NON-BLOCKING (no global loading overlay). // The frontend uses pageGeneratingTasks to track per-page generation status. // StatusBadge shows "生成中" (orange badge with animate-pulse) during generation. // We wait for export button to be enabled (hasAllImages = all pages have generated_image_path). // Use 15 minutes timeout (900000ms) to cover retries on API disconnects. const startTime = Date.now() const maxWaitTime = 900000 // 15 minutes total // Helper: Precise selector for "生成中" StatusBadge (orange background) // StatusBadge structure: 生成中 // We use CSS class selector which is more reliable than text matching const generatingBadgeSelector = 'span.bg-orange-100.text-orange-600' // Helper: Selector for failed status badges (red background) const failedBadgeSelector = 'span.bg-red-100.text-red-600' // Helper: Selector for completed status badges (green background) const _completedBadgeSelector = 'span.bg-green-100.text-green-600' // Helper: Image selector for generated slide images // Generated images are stored at: /files/{project_id}/pages/{page_id}_v{version}.png // Template images are at: /files/{project_id}/template/template.png (excluded) // We match images in /pages/ directory OR with "Slide" in alt text const slideImageSelector = 'img[src*="/pages/"], img[alt*="Slide"]:not([alt="Template"])' // Step 13a: Wait for generation to START, then COMPLETE console.log(' Step 13a: Waiting for image generation task to complete...') // First, wait a bit for the API call to start and status to change await page.waitForTimeout(2000) // Check if generation has started (look for "生成中" badges OR skeleton loaders) let generationStarted = false for (let i = 0; i < 10; i++) { // Try for up to 20 seconds const generatingBadges = page.locator(generatingBadgeSelector) const skeletons = page.locator('.animate-shimmer') // Skeleton uses animate-shimmer const generatingCount = await generatingBadges.count() const skeletonCount = await skeletons.count() if (generatingCount > 0 || skeletonCount > 0) { generationStarted = true console.log(` ✓ Generation started (${generatingCount} generating badges, ${skeletonCount} skeletons)`) break } // Also check if images are already generated (fast path - previous run cached) const images = page.locator(slideImageSelector) const imageCount = await images.count() if (imageCount >= pageCount) { console.log(` ✓ Images already generated (${imageCount}/${pageCount})`) generationStarted = true break } await page.waitForTimeout(2000) } if (!generationStarted) { console.log(' ⚠ Could not detect generation start, continuing anyway...') } // Now wait for generation to complete (no more "生成中" badges) await expect(async () => { // Check for "生成中" StatusBadge const generatingBadges = page.locator(generatingBadgeSelector) const generatingCount = await generatingBadges.count() // Also check for failed status - if all pages failed, we should fail early const failedBadges = page.locator(failedBadgeSelector) const failedCount = await failedBadges.count() const elapsed = Math.floor((Date.now() - startTime) / 1000) // Log progress every 30 seconds if (elapsed % 30 === 0 && elapsed > 0) { console.log(` [${elapsed}s] Still generating... (${generatingCount} in progress, ${failedCount} failed)`) } // If all pages failed, fail early if (failedCount >= pageCount && generatingCount === 0) { throw new Error(`All ${pageCount} pages failed to generate images`) } if (generatingCount > 0) { throw new Error(`Image generation still in progress (${elapsed}s elapsed, ${generatingCount} pages generating)`) } return true }).toPass({ timeout: maxWaitTime, intervals: [3000, 5000, 5000] // Check every 3-5 seconds }) console.log(' ✓ Image generation task completed, waiting for UI to update...') await page.waitForTimeout(3000) // Give UI time to sync state after task completion // Step 13b: Wait for export button to be enabled (all images synced to UI) // This verifies hasAllImages = true (all pages have generated_image_path) console.log(' Step 13b: Waiting for export button to be enabled...') await expect(async () => { // Try to trigger a refresh by clicking refresh button if available (helps sync state) const refreshBtn = page.locator('button:has-text("刷新")').first() if (await refreshBtn.isVisible().catch(() => false)) { await refreshBtn.click().catch(() => {}) // Non-blocking refresh await page.waitForTimeout(1000) // Wait for refresh to complete } const exportBtnCheck = page.locator('button:has-text("导出")') const isEnabled = await exportBtnCheck.isEnabled().catch(() => false) // Use precise selector for slide images (in aspect-video containers) const images = page.locator(slideImageSelector) const imageCount = await images.count() // Also check for failed pages const failedBadges = page.locator(failedBadgeSelector) const failedCount = await failedBadges.count() const elapsed = Math.floor((Date.now() - startTime) / 1000) // Log progress every 10 seconds if (elapsed % 10 === 0 && elapsed > 0) { console.log(` [${elapsed}s] Export enabled: ${isEnabled}, Images: ${imageCount}/${pageCount}, Failed: ${failedCount}`) } // If some pages failed but we have enough images, that's also acceptable for partial export // However, for full test we want all images if (failedCount > 0 && imageCount + failedCount >= pageCount) { console.log(` ⚠ ${failedCount} pages failed, ${imageCount} succeeded`) } if (!isEnabled) { throw new Error(`Export button not yet enabled (${elapsed}s elapsed, ${imageCount}/${pageCount} images, ${failedCount} failed)`) } if (imageCount < pageCount) { throw new Error(`Only ${imageCount}/${pageCount} images found (${elapsed}s elapsed, ${failedCount} failed)`) } console.log(` [${elapsed}s] ✓ Export button enabled and ${imageCount} images found`) return true }).toPass({ timeout: 120000, // 2 minutes for state sync (after task completion) intervals: [2000, 3000, 5000] // Check every 2-5 seconds }) // Final verification: export button should be enabled const exportBtnCheck = page.locator('button:has-text("导出")') await expect(exportBtnCheck).toBeEnabled({ timeout: 5000 }) console.log('✓ All images generated\n') await page.screenshot({ path: 'test-results/e2e-images-generated.png' }) } else { throw new Error('Batch generate images button not found') } // ==================================== // Step 14: Export PPT // ==================================== console.log('📦 Step 14: Exporting PPT file...') // Setup download handler const downloadPromise = page.waitForEvent('download', { timeout: 60000 }) // Step 1: Wait for export button to be enabled (it's disabled until all images are generated) const exportBtn = page.locator('button:has-text("导出")') await exportBtn.waitFor({ state: 'visible', timeout: 10000 }) await expect(exportBtn).toBeEnabled({ timeout: 5000 }) await exportBtn.first().click() console.log('✓ Clicked export button, opening menu...') // Wait for dropdown menu to appear await page.waitForTimeout(500) // Step 2: Click "导出为 PPTX" in the dropdown menu const exportPptxBtn = page.locator('button:has-text("导出为 PPTX")') await exportPptxBtn.waitFor({ state: 'visible', timeout: 5000 }) await exportPptxBtn.click() console.log('✓ Clicked "导出为 PPTX" button\n') // Wait for download to complete console.log('⏳ Waiting for PPT file download...') const download = await downloadPromise // Save file const downloadPath = path.join('test-results', 'e2e-test-output.pptx') await download.saveAs(downloadPath) // Verify file exists and is not empty const fileExists = fs.existsSync(downloadPath) expect(fileExists).toBeTruthy() const fileStats = fs.statSync(downloadPath) expect(fileStats.size).toBeGreaterThan(1000) // At least 1KB console.log(`✓ PPT file downloaded successfully!`) console.log(` Path: ${downloadPath}`) console.log(` Size: ${(fileStats.size / 1024).toFixed(2)} KB\n`) // Validate PPTX file content using python-pptx console.log('🔍 Validating PPTX file content...') const { execSync } = await import('child_process') const { fileURLToPath } = await import('url') try { // Get current directory (ES module compatible) const currentDir = path.dirname(fileURLToPath(import.meta.url)) const validateScript = path.join(currentDir, 'validate_pptx.py') const result = execSync( `python3 "${validateScript}" "${downloadPath}" 3 "人工智能" "AI"`, { encoding: 'utf-8', stdio: 'pipe' } ) console.log(`✓ ${result.trim()}\n`) } catch (error: any) { console.warn(`⚠️ PPTX validation warning: ${error.stdout || error.message}`) console.log(' (Continuing test, but PPTX content validation had issues)\n') } // ==================================== // Final verification // ==================================== console.log('========================================') console.log('✅ Full E2E test completed!') console.log('========================================\n') // Final screenshot await page.screenshot({ path: 'test-results/e2e-final-state.png', fullPage: true }) }) }) test.describe('UI E2E - Simplified (skip long waits)', () => { test.setTimeout(5 * 60 * 1000) // 5 minutes test('User flow verification: Only verify UI interactions, do not wait for AI generation', async ({ page }) => { console.log('\n🏃 Quick E2E test (verify UI flow, do not wait for generation)\n') // Visit homepage (prevent HelpModal from appearing) await page.addInitScript(() => { localStorage.setItem('hasSeenHelpModal', 'true') }) await page.goto('http://localhost:3000') console.log('✓ Homepage loaded') // Ensure "一句话生成" tab is selected (it's selected by default) await page.click('button:has-text("一句话生成")').catch(() => { // If click fails, the tab might already be selected, which is fine }) console.log('✓ Entered create page') // Wait for textarea to be visible (MarkdownTextarea uses contentEditable div with role="textbox") await page.waitForSelector('[role="textbox"], textarea', { timeout: 10000 }) // Enter content const ideaInput = page.locator('[role="textbox"], textarea').first() await ideaInput.click() await ideaInput.pressSequentially('E2E test project') console.log('✓ Entered content') // Click generate await page.click('button:has-text("下一步")') console.log('✓ Submitted generation request') // Verify loading state appears or navigation happens (indicates request was sent) // For quick test, we can accept either loading state OR successful navigation try { // Option 1: Wait for navigation to outline page (most reliable) await page.waitForURL(/\/project\/.*\/outline/, { timeout: 10000 }) console.log('✓ Navigation to outline page detected') } catch { // Option 2: Check for loading indicators try { await page.waitForSelector( '.animate-spin, button[disabled], div:has-text("加载"), div:has-text("生成中")', { timeout: 5000 } ) console.log('✓ Loading state detected') } catch { // Option 3: Just wait a bit and assume request was sent // This is acceptable for a quick test that doesn't wait for completion await page.waitForTimeout(1000) console.log('✓ Request submitted (assuming success)') } } console.log('\n✅ UI flow verification passed!\n') }) }) ================================================ FILE: frontend/e2e/upload-folder-path.spec.ts ================================================ /** * E2E test for UPLOAD_FOLDER path resolution fix (#287). * * Bug: ai_service.py used os.environ.get('UPLOAD_FOLDER', '') which always * returned '' because UPLOAD_FOLDER lives in Flask app.config, not env vars. * Fix: use get_config().UPLOAD_FOLDER instead. * * Test strategy: * 1. Upload a material image to a project * 2. Set page description referencing the material via /files/ path * 3. Trigger image generation (will fail at AI provider level — that's fine) * 4. Verify backend logs show the file was FOUND, not "Local file not found" */ import { test, expect } from '@playwright/test' import * as fs from 'fs' import * as path from 'path' const FRONTEND_DIR = process.cwd().endsWith('frontend') ? process.cwd() : path.join(process.cwd(), 'frontend') const PROJECT_ROOT = path.resolve(FRONTEND_DIR, '..') const FIXTURES = path.join(FRONTEND_DIR, 'e2e', 'fixtures') const BACKEND_LOG = '/tmp/fix-upload-backend.log' test.describe('UPLOAD_FOLDER path resolution (#287)', () => { test('material image referenced in description is resolved correctly during image generation', async ({ request, }) => { // 1. Create a project const createResp = await request.post('/api/projects', { data: { creation_type: 'idea', idea_prompt: 'upload folder path test', template_style: 'default', }, }) if (!createResp.ok()) { test.skip(true, 'Backend unavailable') return } const projectId = (await createResp.json()).data?.project_id expect(projectId).toBeTruthy() // 2. Create a page const pageResp = await request.post(`/api/projects/${projectId}/pages`, { data: { order_index: 0, outline_content: { title: 'Test Slide' } }, }) expect(pageResp.ok()).toBe(true) const pageId = (await pageResp.json()).data?.page_id expect(pageId).toBeTruthy() // 3. Upload a material image const fixturePath = path.join(FIXTURES, 'slide_1.jpg') if (!fs.existsSync(fixturePath)) { test.skip(true, 'Fixture image not found') return } const fileBuffer = fs.readFileSync(fixturePath) const uploadResp = await request.post( `/api/projects/${projectId}/materials/upload`, { multipart: { file: { name: 'test-material.jpg', mimeType: 'image/jpeg', buffer: fileBuffer } } }, ) expect(uploadResp.ok()).toBe(true) const materialData = (await uploadResp.json()).data const materialPath: string = materialData?.relative_path || materialData?.file_path || '' expect(materialPath).toBeTruthy() // Build the /files/ URL that would appear in a description const filesUrl = materialPath.startsWith('/files/') ? materialPath : `/files/${materialPath}` // 4. Verify the material file is accessible via /files/ endpoint const fileResp = await request.get(filesUrl) expect(fileResp.ok()).toBe(true) // 5. Set page description with material reference const descResp = await request.put( `/api/projects/${projectId}/pages/${pageId}/description`, { data: { description_content: { title: 'Test Slide', text: `Use this reference image: ![material](${filesUrl})`, text_content: [`Use this reference image: ![material](${filesUrl})`], layout_suggestion: 'full-image', }, }, }, ) expect(descResp.ok()).toBe(true) // 6. Mark the log position before triggering generation const logBefore = fs.existsSync(BACKEND_LOG) ? fs.readFileSync(BACKEND_LOG, 'utf8').length : 0 // 7. Trigger image generation (will fail at AI provider level — expected) const genResp = await request.post( `/api/projects/${projectId}/generate/images`, { data: { max_workers: 1 } }, ) expect(genResp.ok()).toBe(true) const taskId = (await genResp.json()).data?.task_id expect(taskId).toBeTruthy() // 8. Poll task until done (expect FAILED due to no AI provider) let taskStatus = 'PROCESSING' for (let i = 0; i < 30; i++) { await new Promise((r) => setTimeout(r, 1000)) const taskResp = await request.get( `/api/projects/${projectId}/tasks/${taskId}`, ) if (!taskResp.ok()) continue const task = (await taskResp.json()).data taskStatus = task?.status if (taskStatus === 'COMPLETED' || taskStatus === 'FAILED') break } // 9. Read new log lines and verify path resolution const logAfter = fs.existsSync(BACKEND_LOG) ? fs.readFileSync(BACKEND_LOG, 'utf8') : '' const newLogs = logAfter.slice(logBefore) // The fix ensures the material file IS found — no "Local file not found" for our material const materialFilename = path.basename(materialPath) const fileNotFoundForMaterial = newLogs .split('\n') .filter( (line) => line.includes('Local file not found') && line.includes(materialFilename), ) expect( fileNotFoundForMaterial, `Material file should be found by ai_service, but got "Local file not found" in logs`, ).toHaveLength(0) // Positive check: if the material filename appears in logs, it should be "Loaded", not "not found" const materialLoadedLine = newLogs .split('\n') .some( (line) => line.includes('Loaded image from local path') && line.includes(materialFilename), ) if (newLogs.includes(materialFilename)) { expect( materialLoadedLine || !fileNotFoundForMaterial.length, `Material ${materialFilename} should be loaded, not missing`, ).toBe(true) } }) }) ================================================ FILE: frontend/e2e/ux-polish-i18n.spec.ts ================================================ import { test, expect, Page } from '@playwright/test'; import { seedProjectWithImages } from './helpers/seed-project'; const BASE = process.env.BASE_URL || 'http://localhost:3000'; /** * Mock test: Verify disabled button tooltips and i18n strings * via page.route() without hitting a real backend. */ test.describe('UX Polish – disabled button tooltips (mock)', () => { test('export button shows tooltip when images are missing', async ({ page }) => { // Set English locale to verify i18n tooltip content await page.addInitScript(() => { localStorage.setItem('banana-slides-language', 'en'); }); // Mock project with pages that have NO generated images const mockProject = { data: { id: 'proj-1', project_id: 'proj-1', creation_type: 'idea', idea_prompt: 'Test', pages: [ { id: 'p1', order_index: 0, outline_content: { title: 'Page 1', points: [] }, description_content: { text: 'desc' } }, { id: 'p2', order_index: 1, outline_content: { title: 'Page 2', points: [] }, description_content: { text: 'desc' } }, ], }, }; await page.route('**/api/projects/proj-1', (route) => { route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(mockProject) }); }); await page.goto(`${BASE}/project/proj-1/preview`); await page.waitForSelector('text=Page 1', { timeout: 5000 }).catch(() => {}); // The export button should be disabled and have a title attribute explaining why const exportBtn = page.locator('button:has-text("PPTX")').first(); if (await exportBtn.count() > 0) { const title = await exportBtn.getAttribute('title'); // Should have the English tooltip explaining why export is disabled expect(title).toContain('no images yet'); } }); test('next button shows tooltip when descriptions are missing in detail editor', async ({ page }) => { // Set English locale to verify i18n tooltip content await page.addInitScript(() => { localStorage.setItem('banana-slides-language', 'en'); }); const mockProject = { data: { id: 'proj-2', project_id: 'proj-2', creation_type: 'idea', idea_prompt: 'Test', pages: [ { id: 'p1', order_index: 0, outline_content: { title: 'Page 1', points: [] } }, { id: 'p2', order_index: 1, outline_content: { title: 'Page 2', points: [] }, description_content: { text: 'has desc' } }, ], }, }; await page.route('**/api/projects/proj-2', (route) => { route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(mockProject) }); }); await page.goto(`${BASE}/project/proj-2/detail`); await page.waitForSelector('text=Page 1', { timeout: 5000 }).catch(() => {}); // Target the "Generate Images" / next-step button specifically (not the AI refine submit) const nextBtn = page.locator('button[title*="descriptions"]').first(); if (await nextBtn.count() > 0) { const title = await nextBtn.getAttribute('title'); expect(title).toContain('missing descriptions'); } }); }); test.describe('UX Polish – i18n strings (mock)', () => { test('project status text uses i18n (not hardcoded Chinese)', async ({ page }) => { // Set English locale await page.addInitScript(() => { localStorage.setItem('banana-slides-language', 'en'); }); const mockProjects = { data: { projects: [ { id: 'proj-en-1', project_id: 'proj-en-1', creation_type: 'idea', idea_prompt: 'English test', pages: [ { id: 'p1', page_id: 'p1', order_index: 0, outline_content: { title: 'Slide 1', points: [] }, description_content: { text: 'desc' }, generated_image_url: '/img.png' }, ], created_at: '2026-01-01T00:00:00Z', updated_at: '2026-01-01T00:00:00Z', }, ], total: 1, }, }; await page.route('**/api/projects**', (route) => { if (route.request().method() === 'GET') { route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify(mockProjects) }); } else { route.continue(); } }); await page.goto(`${BASE}/history`); await expect(page.locator('text=Slide 1')).toBeVisible({ timeout: 5000 }); // The status badge should show English text, not Chinese const pageContent = await page.textContent('body'); // In English mode, status should be "Completed" not "已完成" expect(pageContent).toContain('Completed'); expect(pageContent).not.toContain('已完成'); }); test('settings page error messages use i18n', async ({ page }) => { // Set English locale await page.addInitScript(() => { localStorage.setItem('banana-slides-language', 'en'); }); // Mock settings endpoint to fail await page.route('**/api/settings', (route) => { if (route.request().method() === 'GET') { route.fulfill({ status: 500, contentType: 'application/json', body: JSON.stringify({ error: { message: 'Server error' } }) }); } else { route.continue(); } }); await page.goto(`${BASE}/settings`); // Wait for the settings page to render (heading appears even on error) await expect(page.locator('h1, h2').first()).toBeVisible({ timeout: 5000 }); // The error toast should show English text const toastText = await page.textContent('body'); // Should NOT contain hardcoded Chinese error like "加载设置失败" expect(toastText).not.toContain('加载设置失败'); }); }); /** * Integration test: Verify i18n works with real backend */ test.describe('UX Polish – integration', () => { test('settings page loads without hardcoded Chinese in English mode', async ({ page }) => { await page.addInitScript(() => { localStorage.setItem('banana-slides-language', 'en'); }); await page.goto(`${BASE}/settings`); await expect(page.locator('h1, h2').first()).toBeVisible({ timeout: 5000 }); // Check that the page title is in English const heading = page.locator('h1, h2').first(); if (await heading.count() > 0) { const text = await heading.textContent(); expect(text).toContain('Settings'); } // Check that action buttons are in English const saveBtn = page.locator('button:has-text("Save")').first(); if (await saveBtn.count() > 0) { expect(await saveBtn.textContent()).toContain('Save'); } }); }); ================================================ FILE: frontend/e2e/visual-regression.spec.ts ================================================ /** * Visual Regression Tests * * Tests critical UI components for visual regressions using screenshot comparison. * * Note: First run will create baseline screenshots. Subsequent runs will compare against baselines. * * To update baselines: npx playwright test visual-regression.spec.ts --update-snapshots */ import { test, expect } from '@playwright/test' test.describe('Visual Regression Tests', () => { test.beforeEach(async ({ page }) => { // Navigate to the app await page.goto('http://localhost:3000') }) test('Homepage visual regression', async ({ page }) => { // Wait for page to fully load await page.waitForLoadState('networkidle') // Take screenshot of homepage await expect(page).toHaveScreenshot('homepage.png', { fullPage: true, maxDiffPixels: 100, // Allow small differences }) }) test('SlidePreview component visual regression', async ({ page }) => { // This test requires a project to exist // For now, we'll test the component in isolation if possible // Navigate to a project preview page (if available) // Note: This may need to be adjusted based on your routing try { // Try to navigate to preview page (you may need to create a test project first) await page.goto('http://localhost:3000/project/test-project-id/preview') await page.waitForLoadState('networkidle') // Take screenshot of SlidePreview component const slidePreview = page.locator('.slide-preview, [data-testid="slide-preview"]').first() if (await slidePreview.count() > 0) { await expect(slidePreview).toHaveScreenshot('slide-preview.png', { maxDiffPixels: 200, }) } else { // If component not found, take full page screenshot await expect(page).toHaveScreenshot('slide-preview-page.png', { fullPage: true, maxDiffPixels: 200, }) } } catch (error) { // If preview page doesn't exist, skip this test test.skip() } }) test('Outline Editor visual regression', async ({ page }) => { // Navigate to outline editor try { await page.goto('http://localhost:3000/project/test-project-id/outline') await page.waitForLoadState('networkidle') // Take screenshot of outline editor await expect(page).toHaveScreenshot('outline-editor.png', { fullPage: true, maxDiffPixels: 200, }) } catch (error) { test.skip() } }) test('Description Editor visual regression', async ({ page }) => { // Navigate to description editor try { await page.goto('http://localhost:3000/project/test-project-id/detail') await page.waitForLoadState('networkidle') // Take screenshot of description editor await expect(page).toHaveScreenshot('description-editor.png', { fullPage: true, maxDiffPixels: 200, }) } catch (error) { test.skip() } }) test('Loading states visual regression', async ({ page }) => { // Test loading spinner/state await page.goto('http://localhost:3000') // Trigger a loading state (e.g., click create button) // Ensure "一句话生成" tab is selected (it's selected by default) const createButton = page.locator('button:has-text("一句话生成")') if (await createButton.count() > 0) { await createButton.click().catch(() => { // If click fails, the tab might already be selected, which is fine }) // Wait for loading state to appear const loadingIndicator = page.locator('.loading, .spinner, [data-loading="true"]') if (await loadingIndicator.count() > 0) { await expect(loadingIndicator.first()).toHaveScreenshot('loading-state.png', { maxDiffPixels: 50, }) } } }) }) ================================================ FILE: frontend/index.html ================================================ 蕉幻 | AI 原生 PPT 生成器
================================================ FILE: frontend/nginx.conf ================================================ server { listen 80; server_name localhost; root /usr/share/nginx/html; index index.html; # 允许上传大文件(解决413错误) client_max_body_size 50M; # Gzip 压缩 gzip on; gzip_vary on; gzip_min_length 1024; gzip_types text/plain text/css text/xml text/javascript application/x-javascript application/xml+rss application/json; # 前端路由支持(SPA) location / { try_files $uri $uri/ /index.html; } # API 代理到后端 - 使用 ^~ 确保优先匹配 location ^~ /api { proxy_pass http://backend:5000; proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection 'upgrade'; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; proxy_cache_bypass $http_upgrade; proxy_read_timeout 300s; proxy_connect_timeout 300s; } # 文件服务代理 - 使用 ^~ 确保优先匹配,阻止后续正则匹配 location ^~ /files { proxy_pass http://backend:5000; proxy_http_version 1.1; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; proxy_read_timeout 300s; proxy_connect_timeout 300s; # 不缓存动态文件 add_header Cache-Control "no-cache, no-store, must-revalidate"; } # 健康检查端点 location /health { proxy_pass http://backend:5000/health; proxy_http_version 1.1; proxy_set_header Host $host; } # 静态资源缓存 - 只匹配前端静态资源,不匹配 /files 和 /api location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$ { expires 1y; add_header Cache-Control "public, immutable"; } } ================================================ FILE: frontend/package.json ================================================ { "name": "banana-slides-frontend", "private": true, "version": "0.1.0", "type": "module", "scripts": { "dev": "vite", "build": "vite build", "build:check": "tsc && vite build", "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 20", "lint:strict": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0", "preview": "vite preview", "test": "vitest", "test:run": "vitest run", "test:coverage": "vitest run --coverage", "test:ui": "vitest --ui", "test:e2e": "playwright test", "test:e2e:ui": "playwright test --ui", "test:e2e:headed": "playwright test --headed" }, "dependencies": { "@dnd-kit/core": "^6.1.0", "@dnd-kit/sortable": "^8.0.0", "@dnd-kit/utilities": "^3.2.2", "axios": "^1.6.2", "clsx": "^2.0.0", "i18next": "^25.8.0", "i18next-browser-languagedetector": "^8.2.0", "katex": "^0.16.28", "lucide-react": "^0.294.0", "react": "^18.2.0", "react-dom": "^18.2.0", "react-i18next": "^16.5.4", "react-markdown": "^9.0.1", "react-router-dom": "^6.20.0", "rehype-katex": "^7.0.1", "rehype-raw": "^7.0.0", "rehype-sanitize": "^6.0.0", "remark-breaks": "^4.0.0", "remark-gfm": "^4.0.0", "remark-math": "^6.0.0", "tailwind-merge": "^2.1.0", "zustand": "^4.4.7" }, "devDependencies": { "@playwright/test": "^1.40.1", "@testing-library/jest-dom": "^6.1.5", "@testing-library/react": "^14.1.2", "@testing-library/user-event": "^14.5.1", "@types/node": "^25.0.1", "@types/react": "^18.2.43", "@types/react-dom": "^18.2.17", "@typescript-eslint/eslint-plugin": "^6.14.0", "@typescript-eslint/parser": "^6.14.0", "@vitejs/plugin-react": "^4.2.1", "@vitest/coverage-v8": "^1.1.0", "@vitest/ui": "^1.1.0", "autoprefixer": "^10.4.16", "eslint": "^8.55.0", "eslint-plugin-react-hooks": "^4.6.0", "eslint-plugin-react-refresh": "^0.4.5", "jsdom": "^23.0.1", "postcss": "^8.4.32", "tailwindcss": "^3.3.6", "typescript": "^5.2.2", "vite": "^5.0.8", "vitest": "^1.1.0" } } ================================================ FILE: frontend/playwright.config.ts ================================================ import { defineConfig, devices } from '@playwright/test' /** * Playwright E2E测试配置 - 前端 UI 测试 * * @see https://playwright.dev/docs/test-configuration */ export default defineConfig({ // 测试目录 testDir: './e2e', // 测试文件匹配模式 testMatch: '**/*.spec.ts', // 并行运行测试 fullyParallel: true, // CI环境下失败立即停止 forbidOnly: !!process.env.CI, // 失败不重试 retries: 0, // 并行worker数量 workers: process.env.CI ? 1 : undefined, // 测试报告 reporter: [ ['html', { outputFolder: 'playwright-report' }], ['list'], ...(process.env.CI ? [['github'] as const] : []), ], // 全局设置 use: { // 基础URL baseURL: process.env.BASE_URL || 'http://localhost:3000', // 截图设置 screenshot: 'only-on-failure', // 视频设置 video: 'retain-on-failure', // 追踪设置 trace: 'retain-on-failure', // 浏览器语言设置(E2E测试使用中文,匹配选择器) locale: 'zh-CN', // 超时设置 actionTimeout: 15000, navigationTimeout: 30000, }, // 全局超时 timeout: 60000, // 预期超时 expect: { timeout: 10000, }, // 项目配置(多浏览器测试) projects: [ { name: 'chromium', use: { ...devices['Desktop Chrome'] }, }, ], // 本地开发时启动服务 webServer: process.env.CI ? undefined : { command: 'cd .. && docker compose up -d && sleep 10', url: 'http://localhost:3000', reuseExistingServer: !process.env.CI, timeout: 120000, }, }) ================================================ FILE: frontend/postcss.config.js ================================================ export default { plugins: { tailwindcss: {}, autoprefixer: {}, }, } ================================================ FILE: frontend/src/App.tsx ================================================ import { useEffect } from 'react'; import { BrowserRouter, Routes, Route, Navigate } from 'react-router-dom'; import { Home } from './pages/Home'; import { Landing } from './pages/Landing'; import { History } from './pages/History'; import { OutlineEditor } from './pages/OutlineEditor'; import { DetailEditor } from './pages/DetailEditor'; import { SlidePreview } from './pages/SlidePreview'; import { SettingsPage } from './pages/Settings'; import { useProjectStore } from './store/useProjectStore'; import { useToast, AccessCodeGuard } from './components/shared'; function App() { const { currentProject, syncProject, error, setError } = useProjectStore(); const { show, ToastContainer } = useToast(); // 恢复项目状态 useEffect(() => { const savedProjectId = localStorage.getItem('currentProjectId'); if (savedProjectId && !currentProject) { syncProject(); } }, [currentProject, syncProject]); // 显示全局错误 useEffect(() => { if (error) { show({ message: error, type: 'error' }); setError(null); } }, [error, setError, show]); return ( } /> } /> } /> } /> } /> } /> } /> } /> ); } export default App; ================================================ FILE: frontend/src/api/client.ts ================================================ import axios from 'axios'; // 开发环境:通过 Vite proxy 转发 // 生产环境:通过 nginx proxy 转发 const API_BASE_URL = ''; // 创建 axios 实例 export const apiClient = axios.create({ baseURL: API_BASE_URL, timeout: 300000, // 5分钟超时(AI生成可能很慢) }); // 请求拦截器 apiClient.interceptors.request.use( (config) => { // Attach access code header for backend enforcement const accessCode = localStorage.getItem('banana-access-code'); if (accessCode && config.headers) { config.headers['X-Access-Code'] = accessCode; } // 如果请求体是 FormData,删除 Content-Type 让浏览器自动设置 // 浏览器会自动添加正确的 Content-Type 和 boundary if (config.data instanceof FormData) { // 不设置 Content-Type,让浏览器自动处理 if (config.headers) { delete config.headers['Content-Type']; } } else if (config.headers && !config.headers['Content-Type']) { // 对于非 FormData 请求,默认设置为 JSON config.headers['Content-Type'] = 'application/json'; } return config; }, (error) => { return Promise.reject(error); } ); // 响应拦截器 apiClient.interceptors.response.use( (response) => { return response; }, (error) => { // 统一错误处理 if (error.response) { // 服务器返回错误状态码 console.error('API Error:', error.response.data); } else if (error.request) { // 请求已发送但没有收到响应 console.error('Network Error:', error.request); } else { // 其他错误 console.error('Error:', error.message); } return Promise.reject(error); } ); // 图片URL处理工具 // 使用相对路径,通过代理转发到后端 export const getImageUrl = (path?: string, timestamp?: string | number): string => { if (!path) return ''; // 如果已经是完整URL,直接返回 if (path.startsWith('http://') || path.startsWith('https://')) { return path; } // 使用相对路径(确保以 / 开头) let url = path.startsWith('/') ? path : '/' + path; // 添加时间戳参数避免浏览器缓存(仅在提供时间戳时添加) if (timestamp) { const ts = typeof timestamp === 'string' ? new Date(timestamp).getTime() : timestamp; url += `?v=${ts}`; } return url; }; export default apiClient; ================================================ FILE: frontend/src/api/endpoints.ts ================================================ import { apiClient } from './client'; import type { Project, Task, ApiResponse, CreateProjectRequest, Page } from '@/types'; import type { Settings } from '../types/index'; // ===== 访问口令 API ===== export const checkAccessCode = async (): Promise> => { const response = await apiClient.get>('/api/access-code/check'); return response.data; }; export const verifyAccessCode = async (code: string): Promise> => { const response = await apiClient.post>('/api/access-code/verify', { code }); return response.data; }; // ===== 项目相关 API ===== /** * 创建项目 */ export const createProject = async (data: CreateProjectRequest): Promise> => { // 根据输入类型确定 creation_type let creation_type = 'idea'; if (data.description_text) { creation_type = 'descriptions'; } else if (data.outline_text) { creation_type = 'outline'; } const response = await apiClient.post>('/api/projects', { creation_type, idea_prompt: data.idea_prompt, outline_text: data.outline_text, description_text: data.description_text, template_style: data.template_style, image_aspect_ratio: data.image_aspect_ratio, }); return response.data; }; /** * 上传模板图片 */ export const uploadTemplate = async ( projectId: string, templateImage: File ): Promise> => { const formData = new FormData(); formData.append('template_image', templateImage); const response = await apiClient.post>( `/api/projects/${projectId}/template`, formData ); return response.data; }; /** * 获取项目列表(历史项目) */ export const listProjects = async (limit?: number, offset?: number): Promise> => { const params = new URLSearchParams(); if (limit !== undefined) params.append('limit', limit.toString()); if (offset !== undefined) params.append('offset', offset.toString()); const queryString = params.toString(); const url = `/api/projects${queryString ? `?${queryString}` : ''}`; const response = await apiClient.get>(url); return response.data; }; /** * 获取项目详情 */ export const getProject = async (projectId: string): Promise> => { const response = await apiClient.get>(`/api/projects/${projectId}`); return response.data; }; /** * 删除项目 */ export const deleteProject = async (projectId: string): Promise => { const response = await apiClient.delete(`/api/projects/${projectId}`); return response.data; }; /** * 更新项目 */ export const updateProject = async ( projectId: string, data: Partial ): Promise> => { const response = await apiClient.put>(`/api/projects/${projectId}`, data); return response.data; }; /** * 更新页面顺序 */ export const updatePagesOrder = async ( projectId: string, pageIds: string[] ): Promise> => { const response = await apiClient.put>( `/api/projects/${projectId}`, { pages_order: pageIds } ); return response.data; }; // ===== 大纲生成 ===== /** * 生成大纲 * @param projectId 项目ID * @param language 输出语言(可选,默认从 sessionStorage 获取) */ export const generateOutline = async (projectId: string, language?: OutputLanguage): Promise => { const lang = language || await getStoredOutputLanguage(); const response = await apiClient.post( `/api/projects/${projectId}/generate/outline`, { language: lang } ); return response.data; }; /** * 流式生成大纲(SSE) * 返回 ReadableStream,每个 page 事件包含一个页面对象 */ export interface OutlineStreamPage { index: number; title: string; points: string[]; part?: string; } export interface OutlineStreamCallbacks { onPage: (page: OutlineStreamPage) => void; onDone: (data: { total: number; pages: Page[] }) => void; onError: (message: string) => void; } export const generateOutlineStream = async ( projectId: string, callbacks: OutlineStreamCallbacks, language?: OutputLanguage, lockPageCount?: boolean, ): Promise => { const lang = language || await getStoredOutputLanguage(); const accessCode = localStorage.getItem('banana-access-code'); const response = await fetch(`/api/projects/${projectId}/generate/outline/stream`, { method: 'POST', headers: { 'Content-Type': 'application/json', ...(accessCode ? { 'X-Access-Code': accessCode } : {}), }, body: JSON.stringify({ language: lang, lock_page_count: lockPageCount }), }); if (!response.ok || !response.body) { callbacks.onError(`HTTP ${response.status}`); return; } const reader = response.body.getReader(); const decoder = new TextDecoder(); let buffer = ''; let readResult = await reader.read(); while (!readResult.done) { const { value } = readResult; buffer += decoder.decode(value, { stream: true }); // Parse SSE events from buffer const parts = buffer.split('\n\n'); buffer = parts.pop() || ''; for (const part of parts) { const lines = part.split('\n'); let eventType = ''; let eventData = ''; for (const line of lines) { if (line.startsWith('event: ')) eventType = line.slice(7); else if (line.startsWith('data: ')) eventData = line.slice(6); } if (!eventType || !eventData) continue; try { const parsed = JSON.parse(eventData); if (eventType === 'page') callbacks.onPage(parsed); else if (eventType === 'done') callbacks.onDone(parsed); else if (eventType === 'error') callbacks.onError(parsed.message); } catch { // Skip malformed events } } readResult = await reader.read(); } }; // ===== 描述生成 ===== /** * 从描述文本生成大纲和页面描述(一次性完成) * @param projectId 项目ID * @param descriptionText 描述文本(可选) * @param language 输出语言(可选,默认从 sessionStorage 获取) */ export const generateFromDescription = async (projectId: string, descriptionText?: string, language?: OutputLanguage): Promise => { const lang = language || await getStoredOutputLanguage(); const response = await apiClient.post( `/api/projects/${projectId}/generate/from-description`, { ...(descriptionText ? { description_text: descriptionText } : {}), language: lang } ); return response.data; }; /** * 批量生成描述(并行模式) * @param projectId 项目ID * @param language 输出语言(可选,默认从 sessionStorage 获取) */ export const generateDescriptions = async (projectId: string, language?: OutputLanguage, detailLevel?: string): Promise => { const lang = language || await getStoredOutputLanguage(); const response = await apiClient.post( `/api/projects/${projectId}/generate/descriptions`, { language: lang, detail_level: detailLevel || 'default' } ); return response.data; }; /** * 流式生成描述(SSE) */ export interface DescriptionStreamEvent { page_index: number; page_id: string; text: string; extra_fields?: Record; } export interface DescriptionStreamCallbacks { onDescription: (data: DescriptionStreamEvent) => void; onDone: (data: { total: number; pages: Page[] }) => void; onError: (message: string) => void; } export const generateDescriptionsStream = async ( projectId: string, callbacks: DescriptionStreamCallbacks, language?: OutputLanguage, detailLevel?: string, ): Promise => { const lang = language || await getStoredOutputLanguage(); const accessCode = localStorage.getItem('banana-access-code'); const response = await fetch(`/api/projects/${projectId}/generate/descriptions/stream`, { method: 'POST', headers: { 'Content-Type': 'application/json', ...(accessCode ? { 'X-Access-Code': accessCode } : {}), }, body: JSON.stringify({ language: lang, detail_level: detailLevel || 'default' }), }); if (!response.ok || !response.body) { callbacks.onError(`HTTP ${response.status}`); return; } const reader = response.body.getReader(); const decoder = new TextDecoder(); let buffer = ''; let readResult = await reader.read(); while (!readResult.done) { const { value } = readResult; buffer += decoder.decode(value, { stream: true }); const parts = buffer.split('\n\n'); buffer = parts.pop() || ''; for (const part of parts) { const lines = part.split('\n'); let eventType = ''; let eventData = ''; for (const line of lines) { if (line.startsWith('event: ')) eventType = line.slice(7); else if (line.startsWith('data: ')) eventData = line.slice(6); } if (!eventType || !eventData) continue; try { const parsed = JSON.parse(eventData); if (eventType === 'description') callbacks.onDescription(parsed); else if (eventType === 'done') callbacks.onDone(parsed); else if (eventType === 'error') callbacks.onError(parsed.message); } catch { // Skip malformed events } } readResult = await reader.read(); } }; /** * 生成单页描述 */ export const generatePageDescription = async ( projectId: string, pageId: string, forceRegenerate: boolean = false, language?: OutputLanguage, detailLevel?: string ): Promise => { const lang = language || await getStoredOutputLanguage(); const response = await apiClient.post( `/api/projects/${projectId}/pages/${pageId}/generate/description`, { force_regenerate: forceRegenerate, language: lang, detail_level: detailLevel || 'default' } ); return response.data; }; /** * 重新生成 PPT 翻新项目的单页(重新解析原 PDF 并提取内容) */ export const regenerateRenovationPage = async ( projectId: string, pageId: string, keepLayout: boolean = false, language?: OutputLanguage ): Promise => { const lang = language || await getStoredOutputLanguage(); const response = await apiClient.post( `/api/projects/${projectId}/pages/${pageId}/regenerate-renovation`, { keep_layout: keepLayout, language: lang } ); return response.data; }; /** * 根据用户要求修改大纲 * @param projectId 项目ID * @param userRequirement 用户要求 * @param previousRequirements 历史要求(可选) * @param language 输出语言(可选,默认从 sessionStorage 获取) */ export const refineOutline = async ( projectId: string, userRequirement: string, previousRequirements?: string[], language?: OutputLanguage ): Promise> => { const lang = language || await getStoredOutputLanguage(); const response = await apiClient.post>( `/api/projects/${projectId}/refine/outline`, { user_requirement: userRequirement, previous_requirements: previousRequirements || [], language: lang } ); return response.data; }; /** * 根据用户要求修改页面描述 * @param projectId 项目ID * @param userRequirement 用户要求 * @param previousRequirements 历史要求(可选) * @param language 输出语言(可选,默认从 sessionStorage 获取) */ export const refineDescriptions = async ( projectId: string, userRequirement: string, previousRequirements?: string[], language?: OutputLanguage ): Promise> => { const lang = language || await getStoredOutputLanguage(); const response = await apiClient.post>( `/api/projects/${projectId}/refine/descriptions`, { user_requirement: userRequirement, previous_requirements: previousRequirements || [], language: lang } ); return response.data; }; // ===== 图片生成 ===== /** * 批量生成图片 * @param projectId 项目ID * @param language 输出语言(可选,默认从 sessionStorage 获取) * @param pageIds 可选的页面ID列表,如果不提供则生成所有页面 */ export const generateImages = async (projectId: string, language?: OutputLanguage, pageIds?: string[]): Promise => { const lang = language || await getStoredOutputLanguage(); const response = await apiClient.post( `/api/projects/${projectId}/generate/images`, { language: lang, page_ids: pageIds } ); return response.data; }; /** * 生成单页图片 */ export const generatePageImage = async ( projectId: string, pageId: string, forceRegenerate: boolean = false, language?: OutputLanguage ): Promise => { const lang = language || await getStoredOutputLanguage(); const response = await apiClient.post( `/api/projects/${projectId}/pages/${pageId}/generate/image`, { force_regenerate: forceRegenerate, language: lang } ); return response.data; }; /** * 编辑图片(自然语言修改) */ export const editPageImage = async ( projectId: string, pageId: string, editPrompt: string, contextImages?: { useTemplate?: boolean; descImageUrls?: string[]; uploadedFiles?: File[]; } ): Promise => { // 如果有上传的文件,使用 multipart/form-data if (contextImages?.uploadedFiles && contextImages.uploadedFiles.length > 0) { const formData = new FormData(); formData.append('edit_instruction', editPrompt); formData.append('use_template', String(contextImages.useTemplate || false)); if (contextImages.descImageUrls && contextImages.descImageUrls.length > 0) { formData.append('desc_image_urls', JSON.stringify(contextImages.descImageUrls)); } // 添加上传的文件 contextImages.uploadedFiles.forEach((file) => { formData.append('context_images', file); }); const response = await apiClient.post( `/api/projects/${projectId}/pages/${pageId}/edit/image`, formData ); return response.data; } else { // 使用 JSON const response = await apiClient.post( `/api/projects/${projectId}/pages/${pageId}/edit/image`, { edit_instruction: editPrompt, context_images: { use_template: contextImages?.useTemplate || false, desc_image_urls: contextImages?.descImageUrls || [], }, } ); return response.data; } }; /** * 获取页面图片历史版本 */ export const getPageImageVersions = async ( projectId: string, pageId: string ): Promise> => { const response = await apiClient.get>( `/api/projects/${projectId}/pages/${pageId}/image-versions` ); return response.data; }; /** * 设置当前使用的图片版本 */ export const setCurrentImageVersion = async ( projectId: string, pageId: string, versionId: string ): Promise => { const response = await apiClient.post( `/api/projects/${projectId}/pages/${pageId}/image-versions/${versionId}/set-current` ); return response.data; }; // ===== 页面操作 ===== /** * 更新页面 */ export const updatePage = async ( projectId: string, pageId: string, data: Partial ): Promise> => { const response = await apiClient.put>( `/api/projects/${projectId}/pages/${pageId}`, data ); return response.data; }; /** * 更新页面描述 */ export const updatePageDescription = async ( projectId: string, pageId: string, descriptionContent: any, language?: OutputLanguage ): Promise> => { const lang = language || await getStoredOutputLanguage(); const response = await apiClient.put>( `/api/projects/${projectId}/pages/${pageId}/description`, { description_content: descriptionContent, language: lang } ); return response.data; }; /** * 更新页面大纲 */ export const updatePageOutline = async ( projectId: string, pageId: string, outlineContent: any, language?: OutputLanguage ): Promise> => { const lang = language || await getStoredOutputLanguage(); const response = await apiClient.put>( `/api/projects/${projectId}/pages/${pageId}/outline`, { outline_content: outlineContent, language: lang } ); return response.data; }; /** * 删除页面 */ export const deletePage = async (projectId: string, pageId: string): Promise => { const response = await apiClient.delete( `/api/projects/${projectId}/pages/${pageId}` ); return response.data; }; /** * 添加页面 */ export const addPage = async (projectId: string, data: Partial): Promise> => { const response = await apiClient.post>( `/api/projects/${projectId}/pages`, data ); return response.data; }; // ===== 任务查询 ===== /** * 查询任务状态 */ export const getTaskStatus = async (projectId: string, taskId: string): Promise> => { const response = await apiClient.get>(`/api/projects/${projectId}/tasks/${taskId}`); return response.data; }; // ===== 导出 ===== /** * Helper function to build query string with page_ids */ const buildPageIdsQuery = (pageIds?: string[]): string => { if (!pageIds || pageIds.length === 0) return ''; const params = new URLSearchParams(); params.set('page_ids', pageIds.join(',')); return `?${params.toString()}`; }; /** * 导出为PPTX * @param projectId 项目ID * @param pageIds 可选的页面ID列表,如果不提供则导出所有页面 */ export const exportPPTX = async ( projectId: string, pageIds?: string[] ): Promise> => { const url = `/api/projects/${projectId}/export/pptx${buildPageIdsQuery(pageIds)}`; const response = await apiClient.get< ApiResponse<{ download_url: string; download_url_absolute?: string }> >(url); return response.data; }; /** * 导出为PDF * @param projectId 项目ID * @param pageIds 可选的页面ID列表,如果不提供则导出所有页面 */ export const exportPDF = async ( projectId: string, pageIds?: string[] ): Promise> => { const url = `/api/projects/${projectId}/export/pdf${buildPageIdsQuery(pageIds)}`; const response = await apiClient.get< ApiResponse<{ download_url: string; download_url_absolute?: string }> >(url); return response.data; }; /** * 导出为图片(单张直接下载,多张打包ZIP) */ export const exportImages = async ( projectId: string, pageIds?: string[] ): Promise> => { const url = `/api/projects/${projectId}/export/images${buildPageIdsQuery(pageIds)}`; const response = await apiClient.get< ApiResponse<{ download_url: string; download_url_absolute?: string }> >(url); return response.data; }; /** * 导出为可编辑PPTX(异步任务) * @param projectId 项目ID * @param filename 可选的文件名 * @param pageIds 可选的页面ID列表,如果不提供则导出所有页面 */ export const exportEditablePPTX = async ( projectId: string, filename?: string, pageIds?: string[] ): Promise> => { const response = await apiClient.post< ApiResponse<{ task_id: string }> >(`/api/projects/${projectId}/export/editable-pptx`, { filename, page_ids: pageIds }); return response.data; }; // ===== 素材生成 ===== /** * 生成单张素材图片(不绑定具体页面) * 现在返回异步任务ID,需要通过getTaskStatus轮询获取结果 */ export const generateMaterialImage = async ( projectId: string, prompt: string, refImage?: File | null, extraImages?: File[], aspectRatio?: string ): Promise> => { const formData = new FormData(); formData.append('prompt', prompt); if (aspectRatio) { formData.append('aspect_ratio', aspectRatio); } if (refImage) { formData.append('ref_image', refImage); } if (extraImages && extraImages.length > 0) { extraImages.forEach((file) => { formData.append('extra_images', file); }); } const response = await apiClient.post>( `/api/projects/${projectId}/materials/generate`, formData ); return response.data; }; /** * 素材信息接口 */ export interface Material { id: string; project_id?: string | null; filename: string; url: string; relative_path: string; created_at: string; // 可选的附加信息:用于展示友好名称 prompt?: string; original_filename?: string; source_filename?: string; name?: string; } /** * 获取素材列表 * @param projectId 项目ID,可选 * - If provided and not 'all' or 'none': Get materials for specific project via /api/projects/{projectId}/materials * - If 'all': Get all materials via /api/materials?project_id=all * - If 'none': Get global materials (not bound to any project) via /api/materials?project_id=none * - If not provided: Get all materials via /api/materials */ export const listMaterials = async ( projectId?: string ): Promise> => { let url: string; if (!projectId || projectId === 'all') { // Get all materials using global endpoint url = '/api/materials?project_id=all'; } else if (projectId === 'none') { // Get global materials (not bound to any project) url = '/api/materials?project_id=none'; } else { // Get materials for specific project url = `/api/projects/${projectId}/materials`; } const response = await apiClient.get>(url); return response.data; }; /** * 上传素材图片 * @param file 图片文件 * @param projectId 可选的项目ID * - If provided: Upload material bound to the project * - If not provided or 'none': Upload as global material (not bound to any project) */ export const uploadMaterial = async ( file: File, projectId?: string | null, generateCaption?: boolean ): Promise> => { const formData = new FormData(); formData.append('file', file); let url: string; if (!projectId || projectId === 'none') { // Use global upload endpoint for materials not bound to any project url = '/api/materials/upload'; } else { // Use project-specific upload endpoint url = `/api/projects/${projectId}/materials/upload`; } if (generateCaption) { url += (url.includes('?') ? '&' : '?') + 'generate_caption=true'; } const response = await apiClient.post>(url, formData); return response.data; }; /** * 删除素材 */ export const deleteMaterial = async (materialId: string): Promise> => { const response = await apiClient.delete>(`/api/materials/${materialId}`); return response.data; }; /** * Download selected materials bundled as a zip archive. */ export const downloadMaterialsZip = async ( materialIds: string[] ): Promise> => { const { data: blob } = await apiClient.post( '/api/materials/download', { material_ids: materialIds }, { responseType: 'blob' }, ); const href = URL.createObjectURL(blob); const link = Object.assign(document.createElement('a'), { href, download: 'materials.zip', }); document.body.appendChild(link); link.click(); document.body.removeChild(link); URL.revokeObjectURL(href); return { success: true, data: { download_url: '' } }; }; /** * 关联素材到项目(通过URL) * @param projectId 项目ID * @param materialUrls 素材URL列表 */ export const associateMaterialsToProject = async ( projectId: string, materialUrls: string[] ): Promise> => { const response = await apiClient.post>( '/api/materials/associate', { project_id: projectId, material_urls: materialUrls } ); return response.data; }; // ===== 用户模板 ===== export interface UserTemplate { template_id: string; name?: string; template_image_url: string; thumb_url?: string; // Thumbnail URL for faster loading created_at?: string; updated_at?: string; } /** * 上传用户模板 */ export const uploadUserTemplate = async ( templateImage: File, name?: string ): Promise> => { const formData = new FormData(); formData.append('template_image', templateImage); if (name) { formData.append('name', name); } const response = await apiClient.post>( '/api/user-templates', formData ); return response.data; }; /** * 获取用户模板列表 */ export const listUserTemplates = async (): Promise> => { const response = await apiClient.get>( '/api/user-templates' ); return response.data; }; /** * 删除用户模板 */ export const deleteUserTemplate = async (templateId: string): Promise => { const response = await apiClient.delete(`/api/user-templates/${templateId}`); return response.data; }; // ===== 参考文件相关 API ===== export interface ReferenceFile { id: string; project_id: string | null; filename: string; file_size: number; file_type: string; parse_status: 'pending' | 'parsing' | 'completed' | 'failed'; markdown_content: string | null; error_message: string | null; image_caption_failed_count?: number; // Optional, calculated dynamically created_at: string; updated_at: string; } /** * 上传参考文件 * @param file 文件 * @param projectId 可选的项目ID(如果不提供或为'none',则为全局文件) */ export const uploadReferenceFile = async ( file: File, projectId?: string | null ): Promise> => { const formData = new FormData(); formData.append('file', file); if (projectId && projectId !== 'none') { formData.append('project_id', projectId); } const response = await apiClient.post>( '/api/reference-files/upload', formData ); return response.data; }; /** * 获取参考文件信息 * @param fileId 文件ID */ export const getReferenceFile = async (fileId: string): Promise> => { const response = await apiClient.get>( `/api/reference-files/${fileId}` ); return response.data; }; /** * 列出项目的参考文件 * @param projectId 项目ID('global' 或 'none' 表示列出全局文件) */ export const listProjectReferenceFiles = async ( projectId: string ): Promise> => { const response = await apiClient.get>( `/api/reference-files/project/${projectId}` ); return response.data; }; /** * 删除参考文件 * @param fileId 文件ID */ export const deleteReferenceFile = async (fileId: string): Promise> => { const response = await apiClient.delete>( `/api/reference-files/${fileId}` ); return response.data; }; /** * 触发文件解析 * @param fileId 文件ID */ export const triggerFileParse = async (fileId: string): Promise> => { const response = await apiClient.post>( `/api/reference-files/${fileId}/parse` ); return response.data; }; /** * 将参考文件关联到项目 * @param fileId 文件ID * @param projectId 项目ID */ export const associateFileToProject = async ( fileId: string, projectId: string ): Promise> => { const response = await apiClient.post>( `/api/reference-files/${fileId}/associate`, { project_id: projectId } ); return response.data; }; /** * 从项目中移除参考文件(不删除文件本身) * @param fileId 文件ID */ export const dissociateFileFromProject = async ( fileId: string ): Promise> => { const response = await apiClient.post>( `/api/reference-files/${fileId}/dissociate` ); return response.data; }; // ===== 输出语言设置 ===== export type OutputLanguage = 'zh' | 'ja' | 'en' | 'auto'; export interface OutputLanguageOption { value: OutputLanguage; label: string; } export const OUTPUT_LANGUAGE_OPTIONS: OutputLanguageOption[] = [ { value: 'zh', label: '中文' }, { value: 'ja', label: '日本語' }, { value: 'en', label: 'English' }, { value: 'auto', label: '自动' }, ]; /** * 获取默认输出语言设置(从服务器环境变量读取) * * 注意:这只返回服务器配置的默认语言。 * 实际的语言选择应由前端在 sessionStorage 中管理, * 并在每次生成请求时通过 language 参数传递。 */ export const getDefaultOutputLanguage = async (): Promise> => { const response = await apiClient.get>( '/api/output-language' ); return response.data; }; /** * 从后端 Settings 获取用户的输出语言偏好 * 如果获取失败,返回默认值 'zh' */ export const getStoredOutputLanguage = async (): Promise => { try { const response = await apiClient.get>('/api/output-language'); return response.data.data?.language || 'zh'; } catch (error) { console.warn('Failed to load output language from settings, using default', error); return 'zh'; } }; /** * 获取系统设置 */ export const getSettings = async (): Promise> => { const response = await apiClient.get>('/api/settings'); return response.data; }; /** * 更新系统设置 */ export const updateSettings = async ( data: Partial> & { api_key?: string; mineru_token?: string; baidu_api_key?: string; text_api_key?: string; image_api_key?: string; image_caption_api_key?: string; lazyllm_api_keys?: Record; } ): Promise> => { const response = await apiClient.put>('/api/settings', data); return response.data; }; /** * 重置系统设置 */ export const resetSettings = async (): Promise> => { const response = await apiClient.post>('/api/settings/reset'); return response.data; }; /** * 验证 API key 是否可用 */ export const verifyApiKey = async (): Promise> => { const response = await apiClient.post>('/api/settings/verify'); return response.data; }; /** * 可选的测试设置类型 */ export interface TestSettingsOverride { api_key?: string; api_base_url?: string; text_model?: string; image_model?: string; image_caption_model?: string; image_caption_model_source?: string; mineru_api_base?: string; mineru_token?: string; baidu_api_key?: string; ai_provider_format?: string; image_resolution?: string; enable_text_reasoning?: boolean; text_thinking_budget?: number; enable_image_reasoning?: boolean; image_thinking_budget?: number; } /** * 测试百度 OCR 服务(异步) * @param settings 可选的设置覆盖(未保存的设置) * @returns 返回任务ID,需要通过 getTestStatus 轮询结果 */ export const testBaiduOcr = async (settings?: TestSettingsOverride): Promise> => { const response = await apiClient.post>('/api/settings/tests/baidu-ocr', settings || {}); return response.data; }; /** * 测试文本生成模型(异步) * @param settings 可选的设置覆盖(未保存的设置) * @returns 返回任务ID,需要通过 getTestStatus 轮询结果 */ export const testTextModel = async (settings?: TestSettingsOverride): Promise> => { const response = await apiClient.post>('/api/settings/tests/text-model', settings || {}); return response.data; }; /** * 测试图片识别模型(异步) * @param settings 可选的设置覆盖(未保存的设置) * @returns 返回任务ID,需要通过 getTestStatus 轮询结果 */ export const testCaptionModel = async (settings?: TestSettingsOverride): Promise> => { const response = await apiClient.post>('/api/settings/tests/caption-model', settings || {}); return response.data; }; /** * 测试百度图像修复(异步) * @param settings 可选的设置覆盖(未保存的设置) * @returns 返回任务ID,需要通过 getTestStatus 轮询结果 */ export const testBaiduInpaint = async (settings?: TestSettingsOverride): Promise> => { const response = await apiClient.post>('/api/settings/tests/baidu-inpaint', settings || {}); return response.data; }; /** * 测试图像生成模型(异步) * @param settings 可选的设置覆盖(未保存的设置) * @returns 返回任务ID,需要通过 getTestStatus 轮询结果 */ export const testImageModel = async (settings?: TestSettingsOverride): Promise> => { const response = await apiClient.post>('/api/settings/tests/image-model', settings || {}); return response.data; }; /** * 测试 MinerU PDF 解析(异步) * @param settings 可选的设置覆盖(未保存的设置) * @returns 返回任务ID,需要通过 getTestStatus 轮询结果 */ export const testMineruPdf = async (settings?: TestSettingsOverride): Promise> => { const response = await apiClient.post>('/api/settings/tests/mineru-pdf', settings || {}); return response.data; }; /** * 查询测试任务状态 * @param taskId 任务ID * @returns 任务状态信息 */ export const getTestStatus = async (taskId: string): Promise> => { const response = await apiClient.get>(`/api/settings/tests/${taskId}/status`); return response.data; }; // ===== PPT 翻新相关 API ===== /** * 创建 PPT 翻新项目 * 上传 PDF/PPTX 文件,后端异步解析内容并填充大纲+描述 */ export const createPptRenovationProject = async ( file: File, options?: { keepLayout?: boolean; templateStyle?: string; language?: string; } ): Promise> => { const formData = new FormData(); formData.append('file', file); if (options?.keepLayout) { formData.append('keep_layout', 'true'); } if (options?.templateStyle) { formData.append('template_style', options.templateStyle); } if (options?.language) { formData.append('language', options.language); } const response = await apiClient.post>( '/api/projects/renovation', formData ); return response.data; }; /** * 从图片提取风格描述(通用,不绑定项目) */ export const extractStyleFromImage = async ( imageFile: File ): Promise> => { const formData = new FormData(); formData.append('image', imageFile); const response = await apiClient.post>( '/api/extract-style', formData ); return response.data; }; ================================================ FILE: frontend/src/components/history/ProjectCard.tsx ================================================ import React, { useState, useEffect } from 'react'; import { Clock, FileText, ChevronRight, Trash2 } from 'lucide-react'; import { useT } from '@/hooks/useT'; import { Card } from '@/components/shared'; import { getProjectTitle, getFirstPageImage, formatDate, getStatusText, getStatusColor } from '@/utils/projectUtils'; import type { Project } from '@/types'; // ProjectCard 组件自包含翻译 const projectCardI18n = { zh: { projectCard: { pages: "{{count}} 页", page: "第 {{num}} 页" } }, en: { projectCard: { pages: "{{count}} pages", page: "Page {{num}}" } } }; export interface ProjectCardProps { project: Project; isSelected: boolean; isEditing: boolean; editingTitle: string; onSelect: (project: Project) => void; onToggleSelect: (projectId: string) => void; onDelete: (e: React.MouseEvent, project: Project) => void; onStartEdit: (e: React.MouseEvent, project: Project) => void; onTitleChange: (title: string) => void; onTitleKeyDown: (e: React.KeyboardEvent, projectId: string) => void; onSaveEdit: (projectId: string) => void; isBatchMode: boolean; } export const ProjectCard: React.FC = ({ project, isSelected, isEditing, editingTitle, onSelect, onToggleSelect, onDelete, onStartEdit, onTitleChange, onTitleKeyDown, onSaveEdit, isBatchMode, }) => { const t = useT(projectCardI18n); // 检测屏幕尺寸,只在非手机端加载图片(必须在早期返回之前声明hooks) const [shouldLoadImage, setShouldLoadImage] = useState(false); useEffect(() => { const checkScreenSize = () => { // sm breakpoint is 640px setShouldLoadImage(window.innerWidth >= 640); }; checkScreenSize(); window.addEventListener('resize', checkScreenSize); return () => window.removeEventListener('resize', checkScreenSize); }, []); const projectId = project.id || project.project_id; if (!projectId) return null; const title = getProjectTitle(project); const pageCount = project.pages?.length || 0; const statusText = getStatusText(project); const statusColor = getStatusColor(project); const firstPageImage = shouldLoadImage ? getFirstPageImage(project) : null; return ( onSelect(project)} >
{/* 复选框 */}
e.stopPropagation()}> onToggleSelect(projectId)} className="w-4 h-4 text-banana-600 border-gray-300 dark:border-border-primary rounded focus:ring-banana-500 cursor-pointer" />
{/* 中间:项目信息 */}
{isEditing ? ( onTitleChange(e.target.value)} onKeyDown={(e) => onTitleKeyDown(e, projectId)} onBlur={() => onSaveEdit(projectId)} autoFocus className="text-base md:text-lg font-semibold text-gray-900 dark:text-foreground-primary px-2 py-1 border border-banana-500 rounded focus:outline-none focus:ring-2 focus:ring-banana-500 flex-1 min-w-0" onClick={(e) => e.stopPropagation()} /> ) : (

onStartEdit(e, project)} title={isBatchMode ? undefined : t('common.edit')} > {title}

)} {statusText}
{t('projectCard.pages', { count: pageCount })} {formatDate(project.updated_at || project.created_at)}
{/* 右侧:图片预览 */}
{firstPageImage ? ( {t('projectCard.page', ) : (
)}
{/* 右侧:操作按钮 */}
); }; ================================================ FILE: frontend/src/components/outline/OutlineCard.tsx ================================================ import React, { useState, useEffect, useRef, useCallback } from 'react'; import { GripVertical, Edit2, Trash2, Check, X } from 'lucide-react'; import { useT } from '@/hooks/useT'; import { useImagePaste } from '@/hooks/useImagePaste'; import { Card, useConfirm, Markdown, ShimmerOverlay } from '@/components/shared'; import { MarkdownTextarea, type MarkdownTextareaRef } from '@/components/shared/MarkdownTextarea'; import type { Page } from '@/types'; // OutlineCard 组件自包含翻译 const outlineCardI18n = { zh: { outlineCard: { page: "第 {{num}} 页", chapter: "章节", titleLabel: "标题", keyPointsPlaceholder: "要点(每行一个,支持粘贴图片)", confirmDeletePage: "确定要删除这一页吗?", confirmDeleteTitle: "确认删除", uploadingImage: "正在上传图片...", coverPage: "封面", coverPageTooltip: "第一页为封面页,通常包含标题和副标题" } }, en: { outlineCard: { page: "Page {{num}}", chapter: "Chapter", titleLabel: "Title", keyPointsPlaceholder: "Key points (one per line, paste images supported)", confirmDeletePage: "Are you sure you want to delete this page?", confirmDeleteTitle: "Confirm Delete", uploadingImage: "Uploading image...", coverPage: "Cover", coverPageTooltip: "This is the cover page, usually containing the title and subtitle" } } }; interface OutlineCardProps { page: Page; index: number; projectId?: string; showToast: (props: { message: string; type: 'success' | 'error' | 'info' | 'warning' }) => void; onUpdate: (data: Partial) => void; onDelete: () => void; onClick: () => void; isSelected: boolean; dragHandleProps?: React.HTMLAttributes; isAiRefining?: boolean; } export const OutlineCard: React.FC = ({ page, index, projectId, showToast, onUpdate, onDelete, onClick, isSelected, dragHandleProps, isAiRefining = false, }) => { const t = useT(outlineCardI18n); const { confirm, ConfirmDialog } = useConfirm(); const outline = page.outline_content ?? { title: '', points: [] as string[] }; const [isEditing, setIsEditing] = useState(false); const [editTitle, setEditTitle] = useState(outline.title); const [editPoints, setEditPoints] = useState(outline.points.join('\n')); const [editPart, setEditPart] = useState(page.part || ''); const textareaRef = useRef(null); // Callback to insert at cursor position in the textarea const insertAtCursor = useCallback((markdown: string) => { textareaRef.current?.insertAtCursor(markdown); }, []); const { handlePaste, handleFiles, isUploading } = useImagePaste({ projectId, setContent: setEditPoints, showToast: showToast, insertAtCursor, }); // 当 page prop 变化时,同步更新本地编辑状态(如果不在编辑模式) useEffect(() => { if (!isEditing) { setEditTitle(outline.title); setEditPoints(outline.points.join('\n')); setEditPart(page.part || ''); } }, [outline.title, outline.points, page.part, isEditing]); const handleSave = () => { onUpdate({ outline_content: { title: editTitle, points: editPoints.split('\n').filter((p) => p.trim()), }, part: editPart.trim() || undefined, }); setIsEditing(false); }; const handleCancel = () => { setEditTitle(outline.title); setEditPoints(outline.points.join('\n')); setEditPart(page.part || ''); setIsEditing(false); }; return (
{/* 拖拽手柄 */}
{/* 内容区 */}
{/* 页码和章节 */}
{t('outlineCard.page', { num: index + 1 })} {index === 0 && !isEditing && ( {t('outlineCard.coverPage')} )} {isEditing ? ( setEditPart(e.target.value)} onClick={(e) => e.stopPropagation()} className="text-xs px-2 py-0.5 w-24 border border-blue-300 dark:border-blue-700 bg-blue-50 dark:bg-blue-900/30 text-blue-700 dark:text-blue-400 rounded focus:outline-none focus:ring-1 focus:ring-blue-500" placeholder={t('outlineCard.chapter')} /> ) : ( page.part && ( {page.part} ) )}
{isEditing ? ( /* 编辑模式 */
e.stopPropagation()}> setEditTitle(e.target.value)} className="w-full px-3 py-2 border border-gray-300 dark:border-border-primary bg-white dark:bg-background-secondary text-gray-900 dark:text-foreground-primary rounded-lg focus:outline-none focus:ring-2 focus:ring-banana-500" placeholder={t('outlineCard.titleLabel')} />
) : ( /* 查看模式 */

{outline.title}

{outline.points.join('\n')}
)}
{/* 操作按钮 */} {!isEditing && (
)}
{ConfirmDialog}
); }; ================================================ FILE: frontend/src/components/preview/DescriptionCard.tsx ================================================ import React, { useState, useRef, useCallback } from 'react'; import { Edit2, FileText, RefreshCw, Tag, Layout, Image, Focus, MessageSquare, ImageOff } from 'lucide-react'; import { useT } from '@/hooks/useT'; import { useImagePaste } from '@/hooks/useImagePaste'; import { Card, ContextualStatusBadge, Button, Modal, Skeleton, Markdown } from '@/components/shared'; import { MarkdownTextarea, type MarkdownTextareaRef } from '@/components/shared/MarkdownTextarea'; import { useDescriptionGeneratingState } from '@/hooks/useGeneratingState'; import type { Page, DescriptionContent } from '@/types'; // DescriptionCard 组件自包含翻译 const descriptionCardI18n = { zh: { descriptionCard: { page: "第 {{num}} 页", regenerate: "重新生成", descriptionTitle: "编辑页面描述", description: "描述", noDescription: "还没有生成描述", uploadingImage: "正在上传图片...", descriptionPlaceholder: "输入页面描述, 可包含页面文字、素材、排版设计等信息,支持粘贴图片", coverPage: "封面", coverPageTooltip: "第一页为封面页,默认保持简洁风格", notInImagePrompt: "不影响图片生成" } }, en: { descriptionCard: { page: "Page {{num}}", regenerate: "Regenerate", descriptionTitle: "Edit Descriptions", description: "Description", noDescription: "No description generated yet", uploadingImage: "Uploading image...", descriptionPlaceholder: "Enter page description, can include page text, materials, layout design, etc., support pasting images", coverPage: "Cover", coverPageTooltip: "This is the cover page, default to keep simple style", notInImagePrompt: "Not used in image generation" } } }; export interface DescriptionCardProps { page: Page; index: number; projectId?: string; extraFieldNames?: string[]; imagePromptFields?: string[]; showToast: (props: { message: string; type: 'success' | 'error' | 'info' | 'warning' }) => void; onUpdate: (data: Partial) => void; onRegenerate: () => void; isAiRefining?: boolean; } // 从 description_content 提取文本内容(提取到组件外部供 memo 比较器使用) const getDescriptionText = (descContent: DescriptionContent | undefined): string => { if (!descContent) return ''; if ('text' in descContent) { return descContent.text; } else if ('text_content' in descContent && Array.isArray(descContent.text_content)) { return descContent.text_content.join('\n'); } return ''; }; // 提取 extra_fields,向后兼容 layout_suggestion const getExtraFields = (descContent: DescriptionContent | undefined): Record => { if (!descContent) return {}; if (descContent.extra_fields) return descContent.extra_fields; // 向后兼容:旧数据只有 layout_suggestion if (descContent.layout_suggestion) return { '排版建议': descContent.layout_suggestion }; return {}; }; // 用于 memo 比较的序列化 key const getExtraFieldsKey = (descContent: DescriptionContent | undefined): string => { const fields = getExtraFields(descContent); return JSON.stringify(fields); }; export const DescriptionCard: React.FC = React.memo(({ page, index, projectId, extraFieldNames = [], imagePromptFields, showToast, onUpdate, onRegenerate, isAiRefining = false, }) => { const t = useT(descriptionCardI18n); const text = getDescriptionText(page.description_content); const extraFields = getExtraFields(page.description_content); const [isEditing, setIsEditing] = useState(false); const [editContent, setEditContent] = useState(''); const [editExtraFields, setEditExtraFields] = useState>({}); const textareaRef = useRef(null); const extraFieldRefs = useRef>({}); // Active field target for image paste — switched via onFocus const activeSetContent = useRef<(updater: (prev: string) => string) => void>(setEditContent); const activeInsertAtCursor = useRef<((markdown: string) => void) | undefined>( () => textareaRef.current?.insertAtCursor('') ); const { handlePaste, handleFiles, isUploading } = useImagePaste({ projectId, setContent: (updater) => activeSetContent.current(updater), showToast: showToast, insertAtCursor: (md) => activeInsertAtCursor.current?.(md), }); // Focus handlers to switch paste target const focusMainDesc = useCallback(() => { activeSetContent.current = setEditContent; activeInsertAtCursor.current = (md: string) => textareaRef.current?.insertAtCursor(md); }, []); const focusExtraField = useCallback((fieldName: string) => { activeSetContent.current = (updater) => setEditExtraFields(prev => ({ ...prev, [fieldName]: updater(prev[fieldName] || '') })); activeInsertAtCursor.current = (md: string) => extraFieldRefs.current[fieldName]?.insertAtCursor(md); }, []); // 通过 page.status 驱动骨架屏,与图片生成的 GENERATING 状态互不干扰 const generating = useDescriptionGeneratingState(page, isAiRefining); const handleEdit = () => { // 在打开编辑对话框时,从当前的 page 获取最新值 const currentText = getDescriptionText(page.description_content); const currentExtraFields = getExtraFields(page.description_content); setEditContent(currentText); setEditExtraFields({ ...currentExtraFields }); setIsEditing(true); }; const handleSave = () => { // 保存时包含 text 和 extra_fields const filteredFields: Record = {}; for (const [key, value] of Object.entries(editExtraFields)) { if (value.trim()) { filteredFields[key] = value; } } onUpdate({ description_content: { text: editContent, ...(Object.keys(filteredFields).length > 0 ? { extra_fields: filteredFields } : {}), } as DescriptionContent, }); setIsEditing(false); }; // 合并已有和配置中的字段名(按配置顺序,附加已有但不在配置中的) const allFieldNames = [...new Set([...extraFieldNames, ...Object.keys(extraFields)])]; return ( <> {/* 标题栏 */}
{t('descriptionCard.page', { num: index + 1 })} {index === 0 && ( {t('descriptionCard.coverPage')} )} {page.part && ( {page.part} )}
{/* 内容 */}
{generating ? (
{t('common.generating')}
) : text ? (
{text} {allFieldNames.map(name => { const value = extraFields[name]; if (!value) return null; const FIELD_ICONS: Record = { '视觉元素': Image, '视觉焦点': Focus, '排版布局': Layout, '演讲者备注': MessageSquare }; const FieldIcon = FIELD_ICONS[name] || Tag; const notInImagePrompt = imagePromptFields && !imagePromptFields.includes(name); return (
{name} {notInImagePrompt && ( {t('descriptionCard.notInImagePrompt')} )}
{value}
); })}
) : (

{t('descriptionCard.noDescription')}

)}
{/* 操作栏 */}
{/* 编辑对话框 */} setIsEditing(false)} title={t('descriptionCard.descriptionTitle')} size="lg" >
{/* 额外字段编辑 */} {allFieldNames.map(name => ( { extraFieldRefs.current[name] = el; }} label={name} value={editExtraFields[name] || ''} onChange={v => setEditExtraFields(prev => ({ ...prev, [name]: v }))} onPaste={handlePaste} onFiles={handleFiles} onFocus={() => focusExtraField(name)} showUploadButton={false} rows={2} placeholder={name} /> ))}
); }, (prev, next) => prev.index === next.index && prev.isAiRefining === next.isAiRefining && prev.projectId === next.projectId && prev.page.id === next.page.id && prev.page.status === next.page.status && prev.page.part === next.page.part && getDescriptionText(prev.page.description_content) === getDescriptionText(next.page.description_content) && getExtraFieldsKey(prev.page.description_content) === getExtraFieldsKey(next.page.description_content) && JSON.stringify(prev.extraFieldNames) === JSON.stringify(next.extraFieldNames) && JSON.stringify(prev.imagePromptFields) === JSON.stringify(next.imagePromptFields) ); ================================================ FILE: frontend/src/components/preview/SlideCard.tsx ================================================ import React from 'react'; import { Edit2, Trash2 } from 'lucide-react'; import { useT } from '@/hooks/useT'; import { StatusBadge, Skeleton, useConfirm } from '@/components/shared'; import { getImageUrl } from '@/api/client'; import type { Page } from '@/types'; // SlideCard 组件自包含翻译 const slideCardI18n = { zh: { slideCard: { notGenerated: "未生成", confirmDeletePage: "确定要删除这一页吗?", confirmDeleteTitle: "确认删除", coverPage: "封面", coverPageTooltip: "第一页为封面页,通常包含标题和副标题" } }, en: { slideCard: { notGenerated: "Not Generated", confirmDeletePage: "Are you sure you want to delete this page?", confirmDeleteTitle: "Confirm Delete", coverPage: "Cover", coverPageTooltip: "This is the cover page, usually containing the title and subtitle" } } }; interface SlideCardProps { page: Page; index: number; isSelected: boolean; onClick: () => void; onEdit: () => void; onDelete: () => void; isGenerating?: boolean; aspectRatio?: string; } export const SlideCard: React.FC = ({ page, index, isSelected, onClick, onEdit, onDelete, isGenerating = false, aspectRatio = '16:9', }) => { const t = useT(slideCardI18n); const { confirm, ConfirmDialog } = useConfirm(); const imageUrl = page.generated_image_path ? getImageUrl(page.generated_image_path, page.updated_at) : ''; const generating = isGenerating || page.status === 'QUEUED' || page.status === 'GENERATING'; return (
{/* 缩略图 */}
{generating ? ( ) : page.generated_image_path ? ( <> {`Slide {/* 悬停操作 */}
) : (
🍌
{t('slideCard.notGenerated')}
)} {/* 状态标签 */}
{/* 标题 */}
{index + 1}. {page.outline_content?.title} {index === 0 && ( {t('slideCard.coverPage')} )}
{ConfirmDialog}
); }; ================================================ FILE: frontend/src/components/shared/AccessCodeGuard.tsx ================================================ import { useState, useEffect, type ReactNode } from 'react'; import { checkAccessCode, verifyAccessCode } from '@/api/endpoints'; import { useT } from '@/hooks/useT'; import { Button } from './Button'; import { Input } from './Input'; const STORAGE_KEY = 'banana-access-code'; const translations = { zh: { title: '请输入访问口令', placeholder: '输入口令', submit: '确认', error: '口令错误,请重试', networkError: '网络错误,请稍后重试', connectError: '无法连接到后端服务', connectHint: '请检查后端服务是否正常运行', retry: '重试', }, en: { title: 'Enter Access Code', placeholder: 'Enter code', submit: 'Submit', error: 'Invalid code, please try again', networkError: 'Network error, please try later', connectError: 'Cannot connect to backend service', connectHint: 'Please check if the backend service is running', retry: 'Retry', }, }; export function AccessCodeGuard({ children }: { children: ReactNode }) { const t = useT(translations); const [status, setStatus] = useState<'loading' | 'prompt' | 'pass' | 'connectError'>('loading'); const [code, setCode] = useState(''); const [error, setError] = useState(''); const [verifying, setVerifying] = useState(false); const checkAccess = async () => { setStatus('loading'); try { const res = await checkAccessCode(); if (!res.data.enabled) { setStatus('pass'); return; } const saved = localStorage.getItem(STORAGE_KEY); if (saved) { const v = await verifyAccessCode(saved); if (v.data.valid) { setStatus('pass'); return; } localStorage.removeItem(STORAGE_KEY); } setStatus('prompt'); } catch { localStorage.removeItem(STORAGE_KEY); setStatus('connectError'); } }; useEffect(() => { checkAccess(); }, []); const handleSubmit = async () => { if (!code.trim()) return; setVerifying(true); setError(''); try { const res = await verifyAccessCode(code.trim()); if (res.data.valid) { localStorage.setItem(STORAGE_KEY, code.trim()); setStatus('pass'); } else { setError(t('error')); } } catch (e: unknown) { const status = (e as { response?: { status?: number } })?.response?.status; setError(status === 403 ? t('error') : t('networkError')); } finally { setVerifying(false); } }; if (status === 'loading') return null; if (status === 'pass') return <>{children}; if (status === 'connectError') { return (

{t('connectError')}

{t('connectHint')}

); } return (

{t('title')}

{ e.preventDefault(); handleSubmit(); }} className="space-y-4"> setCode(e.target.value)} error={error} autoFocus />
); } ================================================ FILE: frontend/src/components/shared/AiRefineInput.tsx ================================================ import React, { useState, memo } from 'react'; import { Sparkles, History, ChevronDown, ChevronUp, Send } from 'lucide-react'; import { useT } from '@/hooks/useT'; // AiRefineInput 组件自包含翻译 const aiRefineI18n = { zh: { aiRefine: { ctrlEnterSubmit: "(Ctrl+Enter 提交)", history: "历史", viewHistory: "查看 {{count}} 条历史修改", previousRequirements: "之前的修改要求:", submitTooltip: "提交 (Ctrl+Enter)" } }, en: { aiRefine: { ctrlEnterSubmit: "(Ctrl+Enter to submit)", history: "History", viewHistory: "View {{count}} previous edits", previousRequirements: "Previous edit requests:", submitTooltip: "Submit (Ctrl+Enter)" } } }; export interface AiRefineInputProps { /** 标题文字 */ title: string; /** 输入框占位文字 */ placeholder: string; /** 提交回调函数,接收当前要求和历史要求,返回 Promise */ onSubmit: (requirement: string, previousRequirements: string[]) => Promise; /** 是否禁用(例如没有内容可修改时) */ disabled?: boolean; /** 自定义类名 */ className?: string; /** 状态变化回调,通知父组件当前是否正在提交 */ onStatusChange?: (isSubmitting: boolean) => void; } const AiRefineInputComponent: React.FC = ({ title, placeholder, onSubmit, disabled = false, className = '', onStatusChange, }) => { const t = useT(aiRefineI18n); const [requirement, setRequirement] = useState(''); const [isSubmitting, setIsSubmitting] = useState(false); const [history, setHistory] = useState([]); const [showHistory, setShowHistory] = useState(false); const handleSubmit = async () => { if (!requirement.trim() || isSubmitting || disabled) return; const currentRequirement = requirement.trim(); setIsSubmitting(true); onStatusChange?.(true); // 通知父组件开始提交 try { await onSubmit(currentRequirement, history); // 成功后将当前要求添加到历史 setHistory(prev => [...prev, currentRequirement]); // 清空输入框 setRequirement(''); } finally { setIsSubmitting(false); onStatusChange?.(false); // 通知父组件提交结束 } }; // 处理 Ctrl+Enter 快捷键 const handleKeyDown = (e: React.KeyboardEvent) => { if (e.key === 'Enter' && (e.ctrlKey || e.metaKey)) { e.preventDefault(); handleSubmit(); } }; if (disabled) { return null; } // 判断是否为紧凑模式(没有标题时) const isCompactMode = !title; return (
{/* 标题和历史按钮 - 仅非紧凑模式显示 */} {!isCompactMode && (

{title}

{t('aiRefine.ctrlEnterSubmit')}
{history.length > 0 && ( )}
)} {/* 历史记录展示 */} {showHistory && history.length > 0 && (
{t('aiRefine.previousRequirements')}
    {history.map((req, idx) => (
  • {idx + 1}. {req}
  • ))}
)}
{/* 紧凑模式下显示图标和历史按钮 */} {isCompactMode && ( <> {history.length > 0 && ( )} )}
setRequirement(e.target.value)} onKeyDown={handleKeyDown} placeholder={placeholder} className={`w-full px-3 py-1.5 text-sm border ${isCompactMode ? 'border-gray-200 dark:border-border-primary' : 'border-gray-300 dark:border-border-primary'} rounded-lg focus:outline-none focus:ring-2 focus:ring-purple-500 focus:border-transparent transition-all ${ isSubmitting ? 'animate-gradient-x bg-gradient-to-r from-purple-100 via-purple-200 to-purple-100 bg-[length:200%_100%]' : 'bg-white dark:bg-background-secondary' }`} disabled={isSubmitting} /> {isSubmitting && (
)}
{/* 提交按钮 - 移动端始终显示,桌面端鼠标悬停时显示 */}
); }; // 使用 memo 包装组件,避免父组件频繁重渲染时影响输入框 // 只有当 props 真正变化时才重新渲染 export const AiRefineInput = memo(AiRefineInputComponent); ================================================ FILE: frontend/src/components/shared/Button.tsx ================================================ import React from 'react'; import { cn } from '@/utils'; interface ButtonProps extends React.ButtonHTMLAttributes { variant?: 'primary' | 'secondary' | 'ghost'; size?: 'sm' | 'md' | 'lg'; loading?: boolean; icon?: React.ReactNode; } export const Button: React.FC = ({ children, variant = 'primary', size = 'md', loading = false, icon, className, disabled, ...props }) => { const baseStyles = 'inline-flex items-center justify-center font-semibold rounded-lg transition-all duration-200 focus:outline-none focus:ring-2 focus:ring-banana-500 focus:ring-offset-2 dark:focus:ring-offset-gray-900 disabled:opacity-50 disabled:cursor-not-allowed touch-manipulation'; const variants = { primary: 'bg-gradient-to-r from-banana-500 to-banana-600 text-black hover:shadow-yellow hover:-translate-y-0.5 active:translate-y-0 shadow-md', secondary: 'bg-white dark:bg-background-secondary border border-banana-500 text-black dark:text-foreground-primary hover:bg-banana-50 dark:hover:bg-background-hover', ghost: 'bg-transparent text-gray-700 dark:text-foreground-secondary hover:bg-gray-100 dark:hover:bg-background-secondary', }; const sizes = { sm: 'h-8 px-3 text-sm', md: 'h-10 px-6 text-base', lg: 'h-12 px-8 text-lg', }; return ( ); }; ================================================ FILE: frontend/src/components/shared/Card.tsx ================================================ import React from 'react'; import { cn } from '@/utils'; interface CardProps extends React.HTMLAttributes { hoverable?: boolean; } export const Card: React.FC = ({ children, hoverable = false, className, ...props }) => { return (
{children}
); }; ================================================ FILE: frontend/src/components/shared/ConfirmDialog.tsx ================================================ import React, { useState, useCallback } from 'react'; import { AlertTriangle } from 'lucide-react'; import { Modal } from './Modal'; import { Button } from './Button'; interface ConfirmDialogProps { isOpen: boolean; onClose: () => void; onConfirm: (checkboxValue?: boolean) => void; title?: string; message: string; confirmText?: string; cancelText?: string; variant?: 'danger' | 'warning' | 'info'; checkboxLabel?: string; checkboxDefaultChecked?: boolean; } export const ConfirmDialog: React.FC = ({ isOpen, onClose, onConfirm, title = '确认操作', message, confirmText = '确定', cancelText = '取消', variant = 'warning', checkboxLabel, checkboxDefaultChecked = false, }) => { const [checkboxChecked, setCheckboxChecked] = useState(checkboxDefaultChecked); const handleConfirm = () => { onConfirm(checkboxLabel ? checkboxChecked : undefined); onClose(); }; const variantStyles = { danger: 'text-red-600 dark:text-red-400', warning: 'text-yellow-600 dark:text-yellow-400', info: 'text-blue-600 dark:text-blue-400', }; return (

{message}

{checkboxLabel && ( )}
); }; // Hook for easy confirmation dialogs export const useConfirm = () => { const [isOpen, setIsOpen] = useState(false); const [config, setConfig] = useState<{ message: string; title?: string; confirmText?: string; cancelText?: string; variant?: 'danger' | 'warning' | 'info'; checkboxLabel?: string; checkboxDefaultChecked?: boolean; onConfirm: (checkboxValue?: boolean) => void; } | null>(null); const confirm = useCallback( ( message: string, onConfirm: (checkboxValue?: boolean) => void, options?: { title?: string; confirmText?: string; cancelText?: string; variant?: 'danger' | 'warning' | 'info'; checkboxLabel?: string; checkboxDefaultChecked?: boolean; } ) => { setConfig({ message, onConfirm, title: options?.title, confirmText: options?.confirmText, cancelText: options?.cancelText, variant: options?.variant || 'warning', checkboxLabel: options?.checkboxLabel, checkboxDefaultChecked: options?.checkboxDefaultChecked, }); setIsOpen(true); }, [] ); const close = useCallback(() => { setIsOpen(false); setConfig(null); }, []); const handleConfirm = useCallback((checkboxValue?: boolean) => { if (config?.onConfirm) { config.onConfirm(checkboxValue); } close(); }, [config, close]); return { confirm, ConfirmDialog: config ? ( ) : null, }; }; ================================================ FILE: frontend/src/components/shared/ContextualStatusBadge.tsx ================================================ import React from 'react'; import { cn } from '@/utils'; import type { Page } from '@/types'; import { usePageStatus, type PageStatusContext } from '@/hooks/usePageStatus'; interface ContextualStatusBadgeProps { page: Page; /** 上下文:description(描述页)、image(图片页)、full(完整状态) */ context?: PageStatusContext; /** 是否显示详细描述(悬停提示) */ showDescription?: boolean; } /** * 根据上下文智能显示状态的徽章 * * - 在描述编辑页面:只显示描述相关状态 * - 在图片预览页面:显示图片生成状态 * - 其他场景:显示完整页面状态 */ export const ContextualStatusBadge: React.FC = ({ page, context = 'full', showDescription = true, }) => { const { status, label, description } = usePageStatus(page, context); const statusConfig: Record = { DRAFT: 'bg-gray-100 dark:bg-background-secondary text-gray-600 dark:text-foreground-tertiary', GENERATING_DESCRIPTION: 'bg-orange-100 dark:bg-orange-900/30 text-orange-600 dark:text-orange-400 animate-pulse', DESCRIPTION_GENERATED: 'bg-blue-100 dark:bg-blue-900/30 text-blue-600 dark:text-blue-400', QUEUED: 'bg-yellow-100 dark:bg-yellow-900/30 text-yellow-700 dark:text-yellow-400 animate-pulse', GENERATING: 'bg-orange-100 dark:bg-orange-900/30 text-orange-600 dark:text-orange-400 animate-pulse', COMPLETED: 'bg-green-100 dark:bg-green-900/30 text-green-600 dark:text-green-400', FAILED: 'bg-red-100 dark:bg-red-900/30 text-red-600 dark:text-red-400', }; return ( {label} ); }; ================================================ FILE: frontend/src/components/shared/ExportTasksPanel.tsx ================================================ import React, { useState, useEffect } from 'react'; import { Download, X, Trash2, FileText, Clock, CheckCircle, XCircle, Loader2, AlertTriangle, HelpCircle, Settings } from 'lucide-react'; import { useExportTasksStore, type ExportTask, type ExportTaskType } from '@/store/useExportTasksStore'; import { useT } from '@/hooks/useT'; import type { Page } from '@/types'; import { Button } from './Button'; import { cn } from '@/utils'; // Export 组件自包含翻译 const exportI18n = { zh: { export: { tasks: "导出任务", inProgress: "{{count}} 进行中", clearHistory: "清除", exportPptx: "PPTX", exportPdf: "PDF", exportEditablePptx: "可编辑 PPTX", exportImages: "图片", allPages: "全部", pageRange: "第{{start}}-{{end}}页", singlePage: "第{{num}}页", pagesCount: "{{count}}页", warnings: "{{count}} 条警告", clickToView: "点击查看", warningsTitle: "导出警告", warningsCount: "导出警告 ({{count}} 条)", detailInfo: "详细信息", styleExtractionFailed: "样式提取失败 ({{count}} 个)", textRenderFailed: "文本渲染失败 ({{count}} 个)", moreItems: "... 还有 {{count}} 条", exportFailed: "导出失败", preparing: "准备中...", settingsTip: "可在「项目设置 → 导出设置」中调整配置或开启「返回半成品」选项" }, shared: { historyRecords: "历史记录" } }, en: { export: { tasks: "Export Tasks", inProgress: "{{count}} in progress", clearHistory: "Clear", exportPptx: "PPTX", exportPdf: "PDF", exportEditablePptx: "Editable PPTX", exportImages: "Images", allPages: "All", pageRange: "Pages {{start}}-{{end}}", singlePage: "Page {{num}}", pagesCount: "{{count}} pages", warnings: "{{count}} warnings", clickToView: "Click to view", warningsTitle: "Export Warnings", warningsCount: "Export Warnings ({{count}})", detailInfo: "Details", styleExtractionFailed: "Style extraction failed ({{count}})", textRenderFailed: "Text render failed ({{count}})", moreItems: "... {{count}} more", exportFailed: "Export Failed", preparing: "Preparing...", settingsTip: "Adjust settings in \"Project Settings → Export Settings\" or enable \"Allow Partial Results\"" }, shared: { historyRecords: "History Records" } } }; const getPageRangeText = (pageIds: string[] | undefined, pages: Page[], t: (key: string, options?: any) => string): string => { if (!pageIds || pageIds.length === 0) { return t('export.allPages'); } const indices: number[] = []; pageIds.forEach(pageId => { const index = pages.findIndex(p => (p.id || p.page_id) === pageId); if (index >= 0) { indices.push(index); } }); if (indices.length === 0) { return t('export.pagesCount', { count: pageIds.length }); } indices.sort((a, b) => a - b); const minIndex = indices[0]; const maxIndex = indices[indices.length - 1]; if (indices.length === maxIndex - minIndex + 1) { if (minIndex === maxIndex) { return t('export.singlePage', { num: minIndex + 1 }); } return t('export.pageRange', { start: minIndex + 1, end: maxIndex + 1 }); } else { return t('export.pagesCount', { count: pageIds.length }); } }; const TaskStatusIcon: React.FC<{ status: ExportTask['status'] }> = ({ status }) => { switch (status) { case 'PENDING': return ; case 'PROCESSING': case 'RUNNING': return ; case 'COMPLETED': return ; case 'FAILED': return ; default: return null; } }; const WarningsModal: React.FC<{ isOpen: boolean; onClose: () => void; warnings: string[]; warningDetails?: any; }> = ({ isOpen, onClose, warnings, warningDetails }) => { const t = useT(exportI18n); if (!isOpen) return null; return (

{t('export.warningsCount', { count: warnings.length })}

{warnings.map((warning, idx) => (
{warning}
))}
{warningDetails && (

{t('export.detailInfo')}

{warningDetails.style_extraction_failed?.length > 0 && (

{t('export.styleExtractionFailed', { count: warningDetails.style_extraction_failed.length })}

{warningDetails.style_extraction_failed.slice(0, 10).map((item: any, idx: number) => (
• {item.element_id}: {item.reason}
))} {warningDetails.style_extraction_failed.length > 10 && (
{t('export.moreItems', { count: warningDetails.style_extraction_failed.length - 10 })}
)}
)} {warningDetails.text_render_failed?.length > 0 && (

{t('export.textRenderFailed', { count: warningDetails.text_render_failed.length })}

{warningDetails.text_render_failed.slice(0, 10).map((item: any, idx: number) => (
• "{item.text}": {item.reason}
))}
)}
)}
); }; const TaskItem: React.FC<{ task: ExportTask; pages: Page[]; onRemove: () => void }> = ({ task, pages, onRemove }) => { const t = useT(exportI18n); const [showWarningsModal, setShowWarningsModal] = useState(false); const taskTypeLabels: Record = { 'pptx': t('export.exportPptx'), 'pdf': t('export.exportPdf'), 'editable-pptx': t('export.exportEditablePptx'), 'images': t('export.exportImages'), }; const formatTime = (isoString: string) => { const date = new Date(isoString); return date.toLocaleTimeString('zh-CN', { hour: '2-digit', minute: '2-digit' }); }; const pageRangeText = getPageRangeText(task.pageIds, pages, t); const getProgressPercent = () => { if (!task.progress) return 0; if (task.progress.percent !== undefined) return task.progress.percent; if (task.progress.total > 0) { return Math.round((task.progress.completed / task.progress.total) * 100); } return 0; }; const progressPercent = getProgressPercent(); const isProcessing = task.status === 'PROCESSING' || task.status === 'RUNNING' || task.status === 'PENDING'; const hasWarnings = task.status === 'COMPLETED' && task.progress?.warnings && task.progress.warnings.length > 0; return (
{taskTypeLabels[task.type]} {pageRangeText} {formatTime(task.createdAt)}
{isProcessing && (
{task.progress ? ( <>
{progressPercent > 0 ? `${progressPercent}%` : t('export.preparing')} {task.progress.current_step && ( {task.progress.current_step} )}
{task.progress.messages && task.progress.messages.length > 0 && (
{task.progress.messages.slice(-2).map((msg, idx) => (
{msg}
))}
)} ) : (
{t('common.pending')}
)}
)} {task.status === 'FAILED' && task.errorMessage && (

{t('export.exportFailed')}

{task.errorMessage}

{task.progress?.help_text && (

{task.progress.help_text}

)}
{t('export.settingsTip')}
)} {hasWarnings && ( <> setShowWarningsModal(false)} warnings={task.progress?.warnings ?? []} warningDetails={task.progress?.warning_details} /> )}
{task.status === 'COMPLETED' && task.downloadUrl && ( )}
); }; interface ExportTasksPanelProps { projectId?: string; pages?: Page[]; className?: string; } export const ExportTasksPanel: React.FC = ({ projectId, pages = [], className }) => { const t = useT(exportI18n); const [isExpanded, setIsExpanded] = useState(true); const { tasks, removeTask, clearCompleted, restoreActiveTasks } = useExportTasksStore(); const filteredTasks = projectId ? tasks.filter(task => task.projectId === projectId) : tasks; const activeTasks = filteredTasks.filter( task => task.status === 'PENDING' || task.status === 'PROCESSING' || task.status === 'RUNNING' ); const completedTasks = filteredTasks.filter( task => task.status === 'COMPLETED' || task.status === 'FAILED' ); useEffect(() => { restoreActiveTasks(); }, []); useEffect(() => { if (activeTasks.length > 0 && !isExpanded) { setIsExpanded(true); } }, [activeTasks.length, isExpanded]); if (filteredTasks.length === 0) { return null; } return (
{isExpanded && (
{activeTasks.length > 0 && (
{activeTasks.map(task => ( removeTask(task.id)} /> ))}
)} {completedTasks.length > 0 && (
{t('shared.historyRecords')}
{completedTasks.map(task => ( removeTask(task.id)} /> ))}
)}
)}
); }; ================================================ FILE: frontend/src/components/shared/FilePreviewModal.tsx ================================================ import React, { useState, useEffect, useRef } from 'react'; import { Modal, Markdown, Loading, useToast } from '@/components/shared'; import { useT } from '@/hooks/useT'; import { getReferenceFile, type ReferenceFile } from '@/api/endpoints'; // FilePreviewModal 组件自包含翻译 const filePreviewI18n = { zh: { filePreview: { title: "文件预览", loading: "加载文件内容中...", notParsed: "文件尚未解析完成,无法预览", loadFailed: "加载文件内容失败" } }, en: { filePreview: { title: "File Preview", loading: "Loading file content...", notParsed: "File not yet parsed, cannot preview", loadFailed: "Failed to load file content" } } }; interface FilePreviewModalProps { fileId: string | null; onClose: () => void; } export const FilePreviewModal: React.FC = ({ fileId, onClose, }) => { const t = useT(filePreviewI18n); const [file, setFile] = useState(null); const [content, setContent] = useState(null); const [isLoading, setIsLoading] = useState(false); const { show } = useToast(); // 使用 ref 保存函数引用,避免依赖项变化导致无限循环 const onCloseRef = useRef(onClose); const showRef = useRef(show); useEffect(() => { onCloseRef.current = onClose; showRef.current = show; }, [onClose, show]); useEffect(() => { if (!fileId) { setFile(null); setContent(null); setIsLoading(false); return; } const loadFile = async () => { setIsLoading(true); try { const response = await getReferenceFile(fileId); if (response.data?.file) { const fileData = response.data.file; // 检查文件是否已解析完成 if (fileData.parse_status !== 'completed') { showRef.current({ message: t('filePreview.notParsed'), type: 'info', }); onCloseRef.current(); return; } setFile(fileData); setContent(fileData.markdown_content || t('common.noData')); } } catch (error: any) { console.error('Load file content failed:', error); showRef.current({ message: error?.response?.data?.error?.message || error.message || t('filePreview.loadFailed'), type: 'error', }); setFile(null); setContent(null); } finally { setIsLoading(false); } }; loadFile(); }, [fileId]); // 只依赖 fileId return ( {isLoading ? (
) : content ? (
{content}
) : (

{t('common.noData')}

)}
); }; ================================================ FILE: frontend/src/components/shared/Footer.tsx ================================================ import React from 'react'; import { Github } from 'lucide-react'; const GITHUB_REPO = 'Anionex/banana-slides'; const GITHUB_URL = `https://github.com/${GITHUB_REPO}`; export const Footer: React.FC = () => { const currentYear = new Date().getFullYear(); return (
{/* Copyright */}
© {currentYear} 蕉幻 Banana Slides
{/* Divider - 仅在大屏显示 */} · {/* GitHub Link */} GitHub
); }; ================================================ FILE: frontend/src/components/shared/GithubBadge.tsx ================================================ import React, { useEffect, useState } from 'react'; import { Github, Star, GitFork } from 'lucide-react'; const GITHUB_REPO = 'Anionex/banana-slides'; const GITHUB_URL = `https://github.com/${GITHUB_REPO}`; interface GithubStats { stars: number; forks: number; } const CACHE_KEY = 'github-stats-cache-v2'; const CACHE_DURATION = 3600 * 1000; // 1 hour export const GithubBadge: React.FC = () => { const [stats, setStats] = useState({ stars: 0, forks: 0, }); useEffect(() => { const fetchStats = async () => { // Check cache try { const cached = localStorage.getItem(CACHE_KEY); if (cached) { const { data, timestamp } = JSON.parse(cached); if (Date.now() - timestamp < CACHE_DURATION) { setStats(data); return; } } } catch (e) { console.warn('Failed to read github stats cache', e); } // Fetch from API try { const res = await fetch(`https://api.github.com/repos/${GITHUB_REPO}`); if (!res.ok) throw new Error('Failed to fetch repo info'); const data = await res.json(); const newStats = { stars: data.stargazers_count, forks: data.forks_count, }; setStats(newStats); localStorage.setItem(CACHE_KEY, JSON.stringify({ data: newStats, timestamp: Date.now(), })); } catch (error) { console.error('Error fetching GitHub stats:', error); } }; fetchStats(); }, []); const formatCount = (count: number) => { if (count >= 1000) { return (count / 1000).toFixed(1).replace(/\.0$/, '') + 'k'; } return count.toString(); }; return ( {/* 左侧:GitHub Logo */}
{/* 右侧:上下结构 (Stars & Forks) */}
{/* Stars */}
{formatCount(stats.stars)}
{/* Forks */}
{formatCount(stats.forks)}
); }; ================================================ FILE: frontend/src/components/shared/GithubRepoCard.tsx ================================================ import React, { useState, useEffect } from 'react'; import { Star, GitFork } from 'lucide-react'; const GITHUB_REPO = 'Anionex/banana-slides'; const GITHUB_URL = `https://github.com/${GITHUB_REPO}`; interface RepoStats { stars: number; forks: number; } export const GithubRepoCard: React.FC = () => { const [stats, setStats] = useState(null); const [loading, setLoading] = useState(true); useEffect(() => { const fetchStats = async () => { try { // 先尝试从 localStorage 读取缓存 const cached = localStorage.getItem('github_repo_stats'); const cacheTime = localStorage.getItem('github_repo_stats_time'); const now = Date.now(); // 缓存有效期 10 分钟 if (cached && cacheTime && now - parseInt(cacheTime) < 10 * 60 * 1000) { setStats(JSON.parse(cached)); setLoading(false); return; } const response = await fetch(`https://api.github.com/repos/${GITHUB_REPO}`); if (response.ok) { const data = await response.json(); const newStats = { stars: data.stargazers_count, forks: data.forks_count, }; setStats(newStats); // 缓存结果 localStorage.setItem('github_repo_stats', JSON.stringify(newStats)); localStorage.setItem('github_repo_stats_time', now.toString()); } } catch (error) { console.error('Failed to fetch GitHub stats:', error); } finally { setLoading(false); } }; fetchStats(); }, []); const formatNumber = (num: number): string => { if (num >= 1000) { return (num / 1000).toFixed(1).replace(/\.0$/, '') + 'k'; } return num.toString(); }; return ( {/* GitHub 图标 */} {/* 分隔线 */}
{/* Star 数量 */}
{loading ? '...' : stats ? formatNumber(stats.stars) : '-'}
{/* Fork 数量 */}
{loading ? '...' : stats ? formatNumber(stats.forks) : '-'}
); }; ================================================ FILE: frontend/src/components/shared/HelpModal.tsx ================================================ import React, { useState } from 'react'; import { Sparkles, FileText, Palette, MessageSquare, Download, ChevronLeft, ChevronRight, ExternalLink, Settings, Check } from 'lucide-react'; import { useNavigate } from 'react-router-dom'; import { Modal } from './Modal'; import { Button } from './Button'; import { useT } from '@/hooks/useT'; import { useTranslation } from 'react-i18next'; // --------------------------------------------------------------------------- // i18n // --------------------------------------------------------------------------- const i18nDict = { zh: { guide: { brand: '蕉幻 · Banana Slides', setup: '快速开始', setupSub: '完成基础配置,开启 AI 创作之旅', features: '功能介绍', featuresSub: '探索如何使用 AI 快速创建精美 PPT', gallery: '结果案例', gallerySub: '以下是使用蕉幻生成的 PPT 案例展示', galleryMore: '查看更多使用案例', hi: '欢迎使用蕉幻!', hiSub: '在开始前,让我们先完成基础配置', s1: '配置 API Key', s1d: '前往设置页面,配置项目需要使用的API服务,包括:', s1i: ['您的 AI 服务提供商的 API Base 和 API Key', '配置文本、图像生成模型(banana pro)和图像描述模型', '若需要文件解析功能,请配置 MinerU Token', '若需要可编辑导出功能,请配置MinerU TOKEN 和 Baidu API KEY'], s2: '保存并测试', s2d: '配置完成后,务必点击「保存设置」按钮,然后在页面底部进行服务测试,确保各项服务正常工作。', s3: '开始创作', s3d: '配置成功后,返回首页即可开始使用 AI 生成精美的 PPT!', s4: '*问题反馈', s4d: '若使用过程中遇到问题,可在github issue提出', issueLink: '前往Github issue', settingsBtn: '前往设置页面', hint: '提示', hintBody: '如果您还没有 API Key,可以前往对应服务商官网注册获取。配置完成后,建议先进行服务测试,避免后续使用出现问题。', prev: '上一页', next: '下一页', cases: { softwareDev: '软件开发最佳实践', deepseek: 'DeepSeek-V3.2技术展示', prefabFood: '预制菜智能产线装备研发和产业化', moneyHistory: '钱的演变:从贝壳到纸币的旅程' }, feat: { paths: { t: '灵活多样的创作路径', d: '支持想法、大纲、页面描述三种起步方式,满足不同创作习惯。', items: ['一句话生成:输入一个主题,AI 自动生成结构清晰的大纲和逐页内容描述', '自然语言编辑:支持以 Vibe 形式口头修改大纲或描述,AI 实时响应调整', '大纲/描述模式:既可一键批量生成,也可手动调整细节'] }, parse: { t: '强大的素材解析能力', d: '上传多种格式文件,自动解析内容,为生成提供丰富素材。', items: ['多格式支持:上传 PDF/Docx/MD/Txt 等文件,后台自动解析内容', '智能提取:自动识别文本中的关键点、图片链接和图表信息', '风格参考:支持上传参考图片或模板,定制 PPT 风格'] }, vibe: { t: '「Vibe」式自然语言修改', d: '不再受限于复杂的菜单按钮,直接通过自然语言下达修改指令。', items: ['局部重绘:对不满意的区域进行口头式修改(如「把这个图换成饼图」)', '整页优化:基于 nano banana pro🍌 生成高清、风格统一的页面'] }, export: { t: '开箱即用的格式导出', d: '一键导出标准格式,直接演示无需调整。', items: ['多格式支持:一键导出标准 PPTX 或 PDF 文件', '完美适配:默认 16:9 比例,排版无需二次调整'] }, }, }, }, en: { guide: { brand: 'Banana Slides', setup: 'Quick Start', setupSub: 'Complete basic configuration and start your AI creation journey', features: 'Features', featuresSub: 'Explore how to use AI to quickly create beautiful PPT', gallery: 'Showcases', gallerySub: 'Here are PPT examples generated with Banana Slides', galleryMore: 'View more examples', hi: 'Welcome to Banana Slides!', hiSub: "Let's complete the basic configuration before you start", s1: 'Configure API Key', s1d: 'Go to settings page to configure the API services needed for the project, including:', s1i: ["Your AI service provider's API Base and API Key", 'Configure text, image generation model (banana pro) and image caption model', 'If you need file parsing, configure MinerU Token', 'If you need editable export, configure MinerU TOKEN and Baidu API KEY'], s2: 'Save and Test', s2d: 'After configuration, be sure to click "Save Settings" button, then test services at the bottom of the page to ensure everything works properly.', s3: 'Start Creating', s3d: 'After successful configuration, return to home page to start using AI to generate beautiful PPT!', s4: '*Feedback', s4d: 'If you encounter issues while using, please raise them on GitHub issues', issueLink: 'Go to GitHub Issues', settingsBtn: 'Go to Settings', hint: 'Tip', hintBody: "If you don't have an API Key yet, you can register on the corresponding service provider's website. After configuration, it's recommended to test services first to avoid issues later.", prev: 'Previous', next: 'Next', cases: { softwareDev: 'Software Development Best Practices', deepseek: 'DeepSeek-V3.2 Technical Showcase', prefabFood: 'Prefab Food Intelligent Production Line R&D', moneyHistory: 'The Evolution of Money: From Shells to Paper' }, feat: { paths: { t: 'Flexible Creation Paths', d: 'Support idea, outline, and page description as starting points to meet different creative habits.', items: ['One-line generation: Enter a topic, AI automatically generates a clear outline and page-by-page content description', 'Natural language editing: Support Vibe-style verbal modification of outlines or descriptions, AI responds in real-time', 'Outline/Description mode: Either batch generate with one click, or manually adjust details'] }, parse: { t: 'Powerful Material Parsing', d: 'Upload multiple format files, automatically parse content to provide rich materials for generation.', items: ['Multi-format support: Upload PDF/Docx/MD/Txt files, backend automatically parses content', 'Smart extraction: Automatically identify key points, image links and chart information in text', 'Style reference: Support uploading reference images or templates to customize PPT style'] }, vibe: { t: '"Vibe" Style Natural Language Editing', d: 'No longer limited by complex menu buttons, directly issue modification commands through natural language.', items: ['Partial redraw: Make verbal modifications to unsatisfying areas (e.g., "Change this chart to a pie chart")', 'Full page optimization: Generate HD, style-consistent pages based on nano banana pro🍌'] }, export: { t: 'Ready-to-Use Format Export', d: 'One-click export to standard formats, present directly without adjustments.', items: ['Multi-format support: One-click export to standard PPTX or PDF files', 'Perfect fit: Default 16:9 ratio, no secondary layout adjustments needed'] }, }, }, }, }; // --------------------------------------------------------------------------- // Static data // --------------------------------------------------------------------------- const SHOWCASES = [ { img: 'https://github.com/user-attachments/assets/d58ce3f7-bcec-451d-a3b9-ca3c16223644', key: 'softwareDev' }, { img: 'https://github.com/user-attachments/assets/c64cd952-2cdf-4a92-8c34-0322cbf3de4e', key: 'deepseek' }, { img: 'https://github.com/user-attachments/assets/383eb011-a167-4343-99eb-e1d0568830c7', key: 'prefabFood' }, { img: 'https://github.com/user-attachments/assets/1a63afc9-ad05-4755-8480-fc4aa64987f1', key: 'moneyHistory' }, ]; const FEATURES: { key: string; icon: React.ReactNode }[] = [ { key: 'paths', icon: }, { key: 'parse', icon: }, { key: 'vibe', icon: }, { key: 'export', icon: }, ]; // --------------------------------------------------------------------------- // Page renderers // --------------------------------------------------------------------------- /** Retrieve an array value from i18nDict by dot-path (useT only handles strings). */ function tList(lang: 'zh' | 'en', path: string): string[] { const dict = i18nDict[lang] as Record; let cur: unknown = dict; for (const seg of path.split('.')) { if (cur && typeof cur === 'object' && seg in (cur as Record)) { cur = (cur as Record)[seg]; } else { return []; } } return Array.isArray(cur) ? cur : []; } type PageRenderer = (ctx: { t: ReturnType; lang: 'zh' | 'en'; navigate: ReturnType; onClose: () => void; showcaseIdx: number; setShowcaseIdx: (i: number) => void; expandedFeat: number | null; setExpandedFeat: (i: number | null) => void; }) => React.ReactNode; const renderSetupPage: PageRenderer = ({ t, lang, navigate, onClose }) => { const steps = [ { num: '1', bg: 'bg-banana-500', content: (

{t('guide.s1')}

{t('guide.s1d')}

    {tList(lang, 'guide.s1i').map((item, i) => (
  • • {item}
  • ))}
), highlight: true }, { num: '2', bg: 'bg-orange-500', content: (

{t('guide.s2')}

{t('guide.s2d')}

) }, { num: , bg: 'bg-green-500', content: (

{t('guide.s3')}

{t('guide.s3d')}

) }, ]; return (
Banana Slides Logo

{t('guide.hi')}

{t('guide.hiSub')}

{steps.map((s, i) => (
{s.num}
{s.content}
))}
4

{t('guide.s4')}

{t('guide.s4d')}

{t('guide.issueLink')}

💡 {t('guide.hint')}:{t('guide.hintBody')}

); }; const renderFeaturesPage: PageRenderer = ({ t, lang, expandedFeat, setExpandedFeat }) => (
{FEATURES.map((f, idx) => (
setExpandedFeat(expandedFeat === idx ? null : idx)} >
{f.icon}

{t(`guide.feat.${f.key}.t`)}

{t(`guide.feat.${f.key}.d`)}

{expandedFeat === idx && (
{tList(lang, `guide.feat.${f.key}.items`).map((line, li) => (
{line}
))}
)}
))}
); const renderGalleryPage: PageRenderer = ({ t, showcaseIdx, setShowcaseIdx }) => { const prev = () => setShowcaseIdx(showcaseIdx === 0 ? SHOWCASES.length - 1 : showcaseIdx - 1); const next = () => setShowcaseIdx(showcaseIdx === SHOWCASES.length - 1 ? 0 : showcaseIdx + 1); return (

{t('guide.gallerySub')}

{t(`guide.cases.${SHOWCASES[showcaseIdx].key}`)}

{t(`guide.cases.${SHOWCASES[showcaseIdx].key}`)}

{SHOWCASES.map((_, i) => (
{SHOWCASES.map((sc, i) => ( ))}
); }; // --------------------------------------------------------------------------- // Pages definition // --------------------------------------------------------------------------- interface PageDef { titleKey: string; subtitleKey: string; render: PageRenderer; } const PAGES: PageDef[] = [ { titleKey: 'guide.setup', subtitleKey: 'guide.setupSub', render: renderSetupPage }, { titleKey: 'guide.features', subtitleKey: 'guide.featuresSub', render: renderFeaturesPage }, { titleKey: 'guide.gallery', subtitleKey: 'guide.gallerySub', render: renderGalleryPage }, ]; // --------------------------------------------------------------------------- // Component // --------------------------------------------------------------------------- interface HelpModalProps { isOpen: boolean; onClose: () => void; } export const HelpModal: React.FC = ({ isOpen, onClose }) => { const t = useT(i18nDict); const { i18n } = useTranslation(); const lang: 'zh' | 'en' = i18n.language?.startsWith('zh') ? 'zh' : 'en'; const navigate = useNavigate(); const [pageIdx, setPageIdx] = useState(0); const [showcaseIdx, setShowcaseIdx] = useState(0); const [expandedFeat, setExpandedFeat] = useState(null); const page = PAGES[pageIdx]; return (
{/* header */}
{t('guide.brand')}

{t(page.titleKey)}

{t(page.subtitleKey)}

{/* dots */}
{PAGES.map((p, i) => (
{/* body */}
{page.render({ t, lang, navigate, onClose, showcaseIdx, setShowcaseIdx, expandedFeat, setExpandedFeat })}
{/* footer */}
{pageIdx > 0 && ( )}
GitHub
{pageIdx < PAGES.length - 1 ? ( ) : ( )}
); }; ================================================ FILE: frontend/src/components/shared/ImagePreviewList.tsx ================================================ import React, { useMemo } from 'react'; import { X } from 'lucide-react'; import { useT } from '@/hooks/useT'; import { isUploadingUrl, getUploadingPreviewUrl } from '@/hooks/useImagePaste'; // ImagePreviewList 组件自包含翻译 const imagePreviewI18n = { zh: { imagePreview: { title: "图片预览", removeImage: "移除图片", imageLoadFailed: "图片加载失败" } }, en: { imagePreview: { title: "Image Preview", removeImage: "Remove Image", imageLoadFailed: "Image load failed" } } }; interface ImagePreviewListProps { content: string; onRemoveImage?: (imageUrl: string) => void; className?: string; } /** * 解析markdown文本中的图片链接 * 支持格式: ![alt](url) 或 ![](url) */ const parseMarkdownImages = (text: string): Array<{ url: string; alt: string; fullMatch: string }> => { const imageRegex = /!\[([^\]]*)\]\(([^)]+)\)/g; const images: Array<{ url: string; alt: string; fullMatch: string }> = []; let match; while ((match = imageRegex.exec(text)) !== null) { images.push({ alt: match[1] || 'image', url: match[2], fullMatch: match[0] }); } return images; }; /** * 图片预览列表组件 - 横向滚动 * 解析并显示编辑框中的所有markdown图片 */ export const ImagePreviewList: React.FC = ({ content, onRemoveImage, className = '' }) => { const t = useT(imagePreviewI18n); // 解析图片列表 const images = useMemo(() => parseMarkdownImages(content), [content]); // 如果没有图片,不显示组件 if (images.length === 0) { return null; } return (
{t('imagePreview.title')} ({images.length})
{/* 横向滚动容器 */}
{images.map((image, index) => { const uploading = isUploadingUrl(image.url); const imgSrc = uploading ? getUploadingPreviewUrl(image.url) : image.url; return (
{/* 图片容器 */}
{image.alt} { const target = e.target as HTMLImageElement; target.style.display = 'none'; const parent = target.parentElement; if (parent && !parent.querySelector('.error-placeholder')) { const placeholder = document.createElement('div'); placeholder.className = 'error-placeholder w-full h-full flex items-center justify-center text-gray-400 text-xs text-center p-2'; placeholder.textContent = t('imagePreview.imageLoadFailed'); parent.appendChild(placeholder); } }} /> {/* 上传中遮罩 */} {uploading && (
)} {/* 删除按钮 */} {onRemoveImage && !uploading && ( )} {/* 悬浮时显示图片描述 */}
{image.alt !== 'image' ? image.alt : decodeURIComponent(imgSrc.split('/').pop()?.replace(/_\d+\./, '.') || '')}
); })}
); }; export default ImagePreviewList; ================================================ FILE: frontend/src/components/shared/Input.tsx ================================================ import React from 'react'; import { cn } from '@/utils'; interface InputProps extends React.InputHTMLAttributes { label?: string; error?: string; } export const Input: React.FC = ({ label, error, className, ...props }) => { return (
{label && ( )} {error && (

{error}

)}
); }; ================================================ FILE: frontend/src/components/shared/Loading.tsx ================================================ import React, { useEffect, useRef } from 'react'; import { ArrowLeft } from 'lucide-react'; import { useTranslation } from 'react-i18next'; import { cn } from '@/utils'; interface ProgressData { total: number; completed: number; percent?: number; current_step?: string; messages?: string[]; } interface LoadingProps { fullscreen?: boolean; message?: string; progress?: ProgressData; /** Callback when user clicks "Run in Background" button */ onBackgroundClick?: () => void; /** Label for the background button */ backgroundButtonLabel?: string; } export const Loading: React.FC = ({ fullscreen = false, message, progress, onBackgroundClick, backgroundButtonLabel, }) => { const { t } = useTranslation(); const messagesEndRef = useRef(null); const defaultMessage = message || t('common.loading'); const defaultBackgroundLabel = backgroundButtonLabel || t('common.runInBackground'); // 自动滚动到最新消息 useEffect(() => { if (messagesEndRef.current) { messagesEndRef.current.scrollIntoView({ behavior: 'smooth' }); } }, [progress?.messages]); // 计算进度百分比 const getPercent = () => { if (!progress) return 0; if (progress.percent !== undefined) return progress.percent; if (progress.total > 0) return Math.round((progress.completed / progress.total) * 100); return 0; }; const percent = getPercent(); const hasMessages = progress?.messages && progress.messages.length > 0; const content = (
{/* 加载图标 */}
{/* 消息 */}

{defaultMessage}

{/* 进度条 */} {progress && (
{percent}%
)} {/* 滚动消息日志 */} {hasMessages && (
{progress.messages!.map((msg, index) => (
{msg}
))}
)}
); if (fullscreen) { return (
{/* Background button - top left corner */} {onBackgroundClick && ( )} {content}
); } return content; }; // 骨架屏组件 export const Skeleton: React.FC<{ className?: string }> = ({ className }) => { return (
); }; ================================================ FILE: frontend/src/components/shared/Markdown.tsx ================================================ import React, { useMemo } from 'react'; import ReactMarkdown from 'react-markdown'; import remarkGfm from 'remark-gfm'; import remarkBreaks from 'remark-breaks'; import remarkMath from 'remark-math'; import rehypeRaw from 'rehype-raw'; import rehypeKatex from 'rehype-katex'; import rehypeSanitize, { defaultSchema } from 'rehype-sanitize'; import 'katex/dist/katex.min.css'; interface MarkdownProps { children: string; className?: string; } /** * Preprocess LaTeX delimiters that remark-math doesn't support natively. * Converts \[...\] to $$...$$ and \(...\) to $...$ */ function preprocessMarkdown(content: string): string { // Convert \[...\] block math to $$...$$ content = content.replace(/\\\[([\s\S]*?)\\\]/g, (_, math) => `$$${math}$$`); // Convert \(...\) inline math to $...$ content = content.replace(/\\\(([\s\S]*?)\\\)/g, (_, math) => `$${math}$`); // 表格前必须有空行才能被解析,自动补空行 content = content.replace(/([^\n])\n(\|[^\n]+\|\s*\n\|[\s:|-]+\|\s*\n)/g, '$1\n\n$2'); return content; } export const Markdown: React.FC = ({ children, className = '' }) => { const processedContent = useMemo(() => preprocessMarkdown(children), [children]); // Create sanitize schema that allows KaTeX classes and spans const sanitizeSchema = useMemo(() => ({ ...defaultSchema, attributes: { ...defaultSchema.attributes, span: [...(defaultSchema.attributes?.span || []), 'className', 'style'], div: [...(defaultSchema.attributes?.div || []), 'className'], }, tagNames: [...(defaultSchema.tagNames || []), 'math', 'semantics', 'mrow', 'msup', 'mi', 'mn', 'mo'], }), []); return (

{children}

, ul: ({ children }) =>
    {children}
, ol: ({ children }) =>
    {children}
, li: ({ children }) =>
  • {children}
  • , a: ({ href, children }) => ( {children} ), img: ({ src, alt }) => ( {alt ), h1: ({ children }) =>

    {children}

    , h2: ({ children }) =>

    {children}

    , h3: ({ children }) =>

    {children}

    , code: ({ className, children }) => { const isInline = !className; return isInline ? ( {children} ) : ( {children} ); }, strong: ({ children }) => {children}, em: ({ children }) => {children}, br: () =>
    , table: ({ children }) => (
    {children}
    ), thead: ({ children }) => {children}, tbody: ({ children }) => {children}, tr: ({ children }) => {children}, th: ({ children }) => ( {children} ), td: ({ children }) => ( {children} ), }} > {processedContent}
    ); }; ================================================ FILE: frontend/src/components/shared/MarkdownTextarea.tsx ================================================ import React, { useRef, useEffect, useCallback, useState, useMemo, forwardRef, useImperativeHandle } from 'react'; import { cn } from '@/utils'; import { useT } from '@/hooks/useT'; import { isUploadingUrl, getUploadingPreviewUrl } from '@/hooks/useImagePaste'; const markdownTextareaI18n = { zh: { markdownTextarea: { dropImages: '拖放图片到此处', uploadImage: '上传图片', imageDescription: '图片描述', doubleClickToEdit: '双击编辑描述', uploading: '上传中...', } }, en: { markdownTextarea: { dropImages: 'Drop images here', uploadImage: 'Upload image', imageDescription: 'Image description', doubleClickToEdit: 'Double-click to edit description', uploading: 'Uploading...', } } }; const IMAGE_REGEX = /!\[([^\]]*)\]\(([^)]+)\)/g; const CHIP_SELECTED_CLASS = 'md-chip-selected'; const CHIP_CLASS = 'md-chip'; interface MarkdownTextareaProps { value: string; onChange: (value: string) => void; onPaste?: (e: React.ClipboardEvent) => void; /** Called when files are dropped or selected via upload button */ onFiles?: (files: File[]) => void; onBlur?: () => void; onFocus?: () => void; placeholder?: string; label?: string; error?: string; className?: string; rows?: number; /** Show the inline image upload button. Default: true when onFiles is provided */ showUploadButton?: boolean; /** Extra content rendered on the left side of the toolbar (after built-in buttons) */ toolbarLeft?: React.ReactNode; /** Content rendered on the right side of the toolbar */ toolbarRight?: React.ReactNode; /** Show compact image preview strip. Default: true */ showImagePreview?: boolean; } /** Ref handle for MarkdownTextarea */ export interface MarkdownTextareaRef { /** Insert text at the current cursor position */ insertAtCursor: (text: string) => void; /** Focus the editor */ focus: () => void; } function escapeHtml(text: string) { return text.replace(/&/g, '&').replace(//g, '>').replace(/"/g, '"'); } type Segment = | { type: 'text'; content: string } | { type: 'image'; alt: string; url: string; raw: string }; function parseSegments(text: string): Segment[] { const segments: Segment[] = []; let lastIndex = 0; const regex = new RegExp(IMAGE_REGEX.source, 'g'); let match; while ((match = regex.exec(text)) !== null) { if (match.index > lastIndex) { segments.push({ type: 'text', content: text.slice(lastIndex, match.index) }); } segments.push({ type: 'image', alt: match[1] || 'image', url: match[2], raw: match[0] }); lastIndex = regex.lastIndex; } if (lastIndex < text.length) { segments.push({ type: 'text', content: text.slice(lastIndex) }); } return segments; } function serializeDOM(element: HTMLElement): string { let result = ''; for (const node of Array.from(element.childNodes)) { if (node.nodeType === Node.TEXT_NODE) { result += (node.textContent || '').replace(/\u200B/g, ''); } else if (node.nodeType === Node.ELEMENT_NODE) { const el = node as HTMLElement; if (el.tagName === 'BR') { result += '\n'; } else if (el.dataset.markdown) { result += el.dataset.markdown; } else if (el.tagName === 'DIV') { if (node !== element.firstChild) result += '\n'; result += serializeDOM(el); } else { result += serializeDOM(el); } } } return result; } function getDisplayName(alt: string, url: string): string { if (alt && alt !== 'image') return alt; const filename = url.split('/').pop() || 'image'; try { return decodeURIComponent(filename.replace(/_\d{10,}\./, '.')); } catch { return filename; } } const IMAGE_ICON = ''; const SPINNER_ICON = ''; function applyChipContent(chip: HTMLElement, seg: { alt: string; url: string; raw: string }, tooltips?: { edit: string; uploading: string }) { const uploading = isUploadingUrl(seg.url); chip.dataset.markdown = seg.raw; chip.dataset.alt = seg.alt; chip.dataset.url = seg.url; chip.title = uploading ? (tooltips?.uploading || '') : (tooltips?.edit || ''); chip.className = [ CHIP_CLASS, 'inline-flex items-center gap-1.5 px-2.5 py-1 rounded-md text-xs font-medium', 'cursor-default select-none align-middle mx-0.5 transition-colors', uploading ? 'bg-amber-50 text-amber-700 border border-amber-200 dark:bg-amber-900/30 dark:text-amber-300 dark:border-amber-700' : 'bg-gray-100 text-gray-700 border border-gray-200 hover:bg-gray-200 dark:bg-gray-700 dark:text-gray-200 dark:border-gray-600 dark:hover:bg-gray-600', ].join(' '); const displayName = getDisplayName(seg.alt, seg.url); chip.innerHTML = `${uploading ? SPINNER_ICON : IMAGE_ICON}${escapeHtml(displayName)}`; } function buildDOM(container: HTMLElement, segments: Segment[], tooltips?: { edit: string; uploading: string }) { container.innerHTML = ''; for (const segment of segments) { if (segment.type === 'text') { const lines = segment.content.split('\n'); lines.forEach((line, i) => { if (i > 0) container.appendChild(document.createElement('br')); if (line) container.appendChild(document.createTextNode(line)); }); } else { const chip = document.createElement('span'); chip.contentEditable = 'false'; applyChipContent(chip, segment, tooltips); container.appendChild(chip); } } if (container.childNodes.length === 0) { container.appendChild(document.createElement('br')); } } /** * Try to patch chips in-place when only image URLs/alt changed. * Returns true if successful (cursor preserved), false if full rebuild needed. */ function patchChips(container: HTMLElement, oldValue: string, newValue: string, tooltips?: { edit: string; uploading: string }): boolean { const oldSegs = parseSegments(oldValue); const newSegs = parseSegments(newValue); if (oldSegs.length !== newSegs.length) return false; for (let i = 0; i < oldSegs.length; i++) { if (oldSegs[i].type !== newSegs[i].type) return false; if (oldSegs[i].type === 'text' && newSegs[i].type === 'text') { if ((oldSegs[i] as { content: string }).content !== (newSegs[i] as { content: string }).content) return false; } } // Structure matches — update changed chips in place const chips = Array.from(container.querySelectorAll('.' + CHIP_CLASS)) as HTMLElement[]; let chipIdx = 0; for (let i = 0; i < newSegs.length; i++) { if (newSegs[i].type === 'image') { const newSeg = newSegs[i] as { alt: string; url: string; raw: string }; const oldSeg = oldSegs[i] as { raw: string }; const chip = chips[chipIdx++]; if (chip && oldSeg.raw !== newSeg.raw) { applyChipContent(chip, newSeg, tooltips); } } } return true; } function getChipBeforeCursor(): HTMLElement | null { const sel = window.getSelection(); if (!sel || sel.rangeCount === 0 || !sel.isCollapsed) return null; const range = sel.getRangeAt(0); if (range.startContainer.nodeType === Node.TEXT_NODE && range.startOffset === 0) { const prev = range.startContainer.previousSibling as HTMLElement | null; if (prev?.dataset?.markdown) return prev; } if (range.startContainer.nodeType === Node.ELEMENT_NODE && range.startOffset > 0) { const prev = range.startContainer.childNodes[range.startOffset - 1] as HTMLElement | null; if (prev?.dataset?.markdown) return prev; } return null; } function getChipAfterCursor(): HTMLElement | null { const sel = window.getSelection(); if (!sel || sel.rangeCount === 0 || !sel.isCollapsed) return null; const range = sel.getRangeAt(0); if (range.startContainer.nodeType === Node.TEXT_NODE) { if (range.startOffset === (range.startContainer.textContent || '').length) { const next = range.startContainer.nextSibling as HTMLElement | null; if (next?.dataset?.markdown) return next; } } if (range.startContainer.nodeType === Node.ELEMENT_NODE) { const next = range.startContainer.childNodes[range.startOffset] as HTMLElement | null; if (next?.dataset?.markdown) return next; } return null; } function clearChipSelection(container: HTMLElement) { container.querySelectorAll('.' + CHIP_SELECTED_CLASS).forEach(el => { el.classList.remove(CHIP_SELECTED_CLASS, 'ring-2', 'ring-red-400', 'bg-red-50', 'dark:bg-red-900/30'); }); } function selectChip(chip: HTMLElement) { chip.classList.add(CHIP_SELECTED_CLASS, 'ring-2', 'ring-red-400', 'bg-red-50', 'dark:bg-red-900/30'); } export const MarkdownTextarea = forwardRef(({ value, onChange, onPaste, onFiles, onBlur, onFocus, placeholder, label, error, className, rows = 4, showUploadButton, toolbarLeft, toolbarRight, showImagePreview = true, }, ref) => { const t = useT(markdownTextareaI18n); const editorRef = useRef(null); const fileInputRef = useRef(null); const lastValueRef = useRef(value); const isInternalRef = useRef(false); const [isDragging, setIsDragging] = useState(false); const [editingChip, setEditingChip] = useState<{ chip: HTMLElement; rect: DOMRect } | null>(null); const [editAlt, setEditAlt] = useState(''); const editInputRef = useRef(null); const dragCountRef = useRef(0); const shouldShowUpload = showUploadButton ?? !!onFiles; const hasToolbar = shouldShowUpload || toolbarLeft || toolbarRight; // Keep chip tooltips in a ref so imperative DOM functions can read the latest i18n const chipTooltipsRef = useRef({ edit: '', uploading: '' }); chipTooltipsRef.current = { edit: t('markdownTextarea.doubleClickToEdit'), uploading: t('markdownTextarea.uploading'), }; // Initial render useEffect(() => { if (editorRef.current) { buildDOM(editorRef.current, parseSegments(value), chipTooltipsRef.current); lastValueRef.current = value; } }, []); // Sync from external value changes — incremental patch when possible useEffect(() => { if (isInternalRef.current) { isInternalRef.current = false; // Even when skipping internal edits, check for external changes // batched in the same render (e.g. upload completion while typing) if (editorRef.current && value !== lastValueRef.current) { if (!patchChips(editorRef.current, lastValueRef.current, value, chipTooltipsRef.current)) { buildDOM(editorRef.current, parseSegments(value), chipTooltipsRef.current); } lastValueRef.current = value; } return; } if (editorRef.current && value !== lastValueRef.current) { // Try incremental update first (preserves cursor position) const patched = patchChips(editorRef.current, lastValueRef.current, value, chipTooltipsRef.current); if (!patched) { // Structure changed — full rebuild (e.g. new placeholder inserted) buildDOM(editorRef.current, parseSegments(value), chipTooltipsRef.current); } lastValueRef.current = value; } }, [value]); // Focus edit input when editing chip useEffect(() => { if (editingChip && editInputRef.current) { editInputRef.current.focus(); editInputRef.current.select(); } }, [editingChip]); const emitChange = useCallback(() => { if (!editorRef.current) return; const markdown = serializeDOM(editorRef.current); isInternalRef.current = true; lastValueRef.current = markdown; onChange(markdown); }, [onChange]); // Shared function to insert content (text + chips) at cursor position const insertContentAtCursor = useCallback((text: string) => { if (!editorRef.current) return; // Parse the text to find image markdown and insert chips directly const segments = parseSegments(text); const sel = window.getSelection(); if (!sel || sel.rangeCount === 0) { // Fallback: just insert as text document.execCommand('insertText', false, text); emitChange(); return; } const range = sel.getRangeAt(0); range.deleteContents(); // Insert segments in order for (const segment of segments) { if (segment.type === 'text') { // Insert text nodes, handling newlines const lines = segment.content.split('\n'); lines.forEach((line, i) => { if (i > 0) { range.insertNode(document.createElement('br')); range.collapse(false); } if (line) { const textNode = document.createTextNode(line); range.insertNode(textNode); range.setStartAfter(textNode); range.collapse(true); } }); } else { // Insert chip element directly const chip = document.createElement('span'); chip.contentEditable = 'false'; applyChipContent(chip, segment, chipTooltipsRef.current); range.insertNode(chip); range.setStartAfter(chip); range.collapse(true); } } // Update selection to after inserted content sel.removeAllRanges(); sel.addRange(range); emitChange(); }, [emitChange]); // Expose insertAtCursor method via ref for external use (e.g., useImagePaste) useImperativeHandle(ref, () => ({ insertAtCursor: (text: string) => { if (!editorRef.current) return; editorRef.current.focus(); insertContentAtCursor(text); }, focus: () => { editorRef.current?.focus(); }, }), [insertContentAtCursor]); // --- Chip editing --- const startEditChip = useCallback((chip: HTMLElement) => { if (isUploadingUrl(chip.dataset.url || '')) return; const rect = chip.getBoundingClientRect(); const containerRect = editorRef.current?.closest('.relative')?.getBoundingClientRect(); if (!containerRect) return; setEditAlt(chip.dataset.alt || ''); setEditingChip({ chip, rect: new DOMRect( rect.left - containerRect.left, rect.bottom - containerRect.top + 4, rect.width, rect.height, ), }); }, []); const commitChipEdit = useCallback(() => { if (!editingChip) return; const { chip } = editingChip; const url = chip.dataset.url || ''; const newAlt = editAlt.trim() || 'image'; const newMarkdown = `![${newAlt}](${url})`; chip.dataset.markdown = newMarkdown; chip.dataset.alt = newAlt; const nameSpan = chip.querySelector('.truncate'); if (nameSpan) nameSpan.textContent = newAlt; setEditingChip(null); emitChange(); }, [editingChip, editAlt, emitChange]); const cancelChipEdit = useCallback(() => { setEditingChip(null); }, []); // --- Key handling --- const handleKeyDown = useCallback((e: React.KeyboardEvent) => { if (!editorRef.current) return; if (e.key === 'Backspace') { const selected = editorRef.current.querySelector('.' + CHIP_SELECTED_CLASS) as HTMLElement | null; if (selected) { e.preventDefault(); selected.remove(); emitChange(); return; } const chip = getChipBeforeCursor(); if (chip) { e.preventDefault(); selectChip(chip); return; } } else if (e.key === 'Delete') { const selected = editorRef.current.querySelector('.' + CHIP_SELECTED_CLASS) as HTMLElement | null; if (selected) { e.preventDefault(); selected.remove(); emitChange(); return; } const chip = getChipAfterCursor(); if (chip) { e.preventDefault(); selectChip(chip); return; } } else { clearChipSelection(editorRef.current); } }, [emitChange]); const handleInput = useCallback(() => { if (editorRef.current) clearChipSelection(editorRef.current); emitChange(); }, [emitChange]); const handlePaste = useCallback((e: React.ClipboardEvent) => { onPaste?.(e); if (!e.defaultPrevented) { e.preventDefault(); const text = e.clipboardData.getData('text/plain'); // Use insertContentAtCursor to properly handle markdown images as chips insertContentAtCursor(text); } }, [onPaste, insertContentAtCursor]); const handleCopy = useCallback((e: React.ClipboardEvent) => { const selection = window.getSelection(); if (!selection || selection.rangeCount === 0) return; const range = selection.getRangeAt(0); const fragment = range.cloneContents(); const tempDiv = document.createElement('div'); tempDiv.appendChild(fragment); const markdown = serializeDOM(tempDiv); e.preventDefault(); e.clipboardData.setData('text/plain', markdown); }, []); const handleCut = useCallback((e: React.ClipboardEvent) => { const selection = window.getSelection(); if (!selection || selection.rangeCount === 0) return; const range = selection.getRangeAt(0); const fragment = range.cloneContents(); const tempDiv = document.createElement('div'); tempDiv.appendChild(fragment); const markdown = serializeDOM(tempDiv); e.preventDefault(); e.clipboardData.setData('text/plain', markdown); // Delete the selected content after copying range.deleteContents(); emitChange(); }, [emitChange]); const handleClick = useCallback(() => { if (editorRef.current) clearChipSelection(editorRef.current); setEditingChip(null); }, []); const handleDoubleClick = useCallback((e: React.MouseEvent) => { const target = (e.target as HTMLElement).closest('.' + CHIP_CLASS) as HTMLElement | null; if (target?.dataset?.markdown) { e.preventDefault(); startEditChip(target); } }, [startEditChip]); // --- Drag and drop --- const handleDragEnter = useCallback((e: React.DragEvent) => { e.preventDefault(); dragCountRef.current++; if (e.dataTransfer.types.includes('Files')) { setIsDragging(true); } }, []); const handleDragLeave = useCallback((e: React.DragEvent) => { e.preventDefault(); dragCountRef.current--; if (dragCountRef.current === 0) setIsDragging(false); }, []); const handleDragOver = useCallback((e: React.DragEvent) => { e.preventDefault(); e.dataTransfer.dropEffect = 'copy'; }, []); const handleDrop = useCallback((e: React.DragEvent) => { e.preventDefault(); dragCountRef.current = 0; setIsDragging(false); if (onFiles && e.dataTransfer.files.length > 0) { onFiles(Array.from(e.dataTransfer.files)); } }, [onFiles]); const handleFileInput = useCallback((e: React.ChangeEvent) => { const files = e.target.files; if (files && files.length > 0 && onFiles) { onFiles(Array.from(files)); } e.target.value = ''; }, [onFiles]); // Click on toolbar empty area → focus editor const handleToolbarMouseDown = useCallback((e: React.MouseEvent) => { // Only handle clicks on the toolbar itself, not on buttons inside if (e.target === e.currentTarget || !(e.target as HTMLElement).closest('button, a, input, [role="button"]')) { e.preventDefault(); // prevent editor blur editorRef.current?.focus(); } }, []); // --- Image preview --- const images = useMemo(() => parseSegments(value).filter((s): s is Extract => s.type === 'image'), [value] ); const removeImage = useCallback((url: string) => { const escaped = url.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); const regex = new RegExp(`!\\[[^\\]]*\\]\\(${escaped}\\)\\n?`, 'g'); const newValue = value.replace(regex, '').replace(/\n{3,}/g, '\n\n').trim(); if (editorRef.current) { buildDOM(editorRef.current, parseSegments(newValue), chipTooltipsRef.current); } isInternalRef.current = true; lastValueRef.current = newValue; onChange(newValue); }, [value, onChange]); const minHeight = rows * 24; const isEmpty = !value.trim(); return (
    {label && ( )} {/* Outer container — owns the border, focus ring, and toolbar */}
    {/* Editor area */}
    {/* Placeholder */} {isEmpty && placeholder && !isDragging && (
    {placeholder}
    )} {/* Drag overlay */} {isDragging && (
    {t('markdownTextarea.dropImages')}
    )} {/* Chip edit popover */} {editingChip && (
    setEditAlt(e.target.value)} onKeyDown={(e) => { if (e.key === 'Enter') { e.preventDefault(); commitChipEdit(); } if (e.key === 'Escape') cancelChipEdit(); }} onBlur={commitChipEdit} className="px-2 py-1 text-xs border-none outline-none bg-transparent w-36 text-gray-900 dark:text-foreground-primary" placeholder={t('markdownTextarea.imageDescription')} />
    )}
    {/* Toolbar */} {hasToolbar && (
    {shouldShowUpload && ( )} {toolbarLeft}
    {toolbarRight && (
    {toolbarRight}
    )}
    )} {/* Compact image preview strip — below toolbar */} {showImagePreview && images.length > 0 && (
    {images.map((img, i) => { const uploading = isUploadingUrl(img.url); const src = uploading ? getUploadingPreviewUrl(img.url) : img.url; return (
    {img.alt} { (e.target as HTMLImageElement).style.display = 'none'; }} /> {uploading && (
    )}
    {!uploading && ( )}
    {img.alt !== 'image' ? img.alt : getDisplayName(img.alt, img.url)}
    ); })}
    )}
    {/* Hidden file input for image upload */} {shouldShowUpload && ( )} {error && (

    {error}

    )}
    ); }); // Add display name for better debugging MarkdownTextarea.displayName = 'MarkdownTextarea'; ================================================ FILE: frontend/src/components/shared/MaterialCenterModal.tsx ================================================ import React, { useReducer, useEffect, useCallback } from 'react'; import { ImageIcon, RefreshCw, Upload, Download, X, FolderOpen, Eye, ArrowUpDown } from 'lucide-react'; import { Button } from './Button'; import { useT } from '@/hooks/useT'; import { useToast } from './Toast'; import { Modal } from './Modal'; import { listMaterials, uploadMaterial, listProjects, deleteMaterial, downloadMaterialsZip, type Material } from '@/api/endpoints'; import type { Project } from '@/types'; import { getImageUrl } from '@/api/client'; // --------------------------------------------------------------------------- // i18n // --------------------------------------------------------------------------- const i18nDict = { zh: { mc: { title: '素材中心', count: '共 {{count}} 个素材', empty: '暂无素材', selected: '已选 {{count}} 个', filterAll: '全部素材', filterNone: '未关联项目', moreProjects: '+ 更多项目…', preview: '预览', remove: '删除', closePreview: '关闭预览', emptyHint: '上传图片或通过素材生成功能创建素材', msg: { loadErr: '加载素材失败', badFormat: '不支持的图片格式', uploaded: '素材上传成功', uploadErr: '上传素材失败', noId: '无法删除:缺少素材ID', deleted: '素材已删除', deleteErr: '删除素材失败', downloaded: '下载成功', downloadErr: '下载失败', zipped: '已打包 {{count}} 个素材', zipErr: '批量下载失败', pickFirst: '请先选择要下载的素材', }, }, }, en: { mc: { title: 'Material Center', count: '{{count}} materials', empty: 'No materials', selected: '{{count}} selected', filterAll: 'All Materials', filterNone: 'Unassociated', moreProjects: '+ More projects…', preview: 'Preview', remove: 'Delete', closePreview: 'Close Preview', emptyHint: 'Upload images or create materials via the generator', msg: { loadErr: 'Failed to load materials', badFormat: 'Unsupported image format', uploaded: 'Material uploaded', uploadErr: 'Failed to upload material', noId: 'Cannot delete: missing material ID', deleted: 'Material deleted', deleteErr: 'Failed to delete material', downloaded: 'Download complete', downloadErr: 'Download failed', zipped: 'Packaged {{count}} materials', zipErr: 'Batch download failed', pickFirst: 'Select materials to download first', }, }, }, }; // --------------------------------------------------------------------------- // State // --------------------------------------------------------------------------- interface State { items: Material[]; selected: Set; deleting: Set; loading: boolean; uploading: boolean; downloading: boolean; filter: string; sortBy: 'newest' | 'oldest' | 'name-asc' | 'name-desc'; projects: Project[]; projectsReady: boolean; preview: { url: string; label: string } | null; } type Action = | { type: 'SET_ITEMS'; items: Material[] } | { type: 'TOGGLE_SELECT'; key: string } | { type: 'SELECT_ALL'; keys: string[] } | { type: 'CLEAR_SELECTION' } | { type: 'SET_LOADING'; on: boolean } | { type: 'SET_UPLOADING'; on: boolean } | { type: 'SET_DOWNLOADING'; on: boolean } | { type: 'SET_FILTER'; value: string } | { type: 'SET_SORT'; value: State['sortBy'] } | { type: 'SET_PROJECTS'; list: Project[] } | { type: 'REMOVE_ITEM'; key: string } | { type: 'ADD_DELETING'; id: string } | { type: 'REMOVE_DELETING'; id: string } | { type: 'SET_PREVIEW'; preview: State['preview'] } | { type: 'RESET_EPHEMERAL' }; const initial: State = { items: [], selected: new Set(), deleting: new Set(), loading: false, uploading: false, downloading: false, filter: 'all', sortBy: 'newest', projects: [], projectsReady: false, preview: null, }; function reducer(s: State, a: Action): State { switch (a.type) { case 'SET_ITEMS': return { ...s, items: a.items, loading: false }; case 'TOGGLE_SELECT': { const next = new Set(s.selected); next.has(a.key) ? next.delete(a.key) : next.add(a.key); return { ...s, selected: next }; } case 'SELECT_ALL': return { ...s, selected: new Set(a.keys) }; case 'CLEAR_SELECTION': return { ...s, selected: new Set() }; case 'SET_LOADING': return { ...s, loading: a.on }; case 'SET_UPLOADING': return { ...s, uploading: a.on }; case 'SET_DOWNLOADING': return { ...s, downloading: a.on }; case 'SET_FILTER': return { ...s, filter: a.value }; case 'SET_SORT': return { ...s, sortBy: a.value }; case 'SET_PROJECTS': return { ...s, projects: a.list, projectsReady: true }; case 'REMOVE_ITEM': { const items = s.items.filter((m) => m.id !== a.key); const selected = new Set(s.selected); selected.delete(a.key); return { ...s, items, selected }; } case 'ADD_DELETING': { const d = new Set(s.deleting); d.add(a.id); return { ...s, deleting: d }; } case 'REMOVE_DELETING': { const d = new Set(s.deleting); d.delete(a.id); return { ...s, deleting: d }; } case 'SET_PREVIEW': return { ...s, preview: a.preview }; case 'RESET_EPHEMERAL': return { ...s, selected: new Set(), showAllProjects: false, preview: null }; default: return s; } } // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- const displayName = (m: Material) => m.prompt?.trim() || m.name?.trim() || m.original_filename?.trim() || m.source_filename?.trim() || m.filename || m.url; const ACCEPTED_TYPES = ['image/png', 'image/jpeg', 'image/jpg', 'image/gif', 'image/webp', 'image/bmp', 'image/svg+xml']; const projectLabel = (p: Project) => { const raw = p.idea_prompt || p.outline_text || `Project ${p.project_id.slice(0, 8)}`; return raw.length > 20 ? `${raw.slice(0, 20)}…` : raw; }; // --------------------------------------------------------------------------- // Sub-components // --------------------------------------------------------------------------- const ToolbarSection: React.FC<{ t: ReturnType; state: State; dispatch: React.Dispatch; onRefresh: () => void; onUpload: (e: React.ChangeEvent) => void; onDownload: () => void; }> = ({ t, state, dispatch, onRefresh, onUpload, onDownload }) => (
    {state.items.length > 0 ? t('mc.count', { count: state.items.length }) : t('mc.empty')} {state.selected.size > 0 && ( {t('mc.selected', { count: state.selected.size })} )} {state.loading && state.items.length > 0 && ( )}
    {/* 项目筛选下拉框 */} {/* 排序循环按钮 */}
    {state.items.length > 0 && (
    {state.selected.size > 0 && ( <>
    )}
    )}
    ); const MaterialGrid: React.FC<{ items: Material[]; selected: Set; deleting: Set; t: ReturnType; onToggle: (id: string) => void; onPreview: (e: React.MouseEvent, m: Material) => void; onDelete: (e: React.MouseEvent, m: Material) => void; }> = ({ items, selected, deleting, t, onToggle, onPreview, onDelete }) => (
    {items.map((m) => { const sel = selected.has(m.id); const busy = deleting.has(m.id); return (
    onToggle(m.id)} className={`aspect-video rounded-lg border-2 cursor-pointer transition-all relative group ${ sel ? 'border-banana-500 ring-2 ring-banana-200' : 'border-gray-200 dark:border-border-primary hover:border-banana-300' }`} > {displayName(m)} {sel && (
    )}
    {displayName(m)}
    ); })}
    ); const PreviewOverlay: React.FC<{ url: string; label: string; t: ReturnType; onClose: () => void }> = ({ url, label, t, onClose, }) => (
    {label} e.stopPropagation()} />
    {label}
    ); // --------------------------------------------------------------------------- // Main component // --------------------------------------------------------------------------- interface MaterialCenterModalProps { isOpen: boolean; onClose: () => void; } export const MaterialCenterModal: React.FC = ({ isOpen, onClose }) => { const t = useT(i18nDict); const { show } = useToast(); const [s, dispatch] = useReducer(reducer, initial); const fetchItems = useCallback(async () => { dispatch({ type: 'SET_LOADING', on: true }); try { const target = s.filter === 'all' ? 'all' : s.filter === 'none' ? 'none' : s.filter; const res = await listMaterials(target); dispatch({ type: 'SET_ITEMS', items: res.data?.materials ?? [] }); } catch (err: any) { dispatch({ type: 'SET_LOADING', on: false }); show({ message: err?.response?.data?.error?.message || err.message || t('mc.msg.loadErr'), type: 'error' }); } }, [s.filter, show, t]); const fetchProjects = useCallback(async () => { try { const res = await listProjects(100, 0); if (res.data?.projects) dispatch({ type: 'SET_PROJECTS', list: res.data.projects }); } catch { /* non-critical */ } }, []); useEffect(() => { if (!isOpen) return; if (!s.projectsReady) fetchProjects(); fetchItems(); dispatch({ type: 'RESET_EPHEMERAL' }); }, [isOpen, s.filter]); const handleUpload = async (e: React.ChangeEvent) => { const file = e.target.files?.[0]; if (!file) return; if (!ACCEPTED_TYPES.includes(file.type)) { show({ message: t('mc.msg.badFormat'), type: 'error' }); return; } dispatch({ type: 'SET_UPLOADING', on: true }); try { const pid = s.filter === 'all' || s.filter === 'none' ? null : s.filter; await uploadMaterial(file, pid); show({ message: t('mc.msg.uploaded'), type: 'success' }); fetchItems(); } catch (err: any) { show({ message: err?.response?.data?.error?.message || err.message || t('mc.msg.uploadErr'), type: 'error' }); } finally { dispatch({ type: 'SET_UPLOADING', on: false }); e.target.value = ''; } }; const handleDelete = async (e: React.MouseEvent, m: Material) => { e.stopPropagation(); if (!m.id) { show({ message: t('mc.msg.noId'), type: 'error' }); return; } dispatch({ type: 'ADD_DELETING', id: m.id }); try { await deleteMaterial(m.id); dispatch({ type: 'REMOVE_ITEM', key: m.id }); show({ message: t('mc.msg.deleted'), type: 'success' }); } catch (err: any) { show({ message: err?.response?.data?.error?.message || err.message || t('mc.msg.deleteErr'), type: 'error' }); } finally { dispatch({ type: 'REMOVE_DELETING', id: m.id }); } }; const handleDownload = async () => { if (s.selected.size === 0) { show({ message: t('mc.msg.pickFirst'), type: 'info' }); return; } const chosen = s.items.filter((m) => s.selected.has(m.id)); if (chosen.length === 1) { try { const blob = await fetch(getImageUrl(chosen[0].url)).then((r) => r.blob()); const href = URL.createObjectURL(blob); const link = Object.assign(document.createElement('a'), { href, download: chosen[0].filename || 'material.png', }); document.body.appendChild(link); link.click(); document.body.removeChild(link); URL.revokeObjectURL(href); show({ message: t('mc.msg.downloaded'), type: 'success' }); } catch (err) { console.error('Download failed:', err); show({ message: t('mc.msg.downloadErr'), type: 'error' }); } return; } dispatch({ type: 'SET_DOWNLOADING', on: true }); try { await downloadMaterialsZip(chosen.map((m) => m.id)); show({ message: t('mc.msg.zipped', { count: chosen.length }), type: 'success' }); } catch (err: any) { show({ message: err?.response?.data?.error?.message || err.message || t('mc.msg.zipErr'), type: 'error' }); } finally { dispatch({ type: 'SET_DOWNLOADING', on: false }); } }; const handlePreview = (e: React.MouseEvent, m: Material) => { e.stopPropagation(); dispatch({ type: 'SET_PREVIEW', preview: { url: getImageUrl(m.url), label: displayName(m) } }); }; const sortedItems = [...s.items].sort((a, b) => { switch (s.sortBy) { case 'newest': return new Date(b.created_at).getTime() - new Date(a.created_at).getTime(); case 'oldest': return new Date(a.created_at).getTime() - new Date(b.created_at).getTime(); case 'name-asc': return displayName(a).localeCompare(displayName(b)); case 'name-desc': return displayName(b).localeCompare(displayName(a)); default: return 0; } }); return ( <>
    {s.loading && s.items.length === 0 ? (
    {t('common.loading')}
    ) : s.items.length === 0 ? (
    {t('mc.empty')}
    {t('mc.emptyHint')}
    ) : ( dispatch({ type: 'TOGGLE_SELECT', key: id })} onPreview={handlePreview} onDelete={handleDelete} /> )}
    {s.preview && ( dispatch({ type: 'SET_PREVIEW', preview: null })} /> )} ); }; ================================================ FILE: frontend/src/components/shared/MaterialGeneratorModal.tsx ================================================ import React, { useState, useEffect, useRef } from 'react'; import { Image as ImageIcon, ImagePlus, Upload, X, FolderOpen, Info } from 'lucide-react'; import { Modal } from './Modal'; import { useT } from '@/hooks/useT'; import { Textarea } from './Textarea'; import { Button } from './Button'; import { useToast } from './Toast'; import { MaterialSelector, materialUrlToFile } from './MaterialSelector'; import { ASPECT_RATIO_OPTIONS } from '@/config/aspectRatio'; import { useProjectStore } from '@/store/useProjectStore'; // MaterialGeneratorModal 组件自包含翻译 const materialGeneratorI18n = { zh: { material: { title: "素材生成", saveToLibraryNote: "生成的素材会保存到素材库", generatedResult: "生成结果", generatedMaterial: "生成的素材", generatedPreview: "生成的素材会展示在这里", promptLabel: "提示词(原样发送给文生图模型)", promptPlaceholder: "例如:蓝紫色渐变背景,带几何图形和科技感线条,用于科技主题标题页...", aspectRatioLabel: "生成比例", referenceImages: "参考图片(可选)", mainReference: "主参考图(可选)", extraReference: "额外参考图(可选,多张)", clickToUpload: "点击上传", selectFromLibrary: "从素材库选择", generateMaterial: "生成素材", messages: { enterPrompt: "请输入提示词", materialAdded: "已添加 {{count}} 个素材", generateSuccess: "素材生成成功,已保存到历史素材库", generateSuccessGlobal: "素材生成成功,已保存到全局素材库", generateComplete: "素材生成完成,但未找到图片地址", generateFailed: "素材生成失败", generateTimeout: "素材生成超时,请稍后查看素材库", pollingFailed: "轮询任务状态失败,请稍后查看素材库", noTaskId: "素材生成失败:未返回任务ID" } } }, en: { material: { title: "Generate Material", saveToLibraryNote: "Generated materials will be saved to the library", generatedResult: "Generated Result", generatedMaterial: "Generated Material", generatedPreview: "Generated materials will be displayed here", promptLabel: "Prompt (sent directly to text-to-image model)", promptPlaceholder: "e.g., Blue-purple gradient background with geometric shapes and tech-style lines for a tech-themed title page...", aspectRatioLabel: "Aspect Ratio", referenceImages: "Reference Images (Optional)", mainReference: "Main Reference (Optional)", extraReference: "Extra References (Optional, multiple)", clickToUpload: "Click to upload", selectFromLibrary: "Select from Library", generateMaterial: "Generate Material", messages: { enterPrompt: "Please enter a prompt", materialAdded: "Added {{count}} material(s)", generateSuccess: "Material generated successfully, saved to history library", generateSuccessGlobal: "Material generated successfully, saved to global library", generateComplete: "Material generation complete, but image URL not found", generateFailed: "Failed to generate material", generateTimeout: "Material generation timeout, please check the library later", pollingFailed: "Failed to poll task status, please check the library later", noTaskId: "Material generation failed: No task ID returned" } } } }; import { Skeleton } from './Loading'; import { generateMaterialImage, getTaskStatus } from '@/api/endpoints'; import { getImageUrl } from '@/api/client'; import type { Material } from '@/api/endpoints'; import type { Task } from '@/types'; interface MaterialGeneratorModalProps { projectId?: string | null; isOpen: boolean; onClose: () => void; } export const MaterialGeneratorModal: React.FC = ({ projectId, isOpen, onClose, }) => { const t = useT(materialGeneratorI18n); const { show } = useToast(); const currentProject = useProjectStore((s) => s.currentProject); const [prompt, setPrompt] = useState(''); const [aspectRatio, setAspectRatio] = useState('16:9'); // Reset aspect ratio to project default when modal opens, // so newly opened modals always reflect current project settings. // Verify the store's currentProject matches the projectId prop to avoid // using a stale/wrong project's aspect ratio. useEffect(() => { if (isOpen) { const projectAspectRatio = (projectId && currentProject?.id === projectId && currentProject.image_aspect_ratio) || '16:9'; setAspectRatio(projectAspectRatio); } }, [isOpen, projectId, currentProject]); const [refImage, setRefImage] = useState(null); const [extraImages, setExtraImages] = useState([]); const [previewUrl, setPreviewUrl] = useState(null); const [isGenerating, setIsGenerating] = useState(false); const [isCompleted, setIsCompleted] = useState(false); const [isMaterialSelectorOpen, setIsMaterialSelectorOpen] = useState(false); const handleRefImageChange = (e: React.ChangeEvent) => { const file = (e.target.files && e.target.files[0]) || null; if (file) { setRefImage(file); } }; const handleExtraImagesChange = (e: React.ChangeEvent) => { const files = Array.from(e.target.files || []); if (files.length === 0) return; if (!refImage) { const [first, ...rest] = files; setRefImage(first); if (rest.length > 0) { setExtraImages((prev) => [...prev, ...rest]); } } else { setExtraImages((prev) => [...prev, ...files]); } }; const removeExtraImage = (index: number) => { setExtraImages((prev) => prev.filter((_, i) => i !== index)); }; const handleSelectMaterials = async (materials: Material[]) => { try { const files = await Promise.all( materials.map((material) => materialUrlToFile(material)) ); if (files.length === 0) return; if (!refImage) { const [first, ...rest] = files; setRefImage(first); if (rest.length > 0) { setExtraImages((prev) => [...prev, ...rest]); } } else { setExtraImages((prev) => [...prev, ...files]); } show({ message: t('material.messages.materialAdded', { count: files.length }), type: 'success' }); } catch (error: any) { console.error('Failed to load materials:', error); show({ message: t('material.messages.loadMaterialFailed') + ': ' + (error.message || t('common.unknownError')), type: 'error', }); } }; // Manage object URLs to prevent memory leaks const refImageUrl = useRef(null); const extraImageUrls = useRef([]); useEffect(() => { // Revoke previous URL if (refImageUrl.current) URL.revokeObjectURL(refImageUrl.current); refImageUrl.current = refImage ? URL.createObjectURL(refImage) : null; }, [refImage]); useEffect(() => { // Revoke all previous URLs extraImageUrls.current.forEach(url => URL.revokeObjectURL(url)); extraImageUrls.current = extraImages.map(file => URL.createObjectURL(file)); }, [extraImages]); useEffect(() => { return () => { if (refImageUrl.current) URL.revokeObjectURL(refImageUrl.current); extraImageUrls.current.forEach(url => URL.revokeObjectURL(url)); }; }, []); const pollingIntervalRef = useRef(null); useEffect(() => { return () => { if (pollingIntervalRef.current) { clearInterval(pollingIntervalRef.current); } }; }, []); const pollMaterialTask = async (taskId: string) => { const targetProjectId = projectId || 'global'; const maxAttempts = 60; let attempts = 0; const poll = async () => { try { attempts++; const response = await getTaskStatus(targetProjectId, taskId); const task: Task = response.data; if (task.status === 'COMPLETED') { const progress = task.progress || {}; const imageUrl = progress.image_url; if (imageUrl) { setPreviewUrl(getImageUrl(imageUrl)); const message = projectId ? t('material.messages.generateSuccess') : t('material.messages.generateSuccessGlobal'); show({ message, type: 'success' }); setIsCompleted(true); } else { show({ message: t('material.messages.generateComplete'), type: 'error' }); } setIsGenerating(false); if (pollingIntervalRef.current) { clearInterval(pollingIntervalRef.current); pollingIntervalRef.current = null; } } else if (task.status === 'FAILED') { show({ message: task.error_message || t('material.messages.generateFailed'), type: 'error', }); setIsGenerating(false); if (pollingIntervalRef.current) { clearInterval(pollingIntervalRef.current); pollingIntervalRef.current = null; } } else if (task.status === 'PENDING' || task.status === 'PROCESSING') { if (attempts >= maxAttempts) { show({ message: t('material.messages.generateTimeout'), type: 'warning' }); setIsGenerating(false); if (pollingIntervalRef.current) { clearInterval(pollingIntervalRef.current); pollingIntervalRef.current = null; } } } } catch (error: any) { console.error('Failed to poll task status:', error); if (attempts >= maxAttempts) { show({ message: t('material.messages.pollingFailed'), type: 'error' }); setIsGenerating(false); if (pollingIntervalRef.current) { clearInterval(pollingIntervalRef.current); pollingIntervalRef.current = null; } } } }; poll(); pollingIntervalRef.current = setInterval(poll, 2000); }; const handleGenerate = async () => { if (!prompt.trim()) { show({ message: t('material.messages.enterPrompt'), type: 'error' }); return; } setIsGenerating(true); try { const targetProjectId = projectId || 'none'; const resp = await generateMaterialImage(targetProjectId, prompt.trim(), refImage as File, extraImages, aspectRatio); const taskId = resp.data?.task_id; if (taskId) { await pollMaterialTask(taskId); } else { show({ message: t('material.messages.noTaskId'), type: 'error' }); setIsGenerating(false); } } catch (error: any) { show({ message: error?.response?.data?.error?.message || error.message || t('material.messages.generateFailed'), type: 'error', }); setIsGenerating(false); } }; const handleClose = () => { onClose(); }; return ( {/* 顶部提示信息 */}

    {t('material.saveToLibraryNote')}

    {/* 生成结果预览卡片 - 使用现代渐变和光晕效果 */}
    {/* 内部光晕 */}

    {t('material.generatedResult')}

    {isGenerating ? (
    ) : previewUrl ? (
    {t('material.generatedMaterial')}
    ) : (
    {t('material.generatedPreview')}
    )}