[
  {
    "path": ".dockerignore",
    "content": "# Git\n.git\n.gitignore\n\n# Python\n__pycache__\n*.pyc\n*.pyo\n*.pyd\n.Python\n*.egg\n*.egg-info/\ndist/\nbuild/\n.eggs/\n*.so\n\n# Virtual environments\nvenv/\nenv/\nENV/\n.venv/\n\n# IDE\n.vscode/\n.idea/\n*.swp\n*.swo\n*~\n.DS_Store\n\n# Environment files (contain secrets)\n.env\n.mcp_env\nnotion_state.json\n\n# Test and development files\n.pytest_cache/\n.coverage\nhtmlcov/\n.tox/\n.mypy_cache/\n.ruff_cache/\ntests/\ntest_environments/\n\n# Results and logs\nresults/\n*.log\nlogs/\n\n# PostgreSQL data\n.postgres/\n\n# Playwright\nplaywright-report/\ntest-results/\n\n# Documentation images\nasset/\n\n# Temporary files\n*.tmp\ntmp/\ntemp/\n\n# Docker\nDockerfile\ndocker-compose.yml\n.dockerignore\n\n# Node modules (if any locally installed)\nnode_modules/\n\n# Pixi lock file\npixi.lock\n.pixi/\n\n# GitHub state files\ngithub_state/\ngithub_template_repo/\n\n# Backup directories\n.mcpbench_backups/"
  },
  {
    "path": ".editorconfig",
    "content": "root = true\n\n; Always use Unix style new lines with new line ending on every file and trim whitespace\n[*]\nend_of_line = lf\ninsert_final_newline = true\ntrim_trailing_whitespace = true\n\n; Python: PEP8 defines 4 spaces for indentation\n[*.py]\nindent_style = space\nindent_size = 4\n"
  },
  {
    "path": ".gitattributes",
    "content": "# SCM syntax highlighting & preventing 3-way merges\npixi.lock merge=binary linguist-language=YAML linguist-generated=true\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/1_bug_report.yml",
    "content": "name: '🐛 Bug Report'\ndescription: 'Report an bug'\nlabels: ['unconfirm']\ntype: Bug\nbody:\n  - type: textarea\n    attributes:\n      label: '🐛 Bug Description'\n      description: A clear and concise description of the bug, if the above option is `Other`, please also explain in detail.\n    validations:\n      required: true\n  - type: textarea\n    attributes:\n      label: '📷 Recurrence Steps'\n      description: A clear and concise description of how to recurrence.\n  - type: textarea\n    attributes:\n      label: '🚦 Expected Behavior'\n      description: A clear and concise description of what you expected to happen.\n  - type: textarea\n    attributes:\n      label: '📝 Additional Information'\n      description: If your problem needs further explanation, or if the issue you're seeing cannot be reproduced in a gist, please add more information here.\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/2_feature_request.yml",
    "content": "name: '🌠 Feature Request'\ndescription: 'Suggest an idea'\ntitle: '[Request] '\ntype: Feature\nbody:\n  - type: textarea\n    attributes:\n      label: '🥰 Feature Description'\n      description: Please add a clear and concise description of the problem you are seeking to solve with this feature request.\n    validations:\n      required: true\n  - type: textarea\n    attributes:\n      label: '🧐 Proposed Solution'\n      description: Describe the solution you'd like in a clear and concise manner.\n    validations:\n      required: true\n  - type: textarea\n    attributes:\n      label: '📝 Additional Information'\n      description: Add any other context about the problem here.\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/config.yml",
    "content": "contact_links:\n  - name: Questions and ideas\n    url: https://github.com/eval-sys/mcpmark/discussions/new/choose\n    about: Please post questions, and ideas in discussions.\n"
  },
  {
    "path": ".github/PULL_REQUEST_TEMPLATE.md",
    "content": "#### Change Type\n\n<!-- For change type, change [ ] to [x]. -->\n\n- [ ] ✨ feat\n- [ ] 🐛 fix\n- [ ] ♻️ refactor\n- [ ] 💄 style\n- [ ] 👷 build\n- [ ] ⚡️ perf\n- [ ] 📝 docs\n- [ ] 🔨 chore\n\n#### Description of Change\n\n<!-- Thank you for your Pull Request. Please provide a description above. -->\n\n#### Additional Information\n\n<!-- Add any other context about the Pull Request here. -->\n"
  },
  {
    "path": ".github/scripts/pr-comment.js",
    "content": "/**\n * Generate or update PR comment with Docker build info\n */\nmodule.exports = async ({ github, context, dockerMetaJson, image, version, dockerhubUrl, platforms }) => {\n  const COMMENT_IDENTIFIER = '<!-- DOCKER-BUILD-COMMENT -->';\n\n  const parseTags = () => {\n    try {\n      if (dockerMetaJson) {\n        const parsed = JSON.parse(dockerMetaJson);\n        if (Array.isArray(parsed.tags) && parsed.tags.length > 0) {\n          return parsed.tags;\n        }\n      }\n    } catch (e) {\n      // ignore parsing error, fallback below\n    }\n    if (image && version) {\n      return [`${image}:${version}`];\n    }\n    return [];\n  };\n\n  const generateCommentBody = () => {\n    const tags = parseTags();\n    const buildTime = new Date().toISOString();\n\n    // Use the first tag as the main version\n    const mainTag = tags.length > 0 ? tags[0] : `${image}:${version}`;\n    const tagVersion = mainTag.includes(':') ? mainTag.split(':')[1] : version;\n\n    return [\n      COMMENT_IDENTIFIER,\n      '',\n      '### 🐳 Docker Build Completed!',\n      `**Version**: \\`${tagVersion || 'N/A'}\\``,\n      `**Build Time**: \\`${buildTime}\\``,\n      '',\n      dockerhubUrl ? `🔗 View all tags on Docker Hub: ${dockerhubUrl}` : '',\n      '',\n      '### Pull Image',\n      'Download the Docker image to your local machine:',\n      '',\n      '```bash',\n      `docker pull ${mainTag}`,\n      '```',\n      '',\n      '### Run Eval',\n      'Execute evaluation tasks using the built image:',\n      '',\n      '```bash',\n      `DOCKER_IMAGE_VERSION=${tagVersion} ./run-task.sh --models gpt-4.1-mini --tasks file_context/uppercase`,\n      '```',\n      '',\n      '> [!IMPORTANT]',\n      '> This build is for testing and validation purposes.',\n    ]\n      .filter(Boolean)\n      .join('\\n');\n  };\n\n  const body = generateCommentBody();\n\n  // List comments on the PR\n  const { data: comments } = await github.rest.issues.listComments({\n    issue_number: context.issue.number,\n    owner: context.repo.owner,\n    repo: context.repo.repo,\n  });\n\n  const existing = comments.find((c) => c.body && c.body.includes(COMMENT_IDENTIFIER));\n  if (existing) {\n    await github.rest.issues.updateComment({\n      comment_id: existing.id,\n      owner: context.repo.owner,\n      repo: context.repo.repo,\n      body,\n    });\n    return { updated: true, id: existing.id };\n  }\n\n  const result = await github.rest.issues.createComment({\n    issue_number: context.issue.number,\n    owner: context.repo.owner,\n    repo: context.repo.repo,\n    body,\n  });\n  return { updated: false, id: result.data.id };\n};\n\n\n\n"
  },
  {
    "path": ".github/workflows/publish-docker-image.yml",
    "content": "name: Publish Docker Image\n\non:\n  workflow_dispatch:\n  release:\n    types: [ published ]\n  pull_request:\n    types: [ synchronize, labeled, unlabeled ]\n\npermissions:\n  contents: read\n  pull-requests: write\n\nconcurrency:\n  group: ${{ github.ref }}-${{ github.workflow }}\n  cancel-in-progress: true\n\nenv:\n  REGISTRY_IMAGE: evalsysorg/mcpmark\n  PR_TAG_PREFIX: pr-\n\njobs:\n  build:\n    if: |\n      (github.event_name == 'pull_request' &&\n       contains(github.event.pull_request.labels.*.name, 'Build Docker')) ||\n      github.event_name != 'pull_request'\n\n    strategy:\n      matrix:\n        include:\n          - platform: linux/amd64\n            os: ubuntu-latest\n          - platform: linux/arm64\n            os: ubuntu-24.04-arm\n    runs-on: ${{ matrix.os }}\n    name: Build ${{ matrix.platform }} Image\n    steps:\n      - name: Prepare\n        run: |\n          platform=${{ matrix.platform }}\n          echo \"PLATFORM_PAIR=${platform//\\//-}\" >> $GITHUB_ENV\n\n      - name: Checkout base\n        uses: actions/checkout@v4\n        with:\n          fetch-depth: 0\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@v3\n\n      - name: Generate PR metadata\n        if: github.event_name == 'pull_request'\n        id: pr_meta\n        run: |\n          branch_name=\"${{ github.head_ref }}\"\n          sanitized_branch=$(echo \"${branch_name}\" | sed -E 's/[^a-zA-Z0-9_.-]+/-/g')\n          echo \"pr_tag=${sanitized_branch}-$(git rev-parse --short HEAD)\" >> $GITHUB_OUTPUT\n\n      - name: Docker meta\n        id: meta\n        uses: docker/metadata-action@v5\n        with:\n          images: ${{ env.REGISTRY_IMAGE }}\n          tags: |\n            type=raw,value=${{ env.PR_TAG_PREFIX }}${{ steps.pr_meta.outputs.pr_tag }},enable=${{ github.event_name == 'pull_request' }}\n            type=semver,pattern={{version}},enable=${{ github.event_name != 'pull_request' }}\n            type=raw,value=latest,enable=${{ github.event_name != 'pull_request' }}\n\n      - name: Docker login\n        uses: docker/login-action@v3\n        with:\n          username: ${{ secrets.DOCKER_REGISTRY_USER }}\n          password: ${{ secrets.DOCKER_REGISTRY_PASSWORD }}\n\n      - name: Get commit SHA\n        if: github.ref == 'refs/heads/main'\n        id: vars\n        run: echo \"sha_short=$(git rev-parse --short HEAD)\" >> $GITHUB_OUTPUT\n\n      - name: Build and export\n        id: build\n        uses: docker/build-push-action@v6\n        with:\n          platforms: ${{ matrix.platform }}\n          context: .\n          file: ./Dockerfile\n          labels: ${{ steps.meta.outputs.labels }}\n          build-args: |\n            SHA=${{ steps.vars.outputs.sha_short }}\n          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true\n\n      - name: Export digest\n        run: |\n          rm -rf /tmp/digests\n          mkdir -p /tmp/digests\n          digest=\"${{ steps.build.outputs.digest }}\"\n          touch \"/tmp/digests/${digest#sha256:}\"\n\n      - name: Upload artifact\n        uses: actions/upload-artifact@v4\n        with:\n          name: digest-${{ env.PLATFORM_PAIR }}\n          path: /tmp/digests/*\n          if-no-files-found: error\n          retention-days: 1\n\n  merge:\n    name: Merge\n    needs: build\n    runs-on: ubuntu-latest\n    steps:\n      - name: Checkout base\n        uses: actions/checkout@v4\n        with:\n          fetch-depth: 0\n\n      - name: Download digests\n        uses: actions/download-artifact@v5\n        with:\n          path: /tmp/digests\n          pattern: digest-*\n          merge-multiple: true\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@v3\n\n      - name: Generate PR metadata\n        if: github.event_name == 'pull_request'\n        id: pr_meta\n        run: |\n          branch_name=\"${{ github.head_ref }}\"\n          sanitized_branch=$(echo \"${branch_name}\" | sed -E 's/[^a-zA-Z0-9_.-]+/-/g')\n          echo \"pr_tag=${sanitized_branch}-$(git rev-parse --short HEAD)\" >> $GITHUB_OUTPUT\n\n      - name: Docker meta\n        id: meta\n        uses: docker/metadata-action@v5\n        with:\n          images: ${{ env.REGISTRY_IMAGE }}\n          tags: |\n            type=raw,value=${{ env.PR_TAG_PREFIX }}${{ steps.pr_meta.outputs.pr_tag }},enable=${{ github.event_name == 'pull_request' }}\n            type=semver,pattern={{version}},enable=${{ github.event_name != 'pull_request' }}\n            type=raw,value=latest,enable=${{ github.event_name != 'pull_request' }}\n\n      - name: Docker login\n        uses: docker/login-action@v3\n        with:\n          username: ${{ secrets.DOCKER_REGISTRY_USER }}\n          password: ${{ secrets.DOCKER_REGISTRY_PASSWORD }}\n\n      - name: Create manifest list and push\n        working-directory: /tmp/digests\n        run: |\n          docker buildx imagetools create $(jq -cr '.tags | map(\"-t \" + .) | join(\" \")' <<< \"$DOCKER_METADATA_OUTPUT_JSON\") \\\n            $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)\n\n      - name: Inspect image\n        run: |\n          docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}\n\n      - name: Comment on PR with Docker build info\n        if: github.event_name == 'pull_request'\n        uses: actions/github-script@v7\n        with:\n          github-token: ${{ secrets.GITHUB_TOKEN }}\n          script: |\n            const prComment = require('${{ github.workspace }}/.github/scripts/pr-comment.js');\n            const result = await prComment({\n              github,\n              context,\n              dockerMetaJson: ${{ toJSON(steps.meta.outputs.json) }},\n              image: \"${{ env.REGISTRY_IMAGE }}\",\n              version: \"${{ steps.meta.outputs.version }}\",\n              dockerhubUrl: \"https://hub.docker.com/r/${{ env.REGISTRY_IMAGE }}/tags\",\n              platforms: \"linux/amd64, linux/arm64\",\n            });\n            core.info(`Status: ${result.updated ? 'Updated' : 'Created'}, ID: ${result.id}`);\n\n\n"
  },
  {
    "path": ".gitignore",
    "content": "logs\n.claude\nCLAUDE.md\n.gemini\nresults\nmaterials\nscripts\n!.github/scripts\n.nfs*\n.mcp_env\n.idea\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[codz]\n*$py.class\nlogs\nlogs/*\n.DS_Store\nnotion-sdk-py/\ngithub_state/*\n\n# for playwright cookies\nnotion_state.json\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\nshare/python-wheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.nox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n*.py.cover\n.hypothesis/\n.pytest_cache/\ncover/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\ndb.sqlite3-journal\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\n.pybuilder/\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# IPython\nprofile_default/\nipython_config.py\n\n# pyenv\n#   For a library or package, you might want to ignore these files since the code is\n#   intended to run in multiple environments; otherwise, check them in:\n# .python-version\n\n# pipenv\n#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.\n#   However, in case of collaboration, if having platform-specific dependencies or dependencies\n#   having no cross-platform support, pipenv may install dependencies that don't work, or not\n#   install all needed dependencies.\n#Pipfile.lock\n\n# UV\n#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.\n#   This is especially recommended for binary packages to ensure reproducibility, and is more\n#   commonly ignored for libraries.\n#uv.lock\n\n# poetry\n#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.\n#   This is especially recommended for binary packages to ensure reproducibility, and is more\n#   commonly ignored for libraries.\n#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control\n#poetry.lock\n#poetry.toml\n\n# pdm\n#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.\n#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.\n#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control\n#pdm.lock\n#pdm.toml\n.pdm-python\n.pdm-build/\n\n# pixi\n#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.\n#pixi.lock\n#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one\n#   in the .venv directory. It is recommended not to include this directory in version control.\n.pixi\n\n# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm\n__pypackages__/\n\n# Celery stuff\ncelerybeat-schedule\ncelerybeat.pid\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.envrc\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n.dmypy.json\ndmypy.json\n\n# Pyre type checker\n.pyre/\n\n# pytype static type analyzer\n.pytype/\n\n# Cython debug symbols\ncython_debug/\n\n# PyCharm\n#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can\n#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore\n#  and can be added to the global gitignore or merged into this file.  For a more nuclear\n#  option (not recommended) you can uncomment the following to ignore the entire idea folder.\n#.idea/\n\n# Abstra\n# Abstra is an AI-powered process automation framework.\n# Ignore directories containing user credentials, local state, and settings.\n# Learn more at https://abstra.io/docs\n.abstra/\n\n# Visual Studio Code\n#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore\n#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore\n#  and can be added to the global gitignore or merged into this file. However, if you prefer,\n#  you could uncomment the following to ignore the entire vscode folder\n# .vscode/\n\n# Ruff stuff:\n.ruff_cache/\n\n# PyPI configuration file\n.pypirc\n\n# Cursor\n#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to\n#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data\n#  refer to https://docs.cursor.com/context/ignore-files\n.cursorignore\n.cursorindexingignore\n\n# Marimo\nmarimo/_static/\nmarimo/_lsp/\n__marimo__/\n\n# pixi environments\n.pixi\n*.egg-info\n\n.postgres\n\n# MCPMark backup directories\n.mcpmark_backups/*\ntest_environments/\npostgres_state\n"
  },
  {
    "path": "CHANGELOG.md",
    "content": "# Changelog\n\nAll notable changes to this project will be documented in this file.\n\nThe format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),\nand this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).\n\n## v1.2.0 - 2025-09-20\n\nThis version includes multiple important feature enhancements, particularly improvements in cost calculation, error handling, and Notion integration. Added per-model cost calculation, comprehensive aggregator functionality, and more robust error recovery mechanisms.\n### ✨ Features\n- **Add 1m parameter & improve log** (#198) - Added claude-1m-context option and enhanced logging functionality\n- **Refine Notion parent resolution and duplicate recovery** (#197) - Improved Notion parent page resolution and duplicate content recovery mechanism\n- **Comprehensive aggregator, enable push to new branch** (#185) - Implemented comprehensive aggregator functionality with support for pushing to new branches\n- **Support price cost calculating per model** (#186) - Added per-model price cost calculation functionality\n- **Improve agent end log** (#183) - Enhanced agent end logging\n- **Improve litellm error handling** (#181) - Enhanced LiteLLM error handling mechanism\n\n### ♻️ Refactoring\n- **Use notion child block list to locate page** (#196) - Refactored page location logic to use Notion child block list approach\n\n### 🐛 Bug Fixes\n- **Fix verification in Notion task company_in_a_box/goals_restructure** (#194) - Fixed verification logic for specific Notion tasks\n- **Improve claude error handling** (#195) - Improved error handling for Claude API interactions\n- **Fix tailing slash issue for find_legacy_name** - Resolved trailing slash issues in find_legacy_name path handling\n- **Recover when duplication lands on parent** (#189) - Fixed recovery mechanism when duplicate content affects parent pages\n- **Correctly handle playwright parser** (#184) - Properly handle Playwright parser\n- **Handle timeout error, add timeout error for resuming** (#182) - Handle timeout errors and add timeout error handling for resume operations\n\n### 📝 Documentation\n- **Better readme, notion language guide** (#190) - Improved README documentation and added comprehensive Notion language guide\n\n### 🔨 Maintenance\n- **Update price info** (#188) - Updated pricing information\n- **Update desktop_template/file_arrangement/verify.py** (#187) - Maintenance updates to verification scripts\n"
  },
  {
    "path": "Dockerfile",
    "content": "# MCPMark Docker image with optimized layer caching\n# Stage 1: Builder for Python dependencies only\nFROM python:3.12-slim AS builder\n\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n    gcc \\\n    g++ \\\n    libpq-dev \\\n    && rm -rf /var/lib/apt/lists/*\n\nWORKDIR /build\n\n# Copy project files needed for pip install\nCOPY pyproject.toml ./\nCOPY src/ ./src/\nCOPY tasks/ ./tasks/\n\n# Install dependencies\nRUN pip install --no-cache-dir --user .\n\n# Stage 2: Final image with all runtime dependencies\nFROM python:3.12-slim\n\n# Layer 1: Core system dependencies (very stable, rarely changes)\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n    ca-certificates \\\n    && rm -rf /var/lib/apt/lists/*\n\n# Layer 2: PostgreSQL runtime and client tools (stable, only changes with postgres version)\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n    libpq5 \\\n    postgresql-client \\\n    && rm -rf /var/lib/apt/lists/*\n\n# Layer 3: Git (stable)\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n    git \\\n    && rm -rf /var/lib/apt/lists/*\n\n# Layer 4: Playwright system dependencies (changes with browser requirements)\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n    libnss3 \\\n    libnspr4 \\\n    libatk1.0-0 \\\n    libatk-bridge2.0-0 \\\n    libcups2 \\\n    libdrm2 \\\n    libxkbcommon0 \\\n    libatspi2.0-0 \\\n    libx11-6 \\\n    libxcomposite1 \\\n    libxdamage1 \\\n    libxfixes3 \\\n    libxrandr2 \\\n    libgbm1 \\\n    libxcb1 \\\n    libpango-1.0-0 \\\n    libcairo2 \\\n    libasound2 \\\n    && rm -rf /var/lib/apt/lists/*\n\n# Layer 5: Download tools and Node.js (changes with Node version)\nRUN apt-get update && \\\n    apt-get install -y --no-install-recommends curl wget unzip && \\\n    curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \\\n    apt-get install -y --no-install-recommends nodejs && \\\n    apt-get autoremove -y && \\\n    rm -rf /var/lib/apt/lists/*\n\n# Layer 6: pipx (rarely changes)\nRUN pip install --no-cache-dir pipx && \\\n    pipx ensurepath\n\n# Layer 7: Copy Python packages from builder (changes with dependencies)\nCOPY --from=builder /root/.local /root/.local\n\n# Layer 8: Playwright browsers (changes with browser versions)\nRUN python3 -m playwright install chromium && \\\n    npx -y playwright install chromium\n\n# Layer 9: Install PostgreSQL MCP server (Python, used via `pipx run postgres-mcp`)\nRUN pipx install postgres-mcp\n\n# Set working directory\nWORKDIR /app\n\n# Layer 9: Create directory structure (rarely changes)\nRUN mkdir -p /app/results\n\n# Layer 10: Application code (changes frequently)\nCOPY . .\n\n# Set environment\nENV PATH=\"/root/.local/bin:/root/.local/pipx/venvs/*/bin:${PATH}\"\nENV PYTHONPATH=\"/app\"\nENV PLAYWRIGHT_BROWSERS_PATH=/root/.cache/ms-playwright\nENV PIPX_HOME=/root/.local/pipx\nENV PIPX_BIN_DIR=/root/.local/bin\n\n# Default command\nCMD [\"python3\", \"-m\", \"pipeline\", \"--help\"]"
  },
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "README.md",
    "content": "<div align=\"center\">\n\n# MCPMark: Stress-Testing Comprehensive MCP Use\n\n[![Website](https://img.shields.io/badge/Website-mcpmark.ai-4285F4?style=for-the-badge&logo=google-chrome&logoColor=white)](https://mcpmark.ai)\n[![arXiv](https://img.shields.io/badge/arXiv-2509.24002-b31b1b?style=for-the-badge&logo=arxiv&logoColor=white)](https://arxiv.org/abs/2509.24002)\n[![Discord](https://img.shields.io/badge/Join_our_discord-5865F2?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/HrKkJAxDnA)\n[![Docs](https://img.shields.io/badge/Docs-000000?style=for-the-badge&logo=mdbook&color=105864)](https://mcpmark.ai/docs)\n[![Hugging Face](https://img.shields.io/badge/Trajectory_Logs-FFD21E?style=for-the-badge&logo=huggingface&logoColor=black)](https://huggingface.co/datasets/Jakumetsu/mcpmark-trajectory-log)\n\n</div>\n\nAn evaluation suite for agentic models in real MCP tool environments (Notion / GitHub / Filesystem / Postgres / Playwright).\n\nMCPMark provides a reproducible, extensible benchmark for researchers and engineers: one-command tasks, isolated sandboxes, auto-resume for failures, unified metrics, and aggregated reports.\n\n[![MCPMark](https://github.com/user-attachments/assets/dfc06a41-e387-45e3-bc98-db7097ffa3dc)](https://mcpmark.ai)\n\n## News\n\n- 📌 **21 Jan** — Pinned MCP server versions for reproducible benchmarks: GitHub MCP Server `v0.15.0` (switched to Docker for version control), Notion MCP Server `@1.9.1` (Notion released 2.0 but it has many bugs, not recommended). See [#246](https://github.com/eval-sys/mcpmark/pull/246).\n- 🔥 **13 Dec** — Added auto-compaction support (`--compaction-token`) to summarize long conversations and avoid context overflow during evaluation ([#236](https://github.com/eval-sys/mcpmark/pull/236])).\n- 🏅 **02 Dec** — Evaluated `gemini-3-pro-preview` (thinking: low): **Pass@1 50.6%** ± 2.3% — so close to `gpt-5-high` (51.6%)! Also `deepseek-v3.2-thinking` 36.8% and `deepseek-v3.2-chat` 29.7%\n- 🔥 **02 Dec** — Obfuscate GitHub @mentions to prevent notification spam during evaluation ([#229](https://github.com/eval-sys/mcpmark/pull/229))\n- 🏅 **01 Dec** — DeepSeek v3.2 uses MCPMark! Kudos on securing the best open-source model. [X Post](https://x.com/deepseek_ai/status/1995452650557763728) | [Technical Report](https://huggingface.co/deepseek-ai/DeepSeek-V3.2/resolve/main/assets/paper.pdf)\n- 🔥 **17 Nov** — Added 50 easy tasks (10 per MCP server) for smaller open-source models ([#225](https://github.com/eval-sys/mcpmark/pull/225))\n- 🤝 **31 Oct** — Community PR from insforge: better MCP servers achieve better results with fewer tokens! ([#214](https://github.com/eval-sys/mcpmark/pull/214))\n- 🔥 **13 Oct** — Added ReAct agent support. PRs for new agent scaffolds welcome! ([#209](https://github.com/eval-sys/mcpmark/pull/209))\n- 🏅 **10 Sep** — `qwen-3-coder-plus` is the best open-source model! Kudos to Qwen team. [X Post](https://x.com/Alibaba_Qwen/status/1965457023438651532)\n\n---\n\n## What you can do with MCPMark\n\n- **Evaluate real tool usage** across multiple MCP services: `Notion`, `GitHub`, `Filesystem`, `Postgres`, `Playwright`.\n- **Use ready-to-run tasks** covering practical workflows, each with strict automated verification.\n- **Reliable and reproducible**: isolated environments that do not pollute your accounts/data; failed tasks auto-retry and resume.\n- **Unified metrics and aggregation**: single/multi-run (pass@k, avg@k, etc.) with automated results aggregation.\n- **Flexible deployment**: local or Docker; fully validated on macOS and Linux.\n\n---\n\n## Quickstart (5 minutes)\n\n### 1) Clone the repository\n```bash\ngit clone https://github.com/eval-sys/mcpmark.git\ncd mcpmark\n```\n\n### 2) Set environment variables (create `.mcp_env` at repo root)\nOnly set what you need. Add service credentials when running tasks for that service.\n\n```env\n# Example: OpenAI\nOPENAI_BASE_URL=\"https://api.openai.com/v1\"\nOPENAI_API_KEY=\"sk-...\"\n\n# Optional: Notion (only for Notion tasks)\nSOURCE_NOTION_API_KEY=\"your-source-notion-api-key\"\nEVAL_NOTION_API_KEY=\"your-eval-notion-api-key\"\nEVAL_PARENT_PAGE_TITLE=\"MCPMark Eval Hub\"\nPLAYWRIGHT_BROWSER=\"chromium\"   # chromium | firefox\nPLAYWRIGHT_HEADLESS=\"True\"\n\n# Optional: GitHub (only for GitHub tasks)\nGITHUB_TOKENS=\"token1,token2\"   # token pooling for rate limits\nGITHUB_EVAL_ORG=\"your-eval-org\"\n\n# Optional: Postgres (only for Postgres tasks)\nPOSTGRES_HOST=\"localhost\"\nPOSTGRES_PORT=\"5432\"\nPOSTGRES_USERNAME=\"postgres\"\nPOSTGRES_PASSWORD=\"password\"\n```\n\nSee `docs/introduction.md` and the service guides below for more details.\n\n### 3) Install and run a minimal example\n\nLocal (Recommended)\n```bash\npip install -e .\n# If you'll use browser-based tasks, install Playwright browsers first\nplaywright install\n```\n\nMCPMark defaults to the built-in orchestration agent (`MCPMarkAgent`). To experiment with the ReAct-style agent, pass `--agent react` to `pipeline.py` (other settings stay the same).\n\nDocker\n```bash\n./build-docker.sh\n```\n\nRun a filesystem task (no external accounts required):\n```bash\npython -m pipeline \\\n  --mcp filesystem \\\n  --k 1 \\ # run once to quick start\n  --models gpt-5  \\ # or any model you configured\n  --tasks file_property/size_classification\n# Add --task-suite easy to run the lightweight dataset (where available)\n```\n\nResults are saved to `./results/{exp_name}/{model}__{mcp}/run-*/...` for the standard suite and `./results/{exp_name}/{model}__{mcp}-easy/run-*/...` when you run `--task-suite easy` (e.g., `./results/test-run/gpt-5__filesystem/run-1/...` or `./results/test-run/gpt-5__github-easy/run-1/...`).\n\n---\n\n## Run your evaluations\n\n### Task suites (standard vs easy)\n\n- Each MCP service now stores tasks under `tasks/<mcp>/<task_suite>/<category>/<task>/`.\n- `standard` (default) covers the full benchmark (127 tasks today).\n- `easy` hosts 10 lightweight tasks per MCP, ideal for smoke tests and CI (GitHub’s are already available under `tasks/github/easy`).\n- Switch suites with `--task-suite easy` (defaults to `--task-suite standard`).\n\n### Single run (k=1)\n```bash\n# Run ALL tasks for a service\npython -m pipeline --exp-name exp --mcp notion --tasks all --models MODEL --k 1\n\n# Run a task group\npython -m pipeline --exp-name exp --mcp notion --tasks online_resume --models MODEL --k 1\n\n# Run a specific task\npython -m pipeline --exp-name exp --mcp notion --tasks online_resume/daily_itinerary_overview --models MODEL --k 1\n\n# Evaluate multiple models\npython -m pipeline --exp-name exp --mcp notion --tasks all --models MODEL1,MODEL2,MODEL3 --k 1\n```\n\n### Multiple runs (k>1) for pass@k\n```bash\n# Run k=4 to compute stability metrics (requires --exp-name to aggregate final results)\npython -m pipeline --exp-name exp --mcp notion --tasks all --models MODEL\n\n# Aggregate results (pass@1 / pass@k / pass^k / avg@k)\npython -m src.aggregators.aggregate_results --exp-name exp\n```\n\n### Run with Docker\n```bash\n# Run all tasks for a service\n./run-task.sh --mcp notion --models MODEL --exp-name exp --tasks all\n\n# Cross-service benchmark\n./run-benchmark.sh --models MODEL --exp-name exp --docker\n```\n\nPlease visit `docs/introduction.md` for choices of *MODEL*.\n\nTip: MCPMark supports **auto-resume**. When re-running, only unfinished tasks will execute. Failures matching our retryable patterns (see [RETRYABLE_PATTERNS](src/errors.py)) are retried automatically. Models may emit different error strings—if you encounter a new resumable error, please open a PR or issue.\n\nTip: MCPMark supports **auto-compaction**; pass `--compaction-token N` to enable automatic context summarization when prompt tokens reach `N` (use `999999999` to disable).\n\n---\n\n## Service setup and authentication\n\n| Service     | Setup summary                                                                                                  | Docs                                  |\n|-------------|-----------------------------------------------------------------------------------------------------------------|---------------------------------------|\n| Notion      | Environment isolation (Source Hub / Eval Hub), integration creation and grants, browser login verification.     | [Guide](docs/mcp/notion.md)           |\n| GitHub      | Multi-account token pooling recommended; import pre-exported repo state if needed.                              | [Guide](docs/mcp/github.md)           |\n| Postgres    | Start via Docker and import sample databases.                                                                   | [Setup](docs/mcp/postgres.md)         |\n| Playwright  | Install browsers before first run; defaults to `chromium`.                                                      | [Setup](docs/mcp/playwright.md)       |\n| Filesystem  | Zero-configuration, run directly.                                                                               | [Config](docs/mcp/filesystem.md)      |\n\nYou can also follow [Quickstart](docs/quickstart.md) for the shortest end-to-end path.\n\n### Important Notice: GitHub Repository Privacy\n\n> **Please ensure your evaluation repositories are set to PRIVATE.**\n\nGitHub state templates are now automatically downloaded from our CDN during evaluation — no manual download is required. However, because these templates contain issues and pull requests from real open-source repositories, the recreation process includes `@username` mentions of the original authors.\n\n**We have received feedback from original GitHub authors who were inadvertently notified** when evaluation repositories were created as public. To be a responsible member of the open-source community, we urge all users to:\n\n1. **Always keep evaluation repositories private** during the evaluation process.\n2. **In the latest version**, we have added random suffixes to all `@username` mentions (e.g., `@user` becomes `@user_x7k2`) and implemented a safety check that prevents importing templates to public repositories.\n3. **If you are using an older version of MCPMark**, please either:\n   - Pull the latest code immediately, or\n   - Manually ensure all GitHub evaluation repositories are set to private.\n\nThank you for helping us maintain a respectful relationship with the open-source community.\n\n---\n\n## Results and metrics\n\n- Results are organized under `./results/{exp_name}/{model}__{mcp}/run-*/` (JSON + CSV per task).\n- Generate a summary with:\n```bash\n# Basic usage\npython -m src.aggregators.aggregate_results --exp-name exp\n\n# For k-run experiments with single-run models\npython -m src.aggregators.aggregate_results --exp-name exp --k 4 --single-run-models claude-opus-4-1\n```\n- Only models with complete results across all tasks and runs are included in the final summary.\n- Includes multi-run metrics (pass@k, pass^k) for stability comparisons when k > 1.\n\n---\n\n## Model and Tasks\n- **Model support**: MCPMark calls models via LiteLLM — see the LiteLLM docs: [`LiteLLM Doc`](https://docs.litellm.ai/docs/). For Anthropic (Claude) extended thinking mode (enabled via `--reasoning-effort`), we use Anthropic’s native API.\n- See `docs/introduction.md` for details and configuration of supported models in MCPMark.\n- To add a new model, edit `src/model_config.py`. Before adding, check LiteLLM supported models/providers. See [`LiteLLM Doc`](https://docs.litellm.ai/docs/).\n- Task design principles in `docs/datasets/task.md`. Each task ships with an automated `verify.py` for objective, reproducible evaluation, see `docs/task.md` for details.\n\n---\n\n## Contributing\n\nContributions are welcome:\n1. Add a new task under `tasks/<mcp>/<task_suite>/<category_id>/<task_id>/` with `meta.json`, `description.md` and `verify.py`.\n2. Ensure local checks pass and open a PR.\n3. See `docs/contributing/make-contribution.md`.\n\n---\n\n## Citation\n\nIf you find our works useful for your research, please consider citing:\n\n```bibtex\n@misc{wu2025mcpmark,\n      title={MCPMark: A Benchmark for Stress-Testing Realistic and Comprehensive MCP Use}, \n      author={Zijian Wu and Xiangyan Liu and Xinyuan Zhang and Lingjun Chen and Fanqing Meng and Lingxiao Du and Yiran Zhao and Fanshi Zhang and Yaoqi Ye and Jiawei Wang and Zirui Wang and Jinjie Ni and Yufan Yang and Arvin Xu and Michael Qizhe Shieh},\n      year={2025},\n      eprint={2509.24002},\n      archivePrefix={arXiv},\n      primaryClass={cs.CL},\n      url={https://arxiv.org/abs/2509.24002}, \n}\n```\n\n## License\n\nThis project is licensed under the Apache License 2.0 — see `LICENSE`.\n"
  },
  {
    "path": "build-docker.sh",
    "content": "#!/bin/bash\n\n# Build Docker image for MCPMark\nset -e\n\n# Color codes for output\nGREEN='\\033[0;32m'\nYELLOW='\\033[1;33m'\nNC='\\033[0m' # No Color\n\necho -e \"${YELLOW}Building MCPMark Docker image locally...${NC}\"\n\n# Build the Docker image with the same tag as Docker Hub for local testing\ndocker build -t evalsysorg/mcpmark:latest . \"$@\"\n\n# Check if build was successful\nif [ $? -eq 0 ]; then\n    echo -e \"${GREEN}✓ Docker image built successfully${NC}\"\n    echo \"  Tag: evalsysorg/mcpmark:latest\"\n\n    # Show image info\n    echo \"\"\n    echo \"Image details:\"\n    docker images evalsysorg/mcpmark:latest --format \"table {{.Repository}}\\t{{.Tag}}\\t{{.Size}}\\t{{.CreatedAt}}\"\n\n    echo \"\"\n    echo \"You can now run tasks using:\"\n    echo \"  ./run-task.sh --mcp notion --models o3 --exp-name test --tasks all\"\nelse\n    echo \"Docker build failed!\"\n    exit 1\nfi\n"
  },
  {
    "path": "cspell.config.yaml",
    "content": "version: \"0.2\"\nignorePaths: []\ndictionaryDefinitions: []\ndictionaries: []\nwords:\n  - datname\n  - domcontentloaded\n  - modelcontextprotocol\n  - pgdumplib\n  - pixi\n  - pypi\n  - topbar\n  - usename\nignoreWords: []\nimport: []\n"
  },
  {
    "path": "docs/contributing/make-contribution.md",
    "content": "# Contributing\n\n1. Fork the repository and create a feature branch.\n\n2. Add new tasks under `tasks/<mcp>/<task_suite>/<category>/<task_id>/` with the files of `meta.json`, `description.md` and `verify.py`. Please refer to [Task Page](../datasets/task.md) for detailed instructions.\n\n3. Ensure all tests pass.\n\n4. Submit a pull request — contributions are welcome!\n"
  },
  {
    "path": "docs/datasets/task.md",
    "content": "# Task\n\nThe tasks in MCPMark follows two major principles\n- The tasks are based on realistic digital environments that are also used by human programmers.\n- The task outcome can be robustly verified in python scripts.\n\nTherefore, each MCPMark task consists of three files\n- `meta.json`\n- `description.md`\n- `verify.py`\n\nHere, `metadata.json` includes the meta information of the task, `description.md` describes the purpose and setting of the task, as well as the instruction to complete the task. `verify.py` checks whether the task is completed successfully.\n\nFor example, you can ask the model agent to create a file with specific name and write specific content to the file, which belongs to the category of operating the file context. The structure looks like\n\n```\ntasks \n│\n└───filesystem\n   │\n   └───standard          # task_suite (also supports `easy`)\n      │\n      └───file_context   # category_id\n         │\n         └───create_file_write\n            │   meta.json \n            │   description.md\n            │   verify.py\n```\n\nAll tasks live under `tasks/<mcp>/<task_suite>/<category>/<task_id>/`. `filesystem` refers to the MCP service and `task_suite` captures the difficulty slice (`standard` benchmark vs `easy` smoke tests).\n\n`meta.json` includes the meta information about the task, including the following key\n- task_id: the id of the task.\n- task_name: full name of the task.\n- description: task description.\n- category_id: the id of task category.\n- category_name: the full name of task categeory.\n- author: the author of the task.\n- difficulty: the task difficulty level.\n- created_at: the timestamp of task creation.\n- tags: a list of tags that describe the task.\n- mcp: a list of MCP services it belongs to.\n- metadata: other meta information.\n\nHere `category_name` describes the shared feature or the environment across different tasks (e.g. the github repository or notion page the task is built on). In this running example, `category_name` refers to `file_context`.\n\n`description.md` could include the following information\n\n- Task name\n    - Create and Write File.\n- Task description\n    - Use the filesystem MCP tools to create a new file and write content to it.\n- Task Objectives\n    - Create a new file named `hello_world.txt` in the test directory.\n    - Write the following content to the file:   ```   Hello, World```\n    - Verify the file was created successfully\n-  Verification Criteria\n    - File `hello_world.txt` exists in the test directory\n    - File contains the expected content structure\n    - File includes \"Hello, World!\" on the first line\n- Tips\n    - Use the `write_file` tool to create and write content to the file\n    - The test directory path will be provided in the task context\n\nThe entire content of `description.md` will be read by the model agent for completing the task. \n\nAccordingly, the `verify.py` contains the following functionalities\n- Check whether the target directory exists. [![Check Target Directory](https://i.postimg.cc/SQfBYvby/task-sample-verify-get-test-dir.png)](https://postimg.cc/4nnLrw3M)\n- Check whether the target directory contains the file with target file name. [![Check Target File Existence](https://i.postimg.cc/Qx0Zwnf6/task-sample-verify-file-existence.png)](https://postimg.cc/7fGRTX87)\n- Check whether the target file contains the desired content `EXPECTED_PATTERNS = [\"Hello Wolrd\"]`. [![Check Content in Target File](https://i.postimg.cc/JzzMhWyV/task-sample-verify-check-content.png)](https://postimg.cc/w7ZSWZc0)\n\n- If the outcome passes **all the above verification functionalities**, the task would be marked as successfully completed.\n"
  },
  {
    "path": "docs/installation_and_docker_usage.md",
    "content": "# Installation and Docker Task Usage Guideline\n\n## Overview\n\nThe MCPMark setup supports installation through either pip or MCPMark Docker (recommended) after cloning the code repository.\n\n### Pip Installtion\n```bash\npip install -e .\n```\n\nThe MCPMark Docker setup provides a simple way to run evaluation tasks in isolated containers. PostgreSQL is automatically handled when needed.\n\n## 1. Quick Start\n\n### 1.1 Docker Image\n\nThe official Docker image is automatically pulled from Docker Hub on first use.\nThe image is hosted at: https://hub.docker.com/r/evalsysorg/mcpmark\n\n**Image Management:**\n- The scripts automatically download the image when it's not found locally\n- To manually update to the latest version:\n  ```bash\n  docker pull evalsysorg/mcpmark:latest\n  ```\n- For local development/testing, you can build your own docker:\n  ```bash\n   # Creates evalsysorg/mcpmark:latest locally\n  ./build-docker.sh\n  ```\n\n## 2. Running MCP Experiments\n\n### 2.1 Running Individual MCP Experiment \n\nThe `run-task.sh` script provides simplified Docker usage:\n\n```bash\n# Run filesystem tasks (filesystem is the default mcp service)\n./run-task.sh --models MODEL_NAME --k K\n\n# Run github/notion/postgres/playwright/playwright_webarena with specific task\n./run-task.sh --mcp MCPSERVICE --models MODEL_NAME --exp-name EXPNAME --tasks TASK --k K\n```\n\nwhere *MODEL_NAME* refers to the model choice from the supported models (see [Introduction Page](./introduction.md) for more information), *EXPNAME* refers to customized experiment name, *TASK* refers to specific task or task group (see `tasks/<mcp>/<task_suite>/...` for more information), *K* refers to the time of independent experiments.\n\n\nAdditionally, the `run-benchmark.sh` script evaluates models across all MCP services:\n\n```bash\n# Run all services with Docker (recommended)\n./run-benchmark.sh --models MODEL --exp-name EXPNAME --docker\n\n# Run specific services\n./run-benchmark.sh --models MODEL --exp-name EXPNAME --mcps MCPSERVICES --docker\n\n# Run with parallel execution for faster results\n./run-benchmark.sh --models MODEL --exp-name EXPNAME --docker --parallel\n\n# Run locally without Docker\n./run-benchmark.sh --models MODEL --exp-name EXPNAME --mcps MCPSERVICES\n```\n\nHere *MCPSERVICES* refers to group of MCP services, separated by comma (e.g. *filesystem,postgres*)\n\nThe benchmark script:\n- Runs all or selected MCP services automatically\n- Supports progress tracking and timing\n- Generates summary reports and logs\n- Supports parallel service execution\n- Continues running even if some services fail\n- Automatically generates performance dashboards\n\n### Manual Docker Commands\n\n#### For Non-Postgres Services\nSuppose Notion is the service:\n```bash\n# Build the image first\n./build-docker.sh\n\n# Run a task\ndocker run --rm \\\n  -v $(pwd)/results:/app/results \\\n  -v $(pwd)/.mcp_env:/app/.mcp_env:ro \\\n  -v $(pwd)/notion_state.json:/app/notion_state.json:ro \\\n  evalsysorg/mcpmark:latest \\\n  python3 -m pipeline --mcp notion --models MODEL --exp-name EXPNAME --tasks TASK --k K\n```\n\n#### For Postgres Service\n```bash\n# The run-task.sh script handles postgres automatically, but if doing manually:\n\n# Start postgres container\ndocker run -d \\\n  --name mcp-postgres \\\n  --network mcp-network \\\n  -e POSTGRES_DATABASE=postgres \\\n  -e POSTGRES_USER=postgres \\\n  -e POSTGRES_PASSWORD=123456 \\\n  ghcr.io/cloudnative-pg/postgresql:17-bookworm\n\n# Run postgres task\ndocker run --rm \\\n  --network mcp-network \\\n  -e POSTGRES_HOST=mcp-postgres \\\n  -v $(pwd)/results:/app/results \\\n  -v $(pwd)/.mcp_env:/app/.mcp_env:ro \\\n  evalsysorg/mcpmark:latest \\\n  python3 -m pipeline --mcp postgres --models MODEL --exp-name EXPNAME --tasks TASK --k K\n\n# Stop and remove postgres when done\ndocker stop mcp-postgres && docker rm mcp-postgres\n```\n\n## Script Usage\n\n### Benchmark Runner (`run-benchmark.sh`)\n\n```\n./run-benchmark.sh --models MODELS --exp-name NAME [OPTIONS]\n\nRequired Options:\n    --models MODELS      Comma-separated list of models to evaluate\n    --exp-name NAME     Experiment name for organizing results\n\nOptional Options:\n    --docker            Run tasks in Docker containers (recommended)\n    --mcps SERVICES Comma-separated list of services to test\n                        Default: filesystem,notion,github,postgres,playwright\n    --parallel          Run services in parallel (experimental)\n    --timeout SECONDS   Timeout per task in seconds (default: 300)\n```\n\n### Individual Task Runner (`run-task.sh`)\n\n```\n./run-task.sh [--mcp SERVICE] [PIPELINE_ARGS]\n\nOptions:\n    --mcp SERVICE    MCP service (notion|github|filesystem|playwright|postgres)\n                        Default: filesystem\n\nEnvironment Variables:\n    DOCKER_MEMORY_LIMIT  Memory limit for container (default: 4g)\n    DOCKER_CPU_LIMIT     CPU limit for container (default: 2)\n    DOCKER_IMAGE_VERSION Docker image tag to use (default: latest)\n\nAll other arguments are passed directly to the pipeline command.\n\nPipeline arguments (see python3 -m pipeline --help):\n  --mcp {notion,github,filesystem,playwright,postgres,playwright_webarena}\n                        MCP service to use (default: filesystem)\n  --models MODELS       Comma-separated list of models to evaluate (e.g., 'o3,k2,gpt-4.1')\n  --tasks TASKS         Tasks to run: \"all\", a category name, or \"category/task_name\"\n  --exp-name EXP_NAME   Experiment name; results are saved under results/<exp-name>/ (default: YYYY-MM-DD-HH-MM-SS)\n  --k K                 Number of evaluation runs for pass@k metrics (default: 1)\n  --timeout TIMEOUT     Timeout in seconds for each task\n  --output-dir OUTPUT_DIR\n                        Directory to save results\n```\n\n## Docker Benefits\n\n1. **Efficiency**: Only starts necessary containers\n2. **Isolation**: Each task runs in a fresh container\n3. **Resource Management**: Automatic cleanup of containers and networks\n4. **Smart Dependencies**: PostgreSQL only starts for postgres service\n5. **Parallel Support**: Can run multiple services simultaneously for faster benchmarks\n6. **Comprehensive Testing**: Benchmark script runs all services with one command\n7. **Progress Tracking**: Colored output with timing and status information\n8. **Automatic Reporting**: Generates summary reports and performance dashboards\n\n## Common Troubleshooting\n\n### Permission Issues\n```bash\nchmod +x run-task.sh\n```\n\n### Docker Build Issues\n```bash\n# Force rebuild with no cache\n./run-task.sh --build --mcp MCPSERVICE --models MODEL_NAME --exp-name EXPNAME --tasks TASK\n```\n\n### PostgreSQL Connection Issues\n```bash\n# Check if postgres is running\ndocker ps | grep postgres\n\n# View postgres logs\ndocker logs mcp-postgres-task\n```\n\n### Cleanup Stuck Resources\n```bash\n# Stop all containers\ndocker stop $(docker ps -q)\n\n# Remove task network\ndocker network rm mcp-task-network\n\n# Remove postgres data volume (careful!)\ndocker volume rm mcp-postgres-data\n```\n\n## Environment Variables\n\nCreate `.mcp_env` file with your credentials:\n```env\n# Service credentials\nSOURCE_NOTION_API_KEY=your-key\nEVAL_NOTION_API_KEY=your-key\nGITHUB_TOKEN=your-token\nPOSTGRES_PASSWORD=your-password\n\n# Model API keys\nOPENAI_API_KEY=your-key\nANTHROPIC_API_KEY=your-key\n# ... etc\n```\n\nPlease refer to [Quick Start](./quickstart.md) for setting up API key for specific model.\n\n## Docker Compose Files\n\n- `docker-compose.yml` - Full stack with postgres (for development/testing)\n\n## Notes\n\n- Results are saved under `./results/<exp-name>/`.\n- Each task runs in an ephemeral container.\n- Docker image is shared across all tasks.\n- PostgreSQL data persists in Docker volume.\n"
  },
  {
    "path": "docs/introduction.md",
    "content": "# MCPMark\nMCPMark is a comprehensive evaluation suite for evaluating the agentic ability of frontier models.\n\nMCPMark includes Model Context Protocol (MCP) service in following environments\n- Notion\n- Github\n- Filesystem\n- Postgres\n- Playwright\n- Playwright-WebArena\n\n### General Procedure\nMCPMark is designed to run agentic tasks in complex environment **safely**. Specifically, it sets up an isolated environment for the experiment, completing the task, and then destroy the environment without affecting existing user profile or information.\n\n### How to Use MCPMark\n1. MCPMark Installation.\n2. Authorize service (for Github and Notion).\n3. Configure the environment variables in `.mcp_env`.\n4. Run MCPMark experiment.\n\nPlease refer to [Quick Start](./quickstart.md) for details regarding how to start a sample filesystem experiment in properly, and [Task Page](./datasets/task.md) for task details. Please visit [Installation and Docker Uusage](./installation_and_docker_usage.md) information of full MCPMark setup.\n\n### Running MCPMark\n\nMCPMark supports the following mode to run experiments (suppose the experiment is named as new_exp, and the model used are o3 and gpt-4.1 and the environment is notion), with K repetive experiments.\n\n#### MCPMark in Pip Installation\n```bash\n# Evaluate ALL tasks\npython -m pipeline --exp-name new_exp --mcp notion --tasks all --models o3 --k K\n\n# Evaluate a single task group (online_resume)\npython -m pipeline --exp-name new_exp --mcp notion --tasks online_resume --models o3 --k K\n\n# Evaluate one specific task (task_1 in online_resume)\npython -m pipeline --exp-name new_exp --mcp notion --tasks online_resume/task_1 --models o3 --k K\n\n# Evaluate multiple models\npython -m pipeline --exp-name new_exp --mcp notion --tasks all --models o3,gpt-4.1 --k K\n```\n\n#### MCPMark in Docker Installation\n```bash\n# Run all tasks for one service\n./run-task.sh --mcp notion --models o3 --exp-name new_exp --tasks all\n\n# Run comprehensive benchmark across all services\n./run-benchmark.sh --models o3,gpt-4.1 --exp-name new_exp --docker\n```\n\n#### Experiment Auto-Resume\nFor re-run experiments, only unfinished tasks will be executed. Tasks that previously failed due to pipeline errors (such as State Duplication Error or MCP Network Error) will also be retried automatically.\n\n### Results\nThe experiment results are written to `./results/` (JSON + CSV).\n\n#### Reult Aggregation (for K > 1)\nMCP supports aggreated metrics of pass@1, pass@K, pass^K, avg@K.\n```bash\npython -m src.aggregators.aggregate_results --exp-name new_exp\n```\n\n### Model Support\nMCPMark supports the following models with according providers (model codes in the brackets).\n#### OpenAI\n- GPT-5 (gpt-5)\n- o3 (o3)\n\n#### Anthropic\n- Claude-4.1-Opus (claude-4.1-opus)\n- Claude-4-Sonnet (claude-4-sonnet)\n\n#### Google\n- Gemini-2.5-Pro (gemini-2.5-pro)\n\n#### Grok\n- Grok-4 (grok-4)\n\n#### Deepseek\n- DeepSeek-Chat (deepseek-chat)\n\n#### Alibaba\n- Qwen3-Coder (qwen-3-coder)\n\n#### Kimi\n- Kimi-K2 (k2)\n\n### Want to contribute?\nVisit [Contributing Page](./contributing) to learn how to make contribution to MCPMark.\n"
  },
  {
    "path": "docs/mcp/filesystem.md",
    "content": "# Filesystem\n\nThis guide walks you through preparing your filesystem environment for MCPMark.\n\n## 1 · Configure Environment Variables\n\nSet the `FILESYSTEM_TEST_ROOT` environment variable in your `.mcp_env` file:\n\n```env\n## Filesystem\nFILESYSTEM_TEST_ROOT=./test_environments\n```\n\n**Recommended**: Use `FILESYSTEM_TEST_ROOT=./test_environments` (relative to project root)\n\n---\n\n## 2 · Automatic Test Environment Download\n\nOur code automatically downloads test folders to your specified `FILESYSTEM_TEST_ROOT` directory when the pipeline starts running.\n\n**Downloaded Structure**:\n\n```\n./test_environments/\n├── desktop/               # Desktop environment \n├── desktop_template/      # Template files for desktop\n├── file_context/          # File content understanding tasks\n├── file_property/         # File metadata and properties related tasks\n├── folder_structure/      # Directory organization tasks\n├── legal_document/        # Legal document processing\n├── papers/                # Academic paper tasks\n├── student_database/      # Database management tasks\n├── threestudio/           # 3D Generation codebase\n└── votenet/               # 3D Object Detection codebase\n```\n\n---\n\n## 3 · Running Filesystem Tasks\n\n**Basic Command**:\n\n```bash\npython -m pipeline --exp-name EXPNAME --mcp filesystem --tasks FILESYSTEMTASK --models MODEL --k K\n```\n\n**Docker Usage (Recommended)**\n\nDocker is recommended to avoid library version conflicts:\n\n```bash\n# Build Docker image\n./build-docker.sh\n\n# Run with Docker\n./run-task.sh --mcp filesystem --models MODEL --exp-name EXPNAME --tasks FILESYSTEMTASK --k K\n```\n\nHere *EXPNAME* refers to customized experiment name, *FILESYSTEMTASK* refers to the github task or task group selected (see [Task Page](../datasets/task.md) for specific task information), *MODEL* refers to the selected model (see [Introduction Page](../introduction.md) for model supported), *K* refers to the time of independent experiments.\n\n---\n\n## 5 · Troubleshooting\n\n**Common Issues**:\n\n- **Test Environment Not Found**: Ensure `FILESYSTEM_TEST_ROOT` is set correctly\n- **Prerequisites**: Make sure your terminal has `wget` and `unzip` commands available\n- **Recommended**: Use Docker to prevent library version conflicts\n"
  },
  {
    "path": "docs/mcp/github.md",
    "content": "# GitHub\nThis guide walks you through preparing your GitHub environment for MCPMark and authenticating the CLI tools with support for **token pooling** to mitigate rate limits.\n\n## 1 · Prepare An Evaluation Organization in Github\n\n1. **Create a free GitHub Organization**  \n   - In GitHub, click your avatar → **Your organizations** → **New organization**.  \n   - We recommend a name like `mcpmark-eval-xxx`. (Check if there is a conflict with other organization names.)\n   - This keeps all benchmark repositories isolated from your personal and work code. \n   - [![Create Org](https://i.postimg.cc/CxqJkRnj/github-create-org.png)](https://postimg.cc/k27xdXc4)\n2. **Create Multiple GitHub Accounts (Recommended for Rate Limit Relief)**  \n   To effectively distribute API load and avoid rate limiting, we recommend creating **2-4 additional GitHub accounts**:\n   - Create new GitHub accounts (e.g., `your-name-eval-1`, `your-name-eval-2`, etc.)\n   - **Important**: Add all these accounts as **Owners** to your evaluation organization\n   - This allows the token pooling system to distribute requests across multiple accounts\n\n3. **Generate Fine-Grained Personal Access Tokens (PATs) for Each Account**  \n   **Repeat the following process for each GitHub account (including your main account):**\n   - Navigate to *Settings → Developer settings → Personal access tokens → Fine-grained tokens*\n   - Click **Generate new token**, select the evaluation organization you created\n      - [![Create Token](https://i.postimg.cc/Z5SjPT82/github-create-token.png)](https://postimg.cc/Mv9yqJrm)\n   - Give the token a descriptive name (e.g., *MCPMark Eval Token - Account 1*)\n   - Under **Repository permissions** and **Organization permissions**, enable **All permissions** (read and write if applicable)\n      - [![Token Permissions](https://i.postimg.cc/nc81ZHPr/github-token-permissions.png)](https://postimg.cc/14HFrZP1)\n   - Copy the generated token and save it safely — you'll need all tokens for the next step\n\n4. **Configure Token Pooling in `.mcp_env`**  \n   In your project root, edit (or create) the `.mcp_env` file and add your tokens:\n   \n   **For single token (Basic setup):**\n   ```env\n   ## GitHub - Single Token Configuration\n   GITHUB_TOKENS=\"your-single-token-here\"\n   GITHUB_EVAL_ORG=\"your-eval-org-name\"\n   ```\n\n   **For multiple tokens (Recommended for handling rate limits):**\n   ```env\n   ## GitHub - Token Pooling Configuration\n   GITHUB_TOKENS=\"token1,token2,token3,token4\"\n   GITHUB_EVAL_ORG=\"your-eval-org-name\"\n   ```\n\n   **Important Notes:**\n   - Replace `token1,token2,token3,token4` with your actual tokens (comma-separated, no spaces)\n   - **2-4 tokens** is recommended for optimal rate limit distribution\n   - All tokens must have **the same permissions** on the evaluation organization\n   - The system automatically rotates between tokens to distribute API load\n\n---\n\n## 2 · Download the Sample Repository State\n\nWe have pre-exported several popular open-source repositories along with curated Issues and PRs.\n\n1. Download the archive from [Google Drive](https://drive.google.com/drive/folders/16bFDjdtqJYzYJlqKcjKBGomo8DwOhWcN?usp=drive_link).  \n2. Extract it so that the directory `./github_state/` appears in the project root:\n\n   ```bash\n   mkdir -p github_state\n   unzip github_state.zip -d ./github_state\n   ```\n\n---\n\n## 3 · Add New Repositories (Optional)\n\nIf you want to benchmark additional repositories:\n\n1. Export the desired repository state:\n   ```bash\n   python -m src.mcp_services.github.repo_exporter --source_repo_url owner/name --max-issues 20 --max-pulls 5\n   ```\n2. Open `src/mcp_services/github/state_manager.py` and add a new entry to `self.initial_state_mapping` pointing to the exported folder.\n\n---\n\n## 4 · GitHub Rate Limits & Token Pooling Benefits\n\n### Understanding Rate Limits\nFine-grained tokens are subject to GitHub API rate limits:\n- **Read operations**: 5,000 requests per hour per token\n- **General write operations**: 80 writes per minute and 500 writes per hour per token\n- **Content creation (Issues, PRs, Comments)**: **500 requests per hour per token** (Secondary Rate Limit)\n\n### How Token Pooling Helps\nWith **token pooling**, MCPMark automatically:\n- **Distributes requests** across multiple tokens to multiply your rate limits\n- **Rotates tokens** for each task execution to balance load\n- **Handles rate limit failures** by trying the next available token\n- **Ensures consistency** between agent execution and verification\n\n### Example: Rate Limit Multiplication\n**Read Operations:**\n- **Single token**: 5,000 requests/hour\n- **4 tokens**: ~20,000 requests/hour total capacity\n\n**Content Creation (Critical for MCPMark):**\n- **Single token**: 500 content creation requests/hour\n- **4 tokens**: ~2,000 content creation requests/hour total capacity\n- **Automatic failover**: If one token hits limits, others continue working\n\nThis dramatically improves evaluation performance, especially for large task batches or frequent testing cycles. **The content creation limit is often the bottleneck**, making token pooling essential for efficient evaluations.\n\n### Repository Limits\nMCPMark places a cap on the number of PRs and issues (≤ 50 in total) per repository to ensure reasonable evaluation times and to stay within rate limits.\n\n\n## 2. Running Github Tasks\n\n1. Configure environment variables: make sure `GITHUB_TOKENS` and `GITHUB_EVAL_ORG` are properly set in `.mcp_env`.\n\n2. For single task or task group, run \n```bash\npython -m pipeline --exp-name EXPNAME --mcp github --tasks GITHUBTASK --models MODEL --k K\n```\nHere *EXPNAME* refers to customized experiment name, *GITHUBTASK* refers to the github task or task group selected (see [Task Page](../datasets/task.md) for specific task information), *MODEL* refers to the selected model (see [Introduction Page](../introduction.md) for model supported), *K* refers to the time of independent experiments."
  },
  {
    "path": "docs/mcp/notion.md",
    "content": "# Notion\n\nThis guide walks you through preparing your Notion environment for MCPMark and authenticating the CLI tools.\n\n> Note: Set your Notion app and workspace interface language to English. We use Playwright for browser automation and our locator logic relies on raw English text in the UI. Non-English interfaces can cause element selection to fail.\n\n## 1 · Set up Notion Environment\n\n1. **Duplicate the MCPMark Source Pages**\n   Copy the template database and pages into your workspace from the public template following this tutorial:\n   [Duplicate MCPMark Source](https://painted-tennis-ebc.notion.site/MCPBench-Source-Hub-23181626b6d7805fb3a7d59c63033819).\n\n2. **Set up the Source and Eval Hub for Environment Isolation**\n   - Prepare **two separate Notion pages**:\n     - **Source Hub**: Stores all the template databases/pages. Managed by `SOURCE_NOTION_API_KEY`.\n     - **Eval Hub**: Only contains the duplicated templates for the current evaluation. Managed by `EVAL_NOTION_API_KEY`.\n   - In Notion, create an **empty page** in your Eval Hub. The page name **must exactly match** the value you set for `EVAL_PARENT_PAGE_TITLE` in your environment variables (e.g., `MCPMark Eval Hub`).\n   - Name your **Source Hub** page to match `SOURCE_PARENT_PAGE_TITLE` (default: `MCPMark Source Hub`). This is where all initial-state templates live; we enumerate this page’s first-level children by exact title.\n   - In Notion's **Connections** settings:\n     - Bind the integration corresponding to `EVAL_NOTION_API_KEY` to the Eval Hub parent page you just created.\n     - Bind the integration corresponding to `SOURCE_NOTION_API_KEY` to your Source Hub (where the templates are stored).\n\n3. **Create Notion Integrations & Grant Access**\n   \n   a. Visit [Notion Integrations](https://www.notion.so/profile/integrations) and create **two internal integrations** (one for Source Hub, one for Eval Hub).\n   \n   b. Copy the generated **Internal Integration Tokens** (these will be your `SOURCE_NOTION_API_KEY` and `EVAL_NOTION_API_KEY`).\n   \n   c. Share the **Source Hub** with the Source integration, and the **Eval Hub parent page** with the Eval integration (*Full Access*).\n\n   [![Source Page](https://i.postimg.cc/pVjDswLH/source-page.png)](https://postimg.cc/XXVGJD5H)\n   [![Create Integration](https://i.postimg.cc/vZ091M3W/create-integration.png)](https://postimg.cc/NKrLShhM)\n   [![Notion API Access](https://i.postimg.cc/YCDGrRCR/api-access.png)](https://postimg.cc/CRDLJjDn)\n   [![Grant Access Source](https://i.postimg.cc/2yxyPFt4/grant-access-source.png)](https://postimg.cc/n9Cnm7pz)\n   [![Grant Access Eval](https://i.postimg.cc/1RM91ttc/grant-access-eval.png)](https://postimg.cc/s1QFp35v)\n\n---\n\n## 2 · Authenticate with Notion\n\n```bash\n# First, install Playwright and the browser binaries\nplaywright install\n# Then, run the Notion login helper with your preferred browser\npython -m src.mcp_services.notion.notion_login_helper --browser {firefox|chromium}\n```\n\nThe verification script will tell you which browser is working properly. The pipeline defaults to using **chromium**. Our pipeline has been **fully tested on macOS and Linux**.\n\n## 3. Running Notion Tasks\n\n1. Configure environment variables: make sure the following service credentials are added in `.mcp_env`.\n```env\n## Notion\nSOURCE_NOTION_API_KEY=\"your-source-notion-api-key\"   # For Source Hub (templates)\nEVAL_NOTION_API_KEY=\"your-eval-notion-api-key\"       # For Eval Hub (active evaluation)\nSOURCE_PARENT_PAGE_TITLE=\"MCPMark Source Hub\"        # Source hub page name (exact match)\nEVAL_PARENT_PAGE_TITLE=\"MCPMark Eval Hub\"           # Must match the name of the empty page you created in Eval Hub\nPLAYWRIGHT_BROWSER=\"chromium\" # default to chromium, you can also choose firefox\nPLAYWRIGHT_HEADLESS=\"True\"\n```\n\n2. For single task or task group, run \n```bash\npython -m pipeline --exp-name EXPNAME --mcp notion --tasks NOTIONTASK --models MODEL --k K\n```\nHere *EXPNAME* refers to customized experiment name, *NOTIONTASK* refers to the notion task or task group selected (see [Task Page](../datasets/task.md) for specific task information), *MODEL* refers to the selected model (see [Introduction Page](../introduction.md) for model supported), *K* refers to the time of independent experiments.\n"
  },
  {
    "path": "docs/mcp/playwright.md",
    "content": "# Playwright\n\nThis guide walks you through setting up WebArena environments for Playwright MCP automated testing, including Shopping, Shopping Admin, and Reddit instances.\n\nSection 1 is designed mainly for completing the Playwright-WebArena tasks.\n\n## 1. Setup WebArena Environment (For Playwright-WebArena Tasks)\n### 1.1 Download Docker Images\n\n[WebArena](https://github.com/web-arena-x/webarena/tree/main/environment_docker) provides Docker images from multiple sources. Choose the fastest one for your network:\n\n### Shopping Environment (Port 7770)\n```bash\n# Option 1: Google Drive (Recommended)\npip install gdown\ngdown 1gxXalk9O0p9eu1YkIJcmZta1nvvyAJpA\n\n# Option 2: Archive.org\nwget https://archive.org/download/webarena-env-shopping-image/shopping_final_0712.tar\n\n# Option 3: CMU Server\nwget http://metis.lti.cs.cmu.edu/webarena-images/shopping_final_0712.tar\n```\n\n### Shopping Admin Environment (Port 7780)\n```bash\n# Option 1: Google Drive (Recommended)\ngdown 1See0ZhJRw0WTTL9y8hFlgaduwPZ_nGfd\n\n# Option 2: Archive.org\nwget https://archive.org/download/webarena-env-shopping-admin-image/shopping_admin_final_0719.tar\n\n# Option 3: CMU Server\nwget http://metis.lti.cs.cmu.edu/webarena-images/shopping_admin_final_0719.tar\n```\n\n### Reddit Environment (Port 9999)\n```bash\n# Option 1: Google Drive (Recommended)\ngdown 17Qpp1iu_mPqzgO_73Z9BnFjHrzmX9DGf\n\n# Option 2: Archive.org\nwget https://archive.org/download/webarena-env-forum-image/postmill-populated-exposed-withimg.tar\n\n# Option 3: CMU Server\nwget http://metis.lti.cs.cmu.edu/webarena-images/postmill-populated-exposed-withimg.tar\n```\n\n### 1.2 Deploy Environments\n\n#### Shopping (E-commerce Site)\n```bash\ndocker load --input shopping_final_0712.tar\n\n# Start container\ndocker run --name shopping -p 7770:80 -d shopping_final_0712\n\n# Wait for service initialization (2-3 minutes)\nsleep 180\n\n# Configure for local access\ndocker exec shopping /var/www/magento2/bin/magento setup:store-config:set --base-url=\"http://localhost:7770\"\ndocker exec shopping mysql -u magentouser -pMyPassword magentodb -e \"UPDATE core_config_data SET value='http://localhost:7770/' WHERE path IN ('web/secure/base_url', 'web/unsecure/base_url');\"\ndocker exec shopping /var/www/magento2/bin/magento cache:flush\n```\n\n**Access**: `http://localhost:7770`  \n\n\n#### Shopping Admin (Management Panel)\n```bash\ndocker load --input shopping_admin_final_0719.tar\n\n# Start container\ndocker run --name shopping_admin -p 7780:80 -d shopping_admin_final_0719\n\n# Wait for service initialization\nsleep 120\n\n# Configure for local access\ndocker exec shopping_admin /var/www/magento2/bin/magento setup:store-config:set --base-url=\"http://localhost:7780\"\ndocker exec shopping_admin mysql -u magentouser -pMyPassword magentodb -e \"UPDATE core_config_data SET value='http://localhost:7780/' WHERE path IN ('web/secure/base_url', 'web/unsecure/base_url');\"\ndocker exec shopping_admin php /var/www/magento2/bin/magento config:set admin/security/password_is_forced 0\ndocker exec shopping_admin php /var/www/magento2/bin/magento config:set admin/security/password_lifetime 0\ndocker exec shopping_admin /var/www/magento2/bin/magento cache:flush\n```\n\n**Access**: `http://localhost:7780/admin`  \n**Admin Credentials**: `admin / admin1234`\n\n#### Reddit (Forum)\n```bash\ndocker load --input postmill-populated-exposed-withimg.tar\n\n# Start container\ndocker run --name forum -p 9999:80 -d postmill-populated-exposed-withimg\n\n# Wait for PostgreSQL initialization\nsleep 120\n\n# Verify service status\ndocker logs forum | grep \"database system is ready\"\ncurl -I http://localhost:9999\n```\n\n**Access**: `http://localhost:9999`\n\n### 1.3 External Access Configuration\n\nFor cloud deployments (GCP, AWS, etc.), configure external access:\n\n#### Configure Firewall (GCP Example)\n```bash\n# Shopping environment\ngcloud compute firewall-rules create allow-shopping-7770 \\\n  --allow tcp:7770 --source-ranges 0.0.0.0/0\n\n# Shopping Admin\ngcloud compute firewall-rules create allow-shopping-admin-7780 \\\n  --allow tcp:7780 --source-ranges 0.0.0.0/0\n\n# Reddit\ngcloud compute firewall-rules create allow-reddit-9999 \\\n  --allow tcp:9999 --source-ranges 0.0.0.0/0\n```\n\n#### Update Base URLs for External Access\n```bash\n# Get external IP\nEXTERNAL_IP=$(curl -s ifconfig.me)\n\n# Shopping\ndocker exec shopping /var/www/magento2/bin/magento setup:store-config:set --base-url=\"http://${EXTERNAL_IP}:7770\"\ndocker exec shopping mysql -u magentouser -pMyPassword magentodb -e \"UPDATE core_config_data SET value='http://${EXTERNAL_IP}:7770/' WHERE path IN ('web/secure/base_url', 'web/unsecure/base_url');\"\ndocker exec shopping /var/www/magento2/bin/magento cache:flush\n\n# Shopping Admin  \ndocker exec shopping_admin /var/www/magento2/bin/magento setup:store-config:set --base-url=\"http://${EXTERNAL_IP}:7780\"\ndocker exec shopping_admin mysql -u magentouser -pMyPassword magentodb -e \"UPDATE core_config_data SET value='http://${EXTERNAL_IP}:7780/' WHERE path IN ('web/secure/base_url', 'web/unsecure/base_url');\"\ndocker exec shopping_admin /var/www/magento2/bin/magento cache:flush\n```\n\n### 1.4 Alternative Access Methods (Not Verified)\n\n#### Cloudflared Tunnel (Free & Persistent)\n```bash\n# Install cloudflared\nwget https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64\nsudo mv cloudflared-linux-amd64 /usr/local/bin/cloudflared\nsudo chmod +x /usr/local/bin/cloudflared\n\n# Create tunnels\ncloudflared tunnel --url http://localhost:7770  # Shopping\ncloudflared tunnel --url http://localhost:7780  # Admin\ncloudflared tunnel --url http://localhost:9999  # Reddit\n```\n\n#### ngrok (Quick Sharing)\n```bash\n# Install ngrok\nwget https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.tgz\ntar xvzf ngrok-v3-stable-linux-amd64.tgz\nsudo mv ngrok /usr/local/bin\n\n# Create tunnel (choose port)\nngrok http 7770  # For Shopping\n```\n\n## 2. Running Playwright Tasks\n\n1. Configure environment variables: make sure the following service credentials are added in `.mcp_env`.\n```env\nPLAYWRIGHT_BROWSER=\"chromium\" # default to chromium, you can also choose firefox\nPLAYWRIGHT_HEADLESS=\"True\"\n```\n\n2. For single task or task group, run \n```bash\npython -m pipeline --exp-name EXPNAME --mcp MCP --tasks  PLAYWRIGHTTASK --models MODEL\n```\nHere *EXPNAME* refers to customized experiment name, *MCP* refers to playwright or playwright_webarena denpending on the task, *PLAYWRIGHTTASK* refers to the task or task group selected (see [Task Page](../datasets/task.md) for specific task information), *MODEL* refers to the selected model (see [Introduction Page](../introduction.md) for model supported), *K* refers to the time of independent experiments.\n\n## 3. Troubleshooting\n\n### Container Issues\n```bash\n# Check status\ndocker ps -a | grep -E \"shopping|forum\"\n\n# View logs\ndocker logs [container_name] --tail 50\n\n# Restart container\ndocker restart [container_name]\n```\n\n### Access Problems\n- **First load is slow** (1-2 minutes for Magento) - this is normal\n- **Ensure ports are available**: `netstat -tlnp | grep -E \"7770|7780|9999\"`\n- **Clear cache after URL changes**: Required for Magento environments\n\n### Reset Environment\n```bash\n# Stop and remove container\ndocker stop [container_name]\ndocker rm [container_name]\n\n# Re-deploy (follow steps in Section 3)\n```\n\n## 4. Important Notes\n\n- **Service startup time**: Allow 2-3 minutes for Magento, 1-2 minutes for Reddit\n- **Memory requirements**: Ensure Docker has at least 4GB RAM allocated per container\n- **URL configuration**: Must reconfigure base URLs after container restart for external access\n- **Port assignments**: \n  - 7770: Shopping\n  - 7780: Shopping Admin  \n  - 9999: Reddit"
  },
  {
    "path": "docs/mcp/postgres.md",
    "content": "# PostgreSQL\n\nThis guide walks you through preparing your PostgreSQL environment for MCPMark evaluation.\n\n## 1. Setup PostgreSQL Environment\n\n### 1.1 Start PostgreSQL with Docker\n\n1. **Run PostgreSQL Container**\n   Start a PostgreSQL instance using Docker:\n   ```bash\n   docker run -d \\\n     --name mcpmark-postgres \\\n     -e POSTGRES_PASSWORD=password \\\n     -e POSTGRES_USER=postgres \\\n     -p 5432:5432 \\\n     pgvector/pgvector:0.8.0-pg17-bookworm\n   ```\n\n2. **Verify Container is Running**\n   ```bash\n   docker ps | grep mcpmark-postgres\n   ```\n\n---\n\n### 1.2 Import Sample Databases\n\n1. **Download Database Backups**\n   Download the backup files and place them in `./postgres_state/` directory:\n   ```bash\n   mkdir -p ./postgres_state\n   cd ./postgres_state\n   \n   # Download all database backups\n   wget https://storage.mcpmark.ai/postgres/employees.backup\n   wget https://storage.mcpmark.ai/postgres/chinook.backup\n   wget https://storage.mcpmark.ai/postgres/dvdrental.backup\n   wget https://storage.mcpmark.ai/postgres/sports.backup\n   wget https://storage.mcpmark.ai/postgres/lego.backup\n   \n   cd ..\n   ```\n\n2. **Create Databases and Restore from Backups**\n   > Make sure your Postgres client version matches the server's version (e.g., pg17).\n\n   ```bash\n   # Set the password environment variable\n   export PGPASSWORD=password\n   \n   # Create and restore each database\n   createdb -h localhost -U postgres employees\n   pg_restore -h localhost -U postgres -d employees -v ./postgres_state/employees.backup\n   \n   createdb -h localhost -U postgres chinook\n   pg_restore -h localhost -U postgres -d chinook -v ./postgres_state/chinook.backup\n   \n   createdb -h localhost -U postgres dvdrental\n   pg_restore -h localhost -U postgres -d dvdrental -v ./postgres_state/dvdrental.backup\n   \n   createdb -h localhost -U postgres sports\n   pg_restore -h localhost -U postgres -d sports -v ./postgres_state/sports.backup\n   \n   createdb -h localhost -U postgres lego\n   pg_restore -h localhost -U postgres -d lego -v ./postgres_state/lego.backup\n   ```\n\n3. **Verify Databases are Imported**\n   ```bash\n   # List all databases\n   PGPASSWORD=password psql -h localhost -U postgres -c \"\\l\"\n   ```\n\n---\n\n## 2. Configure Environment Variables\n\nConfigure environment variables: make sure the following enservice credentials are added in `.mcp_env`:\n```env\n## PostgreSQL Configuration\nPOSTGRES_HOST=\"localhost\"\nPOSTGRES_PORT=\"5432\"\nPOSTGRES_USERNAME=\"postgres\"\nPOSTGRES_PASSWORD=\"password\"\n```\n\n\n## 3. Verify Connection\n\nVerify the PostgreSQL setup is working correctly:\n\n```bash\n# Test connection using psql\nPGPASSWORD=password psql -h localhost -U postgres -c \"SELECT version();\"\n```\n\n\n## 4. Common Operations\n\n### Stop PostgreSQL Container\n```bash\ndocker stop mcpmark-postgres\n```\n\n### Start PostgreSQL Container\n```bash\ndocker start mcpmark-postgres\n```\n\n### Remove PostgreSQL Container (Clean Setup)\n```bash\ndocker stop mcpmark-postgres\ndocker rm mcpmark-postgres\n```\n\n### Access PostgreSQL Shell\n```bash\nPGPASSWORD=mysecretpassword psql -h localhost -U postgres\n```\n\n## 5. Running Postgres Experiment\n\nFor single task or task group, run \n```bash\npython -m pipeline --exp-name EXPNAME --mcp postgres --tasks POSTGRESTASK --models MODEL\n```\nHere *EXPNAME* refers to customized experiment name, *POSTGRESTASK* refers to the postgres task or task group selected (see `tasks/` for specific task information), *MODEL* refers to the selected model (see [Introduction Page](../introduction.md) for model supported), *K* refers to the time of independent experiments.\n\n\n## 6. Troubleshooting\n\n### Port Already in Use\nIf port 5432 is already in use, you can use a different port:\n```bash\ndocker run -d \\\n   ```bash\n   docker run -d \\\n     --name mcpmark-postgres \\\n     -e POSTGRES_PASSWORD=password \\\n     -e POSTGRES_USER=postgres \\\n     -p 5433:5432 \\\n     pgvector/pgvector:0.8.0-pg17-bookworm\n   ```\nRemember to update `POSTGRES_PORT=\"5433\"` in your `.mcp_env` file.\n\n### Connection Refused\nEnsure the Docker container is running and the port mapping is correct:\n```bash\ndocker ps\ndocker logs mcpmark-postgres\n```\n"
  },
  {
    "path": "docs/quickstart.md",
    "content": "# Quick Start\nTo quickly experience MCPMark, we recommend firstly preparing the environment, and then execute the Postgres tasks.\n\n### 1. Clone MCPMark\n```bash\ngit clone https://github.com/eval-sys/mcpmark.git\n\ncd mcpmark\n```\n\n### 2. Setup Environment Variables\nTo setup the model access in environment variable, edit the `.mcp_env` file in `mcpmark/`.\n\n```env\n# Model Providers (set only those you need)\n## Google Gemini\nGEMINI_BASE_URL=\"https://your-gemini-base-url.com/v1\"\nGEMINI_API_KEY=\"your-gemini-api-key\"\n\n## DeepSeek\nDEEPSEEK_BASE_URL=\"https://your-deepseek-base-url.com/v1\"\nDEEPSEEK_API_KEY=\"your-deepseek-api-key\"\n\n## OpenAI\nOPENAI_BASE_URL=\"https://your-openai-base-url.com/v1\"\nOPENAI_API_KEY=\"your-openai-api-key\"\n\n## Anthropic\nANTHROPIC_BASE_URL=\"https://your-anthropic-base-url.com/v1\"\nANTHROPIC_API_KEY=\"your-anthropic-api-key\"\n\n## Moonshot\nMOONSHOT_BASE_URL=\"https://your-moonshot-base-url.com/v1\"\nMOONSHOT_API_KEY=\"your-moonshot-api-key\"\n\n## xAI\nXAI_BASE_URL=\"https://your-xai-base-url.com/v1\"\nXAI_API_KEY=\"your-xai-api-key\"\n```\n\n### 3. Run Quick Example in MCPMark\nSuppose you are running the employee query task with gemini-2.5-flash, and name your experiment as test-run-1, you can use the following command to test the `size_classification` task in `file_property`, which categorizes files by their sizes.\n\n```bash\npython -m pipeline \n--exp-name test-run-1\n--mcp filesystem\n--tasks file_property/size_classification\n--models gemini-2.5-flash\n```\n\nHere is the expected output (the verification may encounter failure due to model choices). \n[![Sample Experiment Output](https://i.postimg.cc/4NRDYRS2/task-sample-file-property-size-classification.png)](https://postimg.cc/Yj8nPZkQ)\n\nThe reuslts are saved under `restuls/{exp_name}/{mcp}_{model}/{tasks}`, if `exp-name` is not specified, the default name would be timestamp of the experiment (but specifying the `exp-name` is useful for resuming experiments).\n\nFor other MCP services, please refers to the [Installation and Docker Usage Page](./installation_and_docker_usage.md) for detailed instruction. \n\n\n"
  },
  {
    "path": "pipeline.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nMCPMark Unified Evaluation Pipeline\n===================================\n\nThis script provides an automated evaluation pipeline for testing Large Language Models (LLMs)\non various Multi-Step Cognitive Processes (MCP) services like Notion, GitHub, and PostgreSQL.\n\"\"\"\n\nimport argparse\nimport sys\nfrom datetime import datetime\nfrom pathlib import Path\nfrom dotenv import load_dotenv\n\nfrom src.logger import get_logger\nfrom src.evaluator import MCPEvaluator\nfrom src.agents import AGENT_REGISTRY\nfrom src.factory import MCPServiceFactory\nfrom src.model_config import ModelConfig\n\n\n# Suppress httpcore/anyio cleanup exceptions that don't affect functionality.\n# These \"Exception ignored\" messages are caused by MCP library's streamablehttp_client\n# timing issues during cleanup, but don't impact actual task execution.\ndef _suppress_cleanup_exceptions(unraisable):\n    \"\"\"Suppress known cleanup exceptions from httpcore/anyio.\"\"\"\n    msg = str(unraisable.exc_value)\n    if any(\n        pattern in msg\n        for pattern in [\n            \"async generator ignored GeneratorExit\",\n            \"cancel scope in a different task\",\n            \"no running event loop\",\n        ]\n    ):\n        return  # Silently ignore\n    # Use default handler for other exceptions\n    sys.__unraisablehook__(unraisable)\n\n\nsys.unraisablehook = _suppress_cleanup_exceptions\n\n# Initialize logger\nlogger = get_logger(__name__)\n\n\ndef main():\n    \"\"\"Main entry point for the evaluation pipeline.\"\"\"\n    parser = argparse.ArgumentParser(description=\"MCPMark Unified Evaluation Pipeline.\")\n\n    supported_mcp_services = MCPServiceFactory.get_supported_mcp_services()\n    supported_models = ModelConfig.get_supported_models()\n\n    # Main configuration\n    parser.add_argument(\n        \"--mcp\",\n        default=\"filesystem\",\n        choices=supported_mcp_services,\n        help=\"MCP service to use (default: filesystem)\",\n    )\n    parser.add_argument(\n        \"--models\",\n        required=True,\n        help=\"Comma-separated list of models to evaluate (e.g., 'o3,k2,gpt-4.1')\",\n    )\n\n    parser.add_argument(\n        \"--agent\",\n        default=\"mcpmark\",\n        choices=sorted(AGENT_REGISTRY.keys()),\n        help=\"Agent implementation to use (default: mcpmark)\",\n    )\n    parser.add_argument(\n        \"--tasks\",\n        default=\"all\",\n        help='Tasks to run: (1). \"all\"; (2). \"category\"; or (3). \"category/task\".',\n    )\n    parser.add_argument(\n        \"--task-suite\",\n        default=\"standard\",\n        choices=[\"standard\", \"easy\"],\n        help=\"Task suite to run (default: standard). Use 'easy' to run the lightweight dataset.\",\n    )\n    parser.add_argument(\n        \"--exp-name\",\n        default=None,\n        help=\"Experiment name; results are saved under results/<exp-name>/ (default: YYYY-MM-DD-HH-MM-SS)\",\n    )\n    parser.add_argument(\n        \"--k\",\n        type=int,\n        default=4,\n        help=\"Number of evaluation runs (default: 1)\",\n    )\n\n    # Execution configuration\n    parser.add_argument(\n        \"--timeout\",\n        type=int,\n        default=3600,\n        help=\"Timeout in seconds for agent execution\",\n    )\n    parser.add_argument(\n        \"--compaction-token\",\n        type=int,\n        default=999_999_999,\n        help=(\n            \"Auto-compact conversation when prompt tokens (from API usage) reach this limit. \"\n            \"Use 999999999 to disable compaction.\"\n        ),\n    )\n    parser.add_argument(\n        \"--reasoning-effort\",\n        default=\"default\",\n        choices=[\"default\", \"minimal\", \"low\", \"medium\", \"high\"],\n        help=\"Reasoning effort level for supported models (default: None)\",\n    )\n\n    # Output configuration\n    parser.add_argument(\n        \"--output-dir\",\n        type=Path,\n        default=Path(\"./results\"),\n        help=\"Directory to save results\",\n    )\n\n    # Load arguments and environment variables\n    args = parser.parse_args()\n    load_dotenv(dotenv_path=\".mcp_env\", override=False)\n\n    # Validate k parameter and exp-name requirement\n    if args.k > 1 and args.exp_name is None:\n        parser.error(\"--exp-name is required when k > 1\")\n\n    # Generate default exp-name if not provided\n    if args.exp_name is None:\n        args.exp_name = datetime.now().strftime(\"%Y-%m-%d-%H-%M-%S\")\n\n    # Parse models (no validation - allow unsupported models)\n    model_list = [m.strip() for m in args.models.split(\",\") if m.strip()]\n    if not model_list:\n        parser.error(\"No valid models provided\")\n\n    # Log warning for unsupported models but don't error\n    unsupported_models = [m for m in model_list if m not in supported_models]\n    if unsupported_models:\n        logger.warning(\n            f\"Using unsupported models: {', '.join(unsupported_models)}. Will use OPENAI_BASE_URL and OPENAI_API_KEY from environment.\"\n        )\n\n    logger.info(\"MCPMark Evaluation\")\n    logger.info(\n        f\"Experiment: {args.exp_name} | {len(model_list)} Model(s): {', '.join(model_list)}\"\n    )\n    logger.info(f\"Task suite: {args.task_suite}\")\n    if args.k > 1:\n        logger.info(f\"Running {args.k} evaluation runs for pass@k metrics\")\n\n    # Run k evaluation runs\n    for run_idx in range(1, args.k + 1):\n        if args.k > 1:\n            logger.info(f\"\\n{'=' * 80}\")\n            logger.info(f\"Starting Run {run_idx}/{args.k}\")\n            logger.info(f\"{'=' * 80}\\n\")\n\n            # For k-runs, results/{exp}/{mcp}__{model}/run-N\n            run_exp_name = f\"run-{run_idx}\"\n            run_output_dir = args.output_dir / args.exp_name\n        else:\n            # For single run, still use run-1 under service_model\n            run_exp_name = \"run-1\"\n            run_output_dir = args.output_dir / args.exp_name\n\n        # Run evaluation for each model\n        for i, model in enumerate(model_list, 1):\n            logger.info(f\"\\n{'=' * 60}\")\n            if args.k > 1:\n                logger.info(\n                    f\"Run {run_idx}/{args.k} | Model {i}/{len(model_list)}: {model}\"\n                )\n            else:\n                logger.info(f\"Starting evaluation {i}/{len(model_list)}: {model}\")\n            logger.info(f\"{'=' * 60}\\n\")\n\n            # Initialize and run the evaluation pipeline for this model\n            pipeline = MCPEvaluator(\n                mcp_service=args.mcp,\n                model=model,\n                timeout=args.timeout,\n                exp_name=run_exp_name,\n                output_dir=run_output_dir,\n                reasoning_effort=args.reasoning_effort,\n                agent_name=args.agent,\n                task_suite=args.task_suite,\n                compaction_token=args.compaction_token,\n            )\n\n            pipeline.run_evaluation(args.tasks)\n            logger.info(f\"📁 Results: {pipeline.base_experiment_dir}\")\n\n    logger.info(f\"\\n{'=' * 60}\")\n    if args.k > 1:\n        logger.info(f\"✓ All {args.k} runs completed for {len(model_list)} model(s)\")\n        logger.info(\n            f\"Run `python -m src.aggregators.aggregate_results --exp-name {args.exp_name}` to compute all metrics\"\n        )\n    else:\n        logger.info(f\"✓ All evaluations completed for {len(model_list)} model(s)\")\n    logger.info(f\"{'=' * 60}\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[project]\nauthors = []\nname = \"MCPMark\"\nrequires-python = \">= 3.11\"\nversion = \"0.0.1\"\ndependencies = [\n  \"notion-client==2.4.0\",\n  \"playwright>=1.43.0\",\n  \"seaborn>=0.12.0\",\n  \"matplotlib>=3.7.0\",\n  \"numpy>=1.23.0\",\n  \"openai-agents>=0.2.3,<0.3\",\n  \"openai>=1.96.1\",\n  \"python-dotenv>=1.1.1,<2\",\n  \"ruff>=0.12.4,<0.13\",\n  \"psycopg2-binary>=2.9.10,<3\",\n  \"pyyaml>=6.0.2,<7\",\n  \"nest-asyncio>=1.6.0,<2\",\n  \"pixi\",\n  \"pipx>=1.7.1,<2\",\n  \"pgdumplib>=3.1.0,<4\",\n  \"litellm==1.80.0\"\n]\n\n[build-system]\nbuild-backend = \"hatchling.build\"\nrequires = [\"hatchling\"]\n\n[tool.pixi.workspace]\nchannels = [\"conda-forge\"]\nplatforms = [\n  \"osx-arm64\",\n  \"linux-aarch64\",\n  \"linux-64\",\n  \"win-64\",\n  \"osx-64\",\n]\n\n[tool.pixi.tasks]\nfmt = \"ruff\"\n\n[tool.ruff.format]\nindent-style = \"space\"\nline-ending = \"auto\"\n\n[tool.hatch.build.targets.wheel]\npackages = [\"src\", \"tasks\"]\n"
  },
  {
    "path": "run-benchmark.sh",
    "content": "#!/bin/bash\n\n# MCPMark Full Benchmark Runner\n# Runs all tasks across all MCP services for comprehensive model evaluation\n\nset -e\n\n# Default values\nMODELS=\"\"\nEXP_NAME=\"\"\nUSE_DOCKER=false\nSERVICES=\"filesystem,notion,github,postgres,playwright\"\nPARALLEL=false\nTIMEOUT=3600\nK=4\n\n# Color codes for output\nRED='\\033[0;31m'\nGREEN='\\033[0;32m'\nYELLOW='\\033[1;33m'\nBLUE='\\033[0;34m'\nNC='\\033[0m' # No Color\n\n# Function to print colored output\nprint_status() {\n    echo -e \"${BLUE}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} $1\"\n}\n\nprint_success() {\n    echo -e \"${GREEN}✓${NC} $1\"\n}\n\nprint_warning() {\n    echo -e \"${YELLOW}⚠${NC} $1\"\n}\n\nprint_error() {\n    echo -e \"${RED}✗${NC} $1\"\n}\n\n# Parse arguments\nwhile [[ $# -gt 0 ]]; do\n    case $1 in\n        --models)\n            MODELS=\"$2\"\n            shift 2\n            ;;\n        --exp-name)\n            EXP_NAME=\"$2\"\n            shift 2\n            ;;\n        --docker)\n            USE_DOCKER=true\n            shift\n            ;;\n        --mcps)\n            SERVICES=\"$2\"\n            shift 2\n            ;;\n        --parallel)\n            PARALLEL=true\n            shift\n            ;;\n        --timeout)\n            TIMEOUT=\"$2\"\n            shift 2\n            ;;\n        --k)\n            K=\"$2\"\n            shift 2\n            ;;\n        --help)\n            cat << EOF\nUsage: $0 --models MODELS --exp-name NAME [OPTIONS]\n\nRun comprehensive benchmark across all MCP services.\n\nRequired Options:\n    --models MODELS      Comma-separated list of models to evaluate\n                        (e.g., \"o3,gpt-4.1,claude-4-sonnet\")\n    --exp-name NAME     Experiment name for organizing results\n\nOptional Options:\n    --docker            Run tasks in Docker containers (recommended)\n    --mcps SERVICES     Comma-separated list of services to test\n                        Default: filesystem,notion,github,postgres,playwright\n    --parallel          Run services in parallel (experimental)\n    --timeout SECONDS   Timeout per task in seconds (default: 300)\n    --k RUNS            Repeat runs per service for pass@k (default: 4)\n\nExamples:\n    # Run all services with Docker\n    $0 --models o3,gpt-4.1 --exp-name benchmark-1 --docker\n\n    # Run specific services locally\n    $0 --models o3 --exp-name test-1 --mcps filesystem,postgres\n\n    # Run with parallel execution\n    $0 --models claude-4 --exp-name parallel-test --docker --parallel\n\nEOF\n            exit 0\n            ;;\n        *)\n            print_error \"Unknown option: $1\"\n            echo \"Use --help for usage information\"\n            exit 1\n            ;;\n    esac\ndone\n\n# Validate required arguments\nif [ -z \"$MODELS\" ]; then\n    print_error \"Error: --models is required\"\n    exit 1\nfi\n\nif [ -z \"$EXP_NAME\" ]; then\n    print_error \"Error: --exp-name is required\"\n    exit 1\nfi\n\n# Check prerequisites\nif [ \"$USE_DOCKER\" = true ]; then\n    if ! command -v docker &> /dev/null; then\n        print_error \"Docker is not installed\"\n        exit 1\n    fi\n\n    # Always use Docker Hub image\n    DOCKER_IMAGE=\"evalsysorg/mcpmark:latest\"\n\n    # Check if Docker image exists locally, pull only if not found\n    if ! docker image inspect \"$DOCKER_IMAGE\" >/dev/null 2>&1; then\n        print_status \"Docker image not found locally, pulling from Docker Hub...\"\n        docker pull \"$DOCKER_IMAGE\" || {\n            print_error \"Failed to pull Docker image from Docker Hub\"\n            exit 1\n        }\n    else\n        print_status \"Using local Docker image: $DOCKER_IMAGE\"\n    fi\nelse\n    # Check Python installation\n    if ! command -v python3 &> /dev/null; then\n        print_error \"Python 3 is not installed\"\n        exit 1\n    fi\n\n    # Check if dependencies are installed\n    if ! python3 -c \"import src.evaluator\" 2>/dev/null; then\n        print_warning \"Python dependencies not installed\"\n        echo \"Installing dependencies...\"\n        pip install -e . || {\n            print_error \"Failed to install dependencies\"\n            exit 1\n        }\n    fi\nfi\n\n# Check .mcp_env file\nif [ ! -f .mcp_env ]; then\n    print_warning \".mcp_env file not found. Some tasks may fail without API credentials.\"\n    echo \"Create one from .mcp_env.example: cp .mcp_env.example .mcp_env\"\nfi\n\n# Convert comma-separated services to array\nIFS=',' read -ra SERVICE_ARRAY <<< \"$SERVICES\"\n\n# Summary\necho \"\"\nprint_status \"MCPMark Benchmark Configuration\"\necho \"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\"\necho \"Models:      $MODELS\"\necho \"Experiment:  $EXP_NAME\"\necho \"Services:    ${SERVICE_ARRAY[*]}\"\necho \"Docker:      $USE_DOCKER\"\necho \"Parallel:    $PARALLEL\"\necho \"Timeout:     ${TIMEOUT}s per task\"\necho \"K-Runs:      $K\"\necho \"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\"\necho \"\"\n\n# Create results directory\nRESULTS_DIR=\"./results/${EXP_NAME}\"\nmkdir -p \"$RESULTS_DIR\"\n\n# Log file for this run with timestamp and models\nTIMESTAMP=$(date '+%Y%m%d_%H%M%S')\nLOG_FILE=\"${RESULTS_DIR}/benchmark_${TIMESTAMP}.log\"\necho \"Benchmark started at $(date '+%Y-%m-%d %H:%M:%S')\" > \"$LOG_FILE\"\necho \"Models: $MODELS\" >> \"$LOG_FILE\"\necho \"Services: ${SERVICE_ARRAY[*]}\" >> \"$LOG_FILE\"\necho \"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\" >> \"$LOG_FILE\"\n\n# Function to run a single service\nrun_service() {\n    local service=$1\n    local start_time=$(date +%s)\n    local start_time_formatted=$(date '+%Y-%m-%d %H:%M:%S')\n\n    print_status \"[$start_time_formatted] Starting $service tasks...\"\n\n    if [ \"$USE_DOCKER\" = true ]; then\n        # Run with Docker\n        ./run-task.sh --mcp \"$service\" \\\n            --models \"$MODELS\" \\\n            --exp-name \"$EXP_NAME\" \\\n            --tasks all \\\n            --timeout \"$TIMEOUT\" \\\n            --k \"$K\" 2>&1 | tee -a \"$LOG_FILE\"\n    else\n        # Run locally\n        python3 -m pipeline \\\n            --mcp \"$service\" \\\n            --models \"$MODELS\" \\\n            --exp-name \"$EXP_NAME\" \\\n            --tasks all \\\n            --timeout \"$TIMEOUT\" \\\n            --k \"$K\" 2>&1 | tee -a \"$LOG_FILE\"\n    fi\n\n    local exit_code=$?\n    local end_time=$(date +%s)\n    local duration=$((end_time - start_time))\n\n    if [ $exit_code -eq 0 ]; then\n        print_success \"$service completed in ${duration}s\"\n        echo \"[$(date '+%Y-%m-%d %H:%M:%S')] $service: SUCCESS (${duration}s)\" >> \"${RESULTS_DIR}/summary.txt\"\n    else\n        print_error \"$service failed with exit code $exit_code\"\n        echo \"[$(date '+%Y-%m-%d %H:%M:%S')] $service: FAILED (exit code $exit_code)\" >> \"${RESULTS_DIR}/summary.txt\"\n    fi\n\n    return $exit_code\n}\n\n# Track overall results\nTOTAL_SERVICES=${#SERVICE_ARRAY[@]}\nCOMPLETED_SERVICES=0\nFAILED_SERVICES=0\n\n# Main execution\nBENCHMARK_START=$(date +%s)\n\nif [ \"$PARALLEL\" = true ]; then\n    print_status \"Running services in parallel...\"\n\n    # Run all services in background\n    for service in \"${SERVICE_ARRAY[@]}\"; do\n        (\n            run_service \"$service\"\n        ) &\n        pids+=($!)\n    done\n\n    # Wait for all background jobs and collect exit codes\n    for pid in \"${pids[@]}\"; do\n        if wait $pid; then\n            ((COMPLETED_SERVICES++))\n        else\n            ((FAILED_SERVICES++))\n        fi\n    done\nelse\n    print_status \"Running services sequentially...\"\n\n    for service in \"${SERVICE_ARRAY[@]}\"; do\n        if run_service \"$service\"; then\n            ((COMPLETED_SERVICES++))\n        else\n            ((FAILED_SERVICES++))\n            print_warning \"Continuing despite failure in $service\"\n        fi\n    done\nfi\n\nBENCHMARK_END=$(date +%s)\nTOTAL_DURATION=$((BENCHMARK_END - BENCHMARK_START))\n\n# Generate final summary\necho \"\"\necho \"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\"\nprint_status \"Benchmark Summary\"\necho \"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\"\necho \"Completed at:      $(date '+%Y-%m-%d %H:%M:%S')\"\necho \"Total Services:    $TOTAL_SERVICES\"\necho \"Completed:         $COMPLETED_SERVICES\"\necho \"Failed:            $FAILED_SERVICES\"\necho \"Total Duration:    ${TOTAL_DURATION}s ($(($TOTAL_DURATION / 60))m $(($TOTAL_DURATION % 60))s)\"\necho \"Results saved to:  $RESULTS_DIR\"\necho \"Log file:          $LOG_FILE\"\necho \"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\"\n\n\n# Final status\nif [ $FAILED_SERVICES -eq 0 ]; then\n    print_success \"Benchmark completed successfully!\"\n    exit 0\nelse\n    print_warning \"Benchmark completed with $FAILED_SERVICES failed service(s)\"\n    exit 1\nfi\n"
  },
  {
    "path": "run-task.sh",
    "content": "#!/bin/bash\n\n# MCPMark Task Runner\n# Enable strict error handling\nset -euo pipefail\n\n# Default values\nSERVICE=\"filesystem\"\nNETWORK_NAME=\"mcp-network\"\nPOSTGRES_CONTAINER=\"mcp-postgres\"\n\n# Resource limits (can be overridden by environment variables)\nDOCKER_MEMORY_LIMIT=\"${DOCKER_MEMORY_LIMIT:-4g}\"\nDOCKER_CPU_LIMIT=\"${DOCKER_CPU_LIMIT:-2}\"\n\n# Cleanup function\ncleanup() {\n    if [ \"${SERVICE:-}\" = \"postgres\" ]; then\n        if docker ps --format '{{.Names}}' | grep -q \"^${POSTGRES_CONTAINER}$\"; then\n            echo \"Cleaning up PostgreSQL container...\"\n            docker stop \"$POSTGRES_CONTAINER\" >/dev/null 2>&1 || true\n            docker rm \"$POSTGRES_CONTAINER\" >/dev/null 2>&1 || true\n        fi\n    fi\n}\n\n# Set up cleanup on exit\ntrap cleanup EXIT\n\n# Parse arguments\nwhile [[ $# -gt 0 ]]; do\n    case $1 in\n        --mcp) SERVICE=\"$2\"; shift 2 ;;\n        --help)\n            cat << EOF\nUsage: $0 [--mcp SERVICE] [PIPELINE_ARGS]\n\nRun MCPMark tasks in Docker containers.\n\nOptions:\n    --mcp SERVICE    MCP service (notion|github|filesystem|playwright|postgres)\n                        Default: filesystem\n\nEnvironment Variables:\n    DOCKER_MEMORY_LIMIT  Memory limit for container (default: 4g)\n    DOCKER_CPU_LIMIT     CPU limit for container (default: 2)\n    DOCKER_IMAGE_VERSION Docker image tag to use (default: latest)\n\nAll other arguments are passed directly to the pipeline.\n\nExamples:\n    $0 --mcp notion --models o3 --exp-name test-1 --tasks all\n    $0 --mcp postgres --models gpt-4 --exp-name pg-test --tasks basic_queries\nEOF\n            exit 0\n            ;;\n        *) break ;;  # Stop parsing, rest goes to pipeline\n    esac\ndone\n\n# Docker image tag can be overridden by environment variable\nDOCKER_IMAGE_REPO=\"evalsysorg/mcpmark\"\nDOCKER_IMAGE_VERSION=\"${DOCKER_IMAGE_VERSION:-latest}\"\nDOCKER_IMAGE=\"${DOCKER_IMAGE_REPO}:${DOCKER_IMAGE_VERSION}\"\n\n# Check if Docker image exists locally, pull only if not found\nif ! docker image inspect \"$DOCKER_IMAGE\" >/dev/null 2>&1; then\n    echo \"Docker image not found locally, pulling from Docker Hub...\"\n    docker pull \"$DOCKER_IMAGE\" || {\n        echo \"Error: Failed to pull Docker image from Docker Hub\"\n        echo \"Please check your internet connection or Docker Hub access\"\n        exit 1\n    }\nelse\n    echo \"Using local Docker image: $DOCKER_IMAGE\"\nfi\n\n# Check if .mcp_env exists (warn but don't fail)\nif [ ! -f .mcp_env ]; then\n    echo \"Warning: .mcp_env file not found. Some tasks may fail without API credentials.\"\nfi\n\n# Create network if doesn't exist\nif ! docker network ls --format '{{.Name}}' | grep -q \"^${NETWORK_NAME}$\"; then\n    echo \"Creating Docker network: $NETWORK_NAME\"\n    docker network create \"$NETWORK_NAME\" || {\n        echo \"Error: Failed to create Docker network\"\n        exit 1\n    }\nfi\n\n# Service-specific configurations\nif [ \"$SERVICE\" = \"postgres\" ]; then\n    # For postgres service, ensure PostgreSQL container is running\n    if ! docker ps --format '{{.Names}}' | grep -q \"^${POSTGRES_CONTAINER}$\"; then\n        echo \"Starting PostgreSQL container...\"\n        docker run -d \\\n            --name \"$POSTGRES_CONTAINER\" \\\n            --network \"$NETWORK_NAME\" \\\n            -e POSTGRES_DATABASE=postgres \\\n            -e POSTGRES_USER=postgres \\\n            -e POSTGRES_PASSWORD=\"${POSTGRES_PASSWORD:-password}\" \\\n            pgvector/pgvector:0.8.0-pg17-bookworm\n\n        echo \"Waiting for PostgreSQL to be ready...\"\n        for i in {1..10}; do\n            if docker exec \"$POSTGRES_CONTAINER\" pg_isready -U postgres >/dev/null 2>&1; then\n                echo \"PostgreSQL is ready!\"\n                break\n            fi\n            sleep 1\n        done\n    else\n        echo \"PostgreSQL container already running\"\n    fi\n\n    # Run task with network connection to postgres\n    docker run --rm \\\n        --memory=\"$DOCKER_MEMORY_LIMIT\" \\\n        --cpus=\"$DOCKER_CPU_LIMIT\" \\\n        --network \"$NETWORK_NAME\" \\\n        -e POSTGRES_HOST=\"$POSTGRES_CONTAINER\" \\\n        -e POSTGRES_PORT=5432 \\\n        -e POSTGRES_USERNAME=postgres \\\n        -e POSTGRES_PASSWORD=\"${POSTGRES_PASSWORD:-password}\" \\\n        -e POSTGRES_DATABASE=postgres \\\n        -v \"$(pwd)/results:/app/results\" \\\n        -v \"$(pwd)/postgres_state:/app/postgres_state\" \\\n        $([ -f .mcp_env ] && echo \"-v $(pwd)/.mcp_env:/app/.mcp_env:ro\") \\\n        \"$DOCKER_IMAGE\" \\\n        python3 -m pipeline --mcp \"$SERVICE\" --k 1 \"$@\"\nelif [ \"$SERVICE\" = \"filesystem\" ]; then\n    # For filesystem service, mount test_environments\n    docker run --rm \\\n        --memory=\"$DOCKER_MEMORY_LIMIT\" \\\n        --cpus=\"$DOCKER_CPU_LIMIT\" \\\n        -v \"$(pwd)/results:/app/results\" \\\n        -v \"$(pwd)/test_environments:/app/test_environments\" \\\n        $([ -f .mcp_env ] && echo \"-v $(pwd)/.mcp_env:/app/.mcp_env:ro\") \\\n        \"$DOCKER_IMAGE\" \\\n        python3 -m pipeline --mcp \"$SERVICE\" --k 1 \"$@\"\nelif [ \"$SERVICE\" = \"insforge\" ]; then\n    # For Insforge service, use host network to access Insforge backend on host\n    docker run --rm \\\n        --memory=\"$DOCKER_MEMORY_LIMIT\" \\\n        --cpus=\"$DOCKER_CPU_LIMIT\" \\\n        --add-host=host.docker.internal:host-gateway \\\n        -v \"$(pwd)/results:/app/results\" \\\n        $([ -f .mcp_env ] && echo \"-v $(pwd)/.mcp_env:/app/.mcp_env:ro\") \\\n        \"$DOCKER_IMAGE\" \\\n        python3 -m pipeline --mcp \"$SERVICE\" --k 1 \"$@\"\nelse\n    # For other services (notion, github, playwright, etc.)\n    docker run --rm \\\n        --memory=\"$DOCKER_MEMORY_LIMIT\" \\\n        --cpus=\"$DOCKER_CPU_LIMIT\" \\\n        -v \"$(pwd)/results:/app/results\" \\\n        -v \"$(pwd)/test_environments:/app/test_environments\" \\\n        $([ -f .mcp_env ] && echo \"-v $(pwd)/.mcp_env:/app/.mcp_env:ro\") \\\n        $([ -f notion_state.json ] && echo \"-v $(pwd)/notion_state.json:/app/notion_state.json\") \\\n        \"$DOCKER_IMAGE\" \\\n        python3 -m pipeline --mcp \"$SERVICE\" --k 1 \"$@\"\nfi\n\necho \"Task completed!\"\n"
  },
  {
    "path": "src/agents/__init__.py",
    "content": "\"\"\"\nMCPMark Agent Module\n====================\n\nProvides agent implementations and registry for MCPMark.\n\"\"\"\n\nfrom .base_agent import BaseMCPAgent\nfrom .mcpmark_agent import MCPMarkAgent\nfrom .react_agent import ReActAgent\n\nAGENT_REGISTRY = {\n    \"mcpmark\": MCPMarkAgent,\n    \"react\": ReActAgent,\n}\n\n__all__ = [\"BaseMCPAgent\", \"MCPMarkAgent\", \"ReActAgent\", \"AGENT_REGISTRY\"]\n\n"
  },
  {
    "path": "src/agents/base_agent.py",
    "content": "\"\"\"Shared base agent functionality for MCPMark agents.\"\"\"\n\nfrom __future__ import annotations\n\nimport asyncio\nimport copy\nimport json\nimport uuid\nfrom abc import ABC, abstractmethod\nfrom typing import Any, Dict, List, Optional, Callable\n\nfrom src.logger import get_logger\nfrom .mcp import MCPStdioServer, MCPHttpServer\nfrom .utils import TokenUsageTracker\n\nlogger = get_logger(__name__)\n\n\nclass BaseMCPAgent(ABC):\n    \"\"\"Base class with shared functionality for MCPMark agents.\"\"\"\n\n    STDIO_SERVICES = [\n        \"notion\",\n        \"filesystem\",\n        \"playwright\",\n        \"playwright_webarena\",\n        \"postgres\",\n        \"insforge\",\n        \"github\",\n    ]\n    HTTP_SERVICES = [\"supabase\"]\n    DEFAULT_TIMEOUT = 600\n    COMPACTION_DISABLED_TOKEN = 999_999_999\n\n    CLAUDE_THINKING_BUDGETS = {\n        \"low\": 1024,\n        \"medium\": 2048,\n        \"high\": 4096,\n    }\n\n    def __init__(\n        self,\n        litellm_input_model_name: str,\n        api_key: str,\n        base_url: str,\n        mcp_service: str,\n        timeout: int = DEFAULT_TIMEOUT,\n        service_config: Optional[Dict[str, Any]] = None,\n        service_config_provider: Optional[Callable[[], Dict[str, Any]]] = None,\n        reasoning_effort: Optional[str] = \"default\",\n        compaction_token: int = COMPACTION_DISABLED_TOKEN,\n    ):\n        self.litellm_input_model_name = litellm_input_model_name\n        self.api_key = api_key\n        self.base_url = base_url\n        self.mcp_service = mcp_service\n        self.timeout = timeout\n        self.service_config = service_config or {}\n        self._service_config_provider = service_config_provider\n        self.reasoning_effort = reasoning_effort or \"default\"\n        self.compaction_token = int(compaction_token)\n\n        self.is_claude = self._is_anthropic_model(litellm_input_model_name)\n        self.use_claude_thinking = self.is_claude and self.reasoning_effort != \"default\"\n\n        self.usage_tracker = TokenUsageTracker()\n        self.litellm_run_model_name = None\n\n        self._partial_messages: List[Dict[str, Any]] = []\n        self._partial_token_usage: Dict[str, int] = {}\n        self._partial_turn_count: int = 0\n\n        logger.debug(\n            \"Initialized %s for service '%s' with model '%s'\",\n            self.__class__.__name__,\n            self.mcp_service,\n            self.litellm_input_model_name,\n        )\n\n        # Warn if Gemini 3 model uses unsupported reasoning_effort value\n        if self._is_gemini_3_model() and self.reasoning_effort not in [\n            \"default\",\n            \"low\",\n            \"high\",\n        ]:\n            logger.warning(\n                \"Gemini 3 models only support reasoning_effort 'low' or 'high', \"\n                \"got '%s'. LiteLLM may map this to the nearest supported value.\",\n                self.reasoning_effort,\n            )\n\n    def __repr__(self) -> str:  # pragma: no cover - debug helper\n        return (\n            f\"{self.__class__.__name__}(service='{self.mcp_service}', \"\n            f\"model='{self.litellm_input_model_name}')\"\n        )\n\n    @abstractmethod\n    async def execute(\n        self,\n        instruction: str,\n        tool_call_log_file: Optional[str] = None,\n    ) -> Dict[str, Any]:\n        \"\"\"Execute the agent logic and return execution metadata.\"\"\"\n\n    def execute_sync(\n        self,\n        instruction: str,\n        tool_call_log_file: Optional[str] = None,\n    ) -> Dict[str, Any]:\n        \"\"\"Synchronous wrapper for async execution.\"\"\"\n        return asyncio.run(self.execute(instruction, tool_call_log_file))\n\n    def get_usage_stats(self) -> Dict[str, Any]:\n        \"\"\"Return aggregated usage statistics.\"\"\"\n        return self.usage_tracker.get_stats()\n\n    def reset_usage_stats(self):\n        \"\"\"Clear usage statistics.\"\"\"\n        self.usage_tracker.reset()\n\n    # ------------------------------------------------------------------\n    # Shared helpers\n    # ------------------------------------------------------------------\n\n    def _is_anthropic_model(self, model_name: str) -> bool:\n        return \"claude\" in model_name.lower()\n\n    def _get_claude_thinking_budget(self) -> Optional[int]:\n        if not self.use_claude_thinking:\n            return None\n        return self.CLAUDE_THINKING_BUDGETS.get(self.reasoning_effort, 2048)\n\n    def _refresh_service_config(self):\n        if not self._service_config_provider:\n            return\n        try:\n            latest_cfg = self._service_config_provider() or {}\n            self.service_config.update(latest_cfg)\n        except Exception as exc:  # pragma: no cover - best effort refresh\n            logger.warning(\"Failed to refresh service config: %s\", exc)\n\n    def _reset_progress(self):\n        self._partial_messages = []\n        self._partial_token_usage = {}\n        self._partial_turn_count = 0\n\n    def _update_progress(\n        self,\n        messages: List[Dict[str, Any]],\n        token_usage: Dict[str, Any],\n        turn_count: int,\n    ):\n        try:\n            self._partial_messages = copy.deepcopy(messages)\n            self._partial_token_usage = dict(token_usage or {})\n            self._partial_turn_count = int(turn_count or 0)\n        except Exception:  # pragma: no cover - defensive copy\n            pass\n\n    # ------------------------------------------------------------------\n    # MCP server management\n    # ------------------------------------------------------------------\n\n    async def _create_mcp_server(self) -> Any:\n        if self.mcp_service in self.STDIO_SERVICES:\n            return self._create_stdio_server()\n        if self.mcp_service in self.HTTP_SERVICES:\n            return self._create_http_server()\n        raise ValueError(f\"Unsupported MCP service: {self.mcp_service}\")\n\n    def _create_stdio_server(self) -> MCPStdioServer:\n        if self.mcp_service == \"notion\":\n            notion_key = self.service_config.get(\"notion_key\")\n            if not notion_key:\n                raise ValueError(\"Notion API key required\")\n            return MCPStdioServer(\n                command=\"npx\",\n                args=[\"-y\", \"@notionhq/notion-mcp-server\"],\n                env={\n                    \"OPENAPI_MCP_HEADERS\": (\n                        '{\"Authorization\": \"Bearer ' + notion_key + '\", '\n                        '\"Notion-Version\": \"2022-06-28\"}'\n                    )\n                },\n            )\n\n        if self.mcp_service == \"filesystem\":\n            test_directory = self.service_config.get(\"test_directory\")\n            if not test_directory:\n                raise ValueError(\"Test directory required for filesystem service\")\n            return MCPStdioServer(\n                command=\"npx\",\n                args=[\n                    \"-y\",\n                    \"@modelcontextprotocol/server-filesystem\",\n                    str(test_directory),\n                ],\n            )\n\n        if self.mcp_service in (\"playwright\", \"playwright_webarena\"):\n            browser = self.service_config.get(\"browser\", \"chromium\")\n            headless = self.service_config.get(\"headless\", True)\n            viewport_width = self.service_config.get(\"viewport_width\", 1280)\n            viewport_height = self.service_config.get(\"viewport_height\", 720)\n\n            args = [\"-y\", \"@playwright/mcp@latest\"]\n            if headless:\n                args.append(\"--headless\")\n            args.extend(\n                [\n                    \"--isolated\",\n                    \"--no-sandbox\",\n                    \"--browser\",\n                    browser,\n                    \"--viewport-size\",\n                    f\"{viewport_width},{viewport_height}\",\n                ]\n            )\n            return MCPStdioServer(command=\"npx\", args=args)\n\n        if self.mcp_service == \"postgres\":\n            host = self.service_config.get(\"host\", \"localhost\")\n            port = self.service_config.get(\"port\", 5432)\n            username = self.service_config.get(\"username\")\n            password = self.service_config.get(\"password\")\n            database = self.service_config.get(\n                \"current_database\"\n            ) or self.service_config.get(\"database\")\n            if not all([username, password, database]):\n                raise ValueError(\"PostgreSQL requires username, password, and database\")\n            database_url = (\n                f\"postgresql://{username}:{password}@{host}:{port}/{database}\"\n            )\n            return MCPStdioServer(\n                command=\"pipx\",\n                args=[\"run\", \"postgres-mcp\", \"--access-mode=unrestricted\"],\n                env={\"DATABASE_URI\": database_url},\n            )\n\n        if self.mcp_service == \"insforge\":\n            api_key = self.service_config.get(\"api_key\")\n            backend_url = self.service_config.get(\"backend_url\")\n            if not all([api_key, backend_url]):\n                raise ValueError(\"Insforge requires api_key and backend_url\")\n            return MCPStdioServer(\n                command=\"npx\",\n                args=[\"-y\", \"@insforge/mcp@dev\"],\n                env={\n                    \"INSFORGE_API_KEY\": api_key,\n                    \"INSFORGE_BACKEND_URL\": backend_url,\n                },\n            )\n\n        raise ValueError(f\"Unsupported stdio service: {self.mcp_service}\")\n\n    def _create_http_server(self) -> MCPHttpServer:\n        if self.mcp_service == \"github\":\n            github_token = self.service_config.get(\"github_token\")\n            if not github_token:\n                raise ValueError(\"GitHub token required\")\n            return MCPHttpServer(\n                url=\"https://api.githubcopilot.com/mcp/\",\n                headers={\n                    \"Authorization\": f\"Bearer {github_token}\",\n                    \"User-Agent\": \"MCPMark/1.0\",\n                },\n            )\n        raise ValueError(f\"Unsupported HTTP service: {self.mcp_service}\")\n\n    # ------------------------------------------------------------------\n    # Message/Tool formatting helpers\n    # ------------------------------------------------------------------\n\n    def _compaction_enabled(self) -> bool:\n        return 0 < self.compaction_token < self.COMPACTION_DISABLED_TOKEN\n\n    def _count_prompt_tokens_litellm(self, messages: List[Dict[str, Any]]) -> int:\n        try:\n            from litellm import token_counter\n\n            return int(\n                token_counter(model=self.litellm_input_model_name, messages=messages)\n                or 0\n            )\n        except Exception:  # pragma: no cover - best effort\n            return 0\n\n    def _convert_to_sdk_format(\n        self, messages: List[Dict[str, Any]]\n    ) -> List[Dict[str, Any]]:\n        sdk_format: List[Dict[str, Any]] = []\n        function_call_map: Dict[str, str] = {}\n\n        for msg in messages:\n            role = msg.get(\"role\")\n\n            if role == \"user\":\n                user_content = msg.get(\"content\", \"\")\n                if isinstance(user_content, list):\n                    tool_results = [\n                        item\n                        for item in user_content\n                        if isinstance(item, dict) and item.get(\"type\") == \"tool_result\"\n                    ]\n                    if tool_results:\n                        for tr in tool_results:\n                            content_items = tr.get(\"content\", [])\n                            text_content = \"\"\n                            for ci in content_items:\n                                if isinstance(ci, dict) and ci.get(\"type\") == \"text\":\n                                    text_content = ci.get(\"text\", \"\")\n                                    break\n                            sdk_format.append(\n                                {\n                                    \"call_id\": tr.get(\"tool_use_id\", \"\"),\n                                    \"output\": json.dumps(\n                                        {\n                                            \"type\": \"text\",\n                                            \"text\": text_content,\n                                            \"annotations\": None,\n                                            \"meta\": None,\n                                        }\n                                    ),\n                                    \"type\": \"function_call_output\",\n                                }\n                            )\n                    else:\n                        text_parts = []\n                        for item in user_content:\n                            if isinstance(item, dict) and item.get(\"type\") == \"text\":\n                                text_parts.append(item.get(\"text\", \"\"))\n                        sdk_format.append(\n                            {\"content\": \"\\n\".join(text_parts), \"role\": \"user\"}\n                        )\n                else:\n                    sdk_format.append({\"content\": user_content, \"role\": \"user\"})\n\n            elif role == \"assistant\":\n                tool_calls = msg.get(\"tool_calls\", [])\n                function_call = msg.get(\"function_call\")\n                content = msg.get(\"content\")\n\n                if isinstance(content, list):\n                    text_parts = []\n                    claude_tool_uses = []\n                    for block in content:\n                        if isinstance(block, dict):\n                            if block.get(\"type\") == \"text\":\n                                text_parts.append(block.get(\"text\", \"\"))\n                            elif block.get(\"type\") == \"thinking\":\n                                thinking_text = block.get(\"thinking\", \"\")\n                                if thinking_text:\n                                    text_parts.append(\n                                        f\"<think>\\n{thinking_text}\\n</think>\"\n                                    )\n                            elif block.get(\"type\") == \"tool_use\":\n                                claude_tool_uses.append(block)\n                    content = \"\\n\".join(text_parts)\n                    if claude_tool_uses and not tool_calls:\n                        tool_calls = []\n                        for tu in claude_tool_uses:\n                            tool_calls.append(\n                                {\n                                    \"id\": tu.get(\"id\"),\n                                    \"function\": {\n                                        \"name\": tu.get(\"name\"),\n                                        \"arguments\": json.dumps(tu.get(\"input\", {})),\n                                    },\n                                }\n                            )\n\n                if content:\n                    sdk_format.append(\n                        {\n                            \"id\": \"__fake_id__\",\n                            \"content\": [\n                                {\n                                    \"annotations\": [],\n                                    \"text\": content,\n                                    \"type\": \"output_text\",\n                                }\n                            ],\n                            \"role\": \"assistant\",\n                            \"status\": \"completed\",\n                            \"type\": \"message\",\n                        }\n                    )\n\n                if tool_calls:\n                    for tool_call in tool_calls:\n                        call_id = tool_call.get(\"id\", f\"call_{uuid.uuid4().hex}\")\n                        func_name = tool_call.get(\"function\", {}).get(\"name\", \"\")\n                        sdk_format.append(\n                            {\n                                \"arguments\": tool_call.get(\"function\", {}).get(\n                                    \"arguments\", \"{}\"\n                                ),\n                                \"call_id\": call_id,\n                                \"name\": func_name,\n                                \"type\": \"function_call\",\n                                \"id\": \"__fake_id__\",\n                            }\n                        )\n\n                if function_call:\n                    func_name = function_call.get(\"name\", \"\")\n                    call_id = f\"call_{uuid.uuid4().hex}\"\n                    function_call_map[func_name] = call_id\n                    sdk_format.append(\n                        {\n                            \"arguments\": function_call.get(\"arguments\", \"{}\"),\n                            \"call_id\": call_id,\n                            \"name\": func_name,\n                            \"type\": \"function_call\",\n                            \"id\": \"__fake_id__\",\n                        }\n                    )\n\n            elif role == \"tool\":\n                sdk_format.append(\n                    {\n                        \"call_id\": msg.get(\"tool_call_id\", \"\"),\n                        \"output\": json.dumps(\n                            {\n                                \"type\": \"text\",\n                                \"text\": msg.get(\"content\", \"\"),\n                                \"annotations\": None,\n                                \"meta\": None,\n                            }\n                        ),\n                        \"type\": \"function_call_output\",\n                    }\n                )\n\n            elif role == \"function\":\n                func_name = msg.get(\"name\", \"\")\n                call_id = function_call_map.get(func_name, f\"call_{uuid.uuid4().hex}\")\n                sdk_format.append(\n                    {\n                        \"call_id\": call_id,\n                        \"output\": json.dumps(\n                            {\n                                \"type\": \"text\",\n                                \"text\": msg.get(\"content\", \"\"),\n                                \"annotations\": None,\n                                \"meta\": None,\n                            }\n                        ),\n                        \"type\": \"function_call_output\",\n                    }\n                )\n\n        return sdk_format\n\n    def _convert_to_anthropic_format(\n        self, tools: List[Dict[str, Any]]\n    ) -> List[Dict[str, Any]]:\n        anthropic_tools = []\n        for tool in tools:\n            anthropic_tool = {\n                \"name\": tool.get(\"name\"),\n                \"description\": tool.get(\"description\", \"\"),\n                \"input_schema\": tool.get(\n                    \"inputSchema\",\n                    {\"type\": \"object\", \"properties\": {}, \"required\": []},\n                ),\n            }\n            anthropic_tools.append(anthropic_tool)\n        return anthropic_tools\n\n    def _is_gemini_model(self) -> bool:\n        model_lower = self.litellm_input_model_name.lower()\n        return \"gemini\" in model_lower or \"bison\" in model_lower\n\n    def _is_gemini_3_model(self) -> bool:\n        \"\"\"Check if this is a Gemini 3 series model.\"\"\"\n        model_lower = self.litellm_input_model_name.lower()\n        return \"gemini-3\" in model_lower or \"gemini/gemini-3\" in model_lower\n\n    def _simplify_schema_for_gemini(\n        self, schema: Optional[Dict[str, Any]]\n    ) -> Dict[str, Any]:\n        if not isinstance(schema, dict):\n            return schema or {}\n\n        simplified: Dict[str, Any] = {}\n        for key, value in schema.items():\n            if key == \"type\" and isinstance(value, list):\n                simplified[key] = value[0] if value else \"string\"\n            elif key == \"items\" and isinstance(value, dict):\n                simplified[key] = self._simplify_schema_for_gemini(value)\n            elif key == \"properties\" and isinstance(value, dict):\n                simplified[key] = {\n                    prop_key: self._simplify_schema_for_gemini(prop_val)\n                    for prop_key, prop_val in value.items()\n                }\n            elif isinstance(value, dict):\n                simplified[key] = self._simplify_schema_for_gemini(value)\n            elif isinstance(value, list) and key not in (\"required\", \"enum\"):\n                simplified[key] = [\n                    self._simplify_schema_for_gemini(item)\n                    if isinstance(item, dict)\n                    else item\n                    for item in value\n                ]\n            else:\n                simplified[key] = value\n        return simplified\n\n    def _convert_to_openai_format(\n        self, tools: List[Dict[str, Any]]\n    ) -> List[Dict[str, Any]]:\n        functions = []\n        is_gemini = self._is_gemini_model()\n\n        if is_gemini:\n            logger.debug(\n                \"Detected Gemini model '%s' – simplifying tool schemas\",\n                self.litellm_input_model_name,\n            )\n\n        for tool in tools:\n            input_schema = tool.get(\n                \"inputSchema\", {\"type\": \"object\", \"properties\": {}, \"required\": []}\n            )\n            if is_gemini:\n                simplified = self._simplify_schema_for_gemini(input_schema)\n                if simplified != input_schema:\n                    input_schema = simplified\n                    logger.debug(\"Simplified schema for tool '%s'\", tool.get(\"name\"))\n\n            functions.append(\n                {\n                    \"name\": tool.get(\"name\"),\n                    \"description\": tool.get(\"description\", \"\"),\n                    \"parameters\": input_schema,\n                }\n            )\n\n        if is_gemini:\n            logger.info(\"Converted %d tools for Gemini compatibility\", len(functions))\n\n        return functions\n"
  },
  {
    "path": "src/agents/mcp/__init__.py",
    "content": "\"\"\"\nMCP (Model Context Protocol) Components\n========================================\n\nMinimal MCP server implementations for MCPMark.\n\"\"\"\n\nfrom .stdio_server import MCPStdioServer\nfrom .http_server import MCPHttpServer\n\n__all__ = [\"MCPStdioServer\", \"MCPHttpServer\"]"
  },
  {
    "path": "src/agents/mcp/http_server.py",
    "content": "\"\"\"\nMinimal MCP HTTP Server Implementation  \n=======================================\n\nProvides HTTP-based MCP server communication for services like GitHub.\n\"\"\"\n\nimport asyncio\nfrom contextlib import AsyncExitStack\nfrom typing import Any, Dict, List, Optional\n\nfrom mcp import ClientSession\nfrom mcp.client.streamable_http import streamablehttp_client\n\nclass MCPHttpServer:\n    \"\"\"\n    HTTP-based MCP client using the official MCP Python SDK\n    (Streamable HTTP transport).\n    \"\"\"\n\n    def __init__(\n        self,\n        url: str,\n        headers: Optional[Dict[str, str]] = None,\n        timeout: int = 30,\n    ):\n        self.url = url.rstrip(\"/\")\n        self.headers = headers or {}\n        self.timeout = timeout\n\n        self._stack: Optional[AsyncExitStack] = None\n        self.session: Optional[ClientSession] = None\n        self._tools_cache: Optional[List[Dict[str, Any]]] = None\n\n    async def __aenter__(self):\n        await self.start()\n        return self\n\n    async def __aexit__(self, exc_type, exc, tb):\n        await self.stop()\n\n    async def start(self):\n        \"\"\"Open Streamable HTTP transport and initialize MCP session.\"\"\"\n        self._stack = AsyncExitStack()\n\n        read_stream, write_stream, _ = await self._stack.enter_async_context(\n            streamablehttp_client(self.url, headers=self.headers)\n        )\n\n        self.session = await self._stack.enter_async_context(ClientSession(read_stream, write_stream))\n        await asyncio.wait_for(self.session.initialize(), timeout=self.timeout)\n\n    async def stop(self):\n        \"\"\"Close the session/transport cleanly.\"\"\"\n        if self._stack:\n            await self._stack.aclose()\n        self._stack = None\n        self.session = None\n        self._tools_cache = None\n\n    async def list_tools(self) -> List[Dict[str, Any]]:\n        \"\"\"Return tool definitions (cached).\"\"\"\n        if self._tools_cache is not None:\n            return self._tools_cache\n        if not self.session:\n            raise RuntimeError(\"MCP HTTP client not started\")\n\n        resp = await asyncio.wait_for(self.session.list_tools(), timeout=self.timeout)\n        self._tools_cache = [t.model_dump() for t in resp.tools]\n        return self._tools_cache\n\n    async def call_tool(self, name: str, arguments: Dict[str, Any]) -> Any:\n        \"\"\"Invoke a remote tool and return the structured result.\"\"\"\n        if not self.session:\n            raise RuntimeError(\"MCP HTTP client not started\")\n\n        result = await asyncio.wait_for(self.session.call_tool(name, arguments), timeout=self.timeout)\n        return result.model_dump()\n"
  },
  {
    "path": "src/agents/mcp/stdio_server.py",
    "content": "\"\"\"\nMinimal MCP Stdio Server Implementation\n========================================\n\nProvides stdio-based MCP server communication for services like\nNotion, Filesystem, Playwright, and Postgres.\n\"\"\"\n\nimport asyncio\nimport os\nfrom contextlib import AsyncExitStack\nfrom typing import Any, Dict, List, Optional\n\nfrom mcp import ClientSession, StdioServerParameters\nfrom mcp.client.stdio import stdio_client\n\nclass MCPStdioServer:\n    \"\"\"Lightweight wrapper around the official MCP Python SDK.\"\"\"\n\n    def __init__(self, command: str, args: List[str], env: Optional[Dict[str, str]] = None, timeout: int = 120):\n        self.params = StdioServerParameters(command=command, args=args, env={**os.environ, **(env or {})})\n        self.timeout = timeout\n        self._stack: Optional[AsyncExitStack] = None\n        self._streams = None\n        self.session: Optional[ClientSession] = None\n\n    async def __aenter__(self):\n        self._stack = AsyncExitStack()\n        read, write = await self._stack.enter_async_context(stdio_client(self.params))\n        self.session = await self._stack.enter_async_context(ClientSession(read, write))\n        await asyncio.wait_for(self.session.initialize(), timeout=self.timeout)\n        return self\n\n    async def __aexit__(self, exc_type, exc, tb):\n        if self._stack:\n            await self._stack.aclose()\n        self._stack = None\n        self.session = None\n\n    async def list_tools(self) -> List[Dict[str, Any]]:\n        resp = await asyncio.wait_for(self.session.list_tools(), timeout=self.timeout)\n        return [t.model_dump() for t in resp.tools]\n\n    async def call_tool(self, name: str, arguments: Dict[str, Any]) -> Any:\n        result = await asyncio.wait_for(self.session.call_tool(name, arguments), timeout=self.timeout)\n        return result.model_dump()  # 同上，转成 dict\n"
  },
  {
    "path": "src/agents/mcpmark_agent.py",
    "content": "\"\"\"\nMCPMark Agent Implementation\n============================\n\nUnified agent using LiteLLM for all model interactions with minimal MCP support.\n\"\"\"\n\nimport asyncio\nimport json\nimport time\nfrom typing import Any, Dict, List, Optional, Callable\nfrom pydantic import AnyUrl\n\nimport httpx\nimport litellm\nimport nest_asyncio\n\nfrom src.logger import get_logger\nfrom .base_agent import BaseMCPAgent\nfrom .mcp import MCPStdioServer, MCPHttpServer\n\n# Apply nested asyncio support\nnest_asyncio.apply()\n\n# Configure LiteLLM\nlitellm.suppress_debug_info = True\n\nlogger = get_logger(__name__)\n\n\n# To fix the \"Object of type AnyUrl is not JSON serializable\" error in the find_file_contents function.\nclass CustomJSONEncoder(json.JSONEncoder):\n    def default(self, obj):\n        if isinstance(obj, AnyUrl):\n            return str(obj)\n        return super().default(obj)\n\n\nclass MCPMarkAgent(BaseMCPAgent):\n    \"\"\"\n    Unified agent for LLM and MCP server management using LiteLLM.\n\n    - Anthropic models: Native MCP support via extra_body\n    - Other models: Manual MCP server management with function calling\n    \"\"\"\n\n    MAX_TURNS = 100\n    SYSTEM_PROMPT = (\n        \"You are a helpful agent that uses tools iteratively to complete the user's task, \"\n        'and when finished, provides the final answer or simply states \"Task completed\" without further tool calls.'\n    )\n    COMPACTION_PROMPT = (\n        \"You are performing a CONTEXT CHECKPOINT COMPACTION.\\n\"\n        \"Summarize the conversation so far for another model to continue.\\n\\n\"\n        \"Include:\\n\"\n        \"- Current progress and key decisions made\\n\"\n        \"- Important context, constraints, or user preferences\\n\"\n        \"- What remains to be done (clear next steps)\\n\"\n        \"- Any critical data, examples, or references needed to continue\\n\\n\"\n        \"Be concise and structured. Do NOT call tools.\"\n    )\n    DEFAULT_TIMEOUT = BaseMCPAgent.DEFAULT_TIMEOUT\n\n    def __init__(\n        self,\n        litellm_input_model_name: str,\n        api_key: str,\n        base_url: str,\n        mcp_service: str,\n        timeout: int = DEFAULT_TIMEOUT,\n        service_config: Optional[Dict[str, Any]] = None,\n        service_config_provider: Optional[Callable[[], Dict[str, Any]]] = None,\n        reasoning_effort: Optional[str] = \"default\",\n        compaction_token: int = BaseMCPAgent.COMPACTION_DISABLED_TOKEN,\n    ):\n        super().__init__(\n            litellm_input_model_name=litellm_input_model_name,\n            api_key=api_key,\n            base_url=base_url,\n            mcp_service=mcp_service,\n            timeout=timeout,\n            service_config=service_config,\n            service_config_provider=service_config_provider,\n            reasoning_effort=reasoning_effort,\n            compaction_token=compaction_token,\n        )\n        logger.debug(\n            \"Initialized MCPMarkAgent for '%s' with model '%s' (Claude: %s, Thinking: %s, Reasoning: %s)\",\n            mcp_service,\n            litellm_input_model_name,\n            self.is_claude,\n            self.use_claude_thinking,\n            reasoning_effort,\n        )\n\n    # ==================== Public Interface Methods ====================\n\n    async def execute(\n        self, instruction: str, tool_call_log_file: Optional[str] = None\n    ) -> Dict[str, Any]:\n        \"\"\"\n        Execute instruction with the agent.\n\n        Args:\n            instruction: The instruction/prompt to execute\n            tool_call_log_file: Optional path to log tool calls\n\n        Returns:\n            Dictionary containing execution results\n        \"\"\"\n        start_time = time.time()\n\n        try:\n            # Reset partial progress for this run\n            self._reset_progress()\n            # Refresh service configuration\n            self._refresh_service_config()\n\n            # Execute with timeout control\n            async def _execute_with_strategy():\n                if self.use_claude_thinking:\n                    # Claude with thinking -> native Anthropic API with tools\n                    return await self._execute_claude_native_with_tools(\n                        instruction, tool_call_log_file\n                    )\n                else:\n                    # All other cases -> LiteLLM with tools\n                    return await self._execute_litellm_with_tools(\n                        instruction, tool_call_log_file\n                    )\n\n            # Apply timeout to the entire execution\n            result = await asyncio.wait_for(\n                _execute_with_strategy(), timeout=self.timeout\n            )\n\n            execution_time = time.time() - start_time\n\n            # Update usage statistics\n            self.usage_tracker.update(\n                success=result[\"success\"],\n                token_usage=result.get(\"token_usage\", {}),\n                turn_count=result.get(\"turn_count\", 0),\n                execution_time=execution_time,\n            )\n\n            result[\"execution_time\"] = execution_time\n            return result\n\n        except Exception as e:\n            execution_time = time.time() - start_time\n            if isinstance(e, asyncio.TimeoutError):\n                error_msg = f\"Execution timed out after {self.timeout} seconds\"\n                logger.error(error_msg)\n            else:\n                error_msg = f\"Agent execution failed: {e}\"\n                logger.error(error_msg, exc_info=True)\n\n            self.usage_tracker.update(\n                success=False,\n                token_usage=self._partial_token_usage or {},\n                turn_count=self._partial_turn_count or 0,\n                execution_time=execution_time,\n            )\n\n            if self._partial_messages:\n                if not self.is_claude:\n                    final_msg = self._convert_to_sdk_format(self._partial_messages)\n                else:\n                    final_msg = self._partial_messages\n            else:\n                final_msg = []\n\n            return {\n                \"success\": False,\n                \"output\": final_msg,\n                \"token_usage\": self._partial_token_usage or {},\n                \"turn_count\": self._partial_turn_count or 0,\n                \"execution_time\": execution_time,\n                \"error\": error_msg,\n                \"litellm_run_model_name\": self.litellm_run_model_name,\n            }\n\n    def execute_sync(\n        self, instruction: str, tool_call_log_file: Optional[str] = None\n    ) -> Dict[str, Any]:\n        \"\"\"\n        Synchronous wrapper for execute method.\n        \"\"\"\n        return asyncio.run(self.execute(instruction, tool_call_log_file))\n\n    def get_usage_stats(self) -> Dict[str, Any]:\n        \"\"\"Get usage statistics.\"\"\"\n        return self.usage_tracker.get_stats()\n\n    def reset_usage_stats(self):\n        \"\"\"Reset usage statistics.\"\"\"\n        self.usage_tracker.reset()\n\n    # ==================== Claude Native API Execution Path ====================\n\n    async def _execute_claude_native_with_tools(\n        self, instruction: str, tool_call_log_file: Optional[str] = None\n    ) -> Dict[str, Any]:\n        \"\"\"\n        Execute Claude with thinking using native Anthropic API.\n        Creates MCP server, gets tools, and executes with thinking.\n        \"\"\"\n        logger.debug(\"Using Claude native API with thinking\")\n\n        thinking_budget = self._get_claude_thinking_budget()\n\n        # Create and start MCP server\n        mcp_server = await self._create_mcp_server()\n\n        async with mcp_server:\n            # Get available tools\n            tools = await mcp_server.list_tools()\n\n            # Convert MCP tools to Anthropic format\n            anthropic_tools = self._convert_to_anthropic_format(tools)\n\n            # Execute with function calling loop\n            return await self._execute_anthropic_native_tool_loop(\n                instruction,\n                anthropic_tools,\n                mcp_server,\n                thinking_budget,\n                tool_call_log_file,\n            )\n\n    async def _call_claude_native_api(\n        self,\n        messages: List[Dict],\n        thinking_budget: int,\n        tools: Optional[List[Dict]] = None,\n        mcp_servers: Optional[List[Dict]] = None,\n        system: Optional[str] = None,\n    ) -> Dict[str, Any]:\n        \"\"\"\n        Call Claude's native API directly using httpx.\n\n        Args:\n            messages: Conversation messages\n            thinking_budget: Token budget for thinking\n            tools: Tool definitions for function calling\n            mcp_servers: MCP server configurations\n            system: System prompt\n\n        Returns:\n            API response as dictionary\n        \"\"\"\n        # Get API base and headers\n        import os\n\n        api_base = os.getenv(\"ANTHROPIC_API_BASE\", \"https://api.anthropic.com\")\n        headers = {\n            \"x-api-key\": self.api_key,\n            \"anthropic-version\": \"2023-06-01\",\n            \"content-type\": \"application/json\",\n            \"anthropic-beta\": \"context-1m-2025-08-07\",  # by default\n        }\n\n        # Build payload\n        max_tokens = max(thinking_budget + 4096, 4096)\n        payload = {\n            \"model\": self.litellm_input_model_name.replace(\"anthropic/\", \"\"),\n            \"max_tokens\": max_tokens,\n            \"messages\": messages,\n        }\n\n        # Add thinking configuration\n        if thinking_budget:\n            payload[\"thinking\"] = {\"type\": \"enabled\", \"budget_tokens\": thinking_budget}\n\n        # Add tools if provided\n        if tools:\n            payload[\"tools\"] = tools\n            payload[\"tool_choice\"] = {\"type\": \"auto\"}\n\n        # Add MCP servers if provided\n        if mcp_servers:\n            headers[\"anthropic-beta\"] = \"mcp-client-2025-04-04\"\n            payload[\"mcp_servers\"] = mcp_servers\n\n        # Add system prompt if provided\n        if system:\n            payload[\"system\"] = system\n\n        # Make the API call\n        async with httpx.AsyncClient() as client:\n            try:\n                response = await client.post(\n                    f\"{api_base}/v1/messages\",\n                    headers=headers,\n                    json=payload,\n                    timeout=self.timeout,\n                )\n                response.raise_for_status()\n                return response.json(), None\n            except httpx.HTTPStatusError as e:\n                return None, e.response.text\n            except Exception as e:\n                return None, e\n\n    async def _count_claude_input_tokens(\n        self,\n        messages: List[Dict[str, Any]],\n        tools: Optional[List[Dict]] = None,\n        system: Optional[str] = None,\n    ) -> int:\n        import os\n\n        api_base = os.getenv(\"ANTHROPIC_API_BASE\", \"https://api.anthropic.com\")\n        headers = {\n            \"x-api-key\": self.api_key,\n            \"anthropic-version\": \"2023-06-01\",\n            \"content-type\": \"application/json\",\n        }\n        payload: Dict[str, Any] = {\n            \"model\": self.litellm_input_model_name.replace(\"anthropic/\", \"\"),\n            \"messages\": messages,\n        }\n        if tools:\n            payload[\"tools\"] = tools\n        if system:\n            payload[\"system\"] = system\n\n        async with httpx.AsyncClient() as client:\n            response = await client.post(\n                f\"{api_base}/v1/messages/count_tokens\",\n                headers=headers,\n                json=payload,\n                timeout=self.timeout,\n            )\n            response.raise_for_status()\n            data = response.json() or {}\n            return int(data.get(\"input_tokens\", 0) or 0)\n\n    def _extract_litellm_text(self, response: Any) -> str:\n        try:\n            choices = getattr(response, \"choices\", None) or []\n            if not choices:\n                return \"\"\n            msg = getattr(choices[0], \"message\", None)\n            if msg is not None:\n                return str(getattr(msg, \"content\", \"\") or \"\")\n            return str(getattr(choices[0], \"text\", \"\") or \"\")\n        except Exception:  # pragma: no cover - best effort\n            return \"\"\n\n    def _extract_anthropic_text(self, response_json: Dict[str, Any]) -> str:\n        pieces: List[str] = []\n        for block in response_json.get(\"content\", []) or []:\n            if isinstance(block, dict) and block.get(\"type\") == \"text\":\n                text = block.get(\"text\")\n                if text:\n                    pieces.append(str(text))\n        return \"\\n\".join(pieces).strip()\n\n    def _merge_usage(self, total_tokens: Dict[str, int], usage: Dict[str, Any]) -> None:\n        try:\n            input_tokens = int(usage.get(\"input_tokens\", 0) or 0)\n            output_tokens = int(usage.get(\"output_tokens\", 0) or 0)\n            total_tokens_count = int(\n                usage.get(\"total_tokens\", 0) or (input_tokens + output_tokens)\n            )\n            total_tokens[\"input_tokens\"] += input_tokens\n            total_tokens[\"output_tokens\"] += output_tokens\n            total_tokens[\"total_tokens\"] += total_tokens_count\n        except Exception:  # pragma: no cover - best effort\n            return\n\n    async def _maybe_compact_litellm_messages(\n        self,\n        messages: List[Dict[str, Any]],\n        total_tokens: Dict[str, int],\n        tool_call_log_file: Optional[str],\n        current_prompt_tokens: int,\n    ) -> List[Dict[str, Any]]:\n        if not self._compaction_enabled():\n            return messages\n        if current_prompt_tokens < self.compaction_token:\n            return messages\n\n        logger.info(\n            f\"| [compaction] Triggered at prompt tokens: {current_prompt_tokens:,}\"\n        )\n        if tool_call_log_file:\n            try:\n                with open(tool_call_log_file, \"a\", encoding=\"utf-8\") as f:\n                    f.write(\n                        f\"| [compaction] Triggered at prompt tokens: {current_prompt_tokens:,}\\n\"\n                    )\n            except Exception:\n                pass\n\n        compact_messages = [\n            {\"role\": \"system\", \"content\": self.COMPACTION_PROMPT},\n            {\"role\": \"user\", \"content\": json.dumps(messages, ensure_ascii=False)},\n        ]\n        completion_kwargs = {\n            \"model\": self.litellm_input_model_name,\n            \"messages\": compact_messages,\n            \"api_key\": self.api_key,\n        }\n        if self.base_url:\n            completion_kwargs[\"base_url\"] = self.base_url\n        response = await litellm.acompletion(**completion_kwargs)\n\n        usage = getattr(response, \"usage\", None)\n        if usage:\n            input_tokens = (\n                getattr(usage, \"prompt_tokens\", None)\n                or getattr(usage, \"input_tokens\", None)\n                or 0\n            )\n            output_tokens = (\n                getattr(usage, \"completion_tokens\", None)\n                or getattr(usage, \"output_tokens\", None)\n                or 0\n            )\n            total_tokens_count = getattr(usage, \"total_tokens\", None)\n            if total_tokens_count is None:\n                total_tokens_count = input_tokens + output_tokens\n            total_tokens[\"input_tokens\"] += int(input_tokens or 0)\n            total_tokens[\"output_tokens\"] += int(output_tokens or 0)\n            total_tokens[\"total_tokens\"] += int(total_tokens_count or 0)\n\n        summary = self._extract_litellm_text(response).strip() or \"(no summary)\"\n        system_msg = (\n            messages[0]\n            if messages\n            else {\"role\": \"system\", \"content\": self.SYSTEM_PROMPT}\n        )\n        first_user = (\n            messages[1] if len(messages) > 1 else {\"role\": \"user\", \"content\": \"\"}\n        )\n        return [\n            system_msg,\n            first_user,\n            {\n                \"role\": \"user\",\n                \"content\": f\"Context summary (auto-compacted due to token limit):\\n{summary}\",\n            },\n        ]\n\n    async def _maybe_compact_anthropic_messages(\n        self,\n        messages: List[Dict[str, Any]],\n        total_tokens: Dict[str, int],\n        thinking_budget: int,\n        tool_call_log_file: Optional[str],\n        current_input_tokens: int,\n    ) -> List[Dict[str, Any]]:\n        if not self._compaction_enabled():\n            return messages\n        if current_input_tokens < self.compaction_token:\n            return messages\n\n        logger.info(\n            f\"| [compaction] Triggered at input tokens: {current_input_tokens:,}\"\n        )\n        if tool_call_log_file:\n            try:\n                with open(tool_call_log_file, \"a\", encoding=\"utf-8\") as f:\n                    f.write(\n                        f\"| [compaction] Triggered at input tokens: {current_input_tokens:,}\\n\"\n                    )\n            except Exception:\n                pass\n\n        compact_messages = [\n            {\"role\": \"user\", \"content\": self.COMPACTION_PROMPT},\n            {\"role\": \"user\", \"content\": json.dumps(messages, ensure_ascii=False)},\n        ]\n        response, error_msg = await self._call_claude_native_api(\n            messages=compact_messages,\n            thinking_budget=thinking_budget,\n            tools=None,\n            system=None,\n        )\n        if error_msg or not response:\n            logger.warning(f\"| [compaction] Failed: {error_msg}\")\n            return messages\n\n        usage = response.get(\"usage\", {}) or {}\n        input_tokens = usage.get(\"input_tokens\", 0) or 0\n        output_tokens = usage.get(\"output_tokens\", 0) or 0\n        total_tokens[\"input_tokens\"] += int(input_tokens)\n        total_tokens[\"output_tokens\"] += int(output_tokens)\n        total_tokens[\"total_tokens\"] += int(input_tokens + output_tokens)\n\n        summary = self._extract_anthropic_text(response) or \"(no summary)\"\n        first_user = messages[0] if messages else {\"role\": \"user\", \"content\": \"\"}\n        return [\n            first_user,\n            {\n                \"role\": \"user\",\n                \"content\": f\"Context summary (auto-compacted due to token limit):\\n{summary}\",\n            },\n        ]\n\n    async def _execute_anthropic_native_tool_loop(\n        self,\n        instruction: str,\n        tools: List[Dict],\n        mcp_server: Any,\n        thinking_budget: int,\n        tool_call_log_file: Optional[str] = None,\n    ) -> Dict[str, Any]:\n        \"\"\"\n        Execute Claude thinking loop with function calling.\n        Handles thinking blocks, tool calls, and message formatting.\n        \"\"\"\n        messages = [{\"role\": \"user\", \"content\": instruction}]\n        total_tokens = {\n            \"input_tokens\": 0,\n            \"output_tokens\": 0,\n            \"total_tokens\": 0,\n            \"reasoning_tokens\": 0,\n        }\n        turn_count = 0\n        max_turns = self.MAX_TURNS\n        hit_turn_limit = False\n        ended_normally = False\n\n        system_text = self.SYSTEM_PROMPT\n        # Record initial state\n        self._update_progress(messages, total_tokens, turn_count)\n\n        for _ in range(max_turns):\n            turn_count += 1\n\n            current_input_tokens = 0\n            if self._compaction_enabled():\n                try:\n                    current_input_tokens = await self._count_claude_input_tokens(\n                        messages=messages,\n                        tools=tools,\n                        system=system_text,\n                    )\n                except Exception as exc:  # noqa: BLE001\n                    logger.debug(\"Claude token counting failed: %s\", exc)\n\n            messages = await self._maybe_compact_anthropic_messages(\n                messages=messages,\n                total_tokens=total_tokens,\n                thinking_budget=thinking_budget,\n                tool_call_log_file=tool_call_log_file,\n                current_input_tokens=current_input_tokens,\n            )\n            self._update_progress(messages, total_tokens, turn_count)\n\n            # Call Claude native API\n            response, error_msg = await self._call_claude_native_api(\n                messages=messages,\n                thinking_budget=thinking_budget,\n                tools=tools,\n                system=system_text,\n            )\n            if turn_count == 1:\n                self.litellm_run_model_name = response[\"model\"].split(\"/\")[-1]\n\n            if error_msg:\n                break\n\n            # Update token usage\n            if \"usage\" in response:\n                usage = response[\"usage\"]\n                input_tokens = usage.get(\"input_tokens\", 0)\n                output_tokens = usage.get(\"output_tokens\", 0)\n                # Calculate output tokens as total - input for consistency\n                total_tokens_count = output_tokens + input_tokens\n\n                total_tokens[\"input_tokens\"] += input_tokens\n                total_tokens[\"output_tokens\"] += output_tokens\n                total_tokens[\"total_tokens\"] += total_tokens_count\n\n                ## TODO: add reasoning tokens for claude\n\n            # Extract blocks from response\n            blocks = response.get(\"content\", [])\n            tool_uses = [b for b in blocks if b.get(\"type\") == \"tool_use\"]\n            thinking_blocks = [b for b in blocks if b.get(\"type\") == \"thinking\"]\n            text_blocks = [b for b in blocks if b.get(\"type\") == \"text\"]\n\n            # Log text output\n            for tb in text_blocks:\n                if tb.get(\"text\") and tool_call_log_file:\n                    with open(tool_call_log_file, \"a\", encoding=\"utf-8\") as f:\n                        f.write(f\"{tb['text']}\\n\")\n                if tb.get(\"text\"):\n                    for line in tb[\"text\"].splitlines():\n                        logger.info(f\"| {line}\")\n\n            # Build assistant message with all blocks\n            assistant_content = []\n\n            # Add thinking blocks\n            for tb in thinking_blocks:\n                assistant_content.append(\n                    {\n                        \"type\": \"thinking\",\n                        \"thinking\": tb.get(\"thinking\", \"\"),\n                        \"signature\": tb.get(\"signature\", \"\"),\n                    }\n                )\n\n            # Add text blocks\n            for tb in text_blocks:\n                if tb.get(\"text\"):\n                    assistant_content.append({\"type\": \"text\", \"text\": tb[\"text\"]})\n\n            # Add tool_use blocks\n            for tu in tool_uses:\n                assistant_content.append(\n                    {\n                        \"type\": \"tool_use\",\n                        \"id\": tu.get(\"id\"),\n                        \"name\": tu.get(\"name\"),\n                        \"input\": tu.get(\"input\", {}),\n                    }\n                )\n\n            messages.append({\"role\": \"assistant\", \"content\": assistant_content})\n\n            # Update partial progress after assistant response\n            self._update_progress(messages, total_tokens, turn_count)\n\n            # If no tool calls, we're done\n            if not tool_uses:\n                ended_normally = True\n                break\n\n            # Execute tools and add results\n            tool_results = []\n            for tu in tool_uses:\n                name = tu.get(\"name\")\n                inputs = tu.get(\"input\", {})\n\n                # Log tool call\n                args_str = json.dumps(inputs, separators=(\",\", \": \"))\n                display_args = (\n                    args_str[:140] + \"...\" if len(args_str) > 140 else args_str\n                )\n                logger.info(f\"| \\033[1m{name}\\033[0m \\033[2;37m{display_args}\\033[0m\")\n\n                if tool_call_log_file:\n                    with open(tool_call_log_file, \"a\", encoding=\"utf-8\") as f:\n                        f.write(f\"| {name} {args_str}\\n\")\n\n                # Execute tool\n                try:\n                    result = await asyncio.wait_for(\n                        mcp_server.call_tool(name, inputs), timeout=60\n                    )\n                    tool_results.append(\n                        {\n                            \"type\": \"tool_result\",\n                            \"tool_use_id\": tu[\"id\"],\n                            \"content\": [\n                                {\n                                    \"type\": \"text\",\n                                    \"text\": json.dumps(result, cls=CustomJSONEncoder),\n                                }\n                            ],\n                        }\n                    )\n                except Exception as e:\n                    logger.error(f\"Tool call failed: {e}\")\n                    tool_results.append(\n                        {\n                            \"type\": \"tool_result\",\n                            \"tool_use_id\": tu[\"id\"],\n                            \"content\": [{\"type\": \"text\", \"text\": f\"Error: {str(e)}\"}],\n                        }\n                    )\n\n            messages.append({\"role\": \"user\", \"content\": tool_results})\n            # Update partial progress after tool results\n            self._update_progress(messages, total_tokens, turn_count)\n\n        # Detect if we exited due to hitting the turn limit\n        if not ended_normally:\n            if turn_count >= max_turns:\n                hit_turn_limit = True\n                logger.warning(\n                    f\"| Max turns ({max_turns}) exceeded; returning failure with partial output.\"\n                )\n                if tool_call_log_file:\n                    try:\n                        with open(tool_call_log_file, \"a\", encoding=\"utf-8\") as f:\n                            f.write(f\"| Max turns ({max_turns}) exceeded\\n\")\n                    except Exception:\n                        pass\n            elif error_msg:\n                logger.warning(f\"| {error_msg}\\n\")\n                if tool_call_log_file:\n                    try:\n                        with open(tool_call_log_file, \"a\", encoding=\"utf-8\") as f:\n                            f.write(f\"| {error_msg}\\n\")\n                    except Exception:\n                        pass\n\n        # Display final token usage\n        if total_tokens[\"total_tokens\"] > 0:\n            log_msg = (\n                f\"|\\n| Token usage: Total: {total_tokens['total_tokens']:,} | \"\n                f\"Input: {total_tokens['input_tokens']:,} | \"\n                f\"Output: {total_tokens['output_tokens']:,}\"\n            )\n            if total_tokens.get(\"reasoning_tokens\", 0) > 0:\n                log_msg += f\" | Reasoning: {total_tokens['reasoning_tokens']:,}\"\n            logger.info(log_msg)\n            logger.info(f\"| Turns: {turn_count}\")\n\n        # Convert messages to SDK format\n        sdk_format_messages = self._convert_to_sdk_format(messages)\n\n        if hit_turn_limit:\n            return {\n                \"success\": False,\n                \"output\": sdk_format_messages,\n                \"token_usage\": total_tokens,\n                \"turn_count\": turn_count,\n                \"error\": f\"Max turns ({max_turns}) exceeded\",\n                \"litellm_run_model_name\": self.litellm_run_model_name,\n            }\n\n        if error_msg:\n            return {\n                \"success\": False,\n                \"output\": sdk_format_messages,\n                \"token_usage\": total_tokens,\n                \"turn_count\": turn_count,\n                \"error\": error_msg,\n                \"litellm_run_model_name\": self.litellm_run_model_name,\n            }\n\n        return {\n            \"success\": True,\n            \"output\": sdk_format_messages,\n            \"token_usage\": total_tokens,\n            \"turn_count\": turn_count,\n            \"error\": None,\n            \"litellm_run_model_name\": self.litellm_run_model_name,\n        }\n\n    # ==================== LiteLLM Execution Path ====================\n\n    async def _execute_litellm_with_tools(\n        self, instruction: str, tool_call_log_file: Optional[str] = None\n    ) -> Dict[str, Any]:\n        \"\"\"\n        Execute with manual MCP server management.\n        Used for all non-Anthropic models and Anthropic models with STDIO services.\n        \"\"\"\n        logger.debug(\"Using manual MCP execution with function calling loop\")\n\n        # Create and start MCP server\n        mcp_server = await self._create_mcp_server()\n\n        try:\n            async with mcp_server:\n                # Get available tools\n                tools = await mcp_server.list_tools()\n\n                # Convert MCP tools to OpenAI function format\n                functions = self._convert_to_openai_format(tools)\n\n                # Execute with function calling loop\n                return await self._execute_litellm_tool_loop(\n                    instruction, functions, mcp_server, tool_call_log_file\n                )\n\n        except Exception as e:\n            logger.error(f\"Manual MCP execution failed: {e}\")\n            raise\n\n    async def _execute_litellm_tool_loop(\n        self,\n        instruction: str,\n        functions: List[Dict],\n        mcp_server: Any,\n        tool_call_log_file: Optional[str] = None,\n    ) -> Dict[str, Any]:\n        \"\"\"Execute function calling loop with LiteLLM.\"\"\"\n        messages = [\n            {\"role\": \"system\", \"content\": self.SYSTEM_PROMPT},\n            {\"role\": \"user\", \"content\": instruction},\n        ]\n        total_tokens = {\n            \"input_tokens\": 0,\n            \"output_tokens\": 0,\n            \"total_tokens\": 0,\n            \"reasoning_tokens\": 0,\n        }\n        turn_count = 0\n        max_turns = self.MAX_TURNS  # Limit turns to prevent infinite loops\n        consecutive_failures = 0\n        max_consecutive_failures = 3\n        hit_turn_limit = False\n        ended_normally = False\n\n        # Convert functions to tools format for newer models\n        tools = (\n            [{\"type\": \"function\", \"function\": func} for func in functions]\n            if functions\n            else None\n        )\n\n        if tool_call_log_file and tools:\n            max_name_length = (\n                max(len(tool.get(\"function\", {}).get(\"name\", \"\")) for tool in tools)\n                if tools\n                else 15\n            )\n            with open(tool_call_log_file, \"a\", encoding=\"utf-8\") as f:\n                f.write(\"===== Available Tools =====\\n\")\n                for tool in tools:\n                    function_info = tool.get(\"function\", {})\n                    tool_name = function_info.get(\"name\", \"N/A\")\n                    description = function_info.get(\"description\", \"N/A\")\n                    f.write(\n                        f\"- ToolName: {tool_name:<{max_name_length}} Description: {description}\\n\"\n                    )\n                f.write(\"\\n===== Execution Logs =====\\n\")\n\n        # Record initial state\n        self._update_progress(messages, total_tokens, turn_count)\n\n        try:\n            while turn_count < max_turns:\n                current_prompt_tokens = 0\n                if self._compaction_enabled():\n                    current_prompt_tokens = self._count_prompt_tokens_litellm(messages)\n\n                messages = await self._maybe_compact_litellm_messages(\n                    messages=messages,\n                    total_tokens=total_tokens,\n                    tool_call_log_file=tool_call_log_file,\n                    current_prompt_tokens=current_prompt_tokens,\n                )\n                self._update_progress(messages, total_tokens, turn_count)\n\n                # Build completion kwargs\n                completion_kwargs = {\n                    \"model\": self.litellm_input_model_name,\n                    \"messages\": messages,\n                    \"api_key\": self.api_key,\n                }\n\n                # Always use tools format if available - LiteLLM will handle conversion\n                if tools:\n                    completion_kwargs[\"tools\"] = tools\n                    completion_kwargs[\"tool_choice\"] = \"auto\"\n\n                # Add reasoning_effort and base_url if specified\n                if self.reasoning_effort != \"default\":\n                    completion_kwargs[\"reasoning_effort\"] = self.reasoning_effort\n                if self.base_url:\n                    completion_kwargs[\"base_url\"] = self.base_url\n\n                try:\n                    # Call LiteLLM with timeout for individual call\n                    response = await asyncio.wait_for(\n                        litellm.acompletion(**completion_kwargs),\n                        timeout=self.timeout / 2,  # Use half of total timeout\n                    )\n                    consecutive_failures = 0  # Reset failure counter on success\n                except asyncio.TimeoutError:\n                    logger.warning(f\"| ✗ LLM call timed out on turn {turn_count + 1}\")\n                    consecutive_failures += 1\n                    if consecutive_failures >= max_consecutive_failures:\n                        raise Exception(\n                            f\"Too many consecutive failures ({consecutive_failures})\"\n                        )\n                    await asyncio.sleep(8**consecutive_failures)  # Exponential backoff\n                    continue\n                except Exception as e:\n                    logger.error(f\"| ✗ LLM call failed on turn {turn_count + 1}: {e}\")\n                    consecutive_failures += 1\n                    if consecutive_failures >= max_consecutive_failures:\n                        raise\n                    if \"ContextWindowExceededError\" in str(e):\n                        # Best-effort fallback: compact and retry once.\n                        messages = await self._maybe_compact_litellm_messages(\n                            messages=messages,\n                            total_tokens=total_tokens,\n                            tool_call_log_file=tool_call_log_file,\n                            current_prompt_tokens=self.compaction_token,\n                        )\n                        self._update_progress(messages, total_tokens, turn_count)\n                        continue\n                    elif \"RateLimitError\" in str(e):\n                        await asyncio.sleep(12**consecutive_failures)\n                    else:\n                        await asyncio.sleep(2**consecutive_failures)\n                    continue\n\n                # Extract actual model name from response (first turn only)\n                if turn_count == 0 and hasattr(response, \"model\") and response.model:\n                    self.litellm_run_model_name = response.model.split(\"/\")[-1]\n\n                # Update token usage including reasoning tokens\n                if hasattr(response, \"usage\") and response.usage:\n                    input_tokens = response.usage.prompt_tokens or 0\n                    total_tokens_count = response.usage.total_tokens or 0\n                    # Calculate output tokens as total - input for consistency\n                    output_tokens = (\n                        total_tokens_count - input_tokens\n                        if total_tokens_count > 0\n                        else (response.usage.completion_tokens or 0)\n                    )\n\n                    total_tokens[\"input_tokens\"] += input_tokens\n                    total_tokens[\"output_tokens\"] += output_tokens\n                    total_tokens[\"total_tokens\"] += total_tokens_count\n\n                    # Extract reasoning tokens if available\n                    if hasattr(response.usage, \"completion_tokens_details\"):\n                        details = response.usage.completion_tokens_details\n                        if hasattr(details, \"reasoning_tokens\"):\n                            total_tokens[\"reasoning_tokens\"] += (\n                                details.reasoning_tokens or 0\n                            )\n\n                # Get response message\n                choices = response.choices\n                if len(choices):\n                    message = choices[0].message\n                    # deeply dump the message to ensure we capture all fields\n                    message_dict = (\n                        message.model_dump()\n                        if hasattr(message, \"model_dump\")\n                        else dict(message)\n                    )\n\n                    # Explicitly preserve function_call if present (even if tool_calls exists),\n                    # as it may contain provider-specific metadata (e.g. Gemini thought_signature)\n                    if hasattr(message, \"function_call\") and message.function_call:\n                        # Ensure it's in the dict if model_dump missed it or it was excluded\n                        if (\n                            \"function_call\" not in message_dict\n                            or not message_dict[\"function_call\"]\n                        ):\n                            fc = message.function_call\n                            message_dict[\"function_call\"] = (\n                                fc.model_dump() if hasattr(fc, \"model_dump\") else fc\n                            )\n\n                # Log assistant's text content if present\n                if hasattr(message, \"content\") and message.content:\n                    # Display the content with line prefix\n                    for line in message.content.splitlines():\n                        logger.info(f\"| {line}\")\n\n                    # Also log to file if specified\n                    if tool_call_log_file:\n                        with open(tool_call_log_file, \"a\", encoding=\"utf-8\") as f:\n                            f.write(f\"{message.content}\\n\")\n\n                # Check for tool calls (newer format)\n                if hasattr(message, \"tool_calls\") and message.tool_calls:\n                    messages.append(message_dict)\n                    turn_count += 1\n                    # Update progress after assistant with tool calls\n                    self._update_progress(messages, total_tokens, turn_count)\n                    # Process tool calls\n                    for tool_call in message.tool_calls:\n                        func_name = tool_call.function.name\n                        func_args = json.loads(tool_call.function.arguments)\n\n                        try:\n                            result = await asyncio.wait_for(\n                                mcp_server.call_tool(func_name, func_args), timeout=60\n                            )\n                            messages.append(\n                                {\n                                    \"role\": \"tool\",\n                                    \"tool_call_id\": tool_call.id,\n                                    \"content\": json.dumps(\n                                        result, cls=CustomJSONEncoder\n                                    ),\n                                }\n                            )\n                        except asyncio.TimeoutError:\n                            error_msg = (\n                                f\"Tool call '{func_name}' timed out after 60 seconds\"\n                            )\n                            logger.error(error_msg)\n                            messages.append(\n                                {\n                                    \"role\": \"tool\",\n                                    \"tool_call_id\": tool_call.id,\n                                    \"content\": f\"Error: {error_msg}\",\n                                }\n                            )\n                        except Exception as e:\n                            logger.error(f\"Tool call failed: {e}\")\n                            messages.append(\n                                {\n                                    \"role\": \"tool\",\n                                    \"tool_call_id\": tool_call.id,\n                                    \"content\": f\"Error: {str(e)}\",\n                                }\n                            )\n\n                        # Format arguments for display (truncate if too long)\n                        args_str = json.dumps(func_args, separators=(\",\", \": \"))\n                        display_arguments = (\n                            args_str[:140] + \"...\" if len(args_str) > 140 else args_str\n                        )\n\n                        # Log with ANSI color codes (bold tool name, dim gray arguments)\n                        logger.info(\n                            f\"| \\033[1m{func_name}\\033[0m \\033[2;37m{display_arguments}\\033[0m\"\n                        )\n\n                        if tool_call_log_file:\n                            with open(tool_call_log_file, \"a\", encoding=\"utf-8\") as f:\n                                f.write(f\"| {func_name} {args_str}\\n\")\n                    # Update progress after tool results appended\n                    self._update_progress(messages, total_tokens, turn_count)\n                    continue\n                else:\n                    # Log end reason\n                    if not choices:\n                        logger.info(\n                            \"|\\n|\\n| Task ended with no messages generated by the model.\"\n                        )\n                    elif choices[0].finish_reason == \"stop\":\n                        logger.info(\n                            \"|\\n|\\n| Task ended with the finish reason from messages being 'stop'.\"\n                        )\n\n                    # No tool/function call, add message and we're done\n                    messages.append(message_dict)\n                    turn_count += 1\n                    # Update progress before exiting\n                    self._update_progress(messages, total_tokens, turn_count)\n                    ended_normally = True\n                    break\n\n        except Exception as loop_error:\n            # On any error, return partial conversation, token usage, and turn count\n            logger.error(f\"Manual MCP loop failed: {loop_error}\", exc_info=True)\n            sdk_format_messages = self._convert_to_sdk_format(messages)\n            return {\n                \"success\": False,\n                \"output\": sdk_format_messages,\n                \"token_usage\": total_tokens,\n                \"turn_count\": turn_count,\n                \"error\": str(loop_error),\n                \"litellm_run_model_name\": self.litellm_run_model_name,\n            }\n\n        # Detect if we exited due to hitting the turn limit\n        if (not ended_normally) and (turn_count >= max_turns):\n            hit_turn_limit = True\n            logger.warning(\n                f\"| Max turns ({max_turns}) exceeded); returning failure with partial output.\"\n            )\n            if tool_call_log_file:\n                try:\n                    with open(tool_call_log_file, \"a\", encoding=\"utf-8\") as f:\n                        f.write(f\"| Max turns ({max_turns}) exceeded\\n\")\n                except Exception:\n                    pass\n\n        # Display final token usage\n        if total_tokens[\"total_tokens\"] > 0:\n            log_msg = (\n                f\"| Token usage: Total: {total_tokens['total_tokens']:,} | \"\n                f\"Input: {total_tokens['input_tokens']:,} | \"\n                f\"Output: {total_tokens['output_tokens']:,}\"\n            )\n            if total_tokens.get(\"reasoning_tokens\", 0) > 0:\n                log_msg += f\" | Reasoning: {total_tokens['reasoning_tokens']:,}\"\n            logger.info(log_msg)\n            logger.info(f\"| Turns: {turn_count}\")\n\n        # Convert messages to SDK format for backward compatibility\n        sdk_format_messages = self._convert_to_sdk_format(messages)\n\n        return {\n            \"success\": not hit_turn_limit,\n            \"output\": sdk_format_messages,\n            \"token_usage\": total_tokens,\n            \"turn_count\": turn_count,\n            \"error\": (f\"Max turns ({max_turns}) exceeded\" if hit_turn_limit else None),\n            \"litellm_run_model_name\": self.litellm_run_model_name,\n        }\n\n    # ==================== MCP Server Management ====================\n\n    async def _create_mcp_server(self) -> Any:\n        \"\"\"Create and return an MCP server instance.\"\"\"\n        if self.mcp_service in self.STDIO_SERVICES:\n            return self._create_stdio_server()\n        elif self.mcp_service in self.HTTP_SERVICES:\n            return self._create_http_server()\n        else:\n            raise ValueError(f\"Unsupported MCP service: {self.mcp_service}\")\n\n    def _create_stdio_server(self) -> MCPStdioServer:\n        \"\"\"Create stdio-based MCP server.\"\"\"\n        if self.mcp_service == \"notion\":\n            notion_key = self.service_config.get(\"notion_key\")\n            if not notion_key:\n                raise ValueError(\"Notion API key required\")\n\n            return MCPStdioServer(\n                command=\"npx\",\n                args=[\"-y\", \"@notionhq/notion-mcp-server@1.9.1\"],\n                env={\n                    \"OPENAPI_MCP_HEADERS\": (\n                        '{\"Authorization\": \"Bearer ' + notion_key + '\", '\n                        '\"Notion-Version\": \"2022-06-28\"}'\n                    )\n                },\n            )\n\n        elif self.mcp_service == \"filesystem\":\n            test_directory = self.service_config.get(\"test_directory\")\n            if not test_directory:\n                raise ValueError(\"Test directory required for filesystem service\")\n\n            return MCPStdioServer(\n                command=\"npx\",\n                args=[\n                    \"-y\",\n                    \"@modelcontextprotocol/server-filesystem\",\n                    str(test_directory),\n                ],\n            )\n\n        elif self.mcp_service in [\"playwright\", \"playwright_webarena\"]:\n            browser = self.service_config.get(\"browser\", \"chromium\")\n            headless = self.service_config.get(\"headless\", True)\n            viewport_width = self.service_config.get(\"viewport_width\", 1280)\n            viewport_height = self.service_config.get(\"viewport_height\", 720)\n\n            args = [\"-y\", \"@playwright/mcp@latest\"]\n            if headless:\n                args.append(\"--headless\")\n            args.extend(\n                [\n                    \"--isolated\",\n                    \"--no-sandbox\",\n                    \"--browser\",\n                    browser,\n                    \"--viewport-size\",\n                    f\"{viewport_width},{viewport_height}\",\n                ]\n            )\n\n            return MCPStdioServer(command=\"npx\", args=args)\n\n        elif self.mcp_service == \"postgres\":\n            host = self.service_config.get(\"host\", \"localhost\")\n            port = self.service_config.get(\"port\", 5432)\n            username = self.service_config.get(\"username\")\n            password = self.service_config.get(\"password\")\n            database = self.service_config.get(\n                \"current_database\"\n            ) or self.service_config.get(\"database\")\n\n            if not all([username, password, database]):\n                raise ValueError(\"PostgreSQL requires username, password, and database\")\n\n            database_url = (\n                f\"postgresql://{username}:{password}@{host}:{port}/{database}\"\n            )\n\n            return MCPStdioServer(\n                command=\"pipx\",\n                args=[\"run\", \"postgres-mcp\", \"--access-mode=unrestricted\"],\n                env={\"DATABASE_URI\": database_url},\n            )\n\n        elif self.mcp_service == \"insforge\":\n            api_key = self.service_config.get(\"api_key\")\n            backend_url = self.service_config.get(\"backend_url\")\n            if not all([api_key, backend_url]):\n                raise ValueError(\"Insforge requires api_key and backend_url\")\n            return MCPStdioServer(\n                command=\"npx\",\n                args=[\"-y\", \"@insforge/mcp@dev\"],\n                env={\n                    \"INSFORGE_API_KEY\": api_key,\n                    \"INSFORGE_BACKEND_URL\": backend_url,\n                },\n            )\n\n        elif self.mcp_service == \"github\":\n            github_token = self.service_config.get(\"github_token\")\n            if not github_token:\n                raise ValueError(\"GitHub token required\")\n\n            return MCPStdioServer(\n                command=\"docker\",\n                args=[\n                    \"run\", \"-i\", \"--rm\",\n                    \"-e\", \"GITHUB_PERSONAL_ACCESS_TOKEN\",\n                    \"ghcr.io/github/github-mcp-server:v0.15.0\",\n                ],\n                env={\"GITHUB_PERSONAL_ACCESS_TOKEN\": github_token},\n            )\n\n        else:\n            raise ValueError(f\"Unsupported stdio service: {self.mcp_service}\")\n\n    def _create_http_server(self) -> MCPHttpServer:\n        \"\"\"Create HTTP-based MCP server.\"\"\"\n        if self.mcp_service == \"supabase\":\n            # Use built-in MCP server from Supabase CLI\n            api_url = self.service_config.get(\"api_url\", \"http://localhost:54321\")\n            api_key = self.service_config.get(\"api_key\", \"\")\n\n            if not api_key:\n                raise ValueError(\n                    \"Supabase requires api_key (use secret key from 'supabase status')\"\n                )\n\n            # Supabase CLI exposes MCP at /mcp endpoint\n            mcp_url = f\"{api_url}/mcp\"\n\n            return MCPHttpServer(\n                url=mcp_url,\n                headers={\n                    \"apikey\": api_key,\n                    \"Authorization\": f\"Bearer {api_key}\",\n                },\n            )\n\n        else:\n            raise ValueError(f\"Unsupported HTTP service: {self.mcp_service}\")\n"
  },
  {
    "path": "src/agents/react_agent.py",
    "content": "\"\"\"ReAct agent implementation for the MCPMark pipeline.\"\"\"\n\nfrom __future__ import annotations\n\nimport asyncio\nimport json\nimport time\nfrom typing import Any, Dict, List, Optional, Callable\n\nimport litellm\n\nfrom src.logger import get_logger\nfrom .base_agent import BaseMCPAgent\n\nlogger = get_logger(__name__)\n\n\nclass ReActAgent(BaseMCPAgent):\n    \"\"\"ReAct-style agent that reuses MCPMark infrastructure.\"\"\"\n\n    DEFAULT_SYSTEM_PROMPT = (\n        \"You are a careful ReAct (reasoning and acting) agent. \"\n        \"At each step you must decide whether to call a tool or provide a final response. \"\n        \"Only use the tools that are listed for you. When you finish, respond with either the final answer \"\n        \"or the phrase \\\"Task completed.\\\" if no further detail is required. \"\n        \"Every reply must be valid JSON without code fences.\"\n    )\n    COMPACTION_PROMPT = (\n        \"You are performing a CONTEXT CHECKPOINT COMPACTION.\\n\"\n        \"Summarize the conversation so far for another model to continue.\\n\\n\"\n        \"Include:\\n\"\n        \"- Current progress and key decisions made\\n\"\n        \"- Important context, constraints, or user preferences\\n\"\n        \"- What remains to be done (clear next steps)\\n\"\n        \"- Any critical data, examples, or references needed to continue\\n\\n\"\n        \"Be concise and structured. Do NOT call tools.\"\n    )\n\n    def __init__(\n        self,\n        litellm_input_model_name: str,\n        api_key: str,\n        base_url: str,\n        mcp_service: str,\n        timeout: int = BaseMCPAgent.DEFAULT_TIMEOUT,\n        service_config: Optional[Dict[str, Any]] = None,\n        service_config_provider: Optional[Callable[[], Dict[str, Any]]] = None,\n        reasoning_effort: Optional[str] = \"default\",\n        max_iterations: int = 100,\n        system_prompt: Optional[str] = None,\n        compaction_token: int = BaseMCPAgent.COMPACTION_DISABLED_TOKEN,\n    ):\n        super().__init__(\n            litellm_input_model_name=litellm_input_model_name,\n            api_key=api_key,\n            base_url=base_url,\n            mcp_service=mcp_service,\n            timeout=timeout,\n            service_config=service_config,\n            service_config_provider=service_config_provider,\n            reasoning_effort=reasoning_effort,\n            compaction_token=compaction_token,\n        )\n        self.max_iterations = max_iterations\n        self.react_system_prompt = system_prompt or self.DEFAULT_SYSTEM_PROMPT\n\n    async def execute(\n        self,\n        instruction: str,\n        tool_call_log_file: Optional[str] = None,\n    ) -> Dict[str, Any]:\n        start_time = time.time()\n\n        try:\n            self._reset_progress()\n            self._refresh_service_config()\n\n            async def _run_react():\n                return await self._execute_react_loop(instruction, tool_call_log_file)\n\n            result = await asyncio.wait_for(_run_react(), timeout=self.timeout)\n            execution_time = time.time() - start_time\n            self.usage_tracker.update(\n                success=result.get(\"success\", False),\n                token_usage=result.get(\"token_usage\", {}),\n                turn_count=result.get(\"turn_count\", 0),\n                execution_time=execution_time,\n            )\n            result[\"execution_time\"] = execution_time\n            return result\n        except Exception as exc:  # noqa: BLE001\n            execution_time = time.time() - start_time\n\n            if isinstance(exc, asyncio.TimeoutError):\n                error_msg = f\"Execution timed out after {self.timeout} seconds\"\n                logger.error(error_msg)\n            else:\n                error_msg = f\"ReAct agent execution failed: {exc}\"\n                logger.error(error_msg, exc_info=True)\n\n            self.usage_tracker.update(\n                success=False,\n                token_usage=self._partial_token_usage or {},\n                turn_count=self._partial_turn_count or 0,\n                execution_time=execution_time,\n            )\n\n            if self._partial_messages:\n                final_msg = self._convert_to_sdk_format(self._partial_messages)\n            else:\n                final_msg = []\n\n            return {\n                \"success\": False,\n                \"output\": final_msg,\n                \"token_usage\": self._partial_token_usage or {},\n                \"turn_count\": self._partial_turn_count or 0,\n                \"execution_time\": execution_time,\n                \"error\": error_msg,\n                \"litellm_run_model_name\": self.litellm_run_model_name,\n            }\n\n    async def _execute_react_loop(\n        self,\n        instruction: str,\n        tool_call_log_file: Optional[str],\n    ) -> Dict[str, Any]:\n        system_message = {\"role\": \"system\", \"content\": self.react_system_prompt}\n        total_tokens = {\n            \"input_tokens\": 0,\n            \"output_tokens\": 0,\n            \"total_tokens\": 0,\n            \"reasoning_tokens\": 0,\n        }\n        turn_count = 0\n        success = False\n        final_error: Optional[str] = None\n\n        mcp_server = await self._create_mcp_server()\n        async with mcp_server:\n            tools = await mcp_server.list_tools()\n            tool_map = {tool.get(\"name\"): tool for tool in tools}\n            tools_description = self._render_tools_description(tools)\n\n            task_message = {\n                \"role\": \"user\",\n                \"content\": self._build_task_prompt(\n                    instruction=instruction,\n                    tools_description=tools_description,\n                ),\n            }\n            messages: List[Dict[str, Any]] = [system_message, task_message]\n            self._update_progress(messages, total_tokens, turn_count)\n\n            for step in range(1, self.max_iterations + 1):\n                current_prompt_tokens = 0\n                if self._compaction_enabled():\n                    current_prompt_tokens = self._count_prompt_tokens_litellm(messages)\n\n                if self._compaction_enabled() and current_prompt_tokens >= self.compaction_token:\n                    logger.info(\n                        f\"| [compaction] Triggered at prompt tokens: {current_prompt_tokens:,}\"\n                    )\n                    if tool_call_log_file:\n                        try:\n                            with open(tool_call_log_file, \"a\", encoding=\"utf-8\") as log_file:\n                                log_file.write(\n                                    f\"| [compaction] Triggered at prompt tokens: {current_prompt_tokens:,}\\n\"\n                                )\n                        except Exception:  # noqa: BLE001\n                            pass\n\n                    compact_messages = [\n                        {\"role\": \"system\", \"content\": self.COMPACTION_PROMPT},\n                        {\"role\": \"user\", \"content\": json.dumps(messages, ensure_ascii=False)},\n                    ]\n                    compact_kwargs = {\n                        \"model\": self.litellm_input_model_name,\n                        \"messages\": compact_messages,\n                        \"api_key\": self.api_key,\n                    }\n                    if self.base_url:\n                        compact_kwargs[\"base_url\"] = self.base_url\n\n                    compact_response = await litellm.acompletion(**compact_kwargs)\n                    usage = getattr(compact_response, \"usage\", None)\n                    if usage:\n                        prompt_tokens = (\n                            getattr(usage, \"prompt_tokens\", None)\n                            or getattr(usage, \"input_tokens\", None)\n                            or 0\n                        )\n                        completion_tokens = (\n                            getattr(usage, \"completion_tokens\", None)\n                            or getattr(usage, \"output_tokens\", None)\n                            or 0\n                        )\n                        total_tokens_count = getattr(usage, \"total_tokens\", None)\n                        if total_tokens_count is None:\n                            total_tokens_count = prompt_tokens + completion_tokens\n\n                        total_tokens[\"input_tokens\"] += int(prompt_tokens or 0)\n                        total_tokens[\"output_tokens\"] += int(completion_tokens or 0)\n                        total_tokens[\"total_tokens\"] += int(total_tokens_count or 0)\n\n                    summary = \"\"\n                    try:\n                        summary = compact_response.choices[0].message.content or \"\"\n                    except Exception:  # noqa: BLE001\n                        summary = \"\"\n                    summary = summary.strip() or \"(no summary)\"\n\n                    messages = [\n                        system_message,\n                        task_message,\n                        {\n                            \"role\": \"user\",\n                            \"content\": (\n                                \"Context summary (auto-compacted due to token limit):\\n\"\n                                f\"{summary}\"\n                            ),\n                        },\n                    ]\n                    self._update_progress(messages, total_tokens, turn_count)\n\n                completion_kwargs = {\n                    \"model\": self.litellm_input_model_name,\n                    \"messages\": messages,\n                    \"api_key\": self.api_key,\n                }\n                if self.base_url:\n                    completion_kwargs[\"base_url\"] = self.base_url\n                if self.reasoning_effort != \"default\":\n                    completion_kwargs[\"reasoning_effort\"] = self.reasoning_effort\n\n                try:\n                    response = await asyncio.wait_for(\n                        litellm.acompletion(**completion_kwargs),\n                        timeout=self.timeout / 2,\n                    )\n                except asyncio.TimeoutError:\n                    final_error = f\"LLM call timed out on step {step}\"\n                    logger.error(final_error)\n                    break\n                except Exception as exc:  # noqa: BLE001\n                    final_error = f\"LLM call failed on step {step}: {exc}\"\n                    logger.error(final_error)\n                    if \"ContextWindowExceededError\" in str(exc):\n                        continue\n                    break\n\n                if turn_count == 0 and getattr(response, \"model\", None):\n                    self.litellm_run_model_name = response.model.split(\"/\")[-1]\n\n                usage = getattr(response, \"usage\", None)\n                if usage:\n                    prompt_tokens = (\n                        getattr(usage, \"prompt_tokens\", None)\n                        or getattr(usage, \"input_tokens\", None)\n                        or 0\n                    )\n                    completion_tokens = (\n                        getattr(usage, \"completion_tokens\", None)\n                        or getattr(usage, \"output_tokens\", None)\n                        or 0\n                    )\n                    total_tokens_count = getattr(usage, \"total_tokens\", None)\n                    if total_tokens_count is None:\n                        total_tokens_count = prompt_tokens + completion_tokens\n\n                    total_tokens[\"input_tokens\"] += prompt_tokens\n                    total_tokens[\"output_tokens\"] += completion_tokens\n                    total_tokens[\"total_tokens\"] += total_tokens_count\n\n                    # Extract reasoning tokens if available\n                    if hasattr(response.usage, 'completion_tokens_details'):\n                        details = response.usage.completion_tokens_details\n                        if hasattr(details, 'reasoning_tokens'):\n                            total_tokens[\"reasoning_tokens\"] += details.reasoning_tokens or 0\n\n                choice = response.choices[0]\n                message_obj = getattr(choice, \"message\", None)\n                if message_obj is None and isinstance(choice, dict):\n                    message_obj = choice.get(\"message\")\n\n                if message_obj is None:\n                    content_raw = getattr(choice, \"text\", \"\")\n                else:\n                    content_raw = message_obj.get(\"content\", \"\")\n\n                assistant_text = self._normalize_content(content_raw)\n                assistant_message = {\"role\": \"assistant\", \"content\": assistant_text}\n                messages.append(assistant_message)\n                turn_count += 1\n                self._update_progress(messages, total_tokens, turn_count)\n\n                parsed = self._parse_react_response(assistant_text)\n                if not parsed or \"thought\" not in parsed:\n                    warning = (\n                        \"The previous response was not valid JSON following the required schema. \"\n                        \"Please respond again using the JSON formats provided.\"\n                    )\n                    messages.append({\"role\": \"user\", \"content\": warning})\n                    self._update_progress(messages, total_tokens, turn_count)\n                    final_error = \"Model produced an invalid response format.\"\n                    continue\n\n                thought = parsed.get(\"thought\", \"\")\n                action = parsed.get(\"action\")\n                answer = parsed.get(\"answer\")\n                result = parsed.get(\"result\")\n\n                logger.info(f\"|\\n| \\033[1;3mThought\\033[0m: {str(thought)}\")\n                if tool_call_log_file:\n                    try:\n                        with open(tool_call_log_file, \"a\", encoding=\"utf-8\") as log_file:\n                            log_file.write(f\"| {str(thought)}\\n\")\n                    except Exception:  # noqa: BLE001\n                        pass\n                if action is not None:\n                    func_name = action.get(\"tool\")\n                    arguments = action.get(\"arguments\", {}) or {}\n                    args_str = json.dumps(arguments, separators=(\",\", \": \"))\n                    display_arguments = args_str[:140] + \"...\" if len(args_str) > 140 else args_str\n                    logger.info(f\"| \\033[1;3mAction\\033[0m: \\033[1m{func_name}\\033[0m \\033[2;37m{display_arguments}\\033[0m\")\n\n\n                if answer is not None:\n                    success = True\n                    break\n\n                if action is not None and isinstance(action, dict):\n                    tool_name = action.get(\"tool\")\n                    arguments = action.get(\"arguments\", {}) or {}\n\n                    if tool_name not in tool_map:\n                        observation = (\n                            f\"Invalid tool '{tool_name}'. Available tools: \"\n                            f\"{', '.join(tool_map)}\"\n                        )\n                    else:\n                        try:\n                            tool_response = await asyncio.wait_for(\n                                mcp_server.call_tool(tool_name, arguments),\n                                timeout=60,\n                            )\n                            observation = self._tool_result_to_text(tool_response)\n                        except asyncio.TimeoutError:\n                            observation = f\"Tool '{tool_name}' timed out\"\n                        except Exception as tool_exc:  # noqa: BLE001\n                            observation = f\"Tool '{tool_name}' failed: {tool_exc}\"\n\n                        if tool_call_log_file:\n                            try:\n                                with open(tool_call_log_file, \"a\", encoding=\"utf-8\") as log_file:\n                                    log_file.write(f\"| {tool_name} {json.dumps(arguments, ensure_ascii=False)}\\n\")\n                            except Exception:  # noqa: BLE001\n                                pass\n\n                    observation_message = {\n                        \"role\": \"user\",\n                        \"content\": (\n                            f\"Observation:\\n{observation}\\n\"\n                            \"Please continue reasoning and reply using the required JSON format.\"\n                        ),\n                    }\n                    messages.append(observation_message)\n                    self._update_progress(messages, total_tokens, turn_count)\n                    continue\n\n                if result is not None:\n                    observation_message = {\n                        \"role\": \"user\",\n                        \"content\": (\n                            f\"Observation:\\n{result}\\n\"\n                            \"Please continue reasoning and reply using the required JSON format.\"\n                        ),\n                    }\n                    messages.append(observation_message)\n                    self._update_progress(messages, total_tokens, turn_count)\n                    continue\n\n                # Unexpected structure: ask model to restate properly\n                messages.append(\n                    {\n                        \"role\": \"user\",\n                        \"content\": (\n                            \"The previous reply did not include an action, result, or answer. \"\n                            \"Please respond again using the JSON formats provided.\"\n                        ),\n                    }\n                )\n                self._update_progress(messages, total_tokens, turn_count)\n\n            if not success and final_error is None:\n                final_error = (\n                    f\"Max iterations ({self.max_iterations}) reached without a final answer.\"\n                )\n\n        if total_tokens[\"total_tokens\"] > 0:\n            log_msg = (\n                f\"|\\n|\\n| Token usage: Total: {total_tokens['total_tokens']:,} | \"\n                f\"Input: {total_tokens['input_tokens']:,} | \"\n                f\"Output: {total_tokens['output_tokens']:,}\"\n            )\n            if total_tokens.get(\"reasoning_tokens\", 0) > 0:\n                log_msg += f\" | Reasoning: {total_tokens['reasoning_tokens']:,}\"\n            logger.info(log_msg)\n            logger.info(f\"| Turns: {turn_count}\")\n\n        sdk_messages = self._convert_to_sdk_format(messages)\n\n        return {\n            \"success\": success,\n            \"output\": sdk_messages,\n            \"token_usage\": total_tokens,\n            \"turn_count\": turn_count,\n            \"error\": None if success else final_error,\n            \"litellm_run_model_name\": self.litellm_run_model_name,\n        }\n\n    def _build_task_prompt(\n        self,\n        instruction: str,\n        tools_description: str,\n    ) -> str:\n        return (\n            f\"Task:\\n{instruction}\\n\\n\"\n            f\"Available MCP tools:\\n{tools_description}\\n\\n\"\n            \"Respond using the JSON formats below.\\n\\n\"\n            \"If you need to use a tool:\\n\"\n            \"{\\n\"\n            '  \"thought\": \"Reasoning for the next action\",\\n'\n            '  \"action\": {\\n'\n            '    \"tool\": \"tool-name\",\\n'\n            '    \"arguments\": {\\n'\n            '      \"parameter\": value\\n'\n            \"    }\\n\"\n            \"  }\\n\"\n            \"}\\n\\n\"\n            \"If you can provide the final answer:\\n\"\n            \"{\\n\"\n            '  \"thought\": \"Reasoning that justifies the answer\",\\n'\n            '  \"answer\": \"Either the final solution or \\'Task completed.\\' when no more detail is required\"\\n'\n            \"}\\n\\n\"\n            \"Remember: omitting the action object ends the task, so only do this when finished.\"\n        )\n\n    def _render_tools_description(self, tools: List[Dict[str, Any]]) -> str:\n        descriptions = []\n        for tool in tools:\n            name = tool.get(\"name\", \"unknown\")\n            description = tool.get(\"description\", \"No description provided.\")\n            input_schema = tool.get(\"inputSchema\", {}) or {}\n            properties = input_schema.get(\"properties\", {}) or {}\n            required = set(input_schema.get(\"required\", []) or [])\n\n            arg_lines = []\n            for prop_name, prop_details in properties.items():\n                details = json.dumps(prop_details, ensure_ascii=False, indent=2)\n                suffix = \" (required)\" if prop_name in required else \"\"\n                arg_lines.append(f\"- {prop_name}{suffix}: {details}\")\n\n            if arg_lines:\n                arguments_text = \"\\n\".join(arg_lines)\n            else:\n                arguments_text = \"(no arguments)\"\n\n            descriptions.append(\n                f\"Tool: {name}\\nDescription: {description}\\nArguments:\\n{arguments_text}\"\n            )\n\n        return \"\\n\\n\".join(descriptions) if descriptions else \"(no tools available)\"\n\n    def _normalize_content(self, content: Any) -> str:\n        if isinstance(content, str):\n            return content\n        if isinstance(content, list):\n            parts = []\n            for block in content:\n                if isinstance(block, dict):\n                    if block.get(\"type\") == \"text\":\n                        parts.append(block.get(\"text\", \"\"))\n                    elif \"text\" in block:\n                        parts.append(str(block.get(\"text\")))\n                else:\n                    parts.append(str(block))\n            return \"\\n\".join(part for part in parts if part)\n        return json.dumps(content, ensure_ascii=False)\n\n    def _parse_react_response(self, payload: str) -> Dict[str, Any]:\n        candidate = payload.strip().strip(\"`\").strip()\n        if candidate.lower().startswith(\"json\"):\n            candidate = candidate[4:].lstrip()\n        try:\n            return json.loads(candidate)\n        except json.JSONDecodeError:\n            return {}\n\n    def _tool_result_to_text(self, result: Any) -> str:\n        if result is None:\n            return \"\"\n        if isinstance(result, str):\n            return result\n        try:\n            return json.dumps(result, ensure_ascii=False)\n        except TypeError:\n            return str(result)\n"
  },
  {
    "path": "src/agents/utils/__init__.py",
    "content": "\"\"\"\nUtility functions for MCPMark Agent\n====================================\n\"\"\"\n\nfrom .token_usage import TokenUsageTracker\n\n__all__ = [\"TokenUsageTracker\"]"
  },
  {
    "path": "src/agents/utils/token_usage.py",
    "content": "\"\"\"\nToken Usage Tracking Utilities\n===============================\n\"\"\"\n\nfrom typing import Dict, Any\n\n\nclass TokenUsageTracker:\n    \"\"\"Track token usage across agent executions.\"\"\"\n    \n    def __init__(self):\n        \"\"\"Initialize token usage tracker.\"\"\"\n        self.reset()\n    \n    def reset(self):\n        \"\"\"Reset all usage statistics.\"\"\"\n        self._stats = {\n            \"total_input_tokens\": 0,\n            \"total_output_tokens\": 0,\n            \"total_tokens\": 0,\n            \"total_turns\": 0,\n            \"total_execution_time\": 0.0,\n            \"successful_executions\": 0,\n            \"failed_executions\": 0,\n        }\n    \n    def update(self, success: bool, token_usage: Dict[str, int], \n               turn_count: int, execution_time: float):\n        \"\"\"\n        Update usage statistics.\n        \n        Args:\n            success: Whether execution was successful\n            token_usage: Token usage dict with input_tokens, output_tokens, total_tokens\n            turn_count: Number of conversation turns\n            execution_time: Execution time in seconds\n        \"\"\"\n        if success:\n            self._stats[\"successful_executions\"] += 1\n        else:\n            self._stats[\"failed_executions\"] += 1\n        \n        self._stats[\"total_input_tokens\"] += token_usage.get(\"input_tokens\", 0)\n        self._stats[\"total_output_tokens\"] += token_usage.get(\"output_tokens\", 0)\n        self._stats[\"total_tokens\"] += token_usage.get(\"total_tokens\", 0)\n        self._stats[\"total_turns\"] += turn_count\n        self._stats[\"total_execution_time\"] += execution_time\n    \n    def get_stats(self) -> Dict[str, Any]:\n        \"\"\"\n        Get usage statistics with calculated averages.\n        \n        Returns:\n            Dictionary containing usage statistics\n        \"\"\"\n        stats = self._stats.copy()\n        \n        # Calculate averages\n        total_executions = stats[\"successful_executions\"] + stats[\"failed_executions\"]\n        if total_executions > 0:\n            stats[\"avg_input_tokens\"] = stats[\"total_input_tokens\"] / total_executions\n            stats[\"avg_output_tokens\"] = stats[\"total_output_tokens\"] / total_executions\n            stats[\"avg_total_tokens\"] = stats[\"total_tokens\"] / total_executions\n            stats[\"avg_turns\"] = stats[\"total_turns\"] / total_executions\n            stats[\"avg_execution_time\"] = stats[\"total_execution_time\"] / total_executions\n            stats[\"success_rate\"] = (stats[\"successful_executions\"] / total_executions * 100)\n        else:\n            stats.update({\n                \"avg_input_tokens\": 0.0,\n                \"avg_output_tokens\": 0.0,\n                \"avg_total_tokens\": 0.0,\n                \"avg_turns\": 0.0,\n                \"avg_execution_time\": 0.0,\n                \"success_rate\": 0.0,\n            })\n        \n        return stats"
  },
  {
    "path": "src/aggregators/aggregate_results.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nSimplified MCPMark Results Aggregator\nAggregates evaluation results and generates summary with pass@k metrics.\n\"\"\"\n\nimport json\nimport os\nimport argparse\nimport subprocess\nimport shutil\nimport tempfile\nfrom pathlib import Path\nfrom collections import defaultdict\nfrom typing import Dict, List, Any, Tuple, Optional\nfrom datetime import datetime\nimport sys\nsys.path.append(str(Path(__file__).parent.parent.parent))\nfrom src.errors import is_retryable_error\nfrom src.aggregators.pricing import compute_cost_usd\n\n\n# Supported difficulty splits in ./tasks/<service>/<task_set>/\nSUPPORTED_TASK_SETS = {\"standard\", \"easy\"}\n\n\ndef discover_tasks(task_set: str = \"standard\") -> Dict[str, List[str]]:\n    \"\"\"Discover all tasks from ./tasks directory filtered by task set.\"\"\"\n    tasks_dir = Path(\"./tasks\")\n\n    all_tasks = {}\n\n    # Handle each MCP service\n    # Note: playwright and playwright_webarena both map to \"playwright\" MCP\n    service_mappings = {\n        \"filesystem\": [\"filesystem\"],\n        \"github\": [\"github\"],\n        \"notion\": [\"notion\"],\n        \"playwright\": [\"playwright\", \"playwright_webarena\"],  # Both count as playwright\n        \"postgres\": [\"postgres\"],  # supabase and insforge are variants with same tasks, don't merge\n    }\n\n    for mcp_service, task_dirs in service_mappings.items():\n        tasks: List[str] = []\n        for task_dir_name in task_dirs:\n            service_path = tasks_dir / task_dir_name\n            if not service_path.exists():\n                continue\n\n            selected_root = service_path / task_set\n\n            # Detect if this service has partitioned task sets (e.g. standard/easy)\n            has_partitioned_layout = any(\n                child.is_dir() and child.name in SUPPORTED_TASK_SETS\n                for child in service_path.iterdir()\n            )\n\n            if selected_root.exists():\n                search_roots = [selected_root]\n            elif has_partitioned_layout:\n                # Requested task set missing for this service; skip it for this run\n                print(f\"  ⚠️ No '{task_set}' tasks found under {service_path}\")\n                search_roots = []\n            else:\n                # Legacy layout without task sets – fall back to original structure\n                search_roots = [service_path]\n\n            for root in search_roots:\n                for category_dir in root.iterdir():\n                    if not category_dir.is_dir() or category_dir.name.startswith(\"__\"):\n                        continue\n\n                    for task_dir in category_dir.iterdir():\n                        if task_dir.is_dir() and not task_dir.name.startswith(\"__\"):\n                            tasks.append(f\"{category_dir.name}__{task_dir.name}\")\n\n        all_tasks[mcp_service] = sorted(tasks)\n    \n    return all_tasks\n\n\ndef collect_results(exp_dir: Path, k: int) -> Dict[str, Dict[str, Any]]:\n    \"\"\"Collect all results from experiment directory.\"\"\"\n    results = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))\n    \n    # Current layout: results/<exp>/<model>__<service>/run-N/<category>__<task>/\n    # Some pipelines include task-set suffix in service dir (e.g., \"filesystem-easy\").\n    # Normalize such names back to canonical service keys used by tasks/ (filesystem, github, notion, playwright, postgres).\n\n    def normalize_service_name(name: str) -> str:\n        # Strip known task-set suffixes like \"-easy\" or \"-standard\"\n        if name.endswith(\"-easy\") or name.endswith(\"-standard\"):\n            base = name.rsplit(\"-\", 1)[0]\n        else:\n            base = name\n\n        # Map variant names to canonical service\n        if base == \"playwright_webarena\":\n            return \"playwright\"\n        return base\n    for model_service_dir in exp_dir.iterdir():\n        if not model_service_dir.is_dir() or \"__\" not in model_service_dir.name:\n            continue\n        \n        model, service = model_service_dir.name.split(\"__\", 1)\n        # Normalize service names\n        if service == \"playwright_webarena\":\n            service = \"playwright\"\n        elif service in [\"supabase\", \"insforge\"]:\n            service = \"postgres\"\n        \n        for run_idx in range(1, k + 1):\n            run_dir = model_service_dir / f\"run-{run_idx}\"\n            if not run_dir.exists():\n                continue\n            \n            for task_dir in run_dir.iterdir():\n                if not task_dir.is_dir() or \"__\" not in task_dir.name:\n                    continue\n                \n                meta_path = task_dir / \"meta.json\"\n                if meta_path.exists():\n                    with open(meta_path) as f:\n                        meta = json.load(f)\n                        task_name = task_dir.name\n                        results[model][service][f\"run-{run_idx}\"][task_name] = meta\n    \n    return results\n\n\ndef check_completeness_and_validity(\n    results: Dict, all_tasks: Dict, k: int, single_run_models: List[str]\n) -> Tuple[Dict, Dict, Dict]:\n    \"\"\"Check completeness and validity of results.\"\"\"\n    complete_models = {}\n    incomplete_models = {}\n    invalid_models = {}\n    \n    for model, model_results in results.items():\n        is_single_run = any(srm in model for srm in single_run_models)\n        required_runs = 1 if is_single_run else k\n        \n        missing_info = []\n        invalid_info = []\n        \n        # Check each service\n        for service, service_tasks in all_tasks.items():\n            if service not in model_results:\n                missing_info.append(f\"Missing entire service: {service}\")\n                continue\n            \n            service_results = model_results[service]\n            \n            # Check runs\n            for run_idx in range(1, required_runs + 1):\n                run_name = f\"run-{run_idx}\"\n                if run_name not in service_results:\n                    missing_info.append(f\"Missing {run_name} for {service}\")\n                    continue\n                \n                run_results = service_results[run_name]\n                \n                # Check tasks\n                missing_tasks = []\n                invalid_tasks = []\n                \n                for task in service_tasks:\n                    if task not in run_results:\n                        missing_tasks.append(task)\n                    else:\n                        # Check for retryable errors only if the task did not succeed\n                        meta = run_results[task]\n                        success = bool(meta.get(\"execution_result\", {}).get(\"success\", False))\n                        error_msg = meta.get(\"execution_result\", {}).get(\"error_message\", \"\")\n                        if (not success) and error_msg and is_retryable_error(error_msg):\n                            invalid_tasks.append(f\"{task}: {error_msg[:50]}...\")\n                \n                if missing_tasks:\n                    missing_info.append(f\"{service}/{run_name}: missing {len(missing_tasks)} tasks\")\n                if invalid_tasks:\n                    invalid_info.extend([f\"{service}/{run_name}/{t}\" for t in invalid_tasks])\n        \n        if missing_info:\n            incomplete_models[model] = missing_info\n        elif invalid_info:\n            invalid_models[model] = invalid_info\n        else:\n            complete_models[model] = model_results\n    \n    return complete_models, incomplete_models, invalid_models\n\n\ndef calculate_metrics(complete_models: Dict, all_tasks: Dict, k: int, single_run_models: List[str]) -> Dict:\n    \"\"\"Calculate rich metrics (totals, averages, per-run aggregates, pass@k) for complete models.\"\"\"\n    summary = {\n        \"generated_at\": datetime.now().isoformat(),\n        \"k\": k,\n        \"overall\": {},\n    }\n\n    # Initialize per-service sections mirroring overall structure\n    for service in all_tasks.keys():\n        summary[service] = {}\n\n    # Helper to safely extract token usage numbers\n    def get_token_counts(meta: Dict[str, Any]) -> Tuple[int, int, int]:\n        tu = meta.get(\"token_usage\", {}) or {}\n        input_tokens = int(tu.get(\"input_tokens\", 0) or 0)\n        output_tokens = int(tu.get(\"output_tokens\", 0) or 0)\n        total_tokens = int(tu.get(\"total_tokens\", input_tokens + output_tokens) or (input_tokens + output_tokens))\n        return input_tokens, output_tokens, total_tokens\n\n    for model, model_results in complete_models.items():\n        is_single_run = any(srm in model for srm in single_run_models)\n        runs_count = 1 if is_single_run else k\n\n        total_tasks = sum(len(tasks) for tasks in all_tasks.values())\n\n        # Aggregates across all services and runs\n        total_agent_execution_time = 0.0\n        total_input_tokens = 0\n        total_output_tokens = 0\n        total_tokens = 0\n        total_turns = 0\n        # For optional fields\n        actual_model_name: Optional[str] = None\n        # If cost info is not present in metas, leave as None\n        per_run_cost: Optional[float] = None\n        # Model-level flags (to be inferred from meta.json)\n        is_open_source_model: Optional[bool] = None\n        is_reasoning_model: Optional[bool] = None\n\n        # For pass@1 per-run statistics across all services\n        pass1_rates_per_run_overall: List[float] = []\n\n        # For pass@k and pass^k across all services\n        pass_k_task_success_any = 0\n        pass_power_k_task_success_all = 0\n\n        # Precompute successes per task across runs for overall\n        # Also accumulate totals for tokens/time/turns\n        for run_idx in range(1, runs_count + 1):\n            run_name = f\"run-{run_idx}\"\n            successes_this_run = 0\n\n            for service, service_tasks in all_tasks.items():\n                # service-level aggregates for this model (will compute fully below)\n                for task in service_tasks:\n                    meta = (\n                        model_results\n                        .get(service, {})\n                        .get(run_name, {})\n                        .get(task)\n                    )\n\n                    # In complete_models, meta should exist; still guard\n                    if not meta:\n                        continue\n\n                    success = bool(meta.get(\"execution_result\", {}).get(\"success\", False))\n                    if success:\n                        successes_this_run += 1\n\n                    # totals accumulation\n                    total_agent_execution_time += float(meta.get(\"agent_execution_time\", 0.0) or 0.0)\n                    in_tok, out_tok, ttl_tok = get_token_counts(meta)\n                    total_input_tokens += in_tok\n                    total_output_tokens += out_tok\n                    total_tokens += ttl_tok\n                    total_turns += int(meta.get(\"turn_count\", 0) or 0)\n\n                    # capture actual model name if present\n                    if actual_model_name is None:\n                        actual_model_name = meta.get(\"actual_model_name\") or None\n\n                    # capture cost if present in any meta as per-run cost token (rare)\n                    if per_run_cost is None:\n                        # A few possible fields people use; if none present, stays None\n                        possible_cost = meta.get(\"per_run_cost\") or meta.get(\"run_cost\") or meta.get(\"cost\")\n                        if isinstance(possible_cost, (int, float)):\n                            per_run_cost = float(possible_cost)\n\n                    # capture model flags if present\n                    if is_open_source_model is None and \"is_open_source_model\" in meta:\n                        is_open_source_model = bool(meta.get(\"is_open_source_model\"))\n                    if is_reasoning_model is None and \"is_reasoning_model\" in meta:\n                        is_reasoning_model = bool(meta.get(\"is_reasoning_model\"))\n\n            pass1_rates_per_run_overall.append(round(successes_this_run / total_tasks, 6))\n\n        # Compute pass@k and pass^k across tasks (overall)\n        if not is_single_run:\n            for service, service_tasks in all_tasks.items():\n                for task in service_tasks:\n                    successes = []\n                    for run_idx in range(1, runs_count + 1):\n                        run_name = f\"run-{run_idx}\"\n                        meta = (\n                            model_results\n                            .get(service, {})\n                            .get(run_name, {})\n                            .get(task)\n                        )\n                        success = bool(meta.get(\"execution_result\", {}).get(\"success\", False)) if meta else False\n                        successes.append(success)\n                    if any(successes):\n                        pass_k_task_success_any += 1\n                    if all(successes):\n                        pass_power_k_task_success_all += 1\n\n        # Build overall metrics entry\n        denom = total_tasks * runs_count if total_tasks > 0 else 1\n        avg_agent_execution_time = total_agent_execution_time / denom\n        avg_input_tokens = total_input_tokens / denom\n        avg_output_tokens = total_output_tokens / denom\n        avg_total_tokens = total_tokens / denom\n        avg_turns = total_turns / denom\n\n        # pass@1 stats across runs\n        if pass1_rates_per_run_overall:\n            avg_pass1 = sum(pass1_rates_per_run_overall) / len(pass1_rates_per_run_overall)\n            mean = avg_pass1\n            variance = (\n                sum((r - mean) ** 2 for r in pass1_rates_per_run_overall) / len(pass1_rates_per_run_overall)\n            )\n            std_pass1 = variance ** 0.5\n        else:\n            avg_pass1 = 0.0\n            std_pass1 = 0.0\n\n        # Compute per-run tokens and cost\n        per_run_input_tokens = total_input_tokens / runs_count if runs_count else 0\n        per_run_output_tokens = total_output_tokens / runs_count if runs_count else 0\n        model_for_pricing = actual_model_name or model\n        computed_per_run_cost = compute_cost_usd(model_for_pricing, per_run_input_tokens, per_run_output_tokens)\n\n        overall_metrics = {\n            \"total_tasks\": total_tasks,\n            \"total_agent_execution_time\": total_agent_execution_time,\n            \"total_input_tokens\": total_input_tokens,\n            \"total_output_tokens\": total_output_tokens,\n            \"total_tokens\": total_tokens,\n            \"total_turns\": total_turns,\n            \"avg_agent_execution_time\": round(avg_agent_execution_time, 4),\n            \"avg_input_tokens\": round(avg_input_tokens, 4),\n            \"avg_output_tokens\": round(avg_output_tokens, 4),\n            \"avg_total_tokens\": round(avg_total_tokens, 4),\n            \"avg_turns\": round(avg_turns, 4),\n            \"per_run_input_tokens\": per_run_input_tokens,\n            \"per_run_output_tokens\": per_run_output_tokens,\n            \"per_run_cost\": computed_per_run_cost if computed_per_run_cost is not None else (per_run_cost if per_run_cost is not None else None),\n            \"actual_model_name\": actual_model_name or \"\",\n            \"is_open_source_model\": (is_open_source_model if is_open_source_model is not None else False),\n            \"is_reasoning_model\": (is_reasoning_model if is_reasoning_model is not None else False),\n            \"pass@1\": {\n                \"avg\": round(avg_pass1, 4),\n                \"std\": round(std_pass1, 4),\n            },\n        }\n        if not is_single_run:\n            overall_metrics[f\"pass@{k}\"] = round(pass_k_task_success_any / total_tasks, 4)\n            overall_metrics[f\"pass^{k}\"] = round(pass_power_k_task_success_all / total_tasks, 4)\n\n        summary[\"overall\"][model] = overall_metrics\n\n        # Per-service detailed metrics mirroring overall\n        for service, service_tasks in all_tasks.items():\n            service_total_tasks = len(service_tasks)\n            if service_total_tasks == 0:\n                continue\n\n            s_total_agent_execution_time = 0.0\n            s_total_input_tokens = 0\n            s_total_output_tokens = 0\n            s_total_tokens = 0\n            s_total_turns = 0\n\n            # per-run pass@1 for this service\n            s_pass1_rates_per_run: List[float] = []\n\n            # pass@k for this service\n            s_pass_k_task_success_any = 0\n            s_pass_power_k_task_success_all = 0\n\n            for run_idx in range(1, runs_count + 1):\n                run_name = f\"run-{run_idx}\"\n                s_successes_this_run = 0\n\n                for task in service_tasks:\n                    meta = (\n                        model_results\n                        .get(service, {})\n                        .get(run_name, {})\n                        .get(task)\n                    )\n                    if not meta:\n                        continue\n\n                    success = bool(meta.get(\"execution_result\", {}).get(\"success\", False))\n                    if success:\n                        s_successes_this_run += 1\n\n                    s_total_agent_execution_time += float(meta.get(\"agent_execution_time\", 0.0) or 0.0)\n                    in_tok, out_tok, ttl_tok = get_token_counts(meta)\n                    s_total_input_tokens += in_tok\n                    s_total_output_tokens += out_tok\n                    s_total_tokens += ttl_tok\n                    s_total_turns += int(meta.get(\"turn_count\", 0) or 0)\n\n                s_pass1_rates_per_run.append(round(s_successes_this_run / service_total_tasks, 6))\n\n            if not is_single_run:\n                for task in service_tasks:\n                    successes = []\n                    for run_idx in range(1, runs_count + 1):\n                        run_name = f\"run-{run_idx}\"\n                        meta = (\n                            model_results\n                            .get(service, {})\n                            .get(run_name, {})\n                            .get(task)\n                        )\n                        success = bool(meta.get(\"execution_result\", {}).get(\"success\", False)) if meta else False\n                        successes.append(success)\n                    if any(successes):\n                        s_pass_k_task_success_any += 1\n                    if all(successes):\n                        s_pass_power_k_task_success_all += 1\n\n            s_denom = service_total_tasks * runs_count if service_total_tasks > 0 else 1\n            s_avg_agent_execution_time = s_total_agent_execution_time / s_denom\n            s_avg_input_tokens = s_total_input_tokens / s_denom\n            s_avg_output_tokens = s_total_output_tokens / s_denom\n            s_avg_total_tokens = s_total_tokens / s_denom\n            s_avg_turns = s_total_turns / s_denom\n\n            if s_pass1_rates_per_run:\n                s_mean = sum(s_pass1_rates_per_run) / len(s_pass1_rates_per_run)\n                s_var = sum((r - s_mean) ** 2 for r in s_pass1_rates_per_run) / len(s_pass1_rates_per_run)\n                s_std = s_var ** 0.5\n            else:\n                s_mean = 0.0\n                s_std = 0.0\n\n            # Compute per-run tokens and cost for this service\n            s_per_run_input_tokens = s_total_input_tokens / runs_count if runs_count else 0\n            s_per_run_output_tokens = s_total_output_tokens / runs_count if runs_count else 0\n            s_computed_per_run_cost = compute_cost_usd(model_for_pricing, s_per_run_input_tokens, s_per_run_output_tokens)\n\n            service_metrics = {\n                \"total_tasks\": service_total_tasks,\n                \"total_agent_execution_time\": s_total_agent_execution_time,\n                \"total_input_tokens\": s_total_input_tokens,\n                \"total_output_tokens\": s_total_output_tokens,\n                \"total_tokens\": s_total_tokens,\n                \"total_turns\": s_total_turns,\n                \"avg_agent_execution_time\": round(s_avg_agent_execution_time, 4),\n                \"avg_input_tokens\": round(s_avg_input_tokens, 4),\n                \"avg_output_tokens\": round(s_avg_output_tokens, 4),\n                \"avg_total_tokens\": round(s_avg_total_tokens, 4),\n                \"avg_turns\": round(s_avg_turns, 4),\n                \"per_run_input_tokens\": s_per_run_input_tokens,\n                \"per_run_output_tokens\": s_per_run_output_tokens,\n                \"per_run_cost\": s_computed_per_run_cost if s_computed_per_run_cost is not None else (per_run_cost if per_run_cost is not None else None),\n                \"actual_model_name\": actual_model_name or \"\",\n                \"is_open_source_model\": (is_open_source_model if is_open_source_model is not None else False),\n                \"is_reasoning_model\": (is_reasoning_model if is_reasoning_model is not None else False),\n                \"pass@1\": {\n                    \"avg\": round(s_mean, 4),\n                    \"std\": round(s_std, 4),\n                },\n            }\n\n            if not is_single_run:\n                service_metrics[f\"pass@{k}\"] = round(s_pass_k_task_success_any / service_total_tasks, 4)\n                service_metrics[f\"pass^{k}\"] = round(s_pass_power_k_task_success_all / service_total_tasks, 4)\n\n            summary[service][model] = service_metrics\n\n    return summary\n\n\ndef generate_model_results(exp_dir: Path, complete_models: Dict, all_tasks: Dict):\n    \"\"\"Generate model_results directory.\"\"\"\n    model_results_dir = exp_dir / \"model_results\"\n    if model_results_dir.exists():\n        shutil.rmtree(model_results_dir)\n    model_results_dir.mkdir()\n    \n    for model, model_data in complete_models.items():\n        model_dir = model_results_dir / model\n        model_dir.mkdir()\n        \n        # Create a file for each task\n        for service, service_tasks in all_tasks.items():\n            if service not in model_data:\n                continue\n            \n            for task in service_tasks:\n                task_data = {\n                    \"model\": model,\n                    \"service\": service,\n                    \"task\": task,\n                    \"runs\": {}\n                }\n                \n                # Collect data from all runs\n                for run_name, run_data in model_data[service].items():\n                    if task in run_data:\n                        meta = run_data[task]\n                        task_data[\"runs\"][run_name] = {\n                            \"success\": meta.get(\"execution_result\", {}).get(\"success\", False),\n                            \"error_message\": meta.get(\"execution_result\", {}).get(\"error_message\"),\n                            \"execution_time\": meta.get(\"agent_execution_time\", 0),\n                            \"token_usage\": meta.get(\"token_usage\", {}),\n                            \"turn_count\": meta.get(\"turn_count\", 0)\n                        }\n                \n                # Save task file\n                task_file = model_dir / f\"{task}.json\"\n                with open(task_file, \"w\") as f:\n                    json.dump(task_data, f, indent=2)\n\n\ndef generate_task_results(exp_dir: Path, complete_models: Dict, all_tasks: Dict):\n    \"\"\"Generate task_results directory.\"\"\"\n    task_results_dir = exp_dir / \"task_results\"\n    if task_results_dir.exists():\n        shutil.rmtree(task_results_dir)\n    task_results_dir.mkdir()\n    \n    # For each task, collect results across all models\n    for service, service_tasks in all_tasks.items():\n        for task in service_tasks:\n            task_data = {\n                \"task\": task,\n                \"service\": service,\n                \"models\": {}\n            }\n            \n            for model, model_data in complete_models.items():\n                if service not in model_data:\n                    continue\n                \n                model_task_data = {\"runs\": []}\n                \n                for run_name, run_data in model_data[service].items():\n                    if task in run_data:\n                        meta = run_data[task]\n                        agent_time = float(meta.get(\"agent_execution_time\", 0.0) or 0.0)\n                        token_usage = meta.get(\"token_usage\", {}) or {}\n                        turn_count = int(meta.get(\"turn_count\", 0) or 0)\n                        success = bool(meta.get(\"execution_result\", {}).get(\"success\", False))\n                        model_task_data[\"runs\"].append({\n                            \"run\": run_name,\n                            \"success\": success,\n                            \"execution_time\": agent_time,\n                            \"agent_execution_time\": agent_time,\n                            \"token_usage\": token_usage,\n                            \"turn_count\": turn_count,\n                        })\n                \n                if model_task_data[\"runs\"]:\n                    # Compute per-model summary across runs for this task\n                    runs_list = model_task_data[\"runs\"]\n                    runs_count = len(runs_list)\n                    successful_runs = sum(1 for r in runs_list if r.get(\"success\"))\n\n                    # Averages\n                    total_agent_time = sum(float(r.get(\"agent_execution_time\", r.get(\"execution_time\", 0.0)) or 0.0) for r in runs_list)\n                    avg_agent_time = round(total_agent_time / runs_count, 2)\n\n                    def _tok(r, key):\n                        tu = r.get(\"token_usage\") or {}\n                        return int(tu.get(key, 0) or 0)\n\n                    total_input_tokens = 0\n                    total_output_tokens = 0\n                    total_total_tokens = 0\n                    for r in runs_list:\n                        in_tok = _tok(r, \"input_tokens\")\n                        out_tok = _tok(r, \"output_tokens\")\n                        ttl_tok = int((r.get(\"token_usage\") or {}).get(\"total_tokens\", in_tok + out_tok) or (in_tok + out_tok))\n                        total_input_tokens += in_tok\n                        total_output_tokens += out_tok\n                        total_total_tokens += ttl_tok\n\n                    avg_input_tokens = round(total_input_tokens / runs_count, 1)\n                    avg_output_tokens = round(total_output_tokens / runs_count, 1)\n                    avg_total_tokens = round(total_total_tokens / runs_count, 1)\n\n                    total_turns = sum(int(r.get(\"turn_count\", 0) or 0) for r in runs_list)\n                    avg_turn_count = round(total_turns / runs_count, 2)\n\n                    summary_obj = {\n                        \"total_runs\": runs_count,\n                        \"successful_runs\": successful_runs,\n                        \"avg_agent_execution_time\": avg_agent_time,\n                        \"avg_input_tokens\": avg_input_tokens,\n                        \"avg_output_tokens\": avg_output_tokens,\n                        \"avg_total_tokens\": avg_total_tokens,\n                        \"avg_turn_count\": avg_turn_count,\n                    }\n\n                    # Include pass@k and pass^k only for multi-run models\n                    if runs_count > 1:\n                        summary_obj[f\"pass@{runs_count}\"] = 1.0 if successful_runs > 0 else 0.0\n                        summary_obj[f\"pass^{runs_count}\"] = 1.0 if successful_runs == runs_count else 0.0\n\n                    model_task_data[\"summary\"] = summary_obj\n                    task_data[\"models\"][model] = model_task_data\n            \n            # Save task file\n            task_file = task_results_dir / f\"{task}.json\"\n            with open(task_file, \"w\") as f:\n                json.dump(task_data, f, indent=2)\n\n\ndef generate_readme(exp_name: str, summary: Dict, k: int) -> str:\n    \"\"\"Generate README.md content with six tables: overall + 5 MCP services.\n    Each table includes Total Tasks, Pass@1 (avg ± std), Avg Agent Time (s), and Pass@k/Pass^k (if k > 1).\n    \"\"\"\n\n    def get_pass1_avg_std(metrics: Dict[str, Any]) -> Tuple[float, float]:\n        p1 = metrics.get(\"pass@1\")\n        if isinstance(p1, dict):\n            return float(p1.get(\"avg\", 0.0) or 0.0), float(p1.get(\"std\", 0.0) or 0.0)\n        # Back-compat if older summaries exist\n        return float(p1 or 0.0), 0.0\n\n    def render_section(title: str, section_data: Dict[str, Any]) -> List[str]:\n        lines_sec: List[str] = [\n            f\"## {title}\",\n            \"\",\n        ]\n\n        header = \"| Model | Total Tasks | Pass@1 (avg ± std) |\"\n        sep = \"|-------|-------------|--------------------|\"\n        # include pass@k headers if present (k>1)\n        include_k = k > 1\n        if include_k:\n            header += f\" Pass@{k} | Pass^{k} |\"\n            sep += \"----------|----------|\"\n        # Add Per-Run Cost (USD) and Avg Agent Time (s) at the end\n        header += \" Per-Run Cost (USD) |\"\n        sep += \"---------------------|\"\n        header += \" Avg Agent Time (s) |\"\n        sep += \"--------------------|\"\n\n        lines_sec.append(header)\n        lines_sec.append(sep)\n\n        # Sort by Pass@1 avg\n        sorted_items = sorted(\n            section_data.items(),\n            key=lambda x: get_pass1_avg_std(x[1])[0],\n            reverse=True\n        )\n\n        for model, metrics in sorted_items:\n            pass1_avg, pass1_std = get_pass1_avg_std(metrics)\n            avg_time = float(metrics.get(\"avg_agent_execution_time\", 0.0) or 0.0)\n            # Format per-run cost (up to 2 decimal places, trim trailing zeros)\n            cost_val = metrics.get(\"per_run_cost\")\n            if isinstance(cost_val, (int, float)):\n                rounded_cost = round(float(cost_val), 2)\n                formatted_cost = f\"{rounded_cost:.2f}\".rstrip('0').rstrip('.')\n                cost_str = f\"${formatted_cost}\"\n            else:\n                cost_str = \"/\"\n            row = (\n                f\"| {model} | {metrics.get('total_tasks', 0)} | \"\n                f\"{pass1_avg * 100:.1f}% ± {pass1_std * 100:.1f}% |\"\n            )\n            if include_k:\n                if f\"pass@{k}\" in metrics and f\"pass^{k}\" in metrics:\n                    row += f\" {metrics[f'pass@{k}'] * 100:.1f}% | {metrics[f'pass^{k}'] * 100:.1f}% |\"\n                else:\n                    # Single-run models do not have pass@k or pass^k; show placeholders\n                    row += \" / | / |\"\n            # Append cost and avg agent time at the end\n            row += f\" {cost_str} |\"\n            row += f\" {avg_time:.1f} |\"\n            lines_sec.append(row)\n\n        lines_sec.append(\"\")\n        return lines_sec\n\n    lines: List[str] = [\n        f\"# {exp_name} - Evaluation Results\",\n        \"\",\n        f\"Generated: {summary['generated_at']}\",\n    ]\n\n    task_set = summary.get(\"task_set\")\n    if task_set:\n        lines.append(f\"Task set: {task_set}\")\n\n    lines.append(\"\")\n\n    # Overall table\n    lines.extend(render_section(\"Overall Performance\", summary.get(\"overall\", {})))\n\n    # Service tables: infer service keys from summary\n    reserved = {\"overall\", \"generated_at\", \"k\", \"experiment_name\", \"task_set\"}\n    service_keys = [key for key in summary.keys() if key not in reserved]\n    # Keep stable order\n    for service in sorted(service_keys):\n        title = f\"{service.capitalize()} Performance\"\n        lines.extend(render_section(title, summary.get(service, {})))\n\n    return \"\\n\".join(lines)\n\n\ndef push_to_github(exp_dir: Path, exp_name: str, branch: Optional[str] = None):\n    \"\"\"Push results to GitHub repository.\"\"\"\n    try:\n        with tempfile.TemporaryDirectory() as temp_dir:\n            temp_path = Path(temp_dir)\n            \n            print(\"📥 Cloning experiments repository...\")\n            subprocess.run([\n                \"git\", \"clone\",\n                \"git@github.com:eval-sys/mcpmark-experiments.git\",\n                str(temp_path)\n            ], check=True, capture_output=True)\n            \n            # Copy files\n            for item in [\"summary.json\", \"README.md\", \"model_results\", \"task_results\"]:\n                src = exp_dir / item\n                if src.exists():\n                    dst = temp_path / item\n                    if src.is_dir():\n                        if dst.exists():\n                            shutil.rmtree(dst)\n                        shutil.copytree(src, dst)\n                    else:\n                        shutil.copy2(src, dst)\n                    print(f\"  📄 {item}\")\n            \n            # Git operations\n            os.chdir(temp_path)\n\n            # If a branch is specified, create/checkout it before staging changes. Otherwise, ensure main.\n            if branch:\n                try:\n                    subprocess.run([\"git\", \"fetch\", \"origin\"], check=True)\n                except subprocess.CalledProcessError:\n                    # Non-fatal if fetch fails in some environments\n                    pass\n                subprocess.run([\"git\", \"checkout\", \"-B\", branch], check=True)\n                print(f\"  🌿 Using branch '{branch}'\")\n            else:\n                # Default to main branch\n                try:\n                    subprocess.run([\"git\", \"fetch\", \"origin\"], check=True)\n                except subprocess.CalledProcessError:\n                    pass\n                # Prefer main; if it doesn't exist locally, create tracking from origin/main\n                result = subprocess.run([\"git\", \"rev-parse\", \"--verify\", \"main\"], capture_output=True)\n                if result.returncode != 0:\n                    # Try to checkout origin/main\n                    try:\n                        subprocess.run([\"git\", \"checkout\", \"-B\", \"main\", \"origin/main\"], check=True)\n                    except subprocess.CalledProcessError:\n                        # Fallback: create main if no origin/main\n                        subprocess.run([\"git\", \"checkout\", \"-B\", \"main\"], check=True)\n                else:\n                    subprocess.run([\"git\", \"checkout\", \"main\"], check=True)\n            subprocess.run([\"git\", \"add\", \".\"], check=True)\n            \n            # Check for changes\n            result = subprocess.run(\n                [\"git\", \"diff\", \"--staged\", \"--name-only\"],\n                capture_output=True, text=True\n            )\n            \n            if not result.stdout.strip():\n                print(\"✅ No changes to push\")\n                return True\n            \n            # Commit and push\n            subprocess.run([\n                \"git\", \"commit\", \"-m\", f\"Update results for {exp_name}\"\n            ], check=True)\n            if branch:\n                subprocess.run([\"git\", \"push\", \"--set-upstream\", \"origin\", branch], check=True)\n            else:\n                subprocess.run([\"git\", \"push\", \"--set-upstream\", \"origin\", \"main\"], check=True)\n            print(\"✅ Successfully pushed to GitHub\")\n            \n            return True\n            \n    except subprocess.CalledProcessError as e:\n        print(f\"❌ Git operation failed: {e}\")\n        return False\n\n\ndef print_validation_report(complete: Dict, incomplete: Dict, invalid: Dict, all_tasks: Dict, k: int, single_run_models: List[str], raw_results: Dict):\n    \"\"\"Print structured validation report with summary table.\"\"\"\n    \n    # Combine all models\n    all_models = {}\n    for model in complete:\n        all_models[model] = {\"status\": \"complete\", \"data\": complete[model]}\n    for model in incomplete:\n        all_models[model] = {\"status\": \"incomplete\", \"issues\": incomplete[model]}\n    for model in invalid:\n        all_models[model] = {\"status\": \"invalid\", \"issues\": invalid[model]}\n    \n    # Calculate expected counts\n    total_expected_tasks = sum(len(tasks) for tasks in all_tasks.values())\n    \n    # Summary table\n    print(\"\\n\" + \"=\" * 100)\n    print(\"COMPLETENESS SUMMARY TABLE\")\n    print(\"=\" * 100)\n    print()\n    print(f\"{'Model':<30} {'Expected':<12} {'Actual':<12} {'Missing':<12} {'Status':<30}\")\n    print(\"-\" * 100)\n    \n    sorted_models = sorted(all_models.keys())\n    \n    for model_name in sorted_models:\n        model_info = all_models[model_name]\n        \n        # Determine expected runs and tasks\n        is_single_run = any(srm in model_name for srm in single_run_models)\n        expected_runs = 1 if is_single_run else k\n        expected_total = total_expected_tasks * expected_runs\n        \n        if model_info[\"status\"] == \"complete\":\n            # Count actual tasks from complete model data\n            actual_total = 0\n            for service, service_data in model_info[\"data\"].items():\n                for run_name, run_data in service_data.items():\n                    actual_total += len(run_data)\n            missing = 0\n            status = \"✅ Complete\"\n        else:\n            # For incomplete/invalid models, count from raw results\n            actual_total = 0\n            if model_name in raw_results:\n                for service, service_data in raw_results[model_name].items():\n                    for run_name, run_data in service_data.items():\n                        actual_total += len(run_data)\n            \n            missing = expected_total - actual_total\n            \n            if model_info[\"status\"] == \"incomplete\":\n                # Find which services have issues\n                problem_services = set()\n                for issue in model_info[\"issues\"]:\n                    if \"Missing entire service:\" in issue:\n                        service = issue.split(\": \")[1]\n                        problem_services.add(service)\n                    elif \"/\" in issue:\n                        service = issue.split(\"/\")[0]\n                        problem_services.add(service)\n                    elif \"Missing run\" in issue:\n                        service = issue.split(\" for \")[1]\n                        problem_services.add(service)\n                \n                if problem_services:\n                    services_str = \", \".join(sorted(problem_services))\n                    status = f\"❌ Incomplete ({services_str})\"\n                else:\n                    status = \"❌ Incomplete\"\n            else:  # invalid\n                status = \"⚠️  Invalid (retryable errors)\"\n        \n        # Format the row\n        print(f\"{model_name:<30} {expected_total:<12} {actual_total:<12} {missing:<12} {status:<30}\")\n    \n    print()\n    \n    # Overall statistics\n    complete_count = len(complete)\n    incomplete_count = len(incomplete)\n    invalid_count = len(invalid)\n    total_models = complete_count + incomplete_count + invalid_count\n    \n    print(\"=\" * 100)\n    print(\"OVERALL STATISTICS\")\n    print(\"=\" * 100)\n    print(f\"Total models analyzed: {total_models}\")\n    print(f\"Complete models: {complete_count}\")\n    print(f\"Incomplete models: {incomplete_count}\")\n    print(f\"Invalid models (with retryable errors): {invalid_count}\")\n    print(f\"Total tasks per MCP: {total_expected_tasks}\")\n    print(f\"Expected runs (k): {k}\")\n    \n    if not complete:\n        print(\"\\n❌ No models have complete and valid results!\")\n    else:\n        print(f\"\\n✅ {complete_count} model(s) ready for aggregation: {', '.join(sorted(complete.keys()))}\")\n\n\ndef main():\n    # Extra parser for push-related options\n    push_parent = argparse.ArgumentParser(add_help=False)\n    push_parent.add_argument(\n        \"--branch\",\n        type=str,\n        help=\"If provided with --push, push to this new branch\"\n    )\n\n    parser = argparse.ArgumentParser(\n        description=\"Simplified MCPMark results aggregator\"\n    , parents=[push_parent])\n    parser.add_argument(\"--exp-name\", required=True, help=\"Experiment name\")\n    parser.add_argument(\"--k\", type=int, default=4, help=\"Number of runs (default: 4)\")\n    parser.add_argument(\n        \"--single-run-models\",\n        type=str,\n        help=\"Comma-separated list of models that only need run-1\"\n    )\n    parser.add_argument(\n        \"--task-set\",\n        choices=sorted(SUPPORTED_TASK_SETS),\n        default=\"standard\",\n        help=\"Which task subset to aggregate (default: standard)\"\n    )\n    parser.add_argument(\"--push\", action=\"store_true\", help=\"Push to GitHub (default to main)\")\n\n    args = parser.parse_args()\n\n    # Parse single-run models\n    single_run_models = []\n    if args.single_run_models:\n        single_run_models = [m.strip() for m in args.single_run_models.split(\",\")]\n        print(f\"📌 Single-run models: {', '.join(single_run_models)}\")\n\n    # Setup paths\n    exp_dir = Path(\"./results\") / args.exp_name\n    if not exp_dir.exists():\n        print(f\"❌ Experiment directory {exp_dir} does not exist\")\n        return 1\n\n    print(f\"🔄 Processing experiment: {args.exp_name}\")\n\n    # Discover all tasks\n    print(f\"📋 Discovering tasks (task set: {args.task_set})...\")\n    all_tasks = discover_tasks(args.task_set)\n    total_tasks = sum(len(tasks) for tasks in all_tasks.values())\n    print(f\"  Found {total_tasks} tasks across {len(all_tasks)} services\")\n    \n    print(\"📥 Collecting results...\")\n    results = collect_results(exp_dir, args.k)\n    print(f\"  Found results for {len(results)} models\")\n    \n    # Check completeness and validity\n    print(\"✓ Checking completeness and validity...\")\n    complete_models, incomplete_models, invalid_models = check_completeness_and_validity(\n        results, all_tasks, args.k, single_run_models\n    )\n    \n    # Print validation report with summary table\n    print_validation_report(complete_models, incomplete_models, invalid_models, \n                           all_tasks, args.k, single_run_models, results)\n\n    # Determine which models to include in output (strict: only complete models)\n    models_for_output = dict(complete_models)\n    if not models_for_output:\n        return 1\n    \n    # Calculate metrics\n    print(\"\\n📊 Calculating metrics...\")\n    summary = calculate_metrics(models_for_output, all_tasks, args.k, single_run_models)\n    summary[\"experiment_name\"] = args.exp_name\n    summary[\"task_set\"] = args.task_set\n    \n    # Save summary\n    summary_path = exp_dir / \"summary.json\"\n    with open(summary_path, \"w\") as f:\n        json.dump(summary, f, indent=2)\n    print(f\"  📄 Saved summary.json\")\n    \n    # Generate model_results\n    print(\"📁 Generating model_results...\")\n    generate_model_results(exp_dir, models_for_output, all_tasks)\n    print(f\"  Created {len(models_for_output)} model directories\")\n    \n    # Generate task_results\n    print(\"📁 Generating task_results...\")\n    generate_task_results(exp_dir, models_for_output, all_tasks)\n    print(f\"  Created {total_tasks} task files\")\n    \n    # Generate README\n    readme_content = generate_readme(args.exp_name, summary, args.k)\n    readme_path = exp_dir / \"README.md\"\n    with open(readme_path, \"w\") as f:\n        f.write(readme_content)\n    print(\"  📄 Generated README.md\")\n    \n    # Push to GitHub if requested\n    if args.push:\n        print(\"\\n🚀 Pushing to GitHub...\")\n        push_to_github(exp_dir, args.exp_name, branch=args.branch)\n    \n    print(f\"\\n🎉 Successfully processed {args.exp_name}\")\n    return 0\n\n\nif __name__ == \"__main__\":\n    exit(main())\n"
  },
  {
    "path": "src/aggregators/aggregate_specific_results.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nSimple Results Aggregator - Aggregate specific result directories\nUsage: python -m src.aggregators.aggregate_specific_results --result-dir results/exp/model__service --k 4\n\"\"\"\n\nimport json\nimport argparse\nfrom pathlib import Path\nfrom collections import defaultdict\nfrom typing import Dict, Any, Tuple, List\nfrom datetime import datetime\nimport sys\nsys.path.append(str(Path(__file__).parent.parent.parent))\nfrom src.aggregators.pricing import compute_cost_usd\n\n\ndef collect_results_from_dir(result_dir: Path, k: int) -> Dict[str, Any]:\n    \"\"\"Collect all results from a specific result directory.\"\"\"\n    results = {}\n\n    for run_idx in range(1, k + 1):\n        run_dir = result_dir / f\"run-{run_idx}\"\n        if not run_dir.exists():\n            print(f\"⚠️  Warning: {run_dir} does not exist, skipping\")\n            continue\n\n        run_results = {}\n        for task_dir in run_dir.iterdir():\n            if not task_dir.is_dir():\n                continue\n\n            meta_path = task_dir / \"meta.json\"\n            if meta_path.exists():\n                with open(meta_path) as f:\n                    meta = json.load(f)\n                    run_results[task_dir.name] = meta\n\n        results[f\"run-{run_idx}\"] = run_results\n\n    return results\n\n\ndef get_token_counts(meta: Dict[str, Any]) -> Tuple[int, int, int]:\n    \"\"\"Extract token counts from meta.\"\"\"\n    tu = meta.get(\"token_usage\", {}) or {}\n    input_tokens = int(tu.get(\"input_tokens\", 0) or 0)\n    output_tokens = int(tu.get(\"output_tokens\", 0) or 0)\n    total_tokens = int(tu.get(\"total_tokens\", input_tokens + output_tokens) or (input_tokens + output_tokens))\n    return input_tokens, output_tokens, total_tokens\n\n\ndef calculate_metrics(results: Dict, k: int, model_name: str) -> Dict:\n    \"\"\"Calculate metrics from results.\"\"\"\n\n    # Get all unique task names\n    all_tasks = set()\n    for run_name, run_data in results.items():\n        all_tasks.update(run_data.keys())\n    all_tasks = sorted(all_tasks)\n\n    total_tasks = len(all_tasks)\n    actual_runs = len(results)\n\n    print(f\"\\n📊 Analysis:\")\n    print(f\"  Total unique tasks: {total_tasks}\")\n    print(f\"  Runs found: {actual_runs} (expected: {k})\")\n\n    # Aggregates\n    total_agent_execution_time = 0.0\n    total_input_tokens = 0\n    total_output_tokens = 0\n    total_tokens = 0\n    total_turns = 0\n\n    actual_model_name = None\n\n    # Per-run pass@1\n    pass1_rates_per_run = []\n\n    # For pass@k\n    pass_k_task_success_any = 0\n    pass_power_k_task_success_all = 0\n\n    for run_idx in range(1, actual_runs + 1):\n        run_name = f\"run-{run_idx}\"\n        successes_this_run = 0\n\n        for task in all_tasks:\n            meta = results.get(run_name, {}).get(task)\n\n            if not meta:\n                continue\n\n            success = bool(meta.get(\"execution_result\", {}).get(\"success\", False))\n            if success:\n                successes_this_run += 1\n\n            total_agent_execution_time += float(meta.get(\"agent_execution_time\", 0.0) or 0.0)\n            in_tok, out_tok, ttl_tok = get_token_counts(meta)\n            total_input_tokens += in_tok\n            total_output_tokens += out_tok\n            total_tokens += ttl_tok\n            total_turns += int(meta.get(\"turn_count\", 0) or 0)\n\n            if actual_model_name is None:\n                actual_model_name = meta.get(\"actual_model_name\") or None\n\n        pass1_rate = successes_this_run / total_tasks if total_tasks > 0 else 0\n        pass1_rates_per_run.append(pass1_rate)\n        print(f\"  Run {run_idx}: {successes_this_run}/{total_tasks} = {pass1_rate*100:.1f}%\")\n\n    # Calculate pass@k\n    for task in all_tasks:\n        successes = []\n        for run_idx in range(1, actual_runs + 1):\n            run_name = f\"run-{run_idx}\"\n            meta = results.get(run_name, {}).get(task)\n            success = bool(meta.get(\"execution_result\", {}).get(\"success\", False)) if meta else False\n            successes.append(success)\n\n        if any(successes):\n            pass_k_task_success_any += 1\n        if all(successes):\n            pass_power_k_task_success_all += 1\n\n    # Averages\n    denom = total_tasks * actual_runs if total_tasks > 0 else 1\n    avg_agent_execution_time = total_agent_execution_time / denom\n    avg_input_tokens = total_input_tokens / denom\n    avg_output_tokens = total_output_tokens / denom\n    avg_total_tokens = total_tokens / denom\n    avg_turns = total_turns / denom\n\n    # Pass@1 stats\n    if pass1_rates_per_run:\n        avg_pass1 = sum(pass1_rates_per_run) / len(pass1_rates_per_run)\n        mean = avg_pass1\n        variance = sum((r - mean) ** 2 for r in pass1_rates_per_run) / len(pass1_rates_per_run)\n        std_pass1 = variance ** 0.5\n    else:\n        avg_pass1 = 0.0\n        std_pass1 = 0.0\n\n    # Cost calculation\n    per_run_input_tokens = total_input_tokens / actual_runs if actual_runs else 0\n    per_run_output_tokens = total_output_tokens / actual_runs if actual_runs else 0\n    model_for_pricing = actual_model_name or model_name\n    per_run_cost = compute_cost_usd(model_for_pricing, per_run_input_tokens, per_run_output_tokens)\n\n    summary = {\n        \"generated_at\": datetime.now().isoformat(),\n        \"model\": model_name,\n        \"actual_model_name\": actual_model_name or model_name,\n        \"runs\": actual_runs,\n        \"total_tasks\": total_tasks,\n        \"total_agent_execution_time\": round(total_agent_execution_time, 2),\n        \"total_input_tokens\": total_input_tokens,\n        \"total_output_tokens\": total_output_tokens,\n        \"total_tokens\": total_tokens,\n        \"total_turns\": total_turns,\n        \"avg_agent_execution_time\": round(avg_agent_execution_time, 4),\n        \"avg_input_tokens\": round(avg_input_tokens, 2),\n        \"avg_output_tokens\": round(avg_output_tokens, 2),\n        \"avg_total_tokens\": round(avg_total_tokens, 2),\n        \"avg_turns\": round(avg_turns, 2),\n        \"per_run_input_tokens\": round(per_run_input_tokens, 2),\n        \"per_run_output_tokens\": round(per_run_output_tokens, 2),\n        \"per_run_cost\": round(per_run_cost, 4) if per_run_cost else None,\n        \"pass@1\": {\n            \"avg\": round(avg_pass1, 4),\n            \"std\": round(std_pass1, 4),\n            \"per_run\": [round(r, 4) for r in pass1_rates_per_run]\n        },\n    }\n\n    if actual_runs > 1:\n        summary[f\"pass@{actual_runs}\"] = round(pass_k_task_success_any / total_tasks, 4)\n        summary[f\"pass^{actual_runs}\"] = round(pass_power_k_task_success_all / total_tasks, 4)\n\n    return summary\n\n\ndef main():\n    parser = argparse.ArgumentParser(description=\"Simple results aggregator for specific directories\")\n    parser.add_argument(\"--result-dir\", required=True, help=\"Path to result directory (e.g., results/exp/model__service)\")\n    parser.add_argument(\"--k\", type=int, default=4, help=\"Number of runs (default: 4)\")\n    parser.add_argument(\"--output\", help=\"Output JSON file path (default: <result-dir>/summary.json)\")\n\n    args = parser.parse_args()\n\n    result_dir = Path(args.result_dir)\n    if not result_dir.exists():\n        print(f\"❌ Result directory {result_dir} does not exist\")\n        return 1\n\n    # Extract model name from directory name\n    model_name = result_dir.name.replace(\"__\", \"-\")\n\n    print(f\"🔄 Processing: {result_dir}\")\n    print(f\"📋 Model: {model_name}\")\n\n    # Collect results\n    results = collect_results_from_dir(result_dir, args.k)\n\n    if not results:\n        print(\"❌ No results found\")\n        return 1\n\n    # Calculate metrics\n    summary = calculate_metrics(results, args.k, model_name)\n\n    # Save summary\n    output_path = Path(args.output) if args.output else result_dir / \"summary.json\"\n    with open(output_path, \"w\") as f:\n        json.dump(summary, f, indent=2)\n\n    print(f\"\\n✅ Summary saved to: {output_path}\")\n    print(f\"\\n📈 Results:\")\n    print(f\"  Pass@1: {summary['pass@1']['avg']*100:.1f}% ± {summary['pass@1']['std']*100:.1f}%\")\n    if f\"pass@{args.k}\" in summary:\n        print(f\"  Pass@{args.k}: {summary[f'pass@{args.k}']*100:.1f}%\")\n        print(f\"  Pass^{args.k}: {summary[f'pass^{args.k}']*100:.1f}%\")\n    print(f\"  Per-run cost: ${summary['per_run_cost']:.4f}\" if summary['per_run_cost'] else \"  Per-run cost: N/A\")\n    print(f\"  Avg agent time: {summary['avg_agent_execution_time']:.2f}s\")\n    print(f\"  Avg turns: {summary['avg_turns']:.2f}\")\n    print(f\"\\n📊 Token Usage:\")\n    avg_tokens_per_run = summary['total_tokens'] / summary['runs'] if summary['runs'] > 0 else 0\n    print(f\"  Avg tokens per run: {avg_tokens_per_run:,.0f}\")\n    print(f\"  Avg tokens per turn: {summary['avg_total_tokens'] / summary['avg_turns']:.0f}\" if summary['avg_turns'] > 0 else \"  Avg tokens per turn: N/A\")\n    print(f\"  Total tokens (all runs): {summary['total_tokens']:,}\")\n    print(f\"  Total turns (all runs): {summary['total_turns']:,}\")\n\n    return 0\n\n\nif __name__ == \"__main__\":\n    exit(main())\n"
  },
  {
    "path": "src/aggregators/aggregate_task_meta.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nTask Meta Aggregator for MCPBench\nAggregates all meta.json files from the tasks directory into a single JSON file.\n\"\"\"\n\nimport json\nimport os\nimport argparse\nimport subprocess\nimport shutil\nfrom pathlib import Path\nfrom typing import Dict, List, Any, Set\n\n\ndef find_all_meta_files(tasks_root: Path = Path(\"tasks\")) -> List[Path]:\n    \"\"\"Find all meta.json files in the tasks directory\"\"\"\n    meta_files = []\n    for root, dirs, files in os.walk(tasks_root):\n        if \"meta.json\" in files:\n            meta_files.append(Path(root) / \"meta.json\")\n    return meta_files\n\n\ndef parse_meta_file(meta_path: Path) -> Dict[str, Any]:\n    \"\"\"Parse a single meta.json file\"\"\"\n    try:\n        with open(meta_path, \"r\", encoding=\"utf-8\") as f:\n            return json.load(f)\n    except Exception as e:\n        print(f\"Error parsing {meta_path}: {e}\")\n        return {}\n\n\ndef aggregate_task_meta(meta_files: List[Path]) -> Dict[str, Any]:\n    \"\"\"Aggregate all meta.json files into the required structure\"\"\"\n    all_data = []\n    categories_dict = {}  # Use dict to track unique categories\n    all_tags_set = set()  # Set to collect all unique tags\n\n    for meta_path in meta_files:\n        meta_data = parse_meta_file(meta_path)\n        if meta_data:\n            # Exclude model_results field from aggregated data\n            filtered_data = {k: v for k, v in meta_data.items() if k != \"model_results\"}\n            all_data.append(filtered_data)\n\n            # Collect categories using category_id and category_name\n            if \"category_id\" in filtered_data and \"category_name\" in filtered_data:\n                category_id = filtered_data[\"category_id\"]\n                category_name = filtered_data[\"category_name\"]\n                # Use category_id as the key to ensure uniqueness\n                categories_dict[category_id] = {\n                    \"id\": category_id,\n                    \"name\": category_name,\n                }\n\n            # Collect all unique tags\n            if \"tags\" in filtered_data and isinstance(filtered_data[\"tags\"], list):\n                all_tags_set.update(filtered_data[\"tags\"])\n\n    # Convert categories dict to sorted list\n    categories_list = sorted(categories_dict.values(), key=lambda x: x[\"id\"])\n\n    # Convert tags set to sorted list\n    all_tags_list = sorted(all_tags_set)\n\n    return {\n        \"data\": all_data,\n        \"count\": len(all_data),\n        \"categories\": categories_list,\n        \"tags\": all_tags_list,\n    }\n\n\ndef create_individual_task_files(meta_files: List[Path]) -> List[Dict[str, Any]]:\n    \"\"\"Create individual task JSON files with instruction and verify content\"\"\"\n    task_files = []\n\n    for meta_path in meta_files:\n        meta_data = parse_meta_file(meta_path)\n        if not meta_data or \"task_id\" not in meta_data:\n            continue\n\n        # Get the task directory\n        task_dir = meta_path.parent\n\n        # Read description.md if exists\n        description_path = task_dir / \"description.md\"\n        instruction_content = \"\"\n        if description_path.exists():\n            try:\n                with open(description_path, \"r\", encoding=\"utf-8\") as f:\n                    instruction_content = f.read()\n            except Exception as e:\n                print(f\"Warning: Could not read {description_path}: {e}\")\n\n        # Read verify.py if exists\n        verify_path = task_dir / \"verify.py\"\n        verify_content = \"\"\n        if verify_path.exists():\n            try:\n                with open(verify_path, \"r\", encoding=\"utf-8\") as f:\n                    verify_content = f.read()\n            except Exception as e:\n                print(f\"Warning: Could not read {verify_path}: {e}\")\n\n        # Create combined task data, excluding model_results\n        task_data = {\n            k: v for k, v in meta_data.items() if k != \"model_results\"\n        }\n        task_data[\"instruction\"] = instruction_content\n        task_data[\"verify\"] = verify_content\n\n        task_files.append({\"filename\": f\"{meta_data['task_id']}.json\", \"data\": task_data})\n\n    return task_files\n\n\ndef push_to_file(\n    output_file: Path,\n    data: Dict[str, Any],\n    task_files: List[Dict[str, Any]] = None,\n    push_to_repo: bool = False,\n) -> bool:\n    \"\"\"Save the aggregated data to file and optionally push to repo\"\"\"\n    try:\n        # Create parent directory if it doesn't exist\n        output_file.parent.mkdir(parents=True, exist_ok=True)\n\n        # Write the aggregated data\n        with open(output_file, \"w\", encoding=\"utf-8\") as f:\n            json.dump(data, f, indent=2, ensure_ascii=False)\n\n        print(f\"✅ Task meta data saved to: {output_file}\")\n        print(f\"📊 Summary:\")\n        print(f\"   - Total tasks with meta.json: {data['count']}\")\n        print(f\"   - Categories: {len(data['categories'])}\")\n        print(f\"   - Unique tags: {len(data['tags'])}\")\n\n        if push_to_repo:\n            return push_to_experiments_repo(output_file, task_files)\n\n        return True\n\n    except Exception as e:\n        print(f\"❌ Error saving file: {e}\")\n        return False\n\n\ndef push_to_experiments_repo(\n    file_path: Path, task_files: List[Dict[str, Any]] = None\n) -> bool:\n    \"\"\"Push the task meta file and individual task files to eval-sys/mcpmark-experiments repo\"\"\"\n    if not file_path.exists():\n        print(\"⚠️  File does not exist\")\n        return False\n\n    repo_url = \"https://github.com/eval-sys/mcpmark-experiments.git\"\n    temp_dir = Path(\"./temp_experiments_repo\")\n\n    try:\n        print(f\"\\n🔄 Preparing to push task meta to experiments repo...\")\n\n        # Clean up any existing temp directory\n        if temp_dir.exists():\n            shutil.rmtree(temp_dir)\n\n        # Clone the repo\n        print(\"📥 Cloning experiments repo...\")\n        subprocess.run(\n            [\"git\", \"clone\", repo_url, str(temp_dir)], check=True, capture_output=True\n        )\n\n        # Copy the main task_meta.json file\n        target_path = temp_dir / \"task_meta.json\"\n        print(f\"📁 Copying task meta file: task_meta.json\")\n        shutil.copy2(file_path, target_path)\n\n        # Create tasks directory and copy individual task files\n        if task_files:\n            tasks_dir = temp_dir / \"tasks\"\n            tasks_dir.mkdir(exist_ok=True)\n            print(f\"📁 Creating individual task files in ./tasks directory...\")\n\n            for task_file in task_files:\n                task_file_path = tasks_dir / task_file[\"filename\"]\n                with open(task_file_path, \"w\", encoding=\"utf-8\") as f:\n                    json.dump(task_file[\"data\"], f, indent=2, ensure_ascii=False)\n\n            print(f\"   - Created {len(task_files)} individual task files\")\n\n        # Change to repo directory for git operations\n        original_dir = os.getcwd()\n        os.chdir(temp_dir)\n\n        # Add all changes\n        subprocess.run([\"git\", \"add\", \".\"], check=True)\n\n        # Check if there are changes to commit\n        result = subprocess.run(\n            [\"git\", \"status\", \"--porcelain\"], capture_output=True, text=True\n        )\n\n        if not result.stdout.strip():\n            print(\"✅ No changes to push (files are up to date)\")\n            return True\n\n        # Commit changes\n        commit_msg = \"Update task meta data and individual task files\"\n        subprocess.run([\"git\", \"commit\", \"-m\", commit_msg], check=True)\n\n        # Push changes\n        print(\"🚀 Pushing to remote repository...\")\n        subprocess.run([\"git\", \"push\"], check=True)\n\n        print(\"✅ Successfully pushed task meta and individual task files to repo!\")\n        return True\n\n    except subprocess.CalledProcessError as e:\n        print(f\"❌ Git operation failed: {e}\")\n        return False\n    except Exception as e:\n        print(f\"❌ Error pushing to repo: {e}\")\n        return False\n    finally:\n        # Change back to original directory\n        os.chdir(original_dir)\n        # Clean up temp directory\n        if temp_dir.exists():\n            shutil.rmtree(temp_dir)\n\n\ndef main():\n    parser = argparse.ArgumentParser(description=\"Aggregate all task meta.json files\")\n    parser.add_argument(\n        \"--output\",\n        type=str,\n        default=\"task_meta.json\",\n        help=\"Output file path (default: task_meta.json)\",\n    )\n    parser.add_argument(\n        \"--push\",\n        action=\"store_true\",\n        help=\"Push results to eval-sys/mcpmark-experiments repo\",\n    )\n    args = parser.parse_args()\n\n    print(\"🔍 Searching for meta.json files in tasks directory...\")\n\n    # Find all meta.json files\n    meta_files = find_all_meta_files()\n\n    if not meta_files:\n        print(\"❌ No meta.json files found in tasks directory\")\n        return 1\n\n    print(f\"📁 Found {len(meta_files)} meta.json files\")\n\n    # Aggregate the data\n    print(\"🔄 Aggregating task meta data...\")\n    aggregated_data = aggregate_task_meta(meta_files)\n\n    # Create individual task files if pushing to repo\n    task_files = None\n    if args.push:\n        print(\"🔄 Creating individual task files...\")\n        task_files = create_individual_task_files(meta_files)\n        print(f\"📝 Prepared {len(task_files)} individual task files\")\n\n    # Save to file\n    output_path = Path(args.output)\n    success = push_to_file(output_path, aggregated_data, task_files, args.push)\n\n    if not success:\n        return 1\n\n    if args.push:\n        print(\n            f\"🚀 Task meta data and individual task files pushed to eval-sys/mcpmark-experiments repo\"\n        )\n\n    return 0\n\n\nif __name__ == \"__main__\":\n    exit(main())\n"
  },
  {
    "path": "src/aggregators/pricing.py",
    "content": "\"\"\"\nPricing utilities for computing per-run cost from token usage.\n\nAll prices are specified per 1,000,000 tokens (M tokens) in USD.\n\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Dict, Optional\n\n\n# Price map keyed by canonical model name (lowercased)\n# Values are dicts with per-M token prices for input and output tokens\nMODEL_PRICES_PER_M: Dict[str, Dict[str, float]] = {\n    # Use exact actual_model_name keys (lowercased) provided by the user\n    # Anthropic\n    \"claude-opus-4-1-20250805\": {\"input\": 15.0, \"output\": 75.0},\n    \"claude-opus-4-5-20251101\": {\"input\": 5.0, \"output\": 25.0},\n    \"claude-sonnet-4-20250514\": {\"input\": 3.0, \"output\": 15.0},\n    \"claude-sonnet-4-5-20250929\": {\"input\": 3.0, \"output\": 15.0},\n\n    # DeepSeek\n    \"deepseek-v3.1-non-think\": {\"input\": 0.56, \"output\": 1.68},\n    \"deepseek-v3.2-chat\": {\"input\": 0.27, \"output\": 0.40},\n    \"deepseek-v3.2-reasoner\": {\"input\": 0.27, \"output\": 0.40},\n    \"deepseek-v3.1-terminus-thinking\": {\"input\": 0.21, \"output\": 0.79},\n    \"deepseek-v3.1-terminus\": {\"input\": 0.21, \"output\": 0.79},\n\n    # Google Gemini\n    \"gemini-2.5-pro\": {\"input\": 2.5, \"output\": 15.0},\n    \"gemini-2.5-flash\": {\"input\": 0.3, \"output\": 2.5},\n    \"gemini-3-pro\": {\"input\": 2.0, \"output\": 12.0},\n\n    # Z.AI\n    \"glm-4.5\": {\"input\": 0.33, \"output\": 1.32},\n\n    # OpenAI\n    \"gpt-5-2025-08-07\": {\"input\": 1.25, \"output\": 10.0},\n    \"gpt-5.2-2025-12-11\": {\"input\": 1.75, \"output\": 14.0},\n    \"gpt-5-mini-2025-08-07\": {\"input\": 0.25, \"output\": 2.0},\n    \"gpt-5-nano-2025-08-07\": {\"input\": 0.05, \"output\": 0.4},\n    \"gpt-4.1-2025-04-14\": {\"input\": 2.0, \"output\": 8.0},\n    \"gpt-4.1-mini-2025-04-14\": {\"input\": 0.4, \"output\": 1.6},\n    \"gpt-4.1-nano-2025-04-14\": {\"input\": 0.1, \"output\": 0.4},\n    \"o3-2025-04-16\": {\"input\": 2.0, \"output\": 8.0},\n    \"o4-mini-2025-04-16\": {\"input\": 1.1, \"output\": 4.4},\n    \"gpt-oss-120b\": {\"input\": 0.072, \"output\": 0.28},\n\n    # Qwen\n    \"qwen3-coder-480b-a35b-instruct\": {\"input\": 0.2, \"output\": 0.8},\n    \"qwen3-max-preview\": {\"input\": 1.2, \"output\": 6},\n    \n    # Xai\n    \"grok-4-0709\": {\"input\": 3.0, \"output\": 15.0},\n    \"grok-code-fast-1\": {\"input\": 0.2, \"output\": 1.5},\n    \"grok-4-fast\": {\"input\": 0.2, \"output\": 0.5},\n\n    # Moonshot\n    \"kimi-k2-0711-preview\": {\"input\": 0.6, \"output\": 2.5},\n    \"kimi-k2-0905-preview\": {\"input\": 0.6, \"output\": 2.5},\n}\n\n\ndef normalize_model_name(model_name: str) -> str:\n    \"\"\"Normalize model name for pricing lookup.\n\n    Lowercases only.\n    \"\"\"\n    return (model_name or \"\").strip().lower()\n\n\ndef get_price_per_m(model_name: str) -> Optional[Dict[str, float]]:\n    \"\"\"Return per-M token prices for given model, or None if unknown.\"\"\"\n    key = normalize_model_name(model_name)\n    return MODEL_PRICES_PER_M.get(key)\n\n\ndef compute_cost_usd(model_name: str, input_tokens: float, output_tokens: float) -> Optional[float]:\n    \"\"\"Compute cost in USD given token usage and model pricing.\n\n    Prices are per 1,000,000 tokens. If pricing unknown, returns None.\n    \"\"\"\n    prices = get_price_per_m(model_name)\n    if not prices:\n        return None\n    input_cost = (input_tokens / 1_000_000.0) * prices[\"input\"]\n    output_cost = (output_tokens / 1_000_000.0) * prices[\"output\"]\n    return float(round(input_cost + output_cost, 6))\n\n\n"
  },
  {
    "path": "src/base/__init__.py",
    "content": ""
  },
  {
    "path": "src/base/login_helper.py",
    "content": "from abc import ABC, abstractmethod\n\n\nclass BaseLoginHelper(ABC):\n    \"\"\"Abstract base class for login helpers.\"\"\"\n\n    def __init__(self):\n        pass\n\n    @abstractmethod\n    def login(self, **kwargs):\n        pass\n"
  },
  {
    "path": "src/base/state_manager.py",
    "content": "import time\nfrom abc import ABC, abstractmethod\nfrom dataclasses import dataclass\nfrom typing import Any, Dict, List, Optional\n\nfrom src.logger import get_logger\nfrom .task_manager import BaseTask\n\n# Initialize logger\nlogger = get_logger(__name__)\n\n\n@dataclass\nclass InitialStateInfo:\n    \"\"\"Information about created initial state for a task.\"\"\"\n\n    state_id: str\n    state_url: Optional[str] = None\n    metadata: Optional[Dict[str, Any]] = None\n\n\nclass BaseStateManager(ABC):\n    \"\"\"\n    Simplified abstract base class for state management in MCP services.\n\n    This class provides essential functionality for initial state creation and cleanup\n    while allowing service-specific implementations through template methods.\n    \"\"\"\n\n    def __init__(self, service_name: str):\n        self.service_name = service_name\n        # Simple resource tracking for cleanup\n        self.tracked_resources: List[Dict[str, Any]] = []\n\n    # Note: Initialization is now handled in service-specific constructors\n\n    def set_up(self, task: BaseTask) -> bool:\n        \"\"\"Set up initial state for a specific task.\n\n        Args:\n            task: The task for which to set up the initial state\n\n        Returns:\n            True if setup successful, False otherwise\n        \"\"\"\n        try:\n            logger.info(\n                f\"| Setting up initial state for {self.service_name} task: {task.name}\"\n            )\n\n            # Create initial state\n            initial_state_info = self._create_initial_state(task)\n            if not initial_state_info:\n                logger.error(f\"| Failed to create initial state for {task.name}\")\n                return False\n\n            # Store initial state info in task\n            self._store_initial_state_info(task, initial_state_info)\n\n            logger.info(f\"| ✓ Initial state setup completed for {task.name}\")\n            return True\n\n        except Exception as e:\n            logger.error(f\"| Setup failed for {task.name}: {e}\")\n            return False\n\n    def clean_up(self, task: BaseTask = None) -> bool:\n        \"\"\"Clean up resources with common patterns and service-specific hooks.\n\n        Args:\n            task: Optional task to clean up specific resources for\n\n        Returns:\n            True if cleanup successful, False otherwise\n        \"\"\"\n        try:\n            cleanup_success = True\n\n            # Task-specific cleanup\n            if task:\n                logger.info(\n                    f\"| ○ Cleaning up initial state for {self.service_name} task: {task.name}\"\n                )\n                if not self._cleanup_task_initial_state(task):\n                    cleanup_success = False\n\n            # Clean up all tracked resources\n            if not self._cleanup_tracked_resources():\n                cleanup_success = False\n\n            if cleanup_success:\n                logger.info(f\"| ✓ Cleanup completed for {self.service_name}\")\n            else:\n                logger.warning(\n                    f\"| Cleanup completed with some failures for {self.service_name}\"\n                )\n\n            return cleanup_success\n\n        except Exception as e:\n            logger.error(f\"Cleanup failed for {self.service_name}: {e}\")\n            return False\n\n    def track_resource(\n        self,\n        resource_type: str,\n        identifier: str,\n        metadata: Optional[Dict[str, Any]] = None,\n    ) -> None:\n        \"\"\"Track a resource for later cleanup.\n\n        Args:\n            resource_type: Type of resource (e.g., 'repository', 'page')\n            identifier: Unique identifier for the resource\n            metadata: Additional metadata about the resource\n        \"\"\"\n        resource = {\n            \"type\": resource_type,\n            \"id\": identifier,\n            \"created_at\": time.time(),\n            \"metadata\": metadata or {},\n        }\n        self.tracked_resources.append(resource)\n        logger.debug(f\"Tracked {resource_type} resource: {identifier}\")\n\n    def get_service_config_for_agent(self) -> dict:\n        \"\"\"\n        Get service-specific configuration for agent execution.\n\n        This method should be overridden by service implementations that need\n        to provide additional configuration to the agent.\n\n        Returns:\n            Dictionary containing configuration needed by the agent/MCP server\n        \"\"\"\n        return {}\n\n    def set_verification_environment(self, messages_path: str = None) -> None:\n        \"\"\"\n        Set environment variables needed for verification scripts.\n\n        Args:\n            messages_path: Optional path to messages.json file for verification\n\n        This method can be overridden by service implementations that need\n        to set specific environment variables for their verification scripts.\n        The default implementation sets MCP_MESSAGES if provided.\n        \"\"\"\n        import os\n        if messages_path:\n            os.environ[\"MCP_MESSAGES\"] = str(messages_path)\n\n    def _cleanup_tracked_resources(self) -> bool:\n        \"\"\"Clean up all tracked resources.\"\"\"\n        cleanup_success = True\n\n        for resource in self.tracked_resources:\n            try:\n                if not self._cleanup_single_resource(resource):\n                    cleanup_success = False\n            except Exception as e:\n                logger.error(f\"Failed to cleanup resource {resource}: {e}\")\n                cleanup_success = False\n\n        # Clear resources after cleanup attempt\n        self.tracked_resources.clear()\n        return cleanup_success\n\n    # =========================================================================\n    # Abstract methods for service-specific behavior (simplified)\n    # =========================================================================\n\n    # Note: Service-specific initialization is now handled in constructors\n\n    @abstractmethod\n    def _create_initial_state(self, task: BaseTask) -> Optional[InitialStateInfo]:\n        \"\"\"Create initial state for a task (e.g., duplicate page, fork repo).\n\n        Args:\n            task: Task for which to create initial state\n\n        Returns:\n            InitialStateInfo object or None if creation failed\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def _store_initial_state_info(\n        self, task: BaseTask, state_info: InitialStateInfo\n    ) -> None:\n        \"\"\"Store initial state information in the task object.\n\n        Args:\n            task: Task object to update\n            state_info: Initial state information to store\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def _cleanup_task_initial_state(self, task: BaseTask) -> bool:\n        \"\"\"Clean up initial state for a specific task.\n\n        Args:\n            task: Task whose initial state should be cleaned up\n\n        Returns:\n            True if cleanup successful, False otherwise\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def _cleanup_single_resource(self, resource: Dict[str, Any]) -> bool:\n        \"\"\"Clean up a single tracked resource.\n\n        Args:\n            resource: Resource dictionary with type, id, and metadata\n\n        Returns:\n            True if cleanup successful, False otherwise\n        \"\"\"\n        pass\n"
  },
  {
    "path": "src/base/task_manager.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nEnhanced Base Task Manager with Common Task Discovery Logic\n===========================================================\n\nThis module provides an improved base class for task managers that consolidates\ncommon task discovery patterns while maintaining flexibility for service-specific needs.\n\"\"\"\n\nimport json\nimport subprocess\nimport sys\nfrom abc import ABC\nfrom dataclasses import dataclass\nfrom pathlib import Path\nfrom typing import Any, Dict, List, Optional\n\nfrom src.logger import get_logger\nfrom src.results_reporter import TaskResult\n\nlogger = get_logger(__name__)\n\n\n@dataclass\nclass BaseTask:\n    \"\"\"Base class for evaluation tasks.\"\"\"\n\n    task_instruction_path: Path\n    task_verification_path: Path\n    service: str\n    category_id: str  # From meta.json if available, otherwise directory name\n    task_id: str  # From meta.json if available, otherwise directory name\n\n    @property\n    def name(self) -> str:\n        \"\"\"Return the task name using '__' separator format: 'category_id__task_id'.\"\"\"\n        return f\"{self.category_id}__{self.task_id}\"\n\n    def get_task_instruction(self) -> str:\n        \"\"\"Return the full text content of the task instruction file.\"\"\"\n        if not self.task_instruction_path.exists():\n            raise FileNotFoundError(\n                f\"Task instruction file not found: {self.task_instruction_path}\"\n            )\n\n        return self.task_instruction_path.read_text(encoding=\"utf-8\")\n\n\nclass BaseTaskManager(ABC):\n    \"\"\"Enhanced base class for service-specific task managers with common discovery logic.\"\"\"\n\n    def __init__(\n        self,\n        tasks_root: Path,\n        mcp_service: str = None,\n        task_class: type = None,\n        task_organization: str = None,\n        task_suite: str | None = \"standard\",\n    ):\n        \"\"\"Initialize the base task manager.\n\n        Args:\n            tasks_root: Root directory containing all tasks\n            mcp_service: MCP service name (e.g., 'notion', 'github', 'filesystem')\n            task_class: Custom task class to use (defaults to BaseTask)\n            task_organization: 'file' or 'directory' based task organization\n            task_suite: Logical task suite (e.g., 'standard', 'easy')\n        \"\"\"\n        self.tasks_root = tasks_root\n        self.mcp_service = mcp_service or self.__class__.__name__.lower().replace(\n            \"taskmanager\", \"\"\n        )\n        self.task_class = task_class or BaseTask\n        self.task_organization = task_organization\n        self.task_suite = task_suite\n        self._tasks_cache = None\n\n    # =========================================================================\n    # Common Task Discovery Implementation\n    # =========================================================================\n\n    def discover_all_tasks(self) -> List[BaseTask]:\n        \"\"\"Discover all available tasks for this service (common implementation).\"\"\"\n        if self._tasks_cache is not None:\n            return self._tasks_cache\n\n        tasks = []\n        service_dir = self.tasks_root / (\n            self.mcp_service or self._get_service_directory_name()\n        )\n        if self.task_suite:\n            service_dir = service_dir / self.task_suite\n\n        if not service_dir.exists():\n            logger.warning(\n                f\"{self.mcp_service.title()} tasks directory does not exist: {service_dir}\"\n            )\n            return tasks\n\n        # Scan categories\n        for category_dir in service_dir.iterdir():\n            if not self._is_valid_category_dir(category_dir):\n                continue\n\n            category_id = category_dir.name\n            logger.info(\"Discovering tasks in category: %s\", category_id)\n\n            # Find tasks using service-specific logic\n            task_files = self._find_task_files(category_dir)\n            for task_files_info in task_files:\n                task = self._create_task_from_files(category_id, task_files_info)\n                if task:\n                    tasks.append(task)\n                    logger.debug(\"Found task: %s\", task.name)\n\n        # Sort and cache\n        # Sort by category_id and a stringified task_id to handle both numeric IDs and slugs uniformly\n        self._tasks_cache = sorted(tasks, key=lambda t: (t.category_id, str(t.task_id)))\n        logger.info(\n            \"Discovered %d %s tasks across all categories (suite=%s)\",\n            len(self._tasks_cache),\n            self.mcp_service.title(),\n            self.task_suite or \"default\",\n        )\n        return self._tasks_cache\n\n    def get_categories(self) -> List[str]:\n        \"\"\"Get a list of all task categories (common implementation).\"\"\"\n        tasks = self.discover_all_tasks()\n        return sorted(list(set(task.category_id for task in tasks)))\n\n    def filter_tasks(self, task_filter: str) -> List[BaseTask]:\n        \"\"\"Filter tasks based on category or specific task pattern (common implementation).\"\"\"\n        all_tasks = self.discover_all_tasks()\n\n        if not task_filter or task_filter.lower() == \"all\":\n            return all_tasks\n\n        # Check if it's a category filter\n        categories = self.get_categories()\n        if task_filter in categories:\n            return [task for task in all_tasks if task.category_id == task_filter]\n\n        # Check for specific task pattern (category_id/task_id)\n        if \"/\" in task_filter:\n            try:\n                category, task_part = task_filter.split(\"/\", 1)\n\n                # First try to match by task_id (could be numeric or string)\n                for task in all_tasks:\n                    if task.category_id == category:\n                        # Check if task_id matches (as string or as specific pattern)\n                        if str(task.task_id) == task_part:\n                            return [task]\n            except (ValueError, IndexError):\n                pass\n\n        # Fallback: check for partial matches in task names or categories\n        filtered_tasks = []\n        for task in all_tasks:\n            if (\n                task_filter in task.category_id\n                or task_filter in task.name\n                or task_filter == str(task.task_id)\n            ):\n                filtered_tasks.append(task)\n\n        return filtered_tasks\n\n    # =========================================================================\n    # Common Helper Methods\n    # =========================================================================\n\n    def get_task_instruction(self, task: BaseTask) -> str:\n        \"\"\"Get formatted task instruction (template method).\"\"\"\n        base_instruction = self._read_task_instruction(task)\n        return self._format_task_instruction(base_instruction)\n\n    def execute_task(self, task: BaseTask, agent_result: Dict[str, Any]) -> TaskResult:\n        \"\"\"Execute task verification (template method).\"\"\"\n        logger.info(f\"| Verifying task ({self.mcp_service.title()}): {task.name}\")\n\n        # Track agent success separately\n        agent_success = agent_result.get(\"success\", False)\n        agent_error = None\n        verification_success = False\n        verification_error = None\n        verification_output = None\n\n        # Handle agent failure (but still continue to verification)\n        if not agent_success:\n            agent_error = agent_result.get(\"error\", \"Agent execution failed\")\n            # Standardize MCP network errors\n            agent_error = self._standardize_error_message(agent_error)\n            \n            logger.error(f\"| ✗ Agent execution failed for task\")\n            logger.error(f\"| ⚠️ Error: {agent_error}\")\n            logger.info(f\"| - Proceeding with verification despite agent failure\")\n\n        try:\n            # Always run verification regardless of agent success\n            verify_result = self.run_verification(task)\n\n            # Process verification results\n            verification_success = verify_result.returncode == 0\n            verification_output = verify_result.stdout\n            \n            # Log verification output\n            if verification_output:\n                print(verification_output)\n            \n            # Capture verification error if failed\n            if not verification_success:\n                verification_error = verify_result.stderr if verify_result.stderr else \"Verification failed with no error message\"\n\n            if verification_success:\n                logger.info(f\"| Verification Result: \\033[92m✓ PASSED\\033[0m\")\n            else:\n                logger.error(f\"| Verification Result: \\033[91m✗ FAILED\\033[0m\")\n                if verification_error:\n                    logger.error(f\"| Verification Error: {verification_error}\")\n\n            return TaskResult(\n                task_name=task.name,\n                success=verification_success,\n                error_message=agent_error,  # Agent execution error\n                verification_error=verification_error,  # Verification error\n                verification_output=verification_output,  # Verification output\n                model_output=agent_result.get(\"output\", \"\"),\n                category_id=task.category_id,\n                task_id=task.task_id,\n                token_usage=agent_result.get(\"token_usage\", {}),\n                turn_count=agent_result.get(\"turn_count\", -1),\n            )\n\n        except Exception as e:\n            logger.error(f\"| Task verification failed: {e}\", exc_info=True)\n            return TaskResult(\n                task_name=task.name,\n                success=False,\n                error_message=agent_error,  # Keep agent error if any\n                verification_error=str(e),  # Verification exception\n                verification_output=None,\n                category_id=task.category_id,\n                task_id=task.task_id,\n                model_output=agent_result.get(\"output\", \"\"),\n                token_usage=agent_result.get(\"token_usage\", {}),\n                turn_count=agent_result.get(\"turn_count\", 0),\n            )\n\n    def run_verification(self, task: BaseTask) -> subprocess.CompletedProcess:\n        \"\"\"Run the verification script for a task (can be overridden).\n\n        Default implementation runs the verification command.\n        Services can override this to add environment variables or custom logic.\n        \"\"\"\n        return subprocess.run(\n            self._get_verification_command(task),\n            capture_output=True,  # Capture stdout and stderr for logging\n            text=True,\n            timeout=300,\n        )\n\n    # =========================================================================\n    # Abstract Methods - Minimal Set Required\n    # =========================================================================\n\n    def _get_service_directory_name(self) -> str:\n        \"\"\"Return the service directory name (e.g., 'notion', 'github').\n\n        Default implementation uses the service parameter if provided.\n        \"\"\"\n        if self.mcp_service:\n            return self.mcp_service\n        raise NotImplementedError(\n            \"Must provide service parameter or implement _get_service_directory_name\"\n        )\n\n    def _get_task_organization(self) -> str:\n        \"\"\"Return task organization type: 'directory' or 'file'.\n\n        - 'directory': Tasks organized as task_X/description.md (Notion)\n        - 'file': Tasks organized as task_X.md (GitHub, Filesystem)\n\n        Default implementation uses the task_organization parameter if provided.\n        \"\"\"\n        if self.task_organization:\n            return self.task_organization\n        raise NotImplementedError(\n            \"Must provide task_organization parameter or implement _get_task_organization\"\n        )\n\n    # Note: _create_task_instance is no longer needed - use task_class parameter instead\n\n    # =========================================================================\n    # Hook Methods with Smart Defaults\n    # =========================================================================\n\n    def _is_valid_category_dir(self, category_dir: Path) -> bool:\n        \"\"\"Check if a directory is a valid category directory.\"\"\"\n        return (\n            category_dir.is_dir()\n            and not category_dir.name.startswith(\".\")\n            and category_dir.name != \"utils\"\n            and category_dir.name != \"__pycache__\"\n        )\n\n    def _find_task_files(self, category_dir: Path) -> List[Dict[str, Any]]:\n        \"\"\"Find task files in a category directory (smart default implementation).\n\n        Automatically handles both directory-based and file-based organization.\n        \"\"\"\n        task_files: List[Dict[str, Any]] = []\n\n        for task_dir in category_dir.iterdir():\n            # Skip anything that is not a directory or is hidden\n            if not task_dir.is_dir() or task_dir.name.startswith(\".\"):\n                continue\n\n            description_path = task_dir / \"description.md\"\n            verify_path = task_dir / \"verify.py\"\n\n            # We consider a directory a valid task only if the two mandatory files exist\n            if not (description_path.exists() and verify_path.exists()):\n                logger.warning(\n                    \"Skipping %s – missing description.md or verify.py\", task_dir\n                )\n                continue\n\n            task_files.append(\n                {\n                    \"task_id\": task_dir.name,\n                    \"instruction_path\": description_path,\n                    \"verification_path\": verify_path,\n                }\n            )\n\n        return task_files\n\n    def _create_task_from_files(\n        self, category_id: str, task_files_info: Dict[str, Any]\n    ) -> Optional[BaseTask]:\n        \"\"\"Create a task from file information with meta.json support.\"\"\"\n        # Check for meta.json\n        meta_path = task_files_info[\"instruction_path\"].parent / \"meta.json\"\n        # Default to directory names\n        task_id = task_files_info[\"task_id\"]\n        final_category_id = category_id\n        \n        if meta_path.exists():\n            try:\n                with open(meta_path, 'r') as f:\n                    meta_data = json.load(f)\n                    # Use values from meta.json if available\n                    final_category_id = meta_data.get(\"category_id\", category_id)\n                    task_id = meta_data.get(\"task_id\", task_id)\n            except Exception as e:\n                logger.warning(f\"Failed to load meta.json from {meta_path}: {e}\")\n        \n        return self.task_class(\n            task_instruction_path=task_files_info[\"instruction_path\"],\n            task_verification_path=task_files_info[\"verification_path\"],\n            service=self.mcp_service,\n            category_id=final_category_id,\n            task_id=task_id,\n        )\n\n    def _read_task_instruction(self, task: BaseTask) -> str:\n        \"\"\"Read and return the task instruction content.\"\"\"\n        return task.get_task_instruction()\n\n    def _format_task_instruction(self, base_instruction: str) -> str:\n        \"\"\"Format task instruction with Notion-specific additions.\"\"\"\n        return (\n            base_instruction\n            + \"\\n\\nNote: Based on your understanding, solve the task all at once by yourself, don't ask for my opinions on anything.\"\n        )\n\n    def _get_verification_command(self, task: BaseTask) -> List[str]:\n        \"\"\"Get the command to run task verification (default implementation).\"\"\"\n        return [sys.executable, str(task.task_verification_path)]\n\n    def _standardize_error_message(self, error_message: str) -> str:\n        \"\"\"Standardize error messages for consistent reporting.\"\"\"\n        from src.errors import standardize_error_message\n\n        return standardize_error_message(error_message, mcp_service=self.mcp_service)\n"
  },
  {
    "path": "src/config/__init__.py",
    "content": ""
  },
  {
    "path": "src/config/config_schema.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nCentralized Configuration Schema for MCPMark\n=============================================\n\nThis module provides a unified configuration system with validation,\ntype safety, and support for multiple configuration sources.\n\"\"\"\n\nimport os\nfrom abc import ABC, abstractmethod\nfrom dataclasses import dataclass\nfrom pathlib import Path\nfrom typing import Any, Dict, Optional\n\nimport yaml\nfrom dotenv import load_dotenv\n\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\n# Lazy import to avoid circular dependencies\ndef get_service_definition(service_name: str) -> dict:\n    from src.services import get_service_definition as _get_service_def\n\n    return _get_service_def(service_name)\n\n\n@dataclass\nclass ConfigValue:\n    \"\"\"Represents a configuration value with metadata.\"\"\"\n\n    key: str\n    value: Any\n    source: str  # 'env', 'file', 'default'\n    required: bool = True\n    description: str = \"\"\n    validator: Optional[callable] = None\n\n    def validate(self) -> bool:\n        \"\"\"Validate the configuration value.\"\"\"\n        if self.required and self.value is None:\n            raise ValueError(f\"Required configuration '{self.key}' is missing\")\n\n        if self.validator and self.value is not None:\n            if not self.validator(self.value):\n                raise ValueError(f\"Invalid value for '{self.key}': {self.value}\")\n\n        return True\n\n\nclass ConfigSchema(ABC):\n    \"\"\"Abstract base class for service configuration schemas.\"\"\"\n\n    def __init__(self, service_name: str):\n        self.service_name = service_name\n        self._values: Dict[str, ConfigValue] = {}\n        self._load_dotenv()\n        self._define_schema()\n        self._load_values()\n        self._validate()\n\n    @abstractmethod\n    def _define_schema(self) -> None:\n        \"\"\"Define the configuration schema for this service.\"\"\"\n        pass\n\n    def _load_dotenv(self) -> None:\n        \"\"\"Load environment variables from .mcp_env file.\"\"\"\n        load_dotenv(dotenv_path=\".mcp_env\", override=False)\n\n    def _add_config(\n        self,\n        key: str,\n        env_var: Optional[str] = None,\n        default: Any = None,\n        required: bool = True,\n        description: str = \"\",\n        validator: Optional[callable] = None,\n        transform: Optional[callable] = None,\n    ) -> None:\n        \"\"\"Add a configuration value to the schema.\"\"\"\n        # Try to get value from environment first\n        value = None\n        source = \"default\"\n\n        if env_var:\n            env_value = os.getenv(env_var)\n            if env_value is not None:\n                value = transform(env_value) if transform else env_value\n                source = \"env\"\n\n        # Use default if no environment value\n        if value is None and default is not None:\n            value = default\n            source = \"default\"\n\n        self._values[key] = ConfigValue(\n            key=key,\n            value=value,\n            source=source,\n            required=required,\n            description=description,\n            validator=validator,\n        )\n\n    def _load_values(self) -> None:\n        \"\"\"Load configuration values from file if available.\"\"\"\n        config_file = Path(f\"config/{self.service_name}.yaml\")\n        if config_file.exists():\n            with open(config_file) as f:\n                file_config = yaml.safe_load(f)\n\n            for key, value in file_config.items():\n                if key in self._values and self._values[key].value is None:\n                    self._values[key].value = value\n                    self._values[key].source = \"file\"\n\n    def _validate(self) -> None:\n        \"\"\"Validate all configuration values.\"\"\"\n        for config_value in self._values.values():\n            config_value.validate()\n\n    def get(self, key: str, default: Any = None) -> Any:\n        \"\"\"Get a configuration value.\"\"\"\n        if key in self._values:\n            return self._values[key].value\n        return default\n\n    def get_all(self) -> Dict[str, Any]:\n        \"\"\"Get all configuration values as a dictionary.\"\"\"\n        return {k: v.value for k, v in self._values.items()}\n\n    def get_debug_info(self) -> Dict[str, Dict[str, Any]]:\n        \"\"\"Get detailed configuration information for debugging.\"\"\"\n        return {\n            k: {\n                \"value\": v.value,\n                \"source\": v.source,\n                \"required\": v.required,\n                \"description\": v.description,\n            }\n            for k, v in self._values.items()\n        }\n\n\nclass GenericConfigSchema(ConfigSchema):\n    \"\"\"Generic configuration schema that reads from service definitions.\"\"\"\n\n    def __init__(self, service_name: str):\n        # Get service definition before calling parent init\n        self.service_definition = get_service_definition(service_name)\n        super().__init__(service_name)\n\n    def _define_schema(self) -> None:\n        \"\"\"Define schema from service definition.\"\"\"\n        config_schema = self.service_definition.get(\"config_schema\", {})\n\n        for key, config in config_schema.items():\n            # Handle transform strings\n            transform = None\n            transform_str = config.get(\"transform\")\n            if transform_str == \"bool\":\n                transform = lambda x: x.lower() in [\"true\", \"1\", \"yes\"]\n            elif transform_str == \"int\":\n                transform = int\n            elif transform_str == \"path\":\n                transform = lambda x: Path(x) if x else None\n            elif transform_str == \"list\":\n                transform = lambda x: [t.strip() for t in x.split(\",\")] if x else []\n\n            # Handle validator strings\n            validator = None\n            validator_str = config.get(\"validator\")\n            if validator_str == \"port\":\n                validator = lambda x: 1 <= x <= 65535\n            elif validator_str and validator_str.startswith(\"in:\"):\n                valid_values = validator_str[3:].split(\",\")\n                validator = lambda x, values=valid_values: x in values\n\n            self._add_config(\n                key=key,\n                env_var=config.get(\"env_var\"),\n                default=config.get(\"default\"),\n                required=config.get(\"required\", True),\n                description=config.get(\"description\", \"\"),\n                validator=validator,\n                transform=transform,\n            )\n\n\n# Configuration Registry\n\n\nclass ConfigRegistry:\n    \"\"\"Central registry for all service configurations.\"\"\"\n\n    _instances: Dict[str, ConfigSchema] = {}\n\n    @classmethod\n    def get_config(cls, service_name: str) -> ConfigSchema:\n        \"\"\"Get or create configuration for a service.\"\"\"\n        if service_name not in cls._instances:\n            cls._instances[service_name] = GenericConfigSchema(service_name)\n        return cls._instances[service_name]\n\n    @classmethod\n    def validate_all(cls) -> Dict[str, bool]:\n        \"\"\"Validate all registered configurations.\"\"\"\n        from src.services import get_supported_mcp_services\n\n        results = {}\n        for service_name in get_supported_mcp_services():\n            try:\n                cls.get_config(service_name)\n                results[service_name] = True\n            except Exception as e:\n                logger.error(f\"Configuration validation failed for {service_name}: {e}\")\n                results[service_name] = False\n        return results\n\n    @classmethod\n    def export_template(cls, service_name: str, output_path: Path) -> None:\n        \"\"\"Export a configuration template for a service.\"\"\"\n        config = cls.get_config(service_name)\n\n        template = {\"service\": service_name, \"configuration\": {}}\n\n        for key, config_value in config._values.items():\n            template[\"configuration\"][key] = {\n                \"value\": config_value.value\n                if config_value.source == \"default\"\n                else None,\n                \"description\": config_value.description,\n                \"required\": config_value.required,\n                \"env_var\": f\"${{{key.upper()}}}\",\n            }\n\n        with open(output_path, \"w\") as f:\n            yaml.dump(template, f, default_flow_style=False, sort_keys=False)\n\n\n# Utility Functions\n\n\ndef get_service_config(service_name: str) -> Dict[str, Any]:\n    \"\"\"Get service configuration as a dictionary.\"\"\"\n    return ConfigRegistry.get_config(service_name).get_all()\n"
  },
  {
    "path": "src/errors.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nSimple Error Handling for MCPMark\n==================================\n\nProvides basic error standardization and retry logic.\n\"\"\"\n\nfrom typing import Optional\n\n\n\"\"\"Retryable error detection via minimal substring matching (lower-case).\"\"\"\n\n# Keep this list short and generic; aim to catch API/infrastructure issues only.\nRETRYABLE_PATTERNS = {\n    \"ratelimit\",              # e.g., RateLimitError, too many requests\n    # \"connection\",             # connection refused/reset/error\n    \"agent execution failed\",\n    \"unavailable\",            # service unavailable\n    # \"execution timed out\",    # timeout\n    \"internal server error\",  # 500s\n    \"network error\",          # generic network issue\n    \"quota\",                  # budget/quota exceeded\n    # \"llm provider not provided\",  # litellm error\n    # pipeline infra signals\n    \"account balance\",\n    \"mcp network error\",\n    \"state duplication error\",\n    \"thought_signature\",\n    \"overloaded.\"\n}\n\n\ndef is_retryable_error(error: str) -> bool:\n    \"\"\"Return True if the error string contains any retryable pattern.\"\"\"\n    error_lower = str(error or \"\").lower()\n    return any(pattern in error_lower for pattern in RETRYABLE_PATTERNS)\n\n\ndef standardize_error_message(error: str, mcp_service: Optional[str] = None) -> str:\n    \"\"\"Standardize error messages for consistent reporting.\"\"\"\n    error_str = str(error).strip()\n\n    # Common standardizations\n    if \"timeout\" in error_str.lower():\n        base_msg = \"Operation timed out\"\n    elif (\n        \"connection refused\" in error_str.lower() or \"econnrefused\" in error_str.lower()\n    ):\n        base_msg = \"Connection refused\"\n    elif \"not found\" in error_str.lower():\n        base_msg = \"Resource not found\"\n    elif \"already exists\" in error_str.lower():\n        base_msg = \"Resource already exists\"\n    else:\n        # Return original message if no standardization applies\n        return error_str\n\n    # Add MCP service prefix if provided\n    if mcp_service:\n        return f\"{mcp_service.title()} {base_msg}\"\n\n    return base_msg\n"
  },
  {
    "path": "src/evaluator.py",
    "content": "import time\nimport json\nimport shutil\n\nfrom datetime import datetime\nfrom pathlib import Path\nfrom typing import List, Optional\n\nfrom src.logger import get_logger\nfrom src.factory import MCPServiceFactory\nfrom src.model_config import ModelConfig\nfrom src.results_reporter import EvaluationReport, ResultsReporter, TaskResult\nfrom src.errors import is_retryable_error\nfrom src.agents import AGENT_REGISTRY\n\n# Initialize logger\nlogger = get_logger(__name__)\n\n\nclass MCPEvaluator:\n    def __init__(\n        self,\n        mcp_service: str,\n        model: str,\n        timeout: int = 300,\n        exp_name: str = \"test-run\",\n        output_dir: Path = None,\n        reasoning_effort: str = \"default\",\n        agent_name: str = \"mcpmark\",\n        task_suite: str = \"standard\",\n        compaction_token: int = 0,\n    ):\n        # Main configuration\n        self.mcp_service = mcp_service\n        self.timeout = timeout\n        self.agent_name = (agent_name or \"mcpmark\").lower()\n        self.task_suite = (task_suite or \"standard\").lower()\n        if self.agent_name not in AGENT_REGISTRY:\n            raise ValueError(f\"Unsupported agent '{agent_name}'. Available: {sorted(AGENT_REGISTRY)}\")\n        \n        # Initialize model configuration\n        self.reasoning_effort = reasoning_effort\n        self.model_name = model\n        \n        model_config = ModelConfig(self.model_name)\n        self.api_key = model_config.api_key\n        self.base_url = model_config.base_url\n        self.litellm_input_model_name = model_config.litellm_input_model_name\n        \n        # Track the actual model name from LiteLLM responses\n        self.litellm_run_model_name = None\n\n        # Initialize managers using the factory pattern (simplified)\n        self.task_manager = MCPServiceFactory.create_task_manager(\n            mcp_service, task_suite=self.task_suite\n        )\n        self.state_manager = MCPServiceFactory.create_state_manager(mcp_service)\n\n        # Obtain static service configuration from state manager (e.g., notion_key)\n        self.service_config = self.state_manager.get_service_config_for_agent()\n\n        # Initialize agent for LLM and MCP server management. The agent will\n        # automatically refresh its service configuration from the state\n        # manager before each execution, so per-task manual updates are no\n        # longer needed.\n        agent_cls = AGENT_REGISTRY[self.agent_name]\n        self.agent = agent_cls(\n            litellm_input_model_name=self.litellm_input_model_name,\n            api_key=self.api_key,\n            base_url=self.base_url,\n            mcp_service=mcp_service,\n            timeout=timeout,\n            service_config=self.service_config,\n            service_config_provider=self.state_manager.get_service_config_for_agent,\n            reasoning_effort=self.reasoning_effort,\n            compaction_token=compaction_token,\n        )\n\n        # Initialize results reporter\n        self.results_reporter = ResultsReporter()\n\n        # Output directory handling\n        if self.reasoning_effort != \"default\":\n            model_slug = self.model_name.replace(\".\", \"-\") + \"-\" + self.reasoning_effort\n        else:\n            model_slug = self.model_name.replace(\".\", \"-\")\n\n        service_for_dir = \"playwright\" if mcp_service == \"playwright_webarena\" else mcp_service\n        suite_suffix = \"\" if self.task_suite in (\"standard\", \"\", None) else f\"-{self.task_suite}\"\n        service_dir_name = f\"{service_for_dir}{suite_suffix}\"\n        self.base_experiment_dir = output_dir / f\"{model_slug}__{service_dir_name}\" / exp_name\n        self.base_experiment_dir.mkdir(parents=True, exist_ok=True)\n\n    def _format_duration(self, seconds: float) -> str:\n        \"\"\"Format duration: <1s as ms, otherwise seconds.\"\"\"\n        return f\"{(seconds * 1000):.2f}ms\" if seconds < 1 else f\"{seconds:.2f}s\"\n\n    def _get_task_output_dir(self, task) -> Path:\n        \"\"\"Return the directory path for storing this task's reports using '__' separator.\"\"\"\n        # Use category_id and task_id with '__' separator\n        category_id = task.category_id if task.category_id else \"uncategorized\"\n        task_id = str(task.task_id)\n\n        return self.base_experiment_dir / f\"{category_id}__{task_id}\"\n\n    # ------------------------------------------------------------------\n    # Resuming helpers\n    # ------------------------------------------------------------------\n\n    def _load_latest_task_result(self, task) -> Optional[TaskResult]:\n        \"\"\"Return the most recent TaskResult for *task* if it has been run before.\"\"\"\n        task_dir = self._get_task_output_dir(task)\n        if not task_dir.exists():\n            return None\n\n        meta_path = task_dir / \"meta.json\"\n        if not meta_path.exists():\n            return None\n\n        try:\n            with meta_path.open(\"r\", encoding=\"utf-8\") as f:\n                meta_data = json.load(f)\n\n            return TaskResult(\n                task_name=meta_data[\"task_name\"],\n                success=meta_data[\"execution_result\"][\"success\"],\n                error_message=meta_data[\"execution_result\"].get(\"error_message\"),\n                verification_error=meta_data[\"execution_result\"].get(\"verification_error\"),\n                verification_output=meta_data[\"execution_result\"].get(\"verification_output\"),\n                category_id=task.category_id,\n                task_id=task.task_id,\n                model_output=None,\n                token_usage=meta_data.get(\"token_usage\", {}),\n                turn_count=meta_data.get(\"turn_count\"),\n                agent_execution_time=meta_data.get(\"agent_execution_time\", 0.0),\n                task_execution_time=meta_data.get(\"task_execution_time\", 0.0),\n            )\n        except Exception as exc:\n            logger.warning(\"Failed to load existing result for %s: %s\", task.name, exc)\n        return None\n\n    def _gather_all_task_results(self) -> List[TaskResult]:\n        \"\"\"Scan *all* task sub-directories and collect the latest TaskResult from each.\"\"\"\n        results: list[TaskResult] = []\n        if not self.base_experiment_dir.exists():\n            return results\n\n        for task_dir in self.base_experiment_dir.iterdir():\n            if not task_dir.is_dir():\n                continue\n            meta_path = task_dir / \"meta.json\"\n            if not meta_path.exists():\n                continue\n            try:\n                with meta_path.open(\"r\", encoding=\"utf-8\") as f:\n                    meta_data = json.load(f)\n\n                category_id, task_id = task_dir.name.split(\"__\", 1)\n\n                result = TaskResult(\n                    task_name=meta_data[\"task_name\"],\n                    success=meta_data[\"execution_result\"][\"success\"],\n                    error_message=meta_data[\"execution_result\"].get(\"error_message\"),\n                    verification_error=meta_data[\"execution_result\"].get(\"verification_error\"),\n                    verification_output=meta_data[\"execution_result\"].get(\"verification_output\"),\n                    category_id=category_id,\n                    task_id=task_id,\n                    model_output=None,\n                    token_usage=meta_data.get(\"token_usage\", {}),\n                    turn_count=meta_data.get(\"turn_count\"),\n                    agent_execution_time=meta_data.get(\"agent_execution_time\", 0.0),\n                    task_execution_time=meta_data.get(\"task_execution_time\", 0.0),\n                )\n                results.append(result)\n            except Exception as exc:\n                logger.warning(\n                    \"Failed to parse existing report in %s: %s\", task_dir, exc\n                )\n        return results\n\n    def _run_single_task(self, task) -> TaskResult:\n        \"\"\"\n        Runs a single task, including setup, agent execution, verification, and cleanup.\n        \"\"\"\n        # Track overall task start time\n        task_start_time = time.time()\n\n        # ------------------------------------------------------------------\n        # Stage 1: Set up the initial state for the task\n        # ------------------------------------------------------------------\n        setup_start_time = time.time()\n        logger.info(\n            \"\\n┌─ Stage 1: Setup ─────────────────────────────────────────────────────\"\n        )\n        setup_success = self.state_manager.set_up(task)\n        setup_time = time.time() - setup_start_time\n\n        if not setup_success:\n            logger.error(f\"| State setup failed for task: {task.name}\")\n            task_total_time = time.time() - task_start_time\n            return TaskResult(\n                task_name=task.name,\n                success=False,\n                error_message=\"State Duplication Error\",\n                verification_error=None,\n                verification_output=None,\n                category_id=task.category_id,\n                task_id=task.task_id,\n                agent_execution_time=0.0,\n                task_execution_time=task_total_time,\n            )\n        display_time = self._format_duration(setup_time)\n        logger.info(f\"└─ Completed in {display_time}\\n\")\n        \n        # ------------------------------------------------------------------\n        # Stage 2: Execute the task using the agent\n        # ------------------------------------------------------------------\n        logger.info(\n            \"┌─ Stage 2: Execute ───────────────────────────────────────────────────\"\n        )\n\n        agent_execution_start_time = time.time()\n\n        # Get task instruction from task manager\n        task_instruction = self.task_manager.get_task_instruction(task)\n\n        # Prepare task_output_dir and tool call log file\n        task_output_dir = self._get_task_output_dir(task)\n        task_output_dir.mkdir(parents=True, exist_ok=True)\n        execution_log_path = task_output_dir / \"execution.log\"\n\n        # Remove existing execution.log to ensure clean start\n        if execution_log_path.exists():\n            execution_log_path.unlink()\n\n        # Execute with agent\n        agent_result = self.agent.execute_sync(\n            task_instruction, str(execution_log_path)\n        )\n\n        agent_execution_time = time.time() - agent_execution_start_time\n        \n        # Extract actual model name from LiteLLM response\n        if agent_result.get(\"litellm_run_model_name\"):\n            self.litellm_run_model_name = agent_result[\"litellm_run_model_name\"]\n\n        # Write messages.json to task_output_dir\n        messages_path = task_output_dir / \"messages.json\"\n        self.results_reporter.save_messages_json(\n            agent_result.get(\"output\", []), messages_path\n        )\n\n        # Set service-specific environment variables for verification scripts\n        self.state_manager.set_verification_environment(str(messages_path))\n        logger.info(f\"└─ Completed in {self._format_duration(agent_execution_time)}\\n\")\n\n        # ------------------------------------------------------------------\n        # Stage 3: Verify\n        # ------------------------------------------------------------------\n        logger.info(\n            \"┌─ Stage 3: Verify ────────────────────────────────────────────────────\"\n        )\n        verify_start_time = time.time()\n        try:\n            result = self.task_manager.execute_task(task, agent_result)\n        finally:\n            # Clean up environment variables\n            import os\n\n            os.environ.pop(\"MCP_MESSAGES\", None)\n            os.environ.pop(\"MCP_GITHUB_TOKEN\", None)\n            \n        verify_time = time.time() - verify_start_time\n        logger.info(f\"└─ Completed in {self._format_duration(verify_time)}\\n\")\n\n        # ------------------------------------------------------------------\n        # Stage 4: Clean up\n        # ------------------------------------------------------------------\n        logger.info(\n            \"┌─ Stage 4: Cleanup ───────────────────────────────────────────────────\"\n        )\n        cleanup_start_time = time.time()\n        self.state_manager.clean_up(task)\n        cleanup_time = time.time() - cleanup_start_time\n        logger.info(f\"└─ Completed in {self._format_duration(cleanup_time)}\\n\")\n\n        # Calculate total task execution time\n        task_total_time = time.time() - task_start_time\n\n        # Add timing information to the result\n        result.agent_execution_time = agent_execution_time\n        result.task_execution_time = task_total_time\n\n        return result\n\n    def run_evaluation(self, task_filter: str) -> EvaluationReport:\n        \"\"\"\n        Runs the full evaluation for the specified tasks.\n        \"\"\"\n        tasks = self.task_manager.filter_tasks(task_filter)\n\n        results = []\n\n        for task in tasks:\n            # --------------------------------------------------------------\n            # Resume check\n            # --------------------------------------------------------------\n            existing_result = self._load_latest_task_result(task)\n\n            # Decide whether to skip or retry this task\n            retry_due_to_error = (\n                existing_result is not None\n                and not existing_result.success\n                and is_retryable_error(existing_result.error_message)\n            )\n\n            if existing_result and not retry_due_to_error:\n                # Existing result is either successful or failed with a non-retryable error – skip.\n                logger.info(\n                    \"↩️  Skipping already-completed task (resume): %s\", task.name\n                )\n                results.append(existing_result)\n                continue\n\n            if retry_due_to_error:\n                # Clean previous artifacts so that new results fully replace them.\n                task_output_dir = self._get_task_output_dir(task)\n                if task_output_dir.exists():\n                    shutil.rmtree(task_output_dir)\n                logger.info(\n                    \"🔄 Retrying task due to pipeline error (%s): %s\",\n                    existing_result.error_message,\n                    task.name,\n                )\n\n            # --------------------------------------------------------------\n            # Execute new task\n            # --------------------------------------------------------------\n            task_start = time.time()\n            task_result = self._run_single_task(task)\n            task_end = time.time()\n\n            results.append(task_result)\n            \n            # Prepare directory & save\n            task_output_dir = self._get_task_output_dir(task)\n            task_output_dir.mkdir(parents=True, exist_ok=True)\n\n            # Save messages.json (conversation trajectory)\n            messages_path = task_output_dir / \"messages.json\"\n\n            if not messages_path.exists():  # 已经写过就跳过\n                messages = (\n                    task_result.model_output\n                    if getattr(task_result, \"model_output\", None)\n                    else []\n                )\n                self.results_reporter.save_messages_json(messages, messages_path)\n\n            # Save meta.json (all other metadata)\n            meta_path = task_output_dir / \"meta.json\"\n            model_config = {\n                \"mcp_service\": self.mcp_service,\n                \"model_name\": self.model_name,\n                \"litellm_run_model_name\": self.litellm_run_model_name,\n                \"reasoning_effort\": self.reasoning_effort,\n                \"timeout\": self.timeout,\n                \"agent_name\": self.agent_name,\n            }\n            self.results_reporter.save_meta_json(\n                task_result,\n                model_config,\n                datetime.fromtimestamp(task_start),\n                datetime.fromtimestamp(task_end),\n                meta_path,\n            )\n\n        # --------------------------------------------------------------\n        # Aggregate results – combine current `results` with any previously\n        # saved TaskResults that ALSO match the current task_filter.\n        # --------------------------------------------------------------\n\n        # Helper: determine if a TaskResult matches the filter string\n        def _matches_filter(tr: TaskResult, flt: str) -> bool:\n            if flt.lower() == \"all\":\n                return True\n            if \"/\" in flt:\n                # specific task (category_id/task_id)\n                category_id, task_id = flt.split(\"/\", 1)\n                return tr.category_id == category_id and str(tr.task_id) == task_id\n            # category level\n            return tr.category_id == flt\n\n        # Pull existing reports from disk and merge\n        existing_results = [\n            r\n            for r in self._gather_all_task_results()\n            if _matches_filter(r, task_filter)\n        ]\n\n        # Merge, giving preference to fresh `results` (avoids duplicates)\n        merged: dict[str, TaskResult] = {r.task_name: r for r in existing_results}\n        merged.update({r.task_name: r for r in results})  # overwrite with latest run\n\n        final_results = list(merged.values())\n\n        aggregated_report = EvaluationReport(\n            model_name=self.model_name,\n            model_config={\n                \"mcp_service\": self.mcp_service,\n                \"model_name\": self.model_name,\n                \"litellm_run_model_name\": self.litellm_run_model_name,\n                \"reasoning_effort\": self.reasoning_effort,\n                \"timeout\": self.timeout,\n                \"agent_name\": self.agent_name,\n            },\n            total_tasks=len(final_results),\n            successful_tasks=sum(1 for r in final_results if r.success),\n            failed_tasks=sum(1 for r in final_results if not r.success),\n            task_results=final_results,\n            tasks_filter=task_filter,\n        )\n\n        # Save model-level summary\n        summary_path = self.base_experiment_dir / \"summary.json\"\n        self.results_reporter.save_model_summary(aggregated_report, summary_path)\n\n        logger.info(\n            \"\\n============================================================\"\n            \"\\nResults Summary\"\n            \"\\n============================================================\"\n        )\n        logger.info(\n            f\"✓ Tasks passed: {aggregated_report.successful_tasks}/{aggregated_report.total_tasks} ({aggregated_report.success_rate:.1f}%)\"\n        )\n        logger.info(f\"⏱ Total time: {aggregated_report.total_task_execution_time:.1f}s\")\n\n        return aggregated_report\n"
  },
  {
    "path": "src/factory.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nMCP Service Factory for MCPMark\n=================================\n\nThis module provides a simplified factory pattern for creating service-specific managers\nwith centralized configuration management.\n\nFeatures:\n- Dynamic service loading from definitions\n- Centralized configuration\n- Simplified service registration\n\"\"\"\n\nimport importlib\nfrom dataclasses import dataclass\nfrom typing import Dict, Type\n\nfrom src.base.login_helper import BaseLoginHelper\nfrom src.base.state_manager import BaseStateManager\nfrom src.base.task_manager import BaseTaskManager\nfrom src.config.config_schema import ConfigRegistry\nfrom src.services import get_service_definition, get_supported_mcp_services\n\n\n@dataclass\nclass ServiceComponents:\n    \"\"\"All components required for an MCP service.\"\"\"\n\n    task_manager_class: Type[BaseTaskManager]\n    state_manager_class: Type[BaseStateManager]\n    login_helper_class: Type[BaseLoginHelper]\n    config_mapping: Dict[str, Dict[str, str]]\n\n\ndef import_class(module_path: str):\n    \"\"\"Dynamically import a class from module path string.\"\"\"\n    if not module_path:\n        return None\n    module_name, class_name = module_path.rsplit(\".\", 1)\n    module = importlib.import_module(module_name)\n    return getattr(module, class_name)\n\n\ndef apply_config_mapping(config: dict, mapping: dict) -> dict:\n    \"\"\"Apply config mapping to transform config keys to constructor params.\"\"\"\n    if not mapping:\n        return {}\n\n    result = {}\n    for param_name, config_key in mapping.items():\n        if config_key in config:\n            result[param_name] = config[config_key]\n    return result\n\n\nclass ServiceRegistry:\n    \"\"\"Central registry that loads MCP services from definitions.\"\"\"\n\n    # Cache for loaded components\n    _components_cache: Dict[str, ServiceComponents] = {}\n\n    @classmethod\n    def get_components(cls, service_name: str) -> ServiceComponents:\n        \"\"\"Get MCP service components from definition.\"\"\"\n        if service_name in cls._components_cache:\n            return cls._components_cache[service_name]\n\n        definition = get_service_definition(service_name)\n\n        # Import classes dynamically\n        components = ServiceComponents(\n            task_manager_class=import_class(definition[\"components\"][\"task_manager\"]),\n            state_manager_class=import_class(definition[\"components\"][\"state_manager\"]),\n            login_helper_class=import_class(definition[\"components\"][\"login_helper\"]),\n            config_mapping=definition.get(\"config_mapping\", {}),\n        )\n\n        cls._components_cache[service_name] = components\n        return components\n\n\nclass GenericServiceFactory:\n    \"\"\"Generic factory that works with any MCP service.\"\"\"\n\n    def __init__(self, components: ServiceComponents, service_name: str):\n        self.components = components\n        self.service_name = service_name\n\n    def create_task_manager(self, **kwargs) -> BaseTaskManager:\n        \"\"\"Create task manager instance.\"\"\"\n        return self.components.task_manager_class(**kwargs)\n\n    def create_state_manager(self, config) -> BaseStateManager:\n        \"\"\"Create state manager with config mapping.\"\"\"\n        mapping = self.components.config_mapping.get(\"state_manager\", {})\n        # Handle both dict and config schema objects\n        config_dict = config.get_all() if hasattr(config, \"get_all\") else config\n        kwargs = apply_config_mapping(config_dict, mapping)\n        return self.components.state_manager_class(**kwargs)\n\n    def create_login_helper(self, config) -> BaseLoginHelper:\n        \"\"\"Create login helper with config mapping.\"\"\"\n        mapping = self.components.config_mapping.get(\"login_helper\", {})\n        # Handle both dict and config schema objects\n        config_dict = config.get_all() if hasattr(config, \"get_all\") else config\n        kwargs = apply_config_mapping(config_dict, mapping)\n        \n        # Special handling for GitHub login helper - it needs a single token\n        if self.service_name == \"github\" and \"token\" in kwargs:\n            tokens_list = kwargs[\"token\"]\n            if isinstance(tokens_list, list) and tokens_list:\n                kwargs[\"token\"] = tokens_list[0]  # Use first token for login helper\n                \n        return self.components.login_helper_class(**kwargs)\n\n\nclass MCPServiceFactory:\n    \"\"\"Main factory interface.\"\"\"\n\n    @classmethod\n    def create_service_config(cls, service_name: str):\n        \"\"\"Create MCP service configuration (backward compatible).\"\"\"\n        config = ConfigRegistry.get_config(service_name)\n\n        # Create a backward-compatible ServiceConfig-like object\n        class ServiceConfigCompat:\n            def __init__(self, service_name: str, config_dict: dict):\n                self.service_name = service_name\n                self.config = config_dict\n                self.api_key = config_dict.get(\"api_key\")\n\n        return ServiceConfigCompat(service_name, config.get_all())\n\n    @classmethod\n    def create_task_manager(cls, service_name: str, **kwargs) -> BaseTaskManager:\n        \"\"\"Create task manager for the specified MCP service.\"\"\"\n        components = ServiceRegistry.get_components(service_name)\n        return components.task_manager_class(**kwargs)\n\n    @classmethod\n    def create_state_manager(cls, service_name: str, **kwargs) -> BaseStateManager:\n        \"\"\"Create state manager for the specified MCP service.\"\"\"\n        components = ServiceRegistry.get_components(service_name)\n        config = ConfigRegistry.get_config(service_name).get_all()\n\n        # Use provided kwargs or apply config mapping\n        if not kwargs:\n            mapping = components.config_mapping.get(\"state_manager\", {})\n            kwargs = apply_config_mapping(config, mapping)\n\n        return components.state_manager_class(**kwargs)\n\n    @classmethod\n    def create_login_helper(cls, service_name: str, **kwargs) -> BaseLoginHelper:\n        \"\"\"Create login helper for the specified MCP service.\"\"\"\n        components = ServiceRegistry.get_components(service_name)\n        config = ConfigRegistry.get_config(service_name).get_all()\n\n        # Use provided kwargs or apply config mapping\n        if not kwargs:\n            mapping = components.config_mapping.get(\"login_helper\", {})\n            kwargs = apply_config_mapping(config, mapping)\n            \n            # Special handling for GitHub login helper - it needs a single token\n            if service_name == \"github\" and \"token\" in kwargs:\n                tokens_list = kwargs[\"token\"]\n                if isinstance(tokens_list, list) and tokens_list:\n                    kwargs[\"token\"] = tokens_list[0]  # Use first token for login helper\n\n        return components.login_helper_class(**kwargs)\n\n    @classmethod\n    def get_supported_mcp_services(cls) -> list:\n        \"\"\"Get list of supported MCP services.\"\"\"\n        return get_supported_mcp_services()\n\n    @classmethod\n    def get_config_info(cls, service_name: str) -> dict:\n        \"\"\"Get detailed configuration information for debugging.\"\"\"\n        config = ConfigRegistry.get_config(service_name)\n        return config.get_debug_info()\n\n    @classmethod\n    def export_config_template(cls, service_name: str, output_path: str) -> None:\n        \"\"\"Export a configuration template for an MCP service.\"\"\"\n        from pathlib import Path\n\n        ConfigRegistry.export_template(service_name, Path(output_path))\n"
  },
  {
    "path": "src/logger.py",
    "content": "#!/usr/bin/env python3\n\"\"\"Logger configuration for MCPMark.\"\"\"\n\nimport logging\nimport sys\n\n\ndef get_logger(name: str) -> logging.Logger:\n    \"\"\"Get a configured logger instance.\"\"\"\n    logger = logging.getLogger(name)\n\n    if not logger.handlers:\n        handler = logging.StreamHandler(sys.stdout)\n        formatter = logging.Formatter(\"%(message)s\")\n        handler.setFormatter(formatter)\n        logger.addHandler(handler)\n        logger.setLevel(logging.INFO)\n\n    return logger\n"
  },
  {
    "path": "src/mcp_services/filesystem/__init__.py",
    "content": "\"\"\"\nFilesystem MCP Service for MCPMark\n===================================\n\nThis module provides filesystem-specific MCP server integration for MCPMark evaluation.\nUses the official filesystem MCP server for local file operations.\n\"\"\"\n\nfrom .filesystem_login_helper import FilesystemLoginHelper\nfrom .filesystem_state_manager import FilesystemStateManager\nfrom .filesystem_task_manager import FilesystemTaskManager, FilesystemTask\n\n__all__ = [\n    \"FilesystemLoginHelper\",\n    \"FilesystemStateManager\",\n    \"FilesystemTaskManager\",\n    \"FilesystemTask\",\n]\n"
  },
  {
    "path": "src/mcp_services/filesystem/filesystem_login_helper.py",
    "content": "\"\"\"\nFilesystem Login Helper for MCPMark\n====================================\n\nThis module provides a minimal login helper for the filesystem MCP service.\nSince filesystem operations don't require authentication, this is a simple\npass-through implementation that satisfies the interface requirements.\n\"\"\"\n\nfrom pathlib import Path\nfrom typing import Optional\n\nfrom src.base.login_helper import BaseLoginHelper\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\nclass FilesystemLoginHelper(BaseLoginHelper):\n    \"\"\"\n    Login helper for filesystem MCP service.\n\n    The filesystem MCP server doesn't require authentication, so this\n    implementation simply returns success for all login operations.\n    \"\"\"\n\n    def __init__(self, state_path: Optional[Path] = None):\n        \"\"\"\n        Initialize the filesystem login helper.\n\n        Args:\n            state_path: Path to save state (not used for filesystem)\n        \"\"\"\n        super().__init__()\n        self.state_path = (\n            state_path or Path.home() / \".mcpmark\" / \"filesystem_state.json\"\n        )\n        logger.info(\"Initialized FilesystemLoginHelper (no auth required)\")\n\n    def login(self, **kwargs) -> bool:\n        \"\"\"\n        Perform login operation.\n\n        Since filesystem doesn't require authentication, this always returns True.\n\n        Returns:\n            bool: Always True for filesystem service\n        \"\"\"\n        logger.info(\"Filesystem service does not require authentication\")\n        return True\n\n    def is_authenticated(self) -> bool:\n        \"\"\"\n        Check if authenticated.\n\n        Returns:\n            bool: Always True for filesystem service\n        \"\"\"\n        return True\n\n    def get_credentials(self) -> dict:\n        \"\"\"\n        Get credentials for the service.\n\n        Returns:\n            dict: Empty dict as no credentials needed\n        \"\"\"\n        return {}\n"
  },
  {
    "path": "src/mcp_services/filesystem/filesystem_state_manager.py",
    "content": "\"\"\"\nFilesystem State Manager for MCPMark\n=====================================\n\nThis module handles filesystem state management for consistent task evaluation.\nIt manages test directories, file creation/cleanup, and environment isolation.\n\"\"\"\n\nimport os\nimport shutil\nimport tempfile\nfrom pathlib import Path\nfrom typing import Any, Dict, List, Optional\n\nfrom src.base.state_manager import BaseStateManager\nfrom src.base.task_manager import BaseTask\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\nclass FilesystemStateManager(BaseStateManager):\n    \"\"\"\n    Manages filesystem state for task evaluation.\n\n    This includes creating isolated test directories, tracking created resources,\n    and cleaning up after task completion.\n    \"\"\"\n\n    def _get_project_root(self) -> Path:\n        \"\"\"Find project root by looking for marker files.\"\"\"\n        current = Path(__file__).resolve()\n\n        # Look for project root markers\n        for parent in current.parents:\n            if (parent / \"pyproject.toml\").exists() or (parent / \"pipeline.py\").exists():\n                return parent\n\n        # Fallback to old method if markers not found\n        return Path(__file__).parent / \"../../../\"\n\n    def __init__(self, test_root: Optional[Path] = None, cleanup_on_exit: bool = False):\n        \"\"\"\n        Initialize filesystem state manager.\n\n        Args:\n            test_root: Root directory for test operations (from FILESYSTEM_TEST_ROOT env var)\n            cleanup_on_exit: Whether to clean up test directories after tasks (default False for persistent environment)\n        \"\"\"\n        super().__init__(service_name=\"filesystem\")\n\n        # Use provided test root or default to persistent test environment\n        if test_root:\n            self.test_root = Path(test_root)\n        else:\n            # Default to persistent test environment\n            project_root = self._get_project_root()\n            self.test_root = (project_root / \"test_environments/desktop\").resolve()\n\n        self.cleanup_on_exit = cleanup_on_exit\n        self.current_task_dir: Optional[Path] = None\n        self.created_resources: List[Path] = []\n\n        # Backup and restore functionality\n        self.backup_dir: Optional[Path] = None\n        self.backup_enabled = (\n            True  # Enable backup/restore by default for task isolation\n        )\n\n        logger.info(\n            f\"Initialized FilesystemStateManager with persistent test environment: {self.test_root}\"\n        )\n\n    def initialize(self, **kwargs) -> bool:\n        \"\"\"\n        Initialize the filesystem environment.\n\n        Ensures the persistent test environment exists and is accessible.\n\n        Returns:\n            bool: True if initialization successful\n        \"\"\"\n        try:\n            # Ensure test environment directory exists\n            if not self.test_root.exists():\n                logger.error(f\"Persistent test environment not found: {self.test_root}\")\n                logger.error(\n                    \"Please ensure test_environments/desktop/ exists in the repository\"\n                )\n                return False\n\n            logger.info(f\"Using persistent test environment: {self.test_root}\")\n\n            # Verify we can write to the directory\n            test_file = self.test_root / \".mcpbench_test\"\n            test_file.write_text(\"test\")\n            test_file.unlink()\n\n            return True\n\n        except Exception as e:\n            logger.error(f\"Failed to initialize filesystem environment: {e}\")\n            return False\n\n    def set_up(self, task: BaseTask) -> bool:\n        \"\"\"\n        Set up filesystem environment for a specific task.\n\n        Creates a backup of the current environment, then uses the backup\n        as the working directory to keep the original unchanged.\n\n        Args:\n            task: The task for which to set up the state\n\n        Returns:\n            bool: True if setup successful\n        \"\"\"\n        try:\n            # Dynamically set test root based on task category\n            self._set_dynamic_test_root(task)\n\n            # Create backup of current test environment before task execution\n            if self.backup_enabled:\n                if not self._create_backup(task):\n                    logger.error(f\"Failed to create backup for task {task.name}\")\n                    return False\n\n            # Use the backup directory as the working directory instead of the original\n            self.current_task_dir = (\n                self.backup_dir\n            )  # Use backup directory for operations\n\n            logger.info(\n                f\"| ✓ Using the backup environment for operations\"\n            )\n\n            # Store the test directory path in the task object for use by task manager\n            if hasattr(task, \"__dict__\"):\n                task.test_directory = str(self.current_task_dir)\n\n            # Set environment variable for verification scripts and MCP server\n            os.environ[\"FILESYSTEM_TEST_DIR\"] = str(self.current_task_dir)\n\n            return True\n\n        except Exception as e:\n            logger.error(f\"Failed to set up filesystem state for {task.name}: {e}\")\n            return False\n\n    def _set_dynamic_test_root(self, task: BaseTask) -> None:\n        \"\"\"\n        Dynamically set the test root directory based on the task category.\n\n        Args:\n            task: The task for which to set the test root\n        \"\"\"\n        # Get the base test environments directory from environment variable\n        base_test_root = os.getenv(\"FILESYSTEM_TEST_ROOT\")\n        if not base_test_root:\n            # Fallback to default path\n            project_root = self._get_project_root()\n            base_test_root = str(project_root / \"test_environments\")\n\n        base_test_path = Path(base_test_root)\n\n        # If task has a category_id, append it to the base path\n        if task.category_id:\n            self.test_root = base_test_path / task.category_id\n            # Store the current task category for URL selection\n            self._current_task_category = task.category_id\n            logger.info(f\"| ✓ Setting test root to category-specific directory: {self.test_root}\")\n        else:\n            # Use the base test environments directory\n            self.test_root = base_test_path\n            # For base directory, use 'desktop' as default category\n            self._current_task_category = 'desktop'\n            logger.info(f\"| Setting test root to base directory: {self.test_root}\")\n\n        # Ensure the directory exists by downloading and extracting if needed\n        if not self.test_root.exists():\n            logger.warning(f\"| Test directory does not exist: {self.test_root}\")\n            if not self._download_and_extract_test_environment():\n                logger.error(f\"Failed to download and extract test environment for: {self.test_root}\")\n                raise RuntimeError(f\"Test environment not available: {self.test_root}\")\n            logger.info(f\"| Downloaded and extracted test environment: {self.test_root}\")\n\n\n    def clean_up(self, task: Optional[BaseTask] = None, **kwargs) -> bool:\n        \"\"\"\n        Clean up filesystem resources created during task execution.\n\n        Since we operate on the backup directory, we just need to clean up the backup.\n\n        Args:\n            task: The task to clean up after (optional)\n            **kwargs: Additional cleanup options\n\n        Returns:\n            bool: True if cleanup successful\n        \"\"\"\n        try:\n            cleanup_success = True\n\n            # Clean up the backup directory since we operated on it\n            if self.backup_enabled and self.backup_dir and self.backup_dir.exists():\n                try:\n                    shutil.rmtree(self.backup_dir)\n                    logger.info(\n                        f\"| ✓ Cleaned up backup directory for task {task.name if task else 'unknown'}\"\n                    )\n                    self.backup_dir = None\n                except Exception as e:\n                    logger.error(f\"Failed to clean up backup directory: {e}\")\n                    cleanup_success = False\n            else:\n                logger.info(\"No backup directory to clean up\")\n\n            # Clear the resources list\n            self.created_resources.clear()\n\n            return cleanup_success\n\n        except Exception as e:\n            logger.error(f\"Filesystem cleanup failed: {e}\")\n            return False\n\n    def get_test_directory(self) -> Optional[Path]:\n        \"\"\"\n        Get the current test directory path.\n\n        Returns:\n            Path to the current test directory, or None if not set up\n        \"\"\"\n        return self.current_task_dir\n\n    def get_service_config_for_agent(self) -> dict:\n        \"\"\"\n        Get service-specific configuration for agent execution.\n\n        Returns:\n            Dictionary containing configuration needed by the agent/MCP server\n        \"\"\"\n        service_config = {}\n\n        # Add test directory if available\n        if self.current_task_dir:\n            service_config[\"test_directory\"] = str(self.current_task_dir)\n\n        return service_config\n\n    def track_resource(self, resource_path: Path):\n        \"\"\"\n        Track a resource for cleanup.\n\n        Args:\n            resource_path: Path to the resource to track\n        \"\"\"\n        if resource_path not in self.created_resources:\n            self.created_resources.append(resource_path)\n            logger.debug(f\"Tracking resource for cleanup: {resource_path}\")\n\n    def reset_test_environment(self) -> bool:\n        \"\"\"\n        Reset the test environment to its original state.\n\n        This method can be used for development/debugging purposes.\n        In normal operation, the persistent environment is maintained.\n\n        Returns:\n            bool: True if reset successful\n        \"\"\"\n        try:\n            # Remove any sorting directories that might have been created\n            sorting_dirs = [\"has_test\", \"no_test\", \"organized\", \"backup\"]\n            for dir_name in sorting_dirs:\n                dir_path = self.test_root / dir_name\n                if dir_path.exists():\n                    shutil.rmtree(dir_path)\n                    logger.info(f\"Removed sorting directory: {dir_path}\")\n\n            # Remove any temporary files that might have been created\n            temp_files = [\"hello_world.txt\", \"new_file.txt\", \"temp.txt\"]\n            for file_name in temp_files:\n                file_path = self.test_root / file_name\n                if file_path.exists():\n                    file_path.unlink()\n                    logger.info(f\"Removed temporary file: {file_path}\")\n\n            logger.info(\"Test environment reset completed\")\n            return True\n        except Exception as e:\n            logger.error(f\"Test environment reset failed: {e}\")\n            return False\n\n    # =========================================================================\n    # Backup and Restore Methods for Task Isolation\n    # =========================================================================\n\n    def _create_backup(self, task: BaseTask) -> bool:\n        \"\"\"\n        Create a complete backup of the test environment before task execution.\n\n        Args:\n            task: The task for which to create backup\n\n        Returns:\n            bool: True if backup successful\n        \"\"\"\n        try:\n            # Create backup directory with task-specific name\n            project_root = self._get_project_root()\n            backup_root = (project_root / \".mcpmark_backups\").resolve()\n            backup_root.mkdir(exist_ok=True)\n\n            task_id = f\"{task.service}_{task.category_id}_{task.task_id}\"\n            self.backup_dir = backup_root / f\"backup_{task_id}_{os.getpid()}\"\n\n            # Remove existing backup if it exists\n            if self.backup_dir.exists():\n                shutil.rmtree(self.backup_dir)\n\n            # Create fresh backup by copying entire test environment\n            shutil.copytree(self.test_root, self.backup_dir)\n\n            logger.info(f\"| ✓ Created backup for task {task.name}: {self.backup_dir}\")\n            return True\n\n        except Exception as e:\n            logger.error(f\"Failed to create backup for task {task.name}: {e}\")\n            return False\n\n    def _restore_from_backup(self, task: Optional[BaseTask] = None) -> bool:\n        \"\"\"\n        Restore the test environment from backup.\n\n        Args:\n            task: The task to restore after (optional, for logging)\n\n        Returns:\n            bool: True if restore successful\n        \"\"\"\n        try:\n            if not self.backup_dir or not self.backup_dir.exists():\n                logger.error(\"No backup directory available for restore\")\n                return False\n\n            # Remove current test environment\n            if self.test_root.exists():\n                shutil.rmtree(self.test_root)\n\n            # Restore from backup\n            shutil.copytree(self.backup_dir, self.test_root)\n\n            # Clean up backup directory\n            shutil.rmtree(self.backup_dir)\n            self.backup_dir = None\n\n            task_name = task.name if task else \"unknown\"\n            logger.info(\n                f\"✅ Restored test environment from backup after task {task_name}\"\n            )\n            return True\n\n        except Exception as e:\n            task_name = task.name if task else \"unknown\"\n            logger.error(f\"Failed to restore from backup after task {task_name}: {e}\")\n            return False\n\n    # =========================================================================\n    # Abstract Method Implementations Required by BaseStateManager\n    # =========================================================================\n\n    def _create_initial_state(self, task: BaseTask) -> Optional[Dict[str, Any]]:\n        \"\"\"Create initial state for a task.\n\n        For filesystem, this is handled in set_up() method by creating task directories.\n        Returns the task directory path as state info.\n        \"\"\"\n        if self.current_task_dir and self.current_task_dir.exists():\n            return {\"task_directory\": str(self.current_task_dir)}\n        return None\n\n    def _store_initial_state_info(\n        self, task: BaseTask, state_info: Dict[str, Any]\n    ) -> None:\n        \"\"\"Store initial state information in the task object.\n\n        For filesystem, we store the test directory path.\n        \"\"\"\n        if state_info and \"task_directory\" in state_info:\n            if hasattr(task, \"__dict__\"):\n                task.test_directory = state_info[\"task_directory\"]\n\n    def _cleanup_task_initial_state(self, task: BaseTask) -> bool:\n        \"\"\"Clean up initial state for a specific task.\n\n        For filesystem, this means removing the task directory.\n        \"\"\"\n        if hasattr(task, \"test_directory\") and task.test_directory:\n            task_dir = Path(task.test_directory)\n            if task_dir.exists():\n                try:\n                    shutil.rmtree(task_dir)\n                    logger.info(f\"Cleaned up task directory: {task_dir}\")\n                    return True\n                except Exception as e:\n                    logger.error(f\"Failed to clean up task directory: {e}\")\n                    return False\n        return True\n\n    def _cleanup_single_resource(self, resource: Dict[str, Any]) -> bool:\n        \"\"\"Clean up a single tracked resource.\n\n        For filesystem, resources are paths to files/directories.\n        \"\"\"\n        if \"path\" in resource:\n            resource_path = Path(resource[\"path\"])\n            if resource_path.exists():\n                try:\n                    if resource_path.is_dir():\n                        shutil.rmtree(resource_path)\n                    else:\n                        resource_path.unlink()\n                    logger.info(f\"Cleaned up resource: {resource_path}\")\n                    return True\n                except Exception as e:\n                    logger.error(f\"Failed to clean up {resource_path}: {e}\")\n                    return False\n        return True\n\n    def _download_and_extract_test_environment(self) -> bool:\n        \"\"\"\n        Download and extract test environment using wget and unzip commands.\n        \n        This approach preserves original file timestamps and is simpler than Python zipfile.\n\n        Returns:\n            bool: True if download and extraction successful\n        \"\"\"\n        try:\n            import subprocess\n            import sys\n            \n            # Define URL mapping for different test environment categories\n            url_mapping = {\n                'desktop': 'https://storage.mcpmark.ai/filesystem/desktop.zip',\n                'file_context': 'https://storage.mcpmark.ai/filesystem/file_context.zip',\n                'file_property': 'https://storage.mcpmark.ai/filesystem/file_property.zip',\n                'folder_structure': 'https://storage.mcpmark.ai/filesystem/folder_structure.zip',\n                'papers': 'https://storage.mcpmark.ai/filesystem/papers.zip',\n                'student_database': 'https://storage.mcpmark.ai/filesystem/student_database.zip',\n                'threestudio': 'https://storage.mcpmark.ai/filesystem/threestudio.zip',\n                'votenet': 'https://storage.mcpmark.ai/filesystem/votenet.zip',\n                'legal_document': 'https://storage.mcpmark.ai/filesystem/legal_document.zip',\n                'desktop_template': 'https://storage.mcpmark.ai/filesystem/desktop_template.zip'\n            }\n\n            # Get the category from the current task context\n            category = getattr(self, '_current_task_category', None)\n            if not category:\n                logger.error(\"| No task category available for URL selection\")\n                return False\n\n            # Select the appropriate URL based on category\n            if category in url_mapping:\n                test_env_url = url_mapping[category]\n                logger.info(f\"| ○ Selected URL for category '{category}': {test_env_url}\")\n            else:\n                logger.error(f\"| No URL mapping found for category: {category}\")\n                return False\n\n            # Allow override via environment variable\n            test_env_url = os.getenv('TEST_ENVIRONMENT_URL', test_env_url)\n\n            logger.info(f\"| ○ Downloading test environment from: {test_env_url}\")\n\n            # Create a temporary directory for the download\n            with tempfile.TemporaryDirectory() as temp_dir:\n                temp_path = Path(temp_dir)\n                zip_path = temp_path / \"test_environment.zip\"\n\n                # Step 1: Download using wget\n                logger.info(\"| ○ Downloading test environment zip file...\")\n                try:\n                    # Use wget if available, otherwise fall back to curl\n                    if sys.platform == \"win32\":\n                        # Windows: try wget, fall back to curl\n                        try:\n                            result = subprocess.run(\n                                [\"wget\", \"-O\", str(zip_path), test_env_url],\n                                capture_output=True, text=True, check=True\n                            )\n                        except (subprocess.CalledProcessError, FileNotFoundError):\n                            # Fall back to curl\n                            result = subprocess.run(\n                                [\"curl\", \"-L\", \"-o\", str(zip_path), test_env_url],\n                                capture_output=True, text=True, check=True\n                            )\n                    else:\n                        # Unix-like systems: try wget, fall back to curl\n                        try:\n                            result = subprocess.run(\n                                [\"wget\", \"-O\", str(zip_path), test_env_url],\n                                capture_output=True, text=True, check=True\n                            )\n                        except (subprocess.CalledProcessError, FileNotFoundError):\n                            # Fall back to curl\n                            result = subprocess.run(\n                                [\"curl\", \"-L\", \"-o\", str(zip_path), test_env_url],\n                                capture_output=True, text=True, check=True\n                            )\n                    \n                    logger.info(\"| ✓ Download completed successfully\")\n                except Exception as e:\n                    logger.error(f\"| Download failed: {e}\")\n                    return False\n\n                # Step 2: Extract using unzip\n                logger.info(\"| ○ Extracting test environment...\")\n                try:\n                    # Extract to parent directory to maintain expected structure\n                    result = subprocess.run(\n                        [\"unzip\", \"-o\", str(zip_path), \"-d\", str(self.test_root.parent)],\n                        capture_output=True, text=True, check=True\n                    )\n                    logger.info(\"| ✓ Extraction completed successfully\")\n                except Exception as e:\n                    logger.error(f\"| Extraction failed: {e}\")\n                    return False\n\n                # Step 3: Remove __MACOSX folder if it exists\n                logger.info(\"| ○ Cleaning up macOS metadata...\")\n                macosx_path = self.test_root.parent / \"__MACOSX\"\n                if macosx_path.exists():\n                    try:\n                        shutil.rmtree(macosx_path)\n                        logger.info(\"| ✓ Removed __MACOSX folder\")\n                    except Exception as e:\n                        logger.warning(f\"| Failed to remove __MACOSX folder: {e}\")\n\n                # Verify the extracted directory exists\n                if not self.test_root.exists():\n                    logger.error(f\"| Extracted directory not found at expected path: {self.test_root}\")\n                    return False\n\n                logger.info(f\"| ✓ Successfully downloaded and extracted test environment to: {self.test_root}\")\n                return True\n\n        except Exception as e:\n            logger.error(f\"| Failed to download and extract test environment: {e}\")\n            return False\n"
  },
  {
    "path": "src/mcp_services/filesystem/filesystem_task_manager.py",
    "content": "\"\"\"\nSimplified Filesystem Task Manager using Enhanced Base Class\n============================================================\n\nThis module shows how the filesystem task manager can be simplified\nusing the enhanced base task manager.\n\"\"\"\n\nimport os\nimport subprocess\nfrom dataclasses import dataclass\nfrom pathlib import Path\nfrom typing import List, Optional, Dict, Any\n\nfrom src.base.task_manager import BaseTask, BaseTaskManager\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\n@dataclass\nclass FilesystemTask(BaseTask):\n    \"\"\"Filesystem-specific task with additional fields.\"\"\"\n\n    test_directory: Optional[str] = None\n    expected_files: Optional[List[str]] = None\n    expected_directories: Optional[List[str]] = None\n\n\nclass FilesystemTaskManager(BaseTaskManager):\n    \"\"\"Simplified filesystem task manager using enhanced base class.\"\"\"\n\n    def __init__(self, tasks_root: Path = None, task_suite: str = \"standard\"):\n        \"\"\"Initialize filesystem task manager.\"\"\"\n        if tasks_root is None:\n            tasks_root = Path(__file__).resolve().parents[3] / \"tasks\"\n\n        super().__init__(\n            tasks_root,\n            mcp_service=\"filesystem\",\n            task_class=FilesystemTask,\n            task_organization=\"directory\",\n            task_suite=task_suite,\n        )\n\n    # Override only what's needed for filesystem-specific behavior\n    def _create_task_from_files(\n        self, category_id: str, task_files_info: Dict[str, Any]\n    ) -> BaseTask:\n        \"\"\"Instantiate a `BaseTask` from the dictionary returned by `_find_task_files`.\"\"\"\n        import json\n        \n        # Support arbitrary task names, not just task_n format\n        task_name = task_files_info[\"task_id\"]\n\n        # Use task_name as default task_id\n        task_id = task_name\n\n        # Check for meta.json\n        meta_path = task_files_info[\"instruction_path\"].parent / \"meta.json\"\n        final_category_id = category_id\n        \n        if meta_path.exists():\n            try:\n                with open(meta_path, 'r') as f:\n                    meta_data = json.load(f)\n                    # Use values from meta.json if available\n                    final_category_id = meta_data.get(\"category_id\", category_id)\n                    task_id = meta_data.get(\"task_id\", task_id)\n            except Exception as e:\n                logger.warning(f\"Failed to load meta.json from {meta_path}: {e}\")\n\n        return self.task_class(\n            task_instruction_path=task_files_info[\"instruction_path\"],\n            task_verification_path=task_files_info[\"verification_path\"],\n            service=\"filesystem\",\n            category_id=final_category_id,\n            task_id=task_id,\n        )\n\n    def run_verification(self, task: BaseTask) -> subprocess.CompletedProcess:\n        \"\"\"Run verification with filesystem-specific environment.\"\"\"\n        env = os.environ.copy()\n\n        # Pass test directory to verification script\n        # Priority: task.test_directory (set by state manager) > environment variable\n        test_dir = None\n        if hasattr(task, \"test_directory\") and task.test_directory:\n            test_dir = task.test_directory\n        else:\n            test_dir = os.getenv(\"FILESYSTEM_TEST_DIR\")\n\n        if test_dir:\n            env[\"FILESYSTEM_TEST_DIR\"] = test_dir\n            logger.debug(f\"Setting FILESYSTEM_TEST_DIR to: {test_dir}\")\n\n        return subprocess.run(\n            self._get_verification_command(task),\n            capture_output=True,\n            text=True,\n            timeout=300,\n            env=env,\n        )\n\n    def filter_tasks(self, task_filter: str) -> List[BaseTask]:\n        \"\"\"Filter tasks based on category or specific task pattern with support for arbitrary task names.\"\"\"\n        all_tasks = self.discover_all_tasks()\n\n        if not task_filter or task_filter.lower() == \"all\":\n            return all_tasks\n\n        # Check if it's a category filter\n        categories = self.get_categories()\n        if task_filter in categories:\n            return [task for task in all_tasks if task.category_id == task_filter]\n\n        # Check for specific task pattern (category_id/task_X or category_id/arbitrary_name)\n        if \"/\" in task_filter:\n            try:\n                category_id, task_id = task_filter.split(\"/\", 1)\n                # Direct string matching for task_id\n                for task in all_tasks:\n                    if task.category_id == category_id and str(task.task_id) == task_id:\n                        return [task]\n            except (ValueError, IndexError):\n                pass\n\n        # Fallback: check for partial matches in task names or categories\n        filtered_tasks = []\n        for task in all_tasks:\n            if (\n                task_filter in task.category_id\n                or task_filter in task.name\n                or task_filter == str(task.task_id)\n            ):\n                filtered_tasks.append(task)\n\n        return filtered_tasks\n"
  },
  {
    "path": "src/mcp_services/github/__init__.py",
    "content": "\"\"\"\nGitHub MCP Service for MCPMark\n===============================\n\nThis module provides GitHub-specific MCP server integration for MCPMark evaluation.\nUses GitHub's official remote MCP server for streamable HTTP/SSE communication.\n\nUpdated to include initial state-based environment replication mechanism.\n\"\"\"\n\nfrom .github_login_helper import GitHubLoginHelper\nfrom .github_task_manager import GitHubTaskManager, GitHubTask\nfrom .github_state_manager import GitHubStateManager\n\n__all__ = [\"GitHubLoginHelper\", \"GitHubTaskManager\", \"GitHubTask\", \"GitHubStateManager\"]\n"
  },
  {
    "path": "src/mcp_services/github/github_login_helper.py",
    "content": "\"\"\"\nGitHub Login Helper for MCPMark\n================================\n\nThis module provides GitHub token authentication and validation utilities.\nUnlike browser-based services, GitHub uses token-based authentication.\n\"\"\"\n\nimport json\nimport requests\nfrom pathlib import Path\nfrom typing import Optional, Dict, Any\n\nfrom src.base.login_helper import BaseLoginHelper\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\nclass GitHubLoginHelper(BaseLoginHelper):\n    \"\"\"\n    Utility helper for GitHub token authentication and validation.\n    \"\"\"\n\n    def __init__(\n        self,\n        token: Optional[str] = None,\n        state_path: Optional[Path] = None,\n    ) -> None:\n        \"\"\"\n        Initialize the GitHub login helper.\n\n        Args:\n            token: GitHub Personal Access Token\n            state_path: Path to save authentication state\n        \"\"\"\n        self.token = token\n        self.state_path = state_path or Path.home() / \".mcpmark\" / \"github_auth.json\"\n\n        # Ensure state directory exists\n        self.state_path.parent.mkdir(parents=True, exist_ok=True)\n\n    def login_and_save_state(self, **kwargs) -> bool:\n        \"\"\"\n        Validate GitHub token and save authentication state.\n\n        Returns:\n            bool: True if authentication successful, False otherwise\n        \"\"\"\n        if not self.token:\n            logger.error(\"No GitHub token provided\")\n            return False\n\n        try:\n            # Validate token by making an authenticated request\n            session = requests.Session()\n            session.headers.update(\n                {\n                    \"Authorization\": f\"Bearer {self.token}\",\n                    \"Accept\": \"application/vnd.github.v3+json\",\n                    \"X-GitHub-Api-Version\": \"2022-11-28\",\n                    \"User-Agent\": \"MCPMark/1.0\",\n                }\n            )\n\n            # Get user information\n            response = session.get(\"https://api.github.com/user\")\n\n            if response.status_code != 200:\n                logger.error(\n                    f\"GitHub authentication failed: {response.status_code} {response.text}\"\n                )\n                return False\n\n            user_info = response.json()\n            logger.info(\n                f\"GitHub authentication successful for user: {user_info['login']}\"\n            )\n\n            # Get token scopes\n            token_scopes = self._get_token_scopes(session)\n\n            # Save authentication state\n            auth_state = {\n                \"user\": user_info,\n                \"token_scopes\": token_scopes,\n                \"authenticated_at\": self._get_current_timestamp(),\n            }\n            self._save_auth_state(auth_state)\n\n            # Verify required permissions\n            if not self._verify_required_permissions(token_scopes):\n                logger.warning(\"GitHub token may not have all required permissions\")\n                return False\n\n            return True\n\n        except Exception as e:\n            logger.error(f\"GitHub authentication error: {e}\")\n            return False\n\n    def _get_token_scopes(self, session: requests.Session) -> list:\n        \"\"\"Get the scopes available to the current token.\"\"\"\n        try:\n            response = session.get(\"https://api.github.com/user\")\n            scopes_header = response.headers.get(\"X-OAuth-Scopes\", \"\")\n            if scopes_header:\n                return [\n                    scope.strip() for scope in scopes_header.split(\",\") if scope.strip()\n                ]\n            return []\n        except Exception as e:\n            logger.warning(f\"Could not determine token scopes: {e}\")\n            return []\n\n    def _verify_required_permissions(self, scopes: list) -> bool:\n        \"\"\"\n        Verify that the token has the minimum required permissions.\n\n        For MCPMark GitHub tasks, we typically need:\n        - repo (for repository access)\n        - read:user (for user information)\n        \"\"\"\n        required_scopes = [\"repo\"]  # Minimum requirement\n        recommended_scopes = [\"repo\", \"read:user\", \"read:org\"]\n\n        has_required = all(scope in scopes for scope in required_scopes)\n        if not has_required:\n            logger.error(\n                f\"Token missing required scopes. Required: {required_scopes}, Available: {scopes}\"\n            )\n            return False\n\n        has_recommended = all(scope in scopes for scope in recommended_scopes)\n        if not has_recommended:\n            logger.warning(\n                f\"Token missing some recommended scopes. Recommended: {recommended_scopes}, Available: {scopes}\"\n            )\n\n        return True\n\n    def _save_auth_state(self, auth_state: Dict[str, Any]):\n        \"\"\"Save authentication state to local file.\"\"\"\n        try:\n            with open(self.state_path, \"w\") as f:\n                json.dump(auth_state, f, indent=2, default=str)\n\n            # Set restrictive permissions (user read/write only)\n            self.state_path.chmod(0o600)\n            logger.info(f\"Authentication state saved to: {self.state_path}\")\n\n        except Exception as e:\n            logger.error(f\"Failed to save authentication state: {e}\")\n\n    def _get_current_timestamp(self) -> str:\n        \"\"\"Get current timestamp in ISO format.\"\"\"\n        from datetime import datetime\n\n        return datetime.utcnow().isoformat() + \"Z\"\n\n    def get_saved_auth_state(self) -> Optional[Dict[str, Any]]:\n        \"\"\"Load and return saved authentication state.\"\"\"\n        try:\n            if self.state_path.exists():\n                with open(self.state_path, \"r\") as f:\n                    return json.load(f)\n        except Exception as e:\n            logger.error(f\"Failed to load authentication state: {e}\")\n        return None\n\n    def is_token_valid(self) -> bool:\n        \"\"\"Check if the current token is still valid.\"\"\"\n        if not self.token:\n            return False\n\n        try:\n            session = requests.Session()\n            session.headers.update(\n                {\n                    \"Authorization\": f\"Bearer {self.token}\",\n                    \"Accept\": \"application/vnd.github.v3+json\",\n                }\n            )\n\n            response = session.get(\"https://api.github.com/user\")\n            return response.status_code == 200\n\n        except Exception:\n            return False\n\n    def get_rate_limit_info(self) -> Dict[str, Any]:\n        \"\"\"Get current rate limit information for the token.\"\"\"\n        if not self.token:\n            return {}\n\n        try:\n            session = requests.Session()\n            session.headers.update(\n                {\n                    \"Authorization\": f\"Bearer {self.token}\",\n                    \"Accept\": \"application/vnd.github.v3+json\",\n                }\n            )\n\n            response = session.get(\"https://api.github.com/rate_limit\")\n            if response.status_code == 200:\n                return response.json()\n\n        except Exception as e:\n            logger.warning(f\"Failed to get rate limit info: {e}\")\n\n        return {}\n\n    def test_repository_access(self, owner: str, repo: str) -> bool:\n        \"\"\"Test if the token has access to a specific repository.\"\"\"\n        if not self.token:\n            return False\n\n        try:\n            session = requests.Session()\n            session.headers.update(\n                {\n                    \"Authorization\": f\"Bearer {self.token}\",\n                    \"Accept\": \"application/vnd.github.v3+json\",\n                }\n            )\n\n            response = session.get(f\"https://api.github.com/repos/{owner}/{repo}\")\n            return response.status_code == 200\n\n        except Exception:\n            return False\n"
  },
  {
    "path": "src/mcp_services/github/github_state_manager.py",
    "content": "\"\"\"\nGitHub State Manager for MCPMark\n=================================\n\nThis module handles GitHub repository state management for consistent task evaluation.\nManages test repositories, branches, and cleanup after evaluation.\n\"\"\"\n\nimport requests\nfrom typing import Optional, List, Union\nfrom pathlib import Path\n\nfrom src.base.state_manager import BaseStateManager, InitialStateInfo\nfrom src.base.task_manager import BaseTask\nfrom src.logger import get_logger\nfrom src.mcp_services.github.token_pool import GitHubTokenPool\n\nlogger = get_logger(__name__)\n\n\nclass GitHubStateManager(BaseStateManager):\n    \"\"\"\n    Manages GitHub repository state for task evaluation.\n    \"\"\"\n\n    def __init__(\n        self,\n        github_token: Union[str, List[str]],\n        # Name of the evaluation organisation / user where temporary test repositories are created\n        eval_org: str = \"mcpmark-eval\",\n        # Local directory that stores *exported* repository templates (produced by repo_exporter.py)\n        templates_root: str = \"./github_state\",\n    ):\n        \"\"\"\n        Initialize GitHub state manager.\n\n        Args:\n            github_token: GitHub Personal Access Token(s). Can be a single token string or a list of tokens for round-robin usage.\n            eval_org: Organisation / user used to host **ephemeral evaluation repositories**.\n        \"\"\"\n        super().__init__(service_name=\"github\")\n\n        # Track repos created via template import so we can delete them afterwards\n        self._repos_to_cleanup: list[tuple[str, str]] = []  # (owner, repo_name)\n\n        # Initialize token pool\n        if isinstance(github_token, str):\n            # Single token - create pool with one token\n            self.token_pool = GitHubTokenPool([github_token])\n            self.github_token = github_token  # Keep for backward compatibility\n        else:\n            # Multiple tokens - use token pool\n            self.token_pool = GitHubTokenPool(github_token)\n            self.github_token = (\n                self.token_pool.get_current_token()\n            )  # For backward compatibility\n\n        # Store evaluation context (consistent naming)\n        self.eval_org = eval_org  # evaluation organisation / user\n\n        # Local path that contains exported repository templates\n        self.templates_root = Path(templates_root).expanduser().resolve()\n\n        # Set up HTTP session for GitHub API\n        self.session = requests.Session()\n        # Note: We'll update the Authorization header before each request\n        self.session.headers.update(\n            {\n                \"Accept\": \"application/vnd.github.v3+json\",\n                \"X-GitHub-Api-Version\": \"2022-11-28\",\n                \"User-Agent\": \"MCPMark/1.0\",\n            }\n        )\n\n        # Validate GitHub configuration during initialization\n        try:\n            # Set initial token for validation\n            self._update_session_token()\n\n            response = self.session.get(\"https://api.github.com/user\")\n            if response.status_code != 200:\n                raise ValueError(\n                    f\"Invalid GitHub token: {response.status_code} {response.text}\"\n                )\n\n            user_info = response.json()\n            logger.info(f\"GitHub authenticated as: {user_info['login']}\")\n            logger.info(f\"Using token pool with {self.token_pool.pool_size} token(s)\")\n\n            # Check if evaluation organisation exists (optional)\n            if self.eval_org:\n                org_response = self.session.get(\n                    f\"https://api.github.com/orgs/{self.eval_org}\"\n                )\n                if org_response.status_code == 200:\n                    logger.info(f\"Using evaluation organisation: {self.eval_org}\")\n                else:\n                    logger.warning(\n                        f\"Evaluation organisation {self.eval_org} not accessible, using user account\"\n                    )\n                    # Fall back to user account\n                    self.eval_org = user_info[\"login\"]\n\n            logger.info(\"GitHub state manager initialized successfully\")\n\n        except Exception as e:\n            raise RuntimeError(f\"GitHub initialization failed: {e}\")\n\n        # Initial state mapping - categories to initial state repositories\n        self.initial_state_mapping = {\n            \"build_your_own_x\": \"codecrafters-io-build-your-own-x\",\n            \"missing-semester\": \"missing-semester-missing-semester\",\n            \"mcpmark-cicd\": \"zjwu0522-mcpmark-cicd\",\n            \"harmony\": \"openai-harmony\",\n            \"claude-code\": \"anthropics-claude-code\",\n            \"easyr1\": \"hiyouga-EasyR1\",\n        }\n\n        # CDN URL mapping for downloading GitHub templates\n        self.github_template_url_mapping = {\n            \"codecrafters-io-build-your-own-x\": \"https://storage.mcpmark.ai/github/codecrafters-io-build-your-own-x.zip\",\n            \"missing-semester-missing-semester\": \"https://storage.mcpmark.ai/github/missing-semester-missing-semester.zip\",\n            \"zjwu0522-mcpmark-cicd\": \"https://storage.mcpmark.ai/github/zjwu0522-mcpmark-cicd.zip\",\n            \"openai-harmony\": \"https://storage.mcpmark.ai/github/openai-harmony.zip\",\n            \"anthropics-claude-code\": \"https://storage.mcpmark.ai/github/anthropics-claude-code.zip\",\n            \"hiyouga-EasyR1\": \"https://storage.mcpmark.ai/github/hiyouga-EasyR1.zip\",\n        }\n\n    # =========================================================================\n    # Core Template Methods (Required by BaseStateManager)\n    # =========================================================================\n\n    # ---------------------------------------------------------------------\n    # Internal helper – template importer (replicates repo_importer logic)\n    # ---------------------------------------------------------------------\n\n    def _import_template_repo(\n        self, template_dir: Path, owner: str, private: bool = True\n    ) -> str:\n        \"\"\"Import repository from local template directory to GitHub (simplified).\"\"\"\n\n        import json\n        import subprocess\n        import time\n\n        # ------------------------------------------------------------------\n        # Helper functions (stripped-down versions of repo_importer utilities)\n        # ------------------------------------------------------------------\n\n        def _list_refs(repo_dir: str) -> list[str]:\n            result = subprocess.run(\n                [\"git\", \"-C\", repo_dir, \"for-each-ref\", \"--format=%(refname)\"],\n                check=True,\n                capture_output=True,\n                text=True,\n            )\n            return result.stdout.strip().splitlines()\n\n        def _push_repo(\n            repo_path: Path, repo_owner: str, repo_name: str, required_refs: list[str]\n        ):\n            \"\"\"Push repo to GitHub: try mirror, else per-ref.\"\"\"\n            token = self.github_token\n            dst_url = f\"https://x-access-token:{token}@github.com/{repo_owner}/{repo_name}.git\"\n\n            try:\n                subprocess.run(\n                    [\"git\", \"-C\", str(repo_path), \"push\", \"--mirror\", dst_url],\n                    check=True,\n                    capture_output=True,\n                )\n                return\n            except subprocess.CalledProcessError as err:\n                logger.warning(\n                    \"| [push] Mirror push failed – falling back: %s\",\n                    err.stderr.decode(errors=\"ignore\"),\n                )\n\n            refs = required_refs or _list_refs(str(repo_path))\n            for ref in refs:\n                for attempt in range(3):\n                    try:\n                        subprocess.run(\n                            [\n                                \"git\",\n                                \"-C\",\n                                str(repo_path),\n                                \"push\",\n                                dst_url,\n                                f\"{ref}:{ref}\",\n                            ],\n                            check=True,\n                            capture_output=True,\n                        )\n                        break\n                    except subprocess.CalledProcessError as ref_err:\n                        if attempt == 2:\n                            raise RuntimeError(\n                                f\"Failed to push ref {ref}: {ref_err.stderr}\"\n                            ) from ref_err\n                        time.sleep(2 * (attempt + 1))\n\n        # ------------------------------------------------------------------\n        # Phase 0 – read template metadata\n        # ------------------------------------------------------------------\n        meta = json.loads((template_dir / \"meta.json\").read_text())\n        repo_name: str = meta[\"repo\"]\n        pr_head_refs = meta.get(\"pr_head_refs\", [])\n        default_branch = meta.get(\"default_branch\", \"main\")\n\n        pulls_data = json.loads((template_dir / \"pulls.json\").read_text())\n        fork_branches = [\n            pr[\"local_branch\"]\n            for pr in pulls_data\n            if pr.get(\"is_from_fork\") and \"local_branch\" in pr\n        ]\n        needed_refs = (\n            [f\"refs/heads/{default_branch}\"]\n            + [f\"refs/heads/{h}\" for h in pr_head_refs]\n            + [f\"refs/heads/{b}\" for b in fork_branches]\n        )\n\n        # ------------------------------------------------------------------\n        # Phase 1 – create empty repo under owner\n        # ------------------------------------------------------------------\n        create_payload = {\n            \"name\": repo_name,\n            \"description\": f\"Restored template repo {repo_name}\",\n            \"private\": private,\n            \"auto_init\": False,\n            \"has_issues\": True,\n            \"has_projects\": True,\n            \"has_wiki\": False,\n            \"default_branch\": default_branch,  # Set the correct default branch\n        }\n\n        auth_user = self._get_authenticated_user()\n        create_url = (\n            \"https://api.github.com/user/repos\"\n            if owner == auth_user\n            else f\"https://api.github.com/orgs/{owner}/repos\"\n        )\n\n        resp = self._request_with_retry(\"POST\", create_url, json=create_payload)\n        if resp.status_code == 422 and \"name already exists\" in resp.text:\n            # Attempt to delete and recreate\n            self._delete_repository(owner, repo_name)\n            resp = self._request_with_retry(\"POST\", create_url, json=create_payload)\n\n        if resp.status_code not in (200, 201):\n            raise RuntimeError(f\"Failed to create repo: {resp.status_code} {resp.text}\")\n\n        html_url = resp.json()[\"html_url\"]\n        logger.info(\"| [import] Target repository created: %s\", html_url)\n\n        # Safety check: Prevent importing to public repositories\n        # Public repos would send @ mention notifications to real users, causing spam\n        if not private:\n            error_msg = (\n                \"ERROR: Cannot import template to a public repository.\\n\\n\"\n                \"Reason: The template contains @ mentions of real GitHub users from the original\\n\"\n                \"repository. Importing to a public repository would send notifications to these\\n\"\n                \"users, which is disruptive and inappropriate.\\n\\n\"\n                \"Solution: Set private=True when calling _import_template_repo().\"\n            )\n            logger.error(error_msg)\n            # Clean up the created repo before raising\n            self._delete_repository(owner, repo_name)\n            raise RuntimeError(error_msg)\n\n        # Immediately disable GitHub Actions for ALL repositories to prevent any accidental triggers\n        # We'll re-enable it later only for mcpmark-cicd\n        logger.info(\n            \"| [import] Disabling GitHub Actions immediately after repo creation...\"\n        )\n        self._disable_github_actions(owner, repo_name)\n\n        # ------------------------------------------------------------------\n        # Phase 2 – push git history\n        # ------------------------------------------------------------------\n        repo_path = template_dir / \"repo\"\n\n        logger.info(\"| [import] Pushing git history …\")\n        _push_repo(repo_path, owner, repo_name, needed_refs)\n\n        # Remove .github directory after pushing with a new commit\n        import shutil\n\n        github_dir = repo_path / \".github\"\n        if github_dir.exists():\n            logger.info(\"| [import] Removing .github directory after push …\")\n            shutil.rmtree(github_dir)\n            # Commit the deletion\n            subprocess.run(\n                [\"git\", \"-C\", str(repo_path), \"add\", \"-A\"],\n                check=True,\n                capture_output=True,\n            )\n            subprocess.run(\n                [\n                    \"git\",\n                    \"-C\",\n                    str(repo_path),\n                    \"commit\",\n                    \"-m\",\n                    \"Remove .github directory\",\n                ],\n                capture_output=True,\n            )\n            # Push the new commit\n            token = self.github_token\n            dst_url = (\n                f\"https://x-access-token:{token}@github.com/{owner}/{repo_name}.git\"\n            )\n            subprocess.run(\n                [\"git\", \"-C\", str(repo_path), \"push\", dst_url],\n                check=True,\n                capture_output=True,\n            )\n\n        # ------------------------------------------------------------------\n        # Phase 3 – recreate issues & PRs\n        # ------------------------------------------------------------------\n\n        def _create_comment(issue_number: int, body: str):\n            self._request_with_retry(\n                \"POST\",\n                f\"https://api.github.com/repos/{owner}/{repo_name}/issues/{issue_number}/comments\",\n                json={\"body\": body},\n            )\n\n        def _create_issue(item: dict) -> Optional[int]:\n            data = {\n                \"title\": item[\"title\"],\n                \"body\": self._obfuscate_mentions(item.get(\"body\", \"\")),\n                \"labels\": item.get(\"labels\", []),\n            }\n            r = self._request_with_retry(\n                \"POST\",\n                f\"https://api.github.com/repos/{owner}/{repo_name}/issues\",\n                json=data,\n            )\n            if r.status_code not in (200, 201):\n                return None\n            new_no = r.json()[\"number\"]\n            if item.get(\"state\") == \"closed\":\n                self._request_with_retry(\n                    \"PATCH\",\n                    f\"https://api.github.com/repos/{owner}/{repo_name}/issues/{new_no}\",\n                    json={\"state\": \"closed\"},\n                )\n            return new_no\n\n        def _create_pull(pr_itm: dict) -> Optional[int]:\n            body = self._obfuscate_mentions(pr_itm.get(\"body\", \"\"))\n            if pr_itm.get(\"is_from_fork\", False):\n                fork_note = f\"\\n\\n---\\n_This PR was originally from a fork: **{pr_itm.get('fork_owner')}/{pr_itm.get('fork_repo')}** (branch: `{pr_itm['head']}`)_\"\n                body = body + fork_note if body else fork_note[2:]\n            payload = {\n                \"title\": pr_itm[\"title\"],\n                \"body\": body,\n                \"head\": pr_itm.get(\"local_branch\", pr_itm[\"head\"]),\n                \"base\": pr_itm[\"base\"],\n            }\n            r = self._request_with_retry(\n                \"POST\",\n                f\"https://api.github.com/repos/{owner}/{repo_name}/pulls\",\n                json=payload,\n            )\n            if r.status_code not in (200, 201):\n                return None\n            return r.json()[\"number\"]\n\n        # Issues\n        issues_data = json.loads((template_dir / \"issues.json\").read_text())\n        created_issues = 0\n        logger.info(\"| [phase] Re-creating issues …\")\n        for itm in issues_data:\n            new_no = _create_issue(itm)\n            if new_no:\n                created_issues += 1\n                for c in itm.get(\"comments\", []):\n                    _create_comment(\n                        new_no,\n                        self._obfuscate_mentions(\n                            f\"*Original author: @{c['user']}*\\n\\n{c['body']}\"\n                        ),\n                    )\n        logger.info(\n            \"| [phase] Created %d out of %d issues\", created_issues, len(issues_data)\n        )\n\n        # Pull requests\n        logger.info(\"| [phase] Re-creating pull requests …\")\n        created_prs = 0\n        skipped_prs = 0\n        for pr in pulls_data:\n            new_pr_no = _create_pull(pr)\n            if new_pr_no:\n                created_prs += 1\n                for c in pr.get(\"comments\", []):\n                    _create_comment(\n                        new_pr_no,\n                        self._obfuscate_mentions(\n                            f\"*Original author: @{c['user']}*\\n\\n{c['body']}\"\n                        ),\n                    )\n                for rc in pr.get(\"review_comments\", []):\n                    _create_comment(\n                        new_pr_no,\n                        self._obfuscate_mentions(\n                            f\"*Original author: @{rc['user']}* (review)\\n\\n{rc['body']}\"\n                        ),\n                    )\n            else:\n                skipped_prs += 1\n        logger.info(\n            \"| [phase] Created %d PRs, skipped %d PRs\", created_prs, skipped_prs\n        )\n\n        # Re-enable GitHub Actions ONLY for mcpmark-cicd repository\n        # All other repos remain disabled (as set immediately after creation)\n        if \"mcpmark-cicd\" in template_dir.name:\n            logger.info(\"| [import] Re-enabling GitHub Actions for CI/CD repository…\")\n            self._enable_github_actions(owner, repo_name)\n\n        # Disable notifications to prevent email spam\n        logger.info(\"| [import] Disabling repository notifications …\")\n        self._disable_repository_notifications(owner, repo_name)\n\n        logger.info(\"| [import] Repository import complete: %s\", html_url)\n        return html_url\n\n    # ---------------------------------------------------------------------\n    # Public – create initial state using local template import\n    # ---------------------------------------------------------------------\n\n    def _create_initial_state(self, task: \"BaseTask\") -> Optional[InitialStateInfo]:\n        \"\"\"\n        Set up GitHub environment for a specific task.\n\n        This may involve:\n        1. Creating/forking test repositories\n        2. Setting up branches\n        3. Creating issues or PRs if needed\n        \"\"\"\n        try:\n            logger.info(f\"| Setting up GitHub state for task: {task.name}\")\n\n            template_name = self.select_initial_state_for_task(task.category_id)\n            if template_name is None:\n                raise RuntimeError(\n                    f\"No template configured for task category: {task.category_id}\"\n                )\n\n            template_dir = (self.templates_root / template_name).resolve()\n            if not template_dir.exists():\n                logger.warning(\n                    \"| Template directory %s not found locally, attempting to download from CDN\",\n                    template_dir,\n                )\n                if not self._download_and_extract_github_template(template_name):\n                    logger.error(\n                        \"| Failed to download template %s from CDN\", template_name\n                    )\n                    return None\n                logger.info(\"| Template %s downloaded successfully\", template_name)\n\n            logger.info(f\"| Importing repository template from {template_dir} …\")\n            owner = self.eval_org if self.eval_org else self._get_authenticated_user()\n\n            if \"mcpmark-cicd\" in template_name:\n                repo_url = self._import_template_repo(template_dir, owner, False)\n            else:\n                repo_url = self._import_template_repo(template_dir, owner, True)\n\n            # Record for cleanup later\n            repo_name = repo_url.rstrip(\"/\").split(\"/\")[-1]\n            self._repos_to_cleanup.append((owner, repo_name))\n\n            # Build InitialStateInfo\n            return InitialStateInfo(\n                state_id=f\"{owner}/{repo_name}\",\n                state_url=repo_url,\n                metadata={\n                    \"owner\": owner,\n                    \"repo_name\": repo_name,\n                    \"category\": task.category_id,\n                    \"task_id\": task.task_id,\n                },\n            )\n\n        except Exception as e:\n            logger.error(f\"| GitHub setup failed for {task.name}: {e}\")\n            return None\n\n    # ---------------------------------------------------------------------\n    # BaseStateManager required hooks\n    # ---------------------------------------------------------------------\n\n    def _store_initial_state_info(self, task, state_info: InitialStateInfo) -> None:  # type: ignore[override]\n        if hasattr(task, \"repository_url\"):\n            task.repository_url = state_info.state_url\n\n    def _cleanup_task_initial_state(self, task) -> bool:  # type: ignore[override]\n        \"\"\"No-op – cleanup is handled by self.clean_up which deletes imported repos.\"\"\"\n        return True\n\n    def _cleanup_single_resource(self, resource) -> bool:  # type: ignore[override]\n        \"\"\"No-op – we don't use BaseStateManager's tracked_resources anymore.\"\"\"\n        return True\n\n    # ---------------------------------------------------------------------\n    def clean_up(self, task=None, **kwargs) -> bool:\n        \"\"\"Delete repositories that were imported for tasks.\"\"\"\n        success = True\n        for owner, repo_name in self._repos_to_cleanup:\n            try:\n                self._delete_repository(owner, repo_name)\n                logger.info(\"| Deleted repository: %s/%s\", owner, repo_name)\n            except Exception as err:\n                logger.error(\n                    \"| Failed to delete repository %s/%s: %s\", owner, repo_name, err\n                )\n                success = False\n\n        self._repos_to_cleanup.clear()\n        return success\n\n    # =========================================================================\n    # Repository Creation and Setup Operations\n    # =========================================================================\n\n    def _delete_repository(self, owner: str, repo_name: str):\n        \"\"\"Delete a repository (use with caution).\"\"\"\n        delete_url = f\"https://api.github.com/repos/{owner}/{repo_name}\"\n        response = self.session.delete(delete_url)\n\n        if response.status_code not in [200, 204]:\n            logger.warning(\n                f\"| Failed to delete repository {owner}/{repo_name}: {response.text}\"\n            )\n            raise Exception(\n                f\"| Failed to delete repository {owner}/{repo_name}: {response.status_code} {response.text}\"\n            )\n        else:\n            logger.info(f\"| Successfully deleted repository {owner}/{repo_name}\")\n\n    def _obfuscate_mentions(self, text: str) -> str:\n        \"\"\"\n        Obfuscate @ mentions to prevent notifications to real users.\n\n        Replaces @username with @username_XXXX (random suffix) to ensure the mentioned\n        user does not exist on GitHub. This prevents notification spam when importing\n        templates that contain @ mentions from original repositories.\n\n        Args:\n            text: The text content that may contain @ mentions\n\n        Returns:\n            Text with obfuscated @ mentions\n        \"\"\"\n        import re\n        import random\n        import string\n\n        if not text:\n            return text\n\n        # Pattern matches @username (GitHub usernames: alphanumeric, hyphens, max 39 chars)\n        # Negative lookbehind (?<![a-zA-Z0-9]) ensures @ is not preceded by alphanumeric,\n        # which excludes emails like user@example.com\n        pattern = r\"(?<![a-zA-Z0-9])@([a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?)\"\n\n        def replace_mention(match):\n            username = match.group(1)\n            # Generate random 4-char suffix\n            suffix = \"\".join(\n                random.choices(string.ascii_lowercase + string.digits, k=4)\n            )\n            return f\"@{username}_{suffix}\"\n\n        return re.sub(pattern, replace_mention, text)\n\n    # ---------------------------------------------------------------------\n    # Helper utilities (organisation vs user)\n    # ---------------------------------------------------------------------\n\n    def _get_authenticated_user(self) -> str:\n        \"\"\"Return cached authenticated username or fetch once from GitHub.\"\"\"\n        if hasattr(self, \"_auth_user\") and self._auth_user:\n            return self._auth_user\n\n        response = self.session.get(\"https://api.github.com/user\")\n        if response.status_code == 200:\n            self._auth_user = response.json()[\"login\"]\n            return self._auth_user\n        return None\n\n    # ---------------------------------------------------------------------\n    # Token management helpers\n    # ---------------------------------------------------------------------\n    def _update_session_token(self):\n        \"\"\"Update the session Authorization header with the current token.\"\"\"\n        current_token = self.token_pool.get_current_token()\n        self.session.headers.update({\"Authorization\": f\"Bearer {current_token}\"})\n        # Update backward compatibility attribute\n        self.github_token = current_token\n\n    def _rotate_token(self):\n        \"\"\"Rotate to the next token in the pool and update session.\"\"\"\n        next_token = self.token_pool.get_next_token()\n        self.session.headers.update({\"Authorization\": f\"Bearer {next_token}\"})\n        # Update backward compatibility attribute\n        self.github_token = next_token\n        logger.debug(f\"| Rotated to next token in pool\")\n\n    # ---------------------------------------------------------------------\n    # Generic request helper with rate-limit (403) retry handling\n    # ---------------------------------------------------------------------\n    def _request_with_retry(\n        self,\n        method: str,\n        url: str,\n        *,\n        max_retries: int = 2,\n        sleep_seconds: int = 120,\n        **kwargs,\n    ):\n        \"\"\"Send a GitHub API request with basic rate-limit handling and token rotation.\n\n        If a request receives HTTP 403 (rate limit):\n        1. First try rotating to the next token in the pool\n        2. If still rate limited, sleep and retry\n        3. After max_retries are exhausted, raise RuntimeError\n        \"\"\"\n        import time  # local import to avoid adding global dependency\n\n        attempt = 0\n        tokens_tried = 0\n\n        while True:\n            # Ensure we have the current token set\n            self._update_session_token()\n\n            resp = self.session.request(method, url, **kwargs)\n            # Successful or non-rate-limited response – return immediately\n            if resp.status_code != 403:\n                return resp\n\n            # 403 – very likely rate-limited\n            # First, try rotating tokens if we have multiple\n            if (\n                self.token_pool.pool_size > 1\n                and tokens_tried < self.token_pool.pool_size\n            ):\n                logger.warning(\n                    \"| GitHub API rate limit encountered. Rotating to next token (tried %d/%d tokens)\",\n                    tokens_tried + 1,\n                    self.token_pool.pool_size,\n                )\n                self._rotate_token()\n                tokens_tried += 1\n                continue\n\n            # All tokens exhausted or single token, resort to sleep/retry\n            if attempt >= max_retries:\n                raise RuntimeError(\n                    f\"GitHub API rate limited after {attempt + 1} attempts with {self.token_pool.pool_size} token(s): {resp.status_code} {resp.text}\"\n                )\n\n            logger.warning(\n                \"| All tokens rate limited (attempt %d/%d). Sleeping %d seconds before retrying …\",\n                attempt + 1,\n                max_retries + 1,\n                sleep_seconds,\n            )\n            time.sleep(sleep_seconds)\n            attempt += 1\n            tokens_tried = 0  # Reset token counter for next attempt\n\n    # =========================================================================\n    # Initial State Selection and Repository Creation\n    # =========================================================================\n\n    # Initial state for each task category is resolved via self.initial_state_mapping\n    def select_initial_state_for_task(self, task_category: str) -> Optional[str]:\n        \"\"\"Resolve template name for a task category with light normalization.\"\"\"\n        if not task_category:\n            return None\n\n        candidate_keys = []\n        candidate_keys.append(task_category)\n\n        # Allow users to swap between hyphen/underscore naming conventions.\n        hyphen_to_underscore = task_category.replace(\"-\", \"_\")\n        if hyphen_to_underscore not in candidate_keys:\n            candidate_keys.append(hyphen_to_underscore)\n\n        underscore_to_hyphen = task_category.replace(\"_\", \"-\")\n        if underscore_to_hyphen not in candidate_keys:\n            candidate_keys.append(underscore_to_hyphen)\n\n        for key in candidate_keys:\n            template = self.initial_state_mapping.get(key)\n            if template:\n                if key != task_category:\n                    logger.debug(\n                        \"| Resolved GitHub template for %s via alias %s -> %s\",\n                        task_category,\n                        key,\n                        template,\n                    )\n                return template\n\n        return None\n\n    def extract_repo_info_from_url(self, repo_url: str) -> tuple[str, str]:\n        \"\"\"Extract owner and repo name from GitHub URL.\"\"\"\n        try:\n            from urllib.parse import urlparse\n\n            # Support https://github.com/owner/repo format\n            if \"github.com\" in repo_url:\n                path = urlparse(repo_url).path.strip(\"/\")\n                parts = path.split(\"/\")\n                if len(parts) >= 2:\n                    return parts[0], parts[1]\n\n            raise ValueError(f\"Invalid GitHub URL format: {repo_url}\")\n\n        except Exception as e:\n            logger.error(f\"| Failed to extract repo info from URL {repo_url}: {e}\")\n            raise\n\n    def get_service_config_for_agent(self) -> dict:\n        \"\"\"\n        Get service-specific configuration for agent execution.\n\n        Rotates to the next token in the pool before returning config\n        to distribute load across tokens.\n\n        Returns:\n            Dictionary containing configuration needed by the agent/MCP server\n        \"\"\"\n        service_config = {}\n\n        # Add GitHub token if available\n        if self.github_token:\n            service_config[\"github_token\"] = self.github_token\n\n        return service_config\n\n    def set_verification_environment(self, messages_path: str = None) -> None:\n        \"\"\"\n        Set GitHub-specific environment variables for verification scripts.\n\n        This ensures verification scripts use the same token as the current\n        agent execution, maintaining consistency across the evaluation flow.\n\n        Args:\n            messages_path: Optional path to messages.json file for verification\n        \"\"\"\n        import os\n\n        # Set common MCP_MESSAGES if provided\n        if messages_path:\n            os.environ[\"MCP_MESSAGES\"] = str(messages_path)\n\n        # Set GitHub-specific token\n        current_token = self.token_pool.get_current_token()\n        os.environ[\"MCP_GITHUB_TOKEN\"] = current_token\n        logger.info(\"| Set MCP_GITHUB_TOKEN for verification scripts\")\n\n    def _enable_github_actions(self, owner: str, repo_name: str):\n        \"\"\"Enable GitHub Actions for the repository using REST API.\"\"\"\n        try:\n            # Enable GitHub Actions\n            url = (\n                f\"https://api.github.com/repos/{owner}/{repo_name}/actions/permissions\"\n            )\n            response = self.session.put(\n                url, json={\"enabled\": True, \"allowed_actions\": \"all\"}\n            )\n\n            if response.status_code in [200, 204]:\n                logger.info(\n                    \"| Successfully enabled GitHub Actions for %s/%s\", owner, repo_name\n                )\n            else:\n                logger.warning(\n                    \"| Failed to enable GitHub Actions: %s %s\",\n                    response.status_code,\n                    response.text,\n                )\n\n        except Exception as e:\n            logger.error(\"| Failed to enable GitHub Actions: %s\", e)\n\n    def _disable_github_actions(self, owner: str, repo_name: str):\n        \"\"\"Disable GitHub Actions for the repository using REST API.\"\"\"\n        try:\n            # Disable GitHub Actions\n            url = (\n                f\"https://api.github.com/repos/{owner}/{repo_name}/actions/permissions\"\n            )\n            response = self.session.put(url, json={\"enabled\": False})\n\n            if response.status_code in [200, 204]:\n                logger.info(\n                    \"| Successfully disabled GitHub Actions for %s/%s\", owner, repo_name\n                )\n            else:\n                logger.warning(\n                    \"| Failed to disable GitHub Actions: %s %s\",\n                    response.status_code,\n                    response.text,\n                )\n\n        except Exception as e:\n            logger.error(\"| Failed to disable GitHub Actions: %s\", e)\n\n    def _disable_repository_notifications(self, owner: str, repo_name: str):\n        \"\"\"Disable repository notifications to prevent email spam.\"\"\"\n        try:\n            # Set repository notification subscription to ignore\n            url = f\"https://api.github.com/repos/{owner}/{repo_name}/subscription\"\n            response = self.session.put(\n                url, json={\"subscribed\": False, \"ignored\": True}\n            )\n\n            if response.status_code in [200, 201]:\n                logger.info(\n                    \"| Successfully disabled notifications for %s/%s\", owner, repo_name\n                )\n            elif response.status_code == 403:\n                # This is expected if the token doesn't have notifications scope\n                logger.debug(\n                    \"| Cannot disable notifications for %s/%s (token lacks notifications scope - this is OK)\",\n                    owner,\n                    repo_name,\n                )\n            else:\n                logger.warning(\n                    \"| Failed to disable repository notifications: %s %s\",\n                    response.status_code,\n                    response.text,\n                )\n\n        except Exception as e:\n            logger.error(\"| Failed to disable repository notifications: %s\", e)\n\n    def _download_and_extract_github_template(self, template_name: str) -> bool:\n        \"\"\"\n        Download and extract GitHub template from CDN using wget and unzip commands.\n\n        This approach preserves original file timestamps and is simpler than Python zipfile.\n\n        Args:\n            template_name: Name of the template to download (e.g., \"anthropics-claude-code\")\n\n        Returns:\n            bool: True if download and extraction successful\n        \"\"\"\n        try:\n            import subprocess\n            import sys\n            import tempfile\n            import shutil\n            import os\n\n            # Get the URL from mapping\n            if template_name not in self.github_template_url_mapping:\n                logger.error(f\"| No URL mapping found for template: {template_name}\")\n                return False\n\n            template_url = self.github_template_url_mapping[template_name]\n            # Allow override via environment variable\n            template_url = os.getenv(\"GITHUB_TEMPLATE_URL\", template_url)\n\n            logger.info(f\"| ○ Downloading GitHub template from: {template_url}\")\n\n            # Create a temporary directory for the download\n            with tempfile.TemporaryDirectory() as temp_dir:\n                temp_path = Path(temp_dir)\n                zip_path = temp_path / \"github_template.zip\"\n\n                # Step 1: Download using wget/curl\n                logger.info(\"| ○ Downloading GitHub template zip file...\")\n                try:\n                    # Use wget if available, otherwise fall back to curl\n                    if sys.platform == \"win32\":\n                        # Windows: try wget, fall back to curl\n                        try:\n                            result = subprocess.run(\n                                [\"wget\", \"-O\", str(zip_path), template_url],\n                                capture_output=True,\n                                text=True,\n                                check=True,\n                            )\n                        except (subprocess.CalledProcessError, FileNotFoundError):\n                            # Fall back to curl\n                            result = subprocess.run(\n                                [\"curl\", \"-L\", \"-o\", str(zip_path), template_url],\n                                capture_output=True,\n                                text=True,\n                                check=True,\n                            )\n                    else:\n                        # Unix-like systems: try wget, fall back to curl\n                        try:\n                            result = subprocess.run(\n                                [\"wget\", \"-O\", str(zip_path), template_url],\n                                capture_output=True,\n                                text=True,\n                                check=True,\n                            )\n                        except (subprocess.CalledProcessError, FileNotFoundError):\n                            # Fall back to curl\n                            result = subprocess.run(\n                                [\"curl\", \"-L\", \"-o\", str(zip_path), template_url],\n                                capture_output=True,\n                                text=True,\n                                check=True,\n                            )\n\n                    logger.info(\"| ✓ Download completed successfully\")\n                except Exception as e:\n                    logger.error(f\"| Download failed: {e}\")\n                    return False\n\n                # Step 2: Extract using unzip\n                logger.info(\"| ○ Extracting GitHub template...\")\n                try:\n                    # Extract to templates root directory\n                    result = subprocess.run(\n                        [\"unzip\", \"-o\", str(zip_path), \"-d\", str(self.templates_root)],\n                        capture_output=True,\n                        text=True,\n                        check=True,\n                    )\n                    logger.info(\"| ✓ Extraction completed successfully\")\n                except Exception as e:\n                    logger.error(f\"| Extraction failed: {e}\")\n                    return False\n\n                # Step 3: Remove __MACOSX folder if it exists\n                macosx_path = self.templates_root / \"__MACOSX\"\n                if macosx_path.exists():\n                    logger.info(\"| ○ Cleaning up macOS metadata...\")\n                    try:\n                        shutil.rmtree(macosx_path)\n                        logger.info(\"| ✓ Removed __MACOSX folder\")\n                    except Exception as e:\n                        logger.warning(f\"| Failed to remove __MACOSX folder: {e}\")\n\n                # Verify the extracted template directory exists\n                template_path = self.templates_root / template_name\n                if not template_path.exists():\n                    logger.error(\n                        f\"| Extracted template directory not found at expected path: {template_path}\"\n                    )\n                    return False\n\n                logger.info(\n                    f\"| ✓ Successfully downloaded and extracted GitHub template to: {template_path}\"\n                )\n                return True\n\n        except Exception as e:\n            logger.error(f\"| Failed to download and extract GitHub template: {e}\")\n            return False\n"
  },
  {
    "path": "src/mcp_services/github/github_task_manager.py",
    "content": "\"\"\"\nGitHub Task Manager for MCPMark Evaluation Pipeline\n====================================================\n\nThis module provides utilities for discovering, filtering, and managing\nGitHub-based evaluation tasks.\n\nThe task manager is responsible for:\n- Task discovery and filtering\n- Task verification and result processing\n- Task-specific logic (NOT LLM execution)\n\"\"\"\n\nimport sys\nfrom dataclasses import dataclass\nfrom pathlib import Path\nfrom typing import Any, Dict, List, Optional\n\nfrom src.base.task_manager import BaseTask, BaseTaskManager\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\n@dataclass\nclass GitHubTask(BaseTask):\n    \"\"\"Represents a single evaluation task for GitHub service.\"\"\"\n\n    # GitHub-specific fields\n    repository_url: Optional[str] = None\n    branch_name: Optional[str] = None\n    pr_number: Optional[int] = None\n    issue_number: Optional[int] = None\n    expected_actions: Optional[List[str]] = None  # Expected GitHub actions to verify\n\n    # Directory-based task slug (e.g., \"update_readme\")\n    task_name: str = \"\"\n\n    # No need to override name property, inherited from BaseTask\n\n\nclass GitHubTaskManager(BaseTaskManager):\n    \"\"\"Manages task discovery, filtering, and verification for GitHub-based MCPMark evaluation.\"\"\"\n\n    def __init__(self, tasks_root: Path = None, task_suite: str = \"standard\"):\n        \"\"\"Initialize GitHub task manager.\n\n        Args:\n            tasks_root: Path to the tasks directory\n        \"\"\"\n        if tasks_root is None:\n            tasks_root = Path(__file__).resolve().parents[3] / \"tasks\"\n\n        # Call parent constructor\n        super().__init__(\n            tasks_root,\n            mcp_service=\"github\",\n            task_class=GitHubTask,\n            task_organization=\"file\",\n            task_suite=task_suite,\n        )  # GitHub uses file-based tasks\n\n    # =========================================================================\n    # Service-specific implementations\n    # =========================================================================\n    # No custom task discovery methods needed; relying entirely on BaseTaskManager defaults.\n\n    def _create_task_from_files(\n        self, category_id: str, task_files_info: Dict[str, Any]\n    ) -> Optional[GitHubTask]:\n        \"\"\"Instantiate a GitHubTask from the dictionary yielded by _find_task_files.\"\"\"\n        import json\n        \n        # Check for meta.json\n        meta_path = task_files_info[\"instruction_path\"].parent / \"meta.json\"\n        final_category_id = category_id\n        task_id = task_files_info[\"task_id\"]\n        \n        if meta_path.exists():\n            try:\n                with open(meta_path, 'r') as f:\n                    meta_data = json.load(f)\n                    # Use values from meta.json if available\n                    final_category_id = meta_data.get(\"category_id\", category_id)\n                    task_id = meta_data.get(\"task_id\", task_id)\n            except Exception as e:\n                logger.warning(f\"Failed to load meta.json from {meta_path}: {e}\")\n\n        return GitHubTask(\n            task_instruction_path=task_files_info[\"instruction_path\"],\n            task_verification_path=task_files_info[\"verification_path\"],\n            service=\"github\",\n            category_id=final_category_id,\n            task_id=task_id,\n            task_name=task_files_info[\"task_id\"],\n        )\n\n    def _get_verification_command(self, task: GitHubTask) -> List[str]:\n        \"\"\"Get the verification command for GitHub tasks.\"\"\"\n        return [sys.executable, str(task.task_verification_path)]\n\n    def get_task_instruction(self, task: GitHubTask) -> str:\n        \"\"\"Return task instruction prefixed with repository context.\n\n        Adds an English prefix to every GitHub task instruction so that the\n        agent knows **exactly** which repository to operate on, following the\n        pattern requested by the user:\n\n            Please execute the following task in my repository {owner}/{repo_name}:\n\n        If the repository URL has not yet been injected into the ``task`` (for\n        example when the state manager has not run), we fall back to a more\n        generic prefix without owner/repo placeholder.\n        \"\"\"\n        # Read the original task description first\n        base_instruction = task.get_task_instruction()\n\n        # Derive the owner/repo pair from the repository URL if available\n        prefix: str\n        if task.repository_url:\n            # Example URL: https://github.com/owner/repo_name.git (or without .git)\n            url_parts = task.repository_url.rstrip(\"/\").replace(\".git\", \"\").split(\"/\")\n            if len(url_parts) >= 2:\n                owner, repo_name = url_parts[-2], url_parts[-1]\n                prefix = f\"Please execute the following task in my repository {owner}/{repo_name}:\"\n            else:\n                prefix = \"Please execute the following task:\"\n        else:\n            prefix = \"Please execute the following task:\"\n\n        # Compose instruction with prefix\n        instruction_with_prefix = f\"{prefix}\\n\\n{base_instruction.strip()}\"\n        \n        # Apply the common formatting suffix from base class\n        return self._format_task_instruction(instruction_with_prefix)\n"
  },
  {
    "path": "src/mcp_services/github/repo_exporter.py",
    "content": "\"\"\"\nrepo_exporter.py – Export public GitHub repository *and* open Issues/PRs\n=====================================================================\nWorkflow\n--------\n1. Mirror-clone the public repository to a local bare repo directory\n   ``${out_dir}/${owner}-${repo}/repo.git``.\n2. Fetch all *open* Issues & Pull-Requests via GitHub REST API (no auth\n   needed for public repos, but a token can be provided to increase the rate\n   limit) and serialise them as JSON under the same folder:\n   • ``issues.json`` – list[Issue]\n   • ``pulls.json`` – list[PullRequest]\n   • ``meta.json``  – {\"owner\": owner, \"repo\": repo}\n\nUsage (CLI)\n-----------\n$ python -m src.mcp_services.github.repo_exporter \\\n    https://github.com/octocat/Hello-World \\\n    --out-dir ./github_state\n\nOptionally ``--token`` can be supplied (or env GITHUB_TOKEN) to avoid the\n60-req/h anonymous limit.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nimport logging\nimport os\nfrom dotenv import load_dotenv\nimport subprocess\nfrom pathlib import Path\nfrom tempfile import mkdtemp\nfrom typing import Optional\nfrom urllib.parse import urlparse\n\nimport requests\n\nlogging.basicConfig(level=logging.INFO)\nlogger = logging.getLogger(__name__)\n\n_API_ROOT = \"https://api.github.com\"\n_DEFAULT_HEADERS = {\n    \"Accept\": \"application/vnd.github.v3+json\",\n    \"User-Agent\": \"MCPMark/RepoExporter/1.0\",\n}\n\n\n# ---------------------------------------------------------------------------\n# Helper utilities\n# ---------------------------------------------------------------------------\n\n\ndef _make_session(token: Optional[str] = None) -> requests.Session:\n    sess = requests.Session()\n    sess.headers.update(_DEFAULT_HEADERS)\n    if token:\n        sess.headers[\"Authorization\"] = f\"Bearer {token}\"\n    return sess\n\n\ndef _parse_repo(url: str) -> tuple[str, str]:\n    parsed = urlparse(url)\n    parts = parsed.path.strip(\"/\").split(\"/\")\n    if len(parts) < 2:\n        raise ValueError(f\"Invalid GitHub repo URL: {url}\")\n    return parts[0], parts[1]\n\n\n# ---------------------------------------------------------------------------\n# Core export logic\n# ---------------------------------------------------------------------------\n\n\ndef export_repository(\n    source_repo_url: str,\n    out_dir: str = \"./github_state\",\n    github_token: str | None = None,\n    max_issues: int | None = None,\n    max_pulls: int | None = None,\n) -> str:\n    \"\"\"Export repository code plus Issues/PRs to ``out_dir``.\n\n    ``max_issues`` / ``max_pulls`` – when supplied, export **only** the most\n    recently created *open* Issues or Pull Requests (respectively).\n\n    Returns the absolute path of the export folder.\n    \"\"\"\n\n    owner, repo = _parse_repo(source_repo_url)\n    export_root = Path(out_dir).expanduser().resolve()\n    repo_dir = export_root / f\"{owner}-{repo}\"\n    repo_dir.mkdir(parents=True, exist_ok=True)\n\n    # ------------------------------------------------------------------\n    # 1. Clone repository – full or shallow *working* clone (no bare repo)\n    # ------------------------------------------------------------------\n    repo_path = repo_dir / \"repo\"\n    if repo_path.exists():\n        logger.info(\"[clone] Repository already exists, skipping clone: %s\", repo_path)\n    else:\n        logger.info(\"[clone] Cloning %s/%s to %s\", owner, repo, repo_path)\n        env = {\n            **os.environ,\n            \"GIT_TERMINAL_PROMPT\": \"0\",\n            \"GIT_LFS_SKIP_SMUDGE\": \"1\",\n        }\n        tmp_dir = mkdtemp(prefix=\"mcp_export_\")\n        try:\n            # Always perform a full clone (no shallow depth limitation).\n            clone_cmd = [\n                \"git\",\n                \"clone\",\n                \"--no-single-branch\",\n                f\"https://github.com/{owner}/{repo}.git\",\n                tmp_dir,\n            ]\n\n            subprocess.run(clone_cmd, check=True, capture_output=True, env=env)\n            subprocess.run([\"mv\", tmp_dir, str(repo_path)], check=True)\n            logger.info(\"[clone] Clone completed\")\n        finally:\n            # tmp_dir moved if success; remove if left\n            if os.path.isdir(tmp_dir):\n                subprocess.run([\"rm\", \"-rf\", tmp_dir])\n\n    # ------------------------------------------------------------------\n    # 2. Dump Issues & Pull Requests\n    # ------------------------------------------------------------------\n    sess = _make_session(github_token)\n\n    def _paginate(url: str, state: str = \"all\", extra_params: dict | None = None):\n        page = 1\n        while True:\n            params = {\"state\": state, \"per_page\": 100, \"page\": page}\n            if extra_params:\n                params.update(extra_params)\n            resp = sess.get(url, params=params)\n            if resp.status_code != 200:\n                logger.warning(\"Failed to list: %s – %s\", url, resp.text)\n                break\n            items = resp.json()\n            if not items:\n                break\n            yield from items\n            page += 1\n\n    # --------------------------------------------------------------\n    # Helper: fetch all issue comments for a given issue / PR number\n    # --------------------------------------------------------------\n    def _fetch_issue_comments(number: int) -> list[dict]:\n        \"\"\"Return a list of {user, body} comment dicts for the given issue/PR.\"\"\"\n        comments = []\n        for c in _paginate(\n            f\"{_API_ROOT}/repos/{owner}/{repo}/issues/{number}/comments\"\n        ):\n            comments.append(\n                {\n                    \"user\": c.get(\"user\", {}).get(\"login\", \"unknown\"),\n                    \"body\": c.get(\"body\", \"\"),\n                }\n            )\n        return comments\n\n    # --------------------------------------------------------------\n    # Helper: fetch all *review* comments (code comments) for a PR\n    # --------------------------------------------------------------\n    def _fetch_review_comments(number: int) -> list[dict]:\n        \"\"\"Return a list of {user, body} review comments for the given PR.\"\"\"\n        comments = []\n        for c in _paginate(f\"{_API_ROOT}/repos/{owner}/{repo}/pulls/{number}/comments\"):\n            comments.append(\n                {\n                    \"user\": c.get(\"user\", {}).get(\"login\", \"unknown\"),\n                    \"body\": c.get(\"body\", \"\"),\n                }\n            )\n        return comments\n\n    # Issues (non-PR)\n    issues = []\n    # If max_issues is 0, skip fetching issues entirely\n    if max_issues == 0:\n        logger.info(\"[export] Skipping issues (max_issues=0)\")\n    else:\n        for itm in _paginate(\n            f\"{_API_ROOT}/repos/{owner}/{repo}/issues\",\n            extra_params={\"sort\": \"created\", \"direction\": \"desc\"},\n        ):\n            if \"pull_request\" in itm:\n                continue\n            issues.append(\n                {\n                    \"title\": itm.get(\"title\"),\n                    \"body\": itm.get(\"body\", \"\"),\n                    \"labels\": [lbl.get(\"name\") for lbl in itm.get(\"labels\", [])],\n                    \"state\": itm.get(\"state\", \"open\"),  # Store issue state\n                    \"number\": itm.get(\"number\"),  # Store issue number for reference\n                    \"comments\": _fetch_issue_comments(itm.get(\"number\")),\n                }\n            )\n\n            if max_issues is not None and len(issues) >= max_issues:\n                break\n    (repo_dir / \"issues.json\").write_text(json.dumps(issues, indent=2))\n    logger.info(\"[export] Saved %d issues\", len(issues))\n\n    # Pull requests – include *all* PRs including those from forks\n    pulls = []\n    pr_head_refs: set[str] = set()\n    fork_pr_branches: dict[str, dict] = {}  # Maps PR branch names to fork info\n\n    # If max_pulls is 0, skip fetching pull requests entirely\n    if max_pulls == 0:\n        logger.info(\"[export] Skipping pull requests (max_pulls=0)\")\n    else:\n        for pr in _paginate(\n            f\"{_API_ROOT}/repos/{owner}/{repo}/pulls\",\n            state=\"open\",\n            extra_params={\"sort\": \"created\", \"direction\": \"desc\"},\n        ):\n            pr_number = pr.get(\"number\")\n            head = pr.get(\"head\", {})\n            if head is None:\n                logger.warning(\"PR #%s has no head (deleted fork), skipping\", pr_number)\n                continue  # skip PRs with missing head (deleted fork)\n\n            head_repo = head.get(\"repo\")\n            head_ref = head.get(\"ref\")\n            head_sha = head.get(\"sha\")\n\n            if head_repo is None:\n                logger.warning(\"PR #%s source repo was deleted, skipping\", pr_number)\n                continue  # skip PRs where source repo was deleted\n\n            head_repo_full = head_repo.get(\"full_name\")\n            is_from_fork = head_repo_full != f\"{owner}/{repo}\"\n\n            # Create PR data with fork information\n            pr_data = {\n                \"number\": pr_number,\n                \"title\": pr.get(\"title\"),\n                \"body\": pr.get(\"body\", \"\"),\n                \"head\": head_ref,\n                \"base\": pr.get(\"base\", {}).get(\"ref\"),\n                \"is_from_fork\": is_from_fork,\n            }\n\n            if is_from_fork:\n                # Store additional metadata for forked PRs\n                pr_data[\"fork_owner\"] = head_repo.get(\"owner\", {}).get(\"login\")\n                pr_data[\"fork_repo\"] = head_repo.get(\"name\")\n                pr_data[\"head_sha\"] = head_sha\n\n                # Create a unique branch name for this forked PR\n                fork_branch_name = f\"pr/{pr_number}-{pr_data['fork_owner']}-{head_ref}\"\n                pr_data[\"local_branch\"] = fork_branch_name\n\n                fork_pr_branches[fork_branch_name] = {\n                    \"clone_url\": head_repo.get(\"clone_url\"),\n                    \"ref\": head_ref,\n                    \"sha\": head_sha,\n                    \"pr_number\": pr_number,\n                }\n            else:\n                # For non-fork PRs, keep the original branch reference\n                pr_head_refs.add(head_ref)\n\n            # Attach comments\n            pr_data[\"comments\"] = _fetch_issue_comments(pr_number)\n            pr_data[\"review_comments\"] = _fetch_review_comments(pr_number)\n\n            pulls.append(pr_data)\n\n            if max_pulls is not None and len(pulls) >= max_pulls:\n                break\n    (repo_dir / \"pulls.json\").write_text(json.dumps(pulls, indent=2))\n    logger.info(\"[export] Saved %d pull requests\", len(pulls))\n\n    # Get default branch info first (needed for fetching)\n    sess = _make_session(github_token)\n    try:\n        repo_info = sess.get(f\"{_API_ROOT}/repos/{owner}/{repo}\")\n        default_branch = repo_info.json().get(\"default_branch\", \"main\")\n    except Exception:\n        default_branch = \"main\"\n\n    # Fetch branches from non-fork PRs (branches from the same repository)\n    non_fork_branches = list(pr_head_refs)  # These are branches from the same repo\n    # Always include the default branch in the branches to fetch\n    if default_branch not in non_fork_branches:\n        non_fork_branches.append(default_branch)\n        pr_head_refs.add(default_branch)\n\n    if non_fork_branches:\n        logger.info(\n            \"[fetch] Fetching %d branches from same repository (including default branch '%s')\",\n            len(non_fork_branches),\n            default_branch,\n        )\n        try:\n            # Fetch all remote branches to ensure we have the PR branches\n            subprocess.run(\n                [\"git\", \"-C\", str(repo_path), \"fetch\", \"origin\", \"--no-tags\"],\n                check=True,\n                capture_output=True,\n            )\n\n            # Create local branches for each PR branch\n            for branch in non_fork_branches:\n                try:\n                    # Create local branch tracking the remote branch\n                    subprocess.run(\n                        [\n                            \"git\",\n                            \"-C\",\n                            str(repo_path),\n                            \"branch\",\n                            \"--track\",\n                            branch,\n                            f\"origin/{branch}\",\n                        ],\n                        check=False,\n                        capture_output=True,\n                    )  # check=False because branch might already exist\n                    logger.info(\"[fetch] Created local branch %s\", branch)\n                except subprocess.CalledProcessError:\n                    # Branch might already exist, which is fine\n                    pass\n\n        except subprocess.CalledProcessError as e:\n            logger.warning(\n                \"[fetch] Failed to fetch branches from origin: %s\",\n                e.stderr.decode(errors=\"ignore\") if e.stderr else str(e),\n            )\n\n    # Fetch branches from forks for PRs\n    if fork_pr_branches:\n        logger.info(\n            \"[fetch] Fetching branches from %d forked PRs\", len(fork_pr_branches)\n        )\n\n        for branch_name, fork_info in fork_pr_branches.items():\n            try:\n                logger.info(\n                    \"[fetch] Fetching branch %s from fork %s\",\n                    fork_info[\"ref\"],\n                    fork_info[\"clone_url\"],\n                )\n\n                # Add fork as remote and fetch the specific branch\n                remote_name = f\"fork-pr-{fork_info['pr_number']}\"\n\n                # Add remote\n                subprocess.run(\n                    [\n                        \"git\",\n                        \"-C\",\n                        str(repo_path),\n                        \"remote\",\n                        \"add\",\n                        remote_name,\n                        fork_info[\"clone_url\"],\n                    ],\n                    check=True,\n                    capture_output=True,\n                )\n\n                # Fetch the specific branch from the fork\n                subprocess.run(\n                    [\n                        \"git\",\n                        \"-C\",\n                        str(repo_path),\n                        \"fetch\",\n                        remote_name,\n                        f\"{fork_info['ref']}:refs/heads/{branch_name}\",\n                    ],\n                    check=True,\n                    capture_output=True,\n                )\n\n                # Remove the remote after fetching\n                subprocess.run(\n                    [\"git\", \"-C\", str(repo_path), \"remote\", \"remove\", remote_name],\n                    check=True,\n                    capture_output=True,\n                )\n\n                # Add the fork branch to pr_head_refs so it gets pushed\n                pr_head_refs.add(branch_name)\n\n                logger.info(\"[fetch] Successfully fetched branch %s\", branch_name)\n\n            except subprocess.CalledProcessError as e:\n                logger.warning(\n                    \"[fetch] Failed to fetch branch from fork PR #%s: %s\",\n                    fork_info[\"pr_number\"],\n                    e.stderr.decode(errors=\"ignore\") if e.stderr else str(e),\n                )\n            except Exception as e:\n                logger.warning(\n                    \"[fetch] Unexpected error fetching fork PR #%s: %s\",\n                    fork_info[\"pr_number\"],\n                    str(e),\n                )\n\n    meta = {\n        \"owner\": owner,\n        \"repo\": repo,\n        \"default_branch\": default_branch,\n        \"pr_head_refs\": sorted(pr_head_refs),\n    }\n    (repo_dir / \"meta.json\").write_text(json.dumps(meta, indent=2))\n\n    logger.info(\"[done] Export finished – data stored at %s\", repo_dir)\n    return str(repo_dir)\n\n\n# ---------------------------------------------------------------------------\n# CLI\n# ---------------------------------------------------------------------------\nif __name__ == \"__main__\":\n    import argparse\n\n    load_dotenv(\".mcp_env\")\n\n    parser = argparse.ArgumentParser(\n        description=\"Export public GitHub repository with Issues/PRs\"\n    )\n    parser.add_argument(\n        \"--source_repo_url\", required=True, help=\"HTTPS URL of the public repository\"\n    )\n    parser.add_argument(\n        \"--out-dir\", default=\"./github_state\", help=\"Output directory root\"\n    )\n    parser.add_argument(\n        \"--max-issues\",\n        type=int,\n        default=20,\n        help=\"Export only the latest N issues (optional)\",\n    )\n    parser.add_argument(\n        \"--max-pulls\",\n        type=int,\n        default=5,\n        help=\"Export only the latest N pull requests (optional)\",\n    )\n    args = parser.parse_args()\n\n    token = os.getenv(\"GITHUB_TOKEN\")\n\n    export_repository(\n        args.source_repo_url, args.out_dir, token, args.max_issues, args.max_pulls\n    )\n"
  },
  {
    "path": "src/mcp_services/github/repo_importer.py",
    "content": "\"\"\"\nrepo_importer.py – Restore previously exported GitHub repository into an org/user\n===============================================================================\nGiven a local export folder created by ``repo_exporter.py`` that contains\n``repo.git`` (bare mirror) and JSON files for Issues/PRs, this script:\n1. Creates an empty repository under the specified owner (user/org) using the\n   provided GitHub token.\n2. Pushes *all* Git history from the local bare repository to the target repo\n   (fallback to per-ref push to avoid timeouts).\n3. Re-creates the open Issues & Pull Requests from the JSON dump.\n\nCLI usage\n---------\n$ python -m src.mcp_services.github.repo_importer \\\n    ./github_template_repo/octocat-Hello-World \\\n    --token YOUR_GH_PAT \\\n    --target-owner EvalOrg \\\n    --private\n\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nimport logging\nimport os\nimport subprocess\nimport time\nfrom pathlib import Path\nfrom typing import Iterable\n\nimport requests\nfrom dotenv import load_dotenv\n\nlogging.basicConfig(level=logging.INFO)\nlogger = logging.getLogger(__name__)\n\n_API_ROOT = \"https://api.github.com\"\n_HEADERS = {\n    \"Accept\": \"application/vnd.github.v3+json\",\n    \"User-Agent\": \"MCPMark/RepoImporter/1.0\",\n}\n\n# ---------------------------------------------------------------------------\n# Helper functions copied / simplified from repo_mirror (shallow clone logic removed)\n# ---------------------------------------------------------------------------\n\n\ndef _make_session(token: str) -> requests.Session:\n    sess = requests.Session()\n    sess.headers.update(_HEADERS | {\"Authorization\": f\"Bearer {token}\"})\n    return sess\n\n\ndef _create_target_repo(\n    sess: requests.Session, owner: str, repo_name: str, description: str, private: bool\n) -> str:\n    data = {\n        \"name\": repo_name,\n        \"description\": description,\n        \"private\": private,\n        \"auto_init\": False,\n        \"has_issues\": True,\n        \"has_projects\": True,\n        \"has_wiki\": False,\n    }\n\n    # Determine if owner == auth user\n    auth_user = _get_authenticated_user(sess)\n    create_url = (\n        f\"{_API_ROOT}/user/repos\"\n        if owner == auth_user\n        else f\"{_API_ROOT}/orgs/{owner}/repos\"\n    )\n\n    resp = sess.post(create_url, json=data)\n    if resp.status_code == 422 and \"name already exists\" in resp.text:\n        logger.warning(\"Repository already exists; attempting to delete and recreate …\")\n        _delete_repo(sess, owner, repo_name)\n        resp = sess.post(create_url, json=data)\n\n    if resp.status_code not in (200, 201):\n        raise RuntimeError(f\"Failed to create repo: {resp.status_code} {resp.text}\")\n\n    html_url = resp.json()[\"html_url\"]\n    logger.info(\"[init] Target repository created: %s\", html_url)\n    return html_url\n\n\ndef _get_authenticated_user(sess: requests.Session) -> str:\n    resp = sess.get(f\"{_API_ROOT}/user\")\n    resp.raise_for_status()\n    return resp.json()[\"login\"]\n\n\ndef _delete_repo(sess: requests.Session, owner: str, repo: str):\n    sess.delete(f\"{_API_ROOT}/repos/{owner}/{repo}\")\n\n\ndef _list_refs(repo_dir: str) -> list[str]:\n    result = subprocess.run(\n        [\"git\", \"-C\", repo_dir, \"for-each-ref\", \"--format=%(refname)\"],\n        check=True,\n        capture_output=True,\n        text=True,\n    )\n    return result.stdout.strip().splitlines()\n\n\ndef _push_repo(\n    repo_path: Path,\n    owner: str,\n    repo_name: str,\n    token: str,\n    required_refs: Iterable[str] | None = None,\n):\n    \"\"\"Push repository to GitHub.\n\n    Strategy:\n    1. Attempt a full `git push --mirror`.\n    2. If that fails (e.g. due to large repo), fall back to pushing refs one-by-one.\n    \"\"\"\n\n    dst_url = f\"https://x-access-token:{token}@github.com/{owner}/{repo_name}.git\"\n\n    # First try mirror push (fast path)\n    try:\n        subprocess.run(\n            [\"git\", \"-C\", str(repo_path), \"push\", \"--mirror\", dst_url],\n            check=True,\n            capture_output=True,\n        )\n        logger.info(\"[push] Mirror push succeeded\")\n        return\n    except subprocess.CalledProcessError as err:\n        logger.warning(\n            \"[push] Mirror push failed (%s). Falling back to per-ref\",\n            err.stderr.decode(errors=\"ignore\"),\n        )\n\n    # ------------------------------------------------------------------\n    # Fallback: push each ref individually (robust but slower)\n    # ------------------------------------------------------------------\n    refs = required_refs or _list_refs(str(repo_path))\n    logger.info(\"[push] Pushing %d refs individually …\", len(refs))\n    for ref in refs:\n        for attempt in range(3):\n            try:\n                subprocess.run(\n                    [\"git\", \"-C\", str(repo_path), \"push\", dst_url, f\"{ref}:{ref}\"],\n                    check=True,\n                    capture_output=True,\n                )\n                break\n            except subprocess.CalledProcessError as ref_err:\n                if attempt == 2:\n                    raise RuntimeError(\n                        f\"Failed to push ref {ref}: {ref_err.stderr}\"\n                    ) from ref_err\n                time.sleep(2 * (attempt + 1))\n\n\ndef _create_comment(\n    sess: requests.Session, owner: str, repo: str, issue_number: int, body: str\n):\n    \"\"\"Create a comment on an Issue or Pull Request. Returns True on success.\"\"\"\n    resp = sess.post(\n        f\"{_API_ROOT}/repos/{owner}/{repo}/issues/{issue_number}/comments\",\n        json={\"body\": body},\n    )\n    if resp.status_code not in (200, 201):\n        logger.debug(\"Failed to create comment on #%s: %s\", issue_number, resp.text)\n        return False\n    return True\n\n\ndef _create_issue(\n    sess: requests.Session,\n    owner: str,\n    repo: str,\n    title: str,\n    body: str,\n    labels: list[str],\n    state: str = \"open\",\n    number: int = None,\n):\n    \"\"\"Create a new Issue and return the *new* issue number (or None on failure).\"\"\"\n    data = {\"title\": title, \"body\": body, \"labels\": labels}\n    resp = sess.post(f\"{_API_ROOT}/repos/{owner}/{repo}/issues\", json=data)\n    if resp.status_code not in (200, 201):\n        logger.debug(\"Failed to create issue #%s: %s\", number, resp.text)\n        return None\n\n    new_number = resp.json().get(\"number\")\n\n    # Close issue if original state was closed\n    if state == \"closed\":\n        close_resp = sess.patch(\n            f\"{_API_ROOT}/repos/{owner}/{repo}/issues/{new_number}\",\n            json={\"state\": \"closed\"},\n        )\n        if close_resp.status_code not in (200, 201):\n            logger.debug(\"Failed to close issue #%s: %s\", new_number, close_resp.text)\n\n    return new_number\n\n\ndef _create_pull(\n    sess: requests.Session,\n    owner: str,\n    repo: str,\n    title: str,\n    body: str,\n    head: str,\n    base: str,\n    pr_number: int = None,\n):\n    \"\"\"Create a Pull Request and return the *new* PR number (or None on failure).\"\"\"\n    data = {\"title\": title, \"body\": body, \"head\": head, \"base\": base}\n    resp = sess.post(f\"{_API_ROOT}/repos/{owner}/{repo}/pulls\", json=data)\n    if resp.status_code not in (200, 201):\n        logger.warning(\n            \"Failed to create PR #%s (head: %s, base: %s): %s\",\n            pr_number,\n            head,\n            base,\n            resp.text,\n        )\n        return None\n    return resp.json().get(\"number\")\n\n\ndef _enable_github_actions(sess: requests.Session, owner: str, repo_name: str):\n    \"\"\"Enable GitHub Actions for the repository using REST API.\"\"\"\n    try:\n        url = f\"{_API_ROOT}/repos/{owner}/{repo_name}/actions/permissions\"\n        response = sess.put(url, json={\"enabled\": True, \"allowed_actions\": \"all\"})\n\n        if response.status_code in [200, 204]:\n            logger.info(\n                \"Successfully enabled GitHub Actions for %s/%s\", owner, repo_name\n            )\n        else:\n            logger.warning(\n                \"Failed to enable GitHub Actions: %s %s\",\n                response.status_code,\n                response.text,\n            )\n\n    except Exception as e:\n        logger.error(\"Failed to enable GitHub Actions: %s\", e)\n\n\ndef _disable_repository_notifications(\n    sess: requests.Session, owner: str, repo_name: str\n):\n    \"\"\"Disable repository notifications to prevent email spam.\"\"\"\n    try:\n        url = f\"{_API_ROOT}/repos/{owner}/{repo_name}/subscription\"\n        response = sess.put(url, json={\"subscribed\": False, \"ignored\": True})\n\n        if response.status_code in [200, 201]:\n            logger.info(\n                \"Successfully disabled notifications for %s/%s\", owner, repo_name\n            )\n        elif response.status_code == 403:\n            # This is expected if the token doesn't have notifications scope\n            logger.debug(\n                \"Cannot disable notifications for %s/%s (token lacks notifications scope - this is OK)\",\n                owner,\n                repo_name,\n            )\n        else:\n            logger.warning(\n                \"Failed to disable repository notifications: %s %s\",\n                response.status_code,\n                response.text,\n            )\n\n    except Exception as e:\n        logger.error(\"Failed to disable repository notifications: %s\", e)\n\n\ndef _set_default_branch(\n    sess: requests.Session, owner: str, repo_name: str, default_branch: str\n):\n    \"\"\"Set the default branch for a repository.\"\"\"\n    if default_branch != \"main\":  # Only update if not already main\n        logger.info(\"[import] Setting default branch to '%s'\", default_branch)\n        url = f\"{_API_ROOT}/repos/{owner}/{repo_name}\"\n        data = {\"default_branch\": default_branch}\n        resp = sess.patch(url, json=data)\n        if resp.status_code in (200, 201):\n            logger.info(\n                \"[import] Successfully set default branch to '%s'\", default_branch\n            )\n        else:\n            logger.warning(\n                \"[import] Failed to set default branch: %s %s\",\n                resp.status_code,\n                resp.text,\n            )\n\n\ndef _remove_github_directory(repo_path: Path, owner: str, repo_name: str, token: str):\n    \"\"\"Remove .github directory after pushing and commit the deletion.\"\"\"\n    import shutil\n\n    github_dir = repo_path / \".github\"\n    if github_dir.exists():\n        logger.info(\"[import] Removing .github directory after push …\")\n        shutil.rmtree(github_dir)\n        # Commit the deletion\n        subprocess.run(\n            [\"git\", \"-C\", str(repo_path), \"add\", \"-A\"], check=True, capture_output=True\n        )\n        subprocess.run(\n            [\"git\", \"-C\", str(repo_path), \"commit\", \"-m\", \"Remove .github directory\"],\n            capture_output=True,\n        )\n        # Push the new commit\n        dst_url = f\"https://x-access-token:{token}@github.com/{owner}/{repo_name}.git\"\n        subprocess.run(\n            [\"git\", \"-C\", str(repo_path), \"push\", dst_url],\n            check=True,\n            capture_output=True,\n        )\n\n\n# ---------------------------------------------------------------------------\n# Main import logic\n# ---------------------------------------------------------------------------\n\n\ndef import_repository(\n    template_dir: str, github_token: str, target_owner: str, private: bool = False\n) -> str:\n    \"\"\"Import repository from a local template directory to GitHub.\"\"\"\n\n    # ------------------------------------------------------------------\n    # Ensure Git HTTP buffer large enough to avoid 400 errors on big pushes\n    # ------------------------------------------------------------------\n    try:\n        subprocess.run(\n            [\n                \"git\",\n                \"config\",\n                \"--global\",\n                \"http.postBuffer\",\n                \"157286400\",  # 150 MiB\n            ],\n            check=True,\n            capture_output=True,\n        )\n        logger.debug(\"[init] Set git http.postBuffer to 150MiB globally\")\n    except subprocess.CalledProcessError as cfg_err:\n        logger.warning(\n            \"[init] Failed to set http.postBuffer – proceeding anyway: %s\",\n            cfg_err.stderr.decode(errors=\"ignore\"),\n        )\n\n    tdir = Path(template_dir).expanduser().resolve()\n    meta = json.loads((tdir / \"meta.json\").read_text())\n    repo_name = meta[\"repo\"]\n    pr_head_refs = meta.get(\"pr_head_refs\", [])\n    default_branch = meta.get(\"default_branch\", \"main\")\n\n    # Also include fork PR branches that were fetched\n    pulls = json.loads((tdir / \"pulls.json\").read_text())\n    fork_branches = [\n        pr[\"local_branch\"]\n        for pr in pulls\n        if pr.get(\"is_from_fork\", False) and \"local_branch\" in pr\n    ]\n\n    needed_refs = (\n        [f\"refs/heads/{default_branch}\"]\n        + [f\"refs/heads/{h}\" for h in pr_head_refs]\n        + [f\"refs/heads/{b}\" for b in fork_branches]\n    )\n\n    sess = _make_session(github_token)\n\n    # 1. Create target repo\n    html_url = _create_target_repo(\n        sess, target_owner, repo_name, f\"Restored mirror of {repo_name}\", private\n    )\n\n    # 2. Push code\n    repo_path = tdir / \"repo\"\n    logger.info(\"[phase] Pushing git history …\")\n    _push_repo(repo_path, target_owner, repo_name, github_token, needed_refs)\n\n    # Set the default branch if it's not 'main'\n    _set_default_branch(sess, target_owner, repo_name, default_branch)\n\n    # Remove .github directory right after pushing, before creating issues/PRs\n    _remove_github_directory(repo_path, target_owner, repo_name, github_token)\n\n    # 3. Re-create issues & PRs\n    logger.info(\"[phase] Re-creating issues …\")\n    issues = json.loads((tdir / \"issues.json\").read_text())\n    created_issues = 0\n    for itm in issues:\n        new_issue_no = _create_issue(\n            sess,\n            target_owner,\n            repo_name,\n            itm[\"title\"],\n            itm.get(\"body\", \"\"),\n            itm.get(\"labels\", []),\n            itm.get(\"state\", \"open\"),\n            itm.get(\"number\"),\n        )\n        if new_issue_no:\n            created_issues += 1\n            for c in itm.get(\"comments\", []):\n                comment_body = f\"*Original author: @{c['user']}*\\n\\n{c['body']}\"\n                _create_comment(\n                    sess, target_owner, repo_name, new_issue_no, comment_body\n                )\n    logger.info(\"[phase] Created %d out of %d issues\", created_issues, len(issues))\n\n    logger.info(\"[phase] Re-creating pull requests …\")\n    pulls = json.loads((tdir / \"pulls.json\").read_text())\n    created_prs = 0\n    skipped_prs = 0\n\n    for pr in pulls:\n        # Use local_branch for forked PRs, otherwise use original head\n        head_branch = pr.get(\"local_branch\", pr[\"head\"])\n\n        # Add note to PR body if it's from a fork\n        body = pr.get(\"body\", \"\")\n        if pr.get(\"is_from_fork\", False):\n            fork_note = f\"\\n\\n---\\n_This PR was originally from a fork: **{pr.get('fork_owner')}/{pr.get('fork_repo')}** (branch: `{pr['head']}`)_\"\n            body = (\n                body + fork_note if body else fork_note[2:]\n            )  # Remove leading newlines if body is empty\n\n        new_pr_number = _create_pull(\n            sess,\n            target_owner,\n            repo_name,\n            pr[\"title\"],\n            body,\n            head_branch,\n            pr[\"base\"],\n            pr.get(\"number\"),\n        )\n\n        if new_pr_number:\n            created_prs += 1\n            for c in pr.get(\"comments\", []):\n                comment_body = f\"*Original author: @{c['user']}*\\n\\n{c['body']}\"\n                _create_comment(\n                    sess, target_owner, repo_name, new_pr_number, comment_body\n                )\n            for rc in pr.get(\"review_comments\", []):\n                comment_body = (\n                    f\"*Original author: @{rc['user']}* (review)\\n\\n{rc['body']}\"\n                )\n                _create_comment(\n                    sess, target_owner, repo_name, new_pr_number, comment_body\n                )\n        else:\n            skipped_prs += 1\n\n    logger.info(\"[phase] Created %d PRs, skipped %d PRs\", created_prs, skipped_prs)\n\n    # Enable GitHub Actions after creating issues and PRs\n    logger.info(\"[import] Enabling GitHub Actions …\")\n    _enable_github_actions(sess, target_owner, repo_name)\n\n    # Disable notifications to prevent email spam\n    logger.info(\"[import] Disabling repository notifications …\")\n    _disable_repository_notifications(sess, target_owner, repo_name)\n\n    logger.info(\"[done] Import complete: %s\", html_url)\n    return html_url\n\n\n# ---------------------------------------------------------------------------\n# CLI\n# ---------------------------------------------------------------------------\nif __name__ == \"__main__\":\n    import argparse\n\n    load_dotenv(\".mcp_env\")\n\n    parser = argparse.ArgumentParser(\n        description=\"Import repository from local template into GitHub\"\n    )\n    parser.add_argument(\"--template_dir\", help=\"Path to exported template directory\")\n    parser.add_argument(\n        \"--target-owner\",\n        \"-o\",\n        default=\"mcpmark-eval\",\n        help=\"User or organisation that will own the new repository\",\n    )\n    args = parser.parse_args()\n\n    token = os.getenv(\"GITHUB_TOKEN\")\n    if not token:\n        parser.error(\"GITHUB_TOKEN not set in environment or .mcp_env\")\n\n    # Always create the target repository as private\n    import_repository(args.template_dir, token, args.target_owner, True)\n"
  },
  {
    "path": "src/mcp_services/github/token_pool.py",
    "content": "\"\"\"\nGitHub Token Pool Manager\n=========================\n\nSimple round-robin token pool for distributing API requests across multiple tokens\nto avoid rate limit issues.\n\"\"\"\n\nfrom typing import List\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\nclass GitHubTokenPool:\n    \"\"\"\n    Manages a pool of GitHub tokens with round-robin selection.\n    \"\"\"\n    \n    def __init__(self, tokens: List[str]):\n        \"\"\"\n        Initialize token pool.\n        \n        Args:\n            tokens: List of GitHub personal access tokens\n        \"\"\"\n        if not tokens:\n            raise ValueError(\"Token pool must contain at least one token\")\n            \n        self.tokens = tokens\n        self.current_index = 0\n        logger.info(f\"Initialized GitHub token pool with {len(tokens)} token(s)\")\n    \n    def get_next_token(self) -> str:\n        \"\"\"\n        Get the next token in round-robin fashion.\n        \n        Returns:\n            The next GitHub token to use\n        \"\"\"\n        token = self.tokens[self.current_index]\n        self.current_index = (self.current_index + 1) % len(self.tokens)\n        return token\n    \n    def get_current_token(self) -> str:\n        \"\"\"\n        Get the current token without advancing the index.\n        \n        Returns:\n            The current GitHub token\n        \"\"\"\n        return self.tokens[self.current_index]\n    \n    @property\n    def pool_size(self) -> int:\n        \"\"\"Get the number of tokens in the pool.\"\"\"\n        return len(self.tokens)"
  },
  {
    "path": "src/mcp_services/insforge/__init__.py",
    "content": "\"\"\"Insforge MCP Service Implementation for MCPMark.\"\"\"\n"
  },
  {
    "path": "src/mcp_services/insforge/insforge_login_helper.py",
    "content": "\"\"\"\nInsforge Login Helper for MCPMark\n==================================\n\nHandles Insforge backend authentication and connection validation.\n\"\"\"\n\nimport json\nimport requests\nfrom pathlib import Path\nfrom typing import Optional, Dict, Any\n\nfrom src.base.login_helper import BaseLoginHelper\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\nclass InsforgeLoginHelper(BaseLoginHelper):\n    \"\"\"Handles Insforge backend authentication and connection validation.\"\"\"\n\n    def __init__(\n        self,\n        api_key: str,\n        backend_url: str,\n        state_path: Optional[Path] = None,\n    ):\n        \"\"\"Initialize Insforge login helper.\n\n        Args:\n            api_key: Insforge backend API key for authentication\n            backend_url: Insforge backend URL (e.g., https://your-app.insforge.app)\n            state_path: Path to save connection state\n        \"\"\"\n        super().__init__()\n        self.api_key = api_key\n        self.backend_url = backend_url.rstrip('/')\n        self.state_path = state_path or Path.home() / \".mcpbench\" / \"insforge_auth.json\"\n\n        # Ensure state directory exists\n        self.state_path.parent.mkdir(parents=True, exist_ok=True)\n\n    def login(self, **kwargs) -> bool:\n        \"\"\"Test Insforge backend connection and validate API key.\n\n        Returns:\n            bool: True if connection successful and API key valid\n        \"\"\"\n        try:\n            # Test 1: Basic connectivity - try to get backend metadata\n            logger.info(f\"Testing connection to Insforge backend: {self.backend_url}\")\n\n            headers = {\n                \"Authorization\": f\"Bearer {self.api_key}\",\n                \"Content-Type\": \"application/json\",\n            }\n\n            # Test with a simple API endpoint - get current user or backend info\n            # Try the auth current session endpoint first\n            test_url = f\"{self.backend_url}/api/auth/sessions/current\"\n\n            response = requests.get(\n                test_url,\n                headers=headers,\n                timeout=10,\n            )\n\n            if response.status_code == 200:\n                # API key is valid and can authenticate\n                logger.info(\"✓ Insforge API key authentication successful\")\n                connection_info = {\n                    \"backend_url\": self.backend_url,\n                    \"authenticated\": True,\n                    \"authenticated_at\": self._get_current_timestamp(),\n                }\n            elif response.status_code == 401:\n                # Invalid API key\n                logger.error(\"✗ Invalid Insforge API key\")\n                return False\n            else:\n                # API key might be admin key, try a different endpoint\n                # Try listing tables/backend metadata as a test\n                logger.info(\"Testing with backend metadata endpoint...\")\n\n                # Simple connectivity test - just check if backend is reachable\n                health_url = f\"{self.backend_url}/api/health\"\n                try:\n                    health_response = requests.get(health_url, timeout=5)\n                    if health_response.status_code in [200, 404]:  # 404 is ok, backend is reachable\n                        logger.info(\"✓ Insforge backend is reachable\")\n                        connection_info = {\n                            \"backend_url\": self.backend_url,\n                            \"api_key_type\": \"admin\",\n                            \"authenticated\": True,\n                            \"authenticated_at\": self._get_current_timestamp(),\n                        }\n                    else:\n                        logger.warning(f\"Unexpected response from backend: {health_response.status_code}\")\n                        connection_info = {\n                            \"backend_url\": self.backend_url,\n                            \"authenticated\": True,\n                            \"authenticated_at\": self._get_current_timestamp(),\n                        }\n                except Exception as e:\n                    logger.warning(f\"Health check failed, but proceeding: {e}\")\n                    # Still consider it successful if we have credentials\n                    connection_info = {\n                        \"backend_url\": self.backend_url,\n                        \"authenticated\": True,\n                        \"authenticated_at\": self._get_current_timestamp(),\n                    }\n\n            # Save connection state\n            self._save_connection_state(connection_info)\n\n            logger.info(f\"Insforge backend connection validated: {self.backend_url}\")\n            return True\n\n        except requests.exceptions.Timeout:\n            logger.error(f\"Connection timeout to Insforge backend: {self.backend_url}\")\n            return False\n        except requests.exceptions.ConnectionError:\n            logger.error(f\"Cannot connect to Insforge backend: {self.backend_url}\")\n            return False\n        except Exception as e:\n            logger.error(f\"Unexpected error during Insforge authentication: {e}\")\n            return False\n\n    def _save_connection_state(self, state: Dict[str, Any]):\n        \"\"\"Save connection state to file.\"\"\"\n        try:\n            # Don't save API key\n            safe_state = {k: v for k, v in state.items() if k not in [\"api_key\", \"access_token\"]}\n\n            with open(self.state_path, \"w\") as f:\n                json.dump(safe_state, f, indent=2)\n\n            # Set restrictive permissions\n            self.state_path.chmod(0o600)\n            logger.info(f\"Connection state saved to: {self.state_path}\")\n\n        except Exception as e:\n            logger.error(f\"Failed to save connection state: {e}\")\n\n    def _get_current_timestamp(self) -> str:\n        \"\"\"Get current timestamp in ISO format.\"\"\"\n        from datetime import datetime, timezone\n\n        return datetime.now(timezone.utc).isoformat()\n\n    def is_connected(self) -> bool:\n        \"\"\"Check if we can connect to Insforge backend.\"\"\"\n        return self.login()\n\n    def get_connection_params(self) -> Dict[str, Any]:\n        \"\"\"Get connection parameters (without API key).\"\"\"\n        return {\n            \"backend_url\": self.backend_url,\n        }\n"
  },
  {
    "path": "src/mcp_services/insforge/insforge_state_manager.py",
    "content": "\"\"\"\nInsforge State Manager for MCPMark\n===================================\n\nManages backend state for Insforge tasks including setup via prepare_environment.py\nand resource cleanup tracking.\n\"\"\"\n\nimport os\nimport sys\nimport subprocess\nimport requests\nfrom pathlib import Path\nfrom typing import Optional, Dict, Any, List\n\nfrom src.base.state_manager import BaseStateManager, InitialStateInfo\nfrom src.base.task_manager import BaseTask\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\nclass InsforgeStateManager(BaseStateManager):\n    \"\"\"Manages Insforge backend state for task evaluation.\"\"\"\n\n    def __init__(\n        self,\n        api_key: str,\n        backend_url: str,\n    ):\n        \"\"\"Initialize Insforge state manager.\n\n        Args:\n            api_key: Insforge backend API key for authentication\n            backend_url: Insforge backend URL (e.g., https://your-app.insforge.app)\n        \"\"\"\n        super().__init__(service_name=\"insforge\")\n\n        self.api_key = api_key\n        self.backend_url = backend_url.rstrip('/')\n\n        # HTTP headers for API requests\n        self.headers = {\n            \"Authorization\": f\"Bearer {self.api_key}\",\n            \"Content-Type\": \"application/json\",\n        }\n\n        # Track current task context for agent configuration\n        self._current_task_context: Optional[Dict[str, Any]] = None\n\n        # Validate connection on initialization\n        try:\n            self._test_connection()\n            logger.info(\"Insforge state manager initialized successfully\")\n        except Exception as e:\n            raise RuntimeError(f\"Insforge initialization failed: {e}\")\n\n        # Store baseline tables (system tables that exist before any tasks run)\n        self._baseline_tables = set(\n            (t['schema'], t['name']) for t in self._get_all_tables()\n        )\n        logger.debug(f\"Stored baseline: {len(self._baseline_tables)} tables\")\n\n    def _test_connection(self):\n        \"\"\"Test backend connection.\"\"\"\n        try:\n            # Simple connectivity test - try any endpoint\n            response = requests.get(\n                f\"{self.backend_url}/api/health\",\n                timeout=5,\n            )\n            # Any response (even 404) means backend is reachable\n            logger.debug(f\"Insforge backend connectivity test: {response.status_code}\")\n        except requests.exceptions.RequestException:\n            # Try with API key\n            try:\n                response = requests.get(\n                    f\"{self.backend_url}/api/auth/sessions/current\",\n                    headers=self.headers,\n                    timeout=5,\n                )\n                logger.debug(f\"Insforge backend auth test: {response.status_code}\")\n            except Exception as inner_e:\n                raise RuntimeError(f\"Cannot connect to Insforge backend: {inner_e}\")\n\n    def _create_initial_state(self, task: BaseTask) -> Optional[InitialStateInfo]:\n        \"\"\"Create initial backend state for a task.\n\n        Restores from backup which may place tables in public or task-specific schema.\n\n        Args:\n            task: Task for which to create initial state\n\n        Returns:\n            InitialStateInfo object or None if creation failed\n        \"\"\"\n        try:\n            # Generate unique state ID for this task run\n            state_id = f\"{task.category_id}_{task.task_id}_{self._get_timestamp()}\"\n            schema_name = task.category_id\n\n            logger.info(f\"| Creating initial state for Insforge task: {task.name}\")\n\n            # Drop schema first (cleanup from previous runs)\n            self._drop_schema(schema_name)\n\n            # Get list of existing tables before restore (to track what we create)\n            tables_before = self._get_all_tables()\n            logger.info(f\"| Tables before restore: {len(tables_before)}\")\n\n            # Note: Don't create schema here - pg_restore will create it from the backup\n\n            # Restore from backup if backup exists (may create tables in public or task schema)\n            if self._restore_from_backup(schema_name):\n                logger.info(f\"| ✓ Restored '{schema_name}' from backup\")\n            else:\n                logger.info(f\"| ○ No backup found for '{schema_name}'\")\n                # Run prepare_environment.py if it exists\n                task_prepared = self._run_prepare_environment(task)\n                if not task_prepared:\n                    logger.debug(f\"| No prepare_environment.py found for task {task.name}\")\n\n            # Get list of tables after restore (to track what we need to clean up)\n            tables_after = self._get_all_tables()\n\n            # Track ALL new tables created by the restore (compare before/after)\n            tables_before_set = {(t['schema'], t['name']) for t in tables_before}\n            created_tables = [\n                t for t in tables_after\n                if (t['schema'], t['name']) not in tables_before_set\n            ]\n\n            logger.info(f\"| Tracked {len(created_tables)} new tables for cleanup\")\n            for t in created_tables:\n                logger.debug(f\"|   - {t['schema']}.{t['name']}\")\n\n            # Track the task context including created tables\n            context = {\n                \"state_id\": state_id,\n                \"category_id\": task.category_id,\n                \"task_id\": task.task_id,\n                \"task_name\": task.name,\n                \"schema\": schema_name,\n                \"created_tables\": created_tables,  # Track all created tables\n            }\n\n            return InitialStateInfo(\n                state_id=state_id,\n                state_url=self.backend_url,\n                metadata=context,\n            )\n\n        except Exception as e:\n            logger.error(f\"Failed to create initial state for {task.name}: {e}\")\n            return None\n\n    def _store_initial_state_info(\n        self, task: BaseTask, state_info: InitialStateInfo\n    ) -> None:\n        \"\"\"Store backend info in task object for agent access.\"\"\"\n        if hasattr(task, \"__dict__\"):\n            task.backend_url = self.backend_url\n            task.api_key = self.api_key\n            task.state_id = state_info.state_id\n\n            # Store current task context for agent configuration\n            self._current_task_context = state_info.metadata\n\n    def _cleanup_task_initial_state(self, task: BaseTask) -> bool:\n        \"\"\"Clean up task-specific resources.\n\n        Drops ALL tables created during task (both setup and agent-created)\n        by comparing against baseline.\n\n        Args:\n            task: Task whose initial state should be cleaned up\n\n        Returns:\n            True if cleanup successful\n        \"\"\"\n        try:\n            logger.info(f\"| Cleaning up initial state for task: {task.name}\")\n\n            if self._current_task_context:\n                schema_name = self._current_task_context.get(\"schema\")\n\n                # Get ALL current tables\n                all_current_tables = self._get_all_tables()\n\n                # Find tables to drop: anything not in baseline\n                tables_to_drop = [\n                    t for t in all_current_tables\n                    if (t['schema'], t['name']) not in self._baseline_tables\n                ]\n\n                logger.info(f\"| Found {len(tables_to_drop)} tables to clean up (setup + agent-created)\")\n\n                # Drop individual tables\n                for table_info in tables_to_drop:\n                    try:\n                        self._drop_table(table_info[\"schema\"], table_info[\"name\"])\n                        logger.debug(f\"| ✓ Dropped table: {table_info['schema']}.{table_info['name']}\")\n                    except Exception as e:\n                        logger.warning(f\"| Failed to drop table {table_info}: {e}\")\n\n                # Drop the task schema (may be empty if all tables were in public)\n                if schema_name:\n                    try:\n                        self._drop_schema(schema_name)\n                        logger.info(f\"| ✓ Dropped schema: {schema_name}\")\n                    except Exception as e:\n                        logger.warning(f\"| Failed to drop schema {schema_name}: {e}\")\n\n                # Clear task context\n                if self._current_task_context.get(\"task_name\") == task.name:\n                    self._current_task_context = None\n\n            logger.info(f\"| ✓ Initial state cleanup completed for {task.name}\")\n            return True\n\n        except Exception as e:\n            logger.error(f\"Failed to cleanup task initial state for {task.name}: {e}\")\n            return False\n\n    def _cleanup_single_resource(self, resource: Dict[str, Any]) -> bool:\n        \"\"\"Clean up a single tracked resource.\n\n        This is a placeholder for resource-specific cleanup logic.\n        Tasks should handle their own cleanup via cleanup scripts.\n\n        Args:\n            resource: Resource dictionary with type, id, and metadata\n\n        Returns:\n            True if cleanup successful\n        \"\"\"\n        resource_type = resource[\"type\"]\n        resource_id = resource[\"id\"]\n\n        logger.debug(f\"| Cleanup for {resource_type} {resource_id} (handled by task scripts)\")\n        return True\n\n    def _run_prepare_environment(self, task: BaseTask) -> bool:\n        \"\"\"Run prepare_environment.py script if it exists in the task directory.\n\n        The script should use Insforge MCP tools or HTTP API to set up required state.\n\n        Args:\n            task: Task for which to prepare environment\n\n        Returns:\n            True if script ran successfully, False if script doesn't exist\n        \"\"\"\n        task_dir = task.task_instruction_path.parent\n        prepare_script = task_dir / \"prepare_environment.py\"\n\n        if not prepare_script.exists():\n            logger.debug(f\"No prepare_environment.py found for task {task.name}\")\n            return False\n\n        logger.info(f\"| Running prepare_environment.py for task {task.name}\")\n\n        # Set up environment variables for the script\n        env = os.environ.copy()\n        env.update({\n            \"INSFORGE_BACKEND_URL\": self.backend_url,\n            \"INSFORGE_API_KEY\": self.api_key,\n        })\n\n        try:\n            # Run the prepare_environment.py script\n            result = subprocess.run(\n                [sys.executable, str(prepare_script)],\n                cwd=str(task_dir),  # Run from task directory\n                env=env,\n                capture_output=True,\n                text=True,\n                timeout=300,  # 5 minute timeout\n            )\n\n            if result.returncode == 0:\n                logger.info(f\"| ✓ Environment preparation completed for {task.name}\")\n                if result.stdout.strip():\n                    logger.debug(f\"| prepare_environment.py output: {result.stdout}\")\n                return True\n            else:\n                logger.error(f\"| ✗ Environment preparation failed for {task.name}\")\n                logger.error(f\"| Error output: {result.stderr}\")\n                raise RuntimeError(f\"prepare_environment.py failed with exit code {result.returncode}\")\n\n        except subprocess.TimeoutExpired:\n            logger.error(f\"✗ Environment preparation timed out for {task.name}\")\n            raise RuntimeError(\"prepare_environment.py execution timed out\")\n        except Exception as e:\n            logger.error(f\"✗ Failed to run prepare_environment.py for {task.name}: {e}\")\n            raise\n\n    def _get_timestamp(self) -> str:\n        \"\"\"Get timestamp for unique naming.\"\"\"\n        from datetime import datetime\n\n        return datetime.now().strftime(\"%Y%m%d%H%M%S\")\n\n    def _drop_schema(self, schema_name: str) -> None:\n        \"\"\"Drop schema and all its contents.\"\"\"\n        import psycopg2\n        from psycopg2 import sql\n\n        conn_params = {\n            \"host\": \"localhost\",\n            \"port\": 5432,\n            \"user\": \"postgres\",\n            \"password\": \"postgres\",\n            \"database\": \"insforge\",\n        }\n\n        conn = psycopg2.connect(**conn_params)\n        conn.autocommit = True\n        try:\n            with conn.cursor() as cur:\n                cur.execute(\n                    sql.SQL(\"DROP SCHEMA IF EXISTS {} CASCADE\").format(\n                        sql.Identifier(schema_name)\n                    )\n                )\n                logger.debug(f\"| Dropped schema: {schema_name}\")\n        finally:\n            conn.close()\n\n    def _create_schema(self, schema_name: str) -> None:\n        \"\"\"Create empty schema.\"\"\"\n        import psycopg2\n        from psycopg2 import sql\n\n        conn_params = {\n            \"host\": \"localhost\",\n            \"port\": 5432,\n            \"user\": \"postgres\",\n            \"password\": \"postgres\",\n            \"database\": \"insforge\",\n        }\n\n        conn = psycopg2.connect(**conn_params)\n        conn.autocommit = True\n        try:\n            with conn.cursor() as cur:\n                cur.execute(\n                    sql.SQL(\"CREATE SCHEMA {}\").format(sql.Identifier(schema_name))\n                )\n                logger.debug(f\"| Created schema: {schema_name}\")\n        finally:\n            conn.close()\n\n    def _get_all_tables(self) -> List[Dict[str, str]]:\n        \"\"\"Get list of all user tables.\n\n        Returns:\n            List of dicts with 'schema' and 'name' keys\n        \"\"\"\n        import psycopg2\n\n        conn_params = {\n            \"host\": \"localhost\",\n            \"port\": 5432,\n            \"user\": \"postgres\",\n            \"password\": \"postgres\",\n            \"database\": \"insforge\",\n        }\n\n        conn = psycopg2.connect(**conn_params)\n        try:\n            with conn.cursor() as cur:\n                cur.execute(\"\"\"\n                    SELECT table_schema, table_name\n                    FROM information_schema.tables\n                    WHERE table_type = 'BASE TABLE'\n                    AND table_schema NOT IN ('information_schema', 'pg_catalog')\n                    AND table_schema NOT LIKE 'pg_%'\n                    AND table_name NOT LIKE '\\\\_%'\n                    ORDER BY table_schema, table_name\n                \"\"\")\n                rows = cur.fetchall()\n                return [{\"schema\": row[0], \"name\": row[1]} for row in rows]\n        finally:\n            conn.close()\n\n    def _drop_table(self, schema_name: str, table_name: str) -> None:\n        \"\"\"Drop a specific table or materialized view.\"\"\"\n        import psycopg2\n        from psycopg2 import sql\n\n        conn_params = {\n            \"host\": \"localhost\",\n            \"port\": 5432,\n            \"user\": \"postgres\",\n            \"password\": \"postgres\",\n            \"database\": \"insforge\",\n        }\n\n        conn = psycopg2.connect(**conn_params)\n        conn.autocommit = True\n        try:\n            with conn.cursor() as cur:\n                # Try dropping as table first\n                cur.execute(\n                    sql.SQL(\"DROP TABLE IF EXISTS {}.{} CASCADE\").format(\n                        sql.Identifier(schema_name),\n                        sql.Identifier(table_name)\n                    )\n                )\n                # Also try dropping as materialized view (in case agent created one)\n                cur.execute(\n                    sql.SQL(\"DROP MATERIALIZED VIEW IF EXISTS {}.{} CASCADE\").format(\n                        sql.Identifier(schema_name),\n                        sql.Identifier(table_name)\n                    )\n                )\n                logger.debug(f\"| Dropped table/view: {schema_name}.{table_name}\")\n        finally:\n            conn.close()\n\n    def _restore_from_backup(self, category_name: str) -> bool:\n        \"\"\"Restore from backup file.\n\n        Tables may be restored into public schema or category-specific schema\n        depending on how the backup was created.\n\n        Args:\n            category_name: Name of category (e.g., 'employees', 'chinook', 'lego')\n\n        Returns:\n            True if backup was restored, False if no backup exists\n        \"\"\"\n        # Path to backup file\n        backup_dir = Path(__file__).parent.parent.parent.parent / \"postgres_state\"\n        backup_file = backup_dir / f\"{category_name}.backup\"\n\n        logger.debug(f\"| Looking for backup at: {backup_file}\")\n        logger.debug(f\"| Backup exists: {backup_file.exists()}\")\n\n        if not backup_file.exists():\n            logger.info(f\"| ○ No backup file found: {backup_file}\")\n            return False\n\n        logger.info(f\"| Restoring {category_name} from backup...\")\n\n        # Set up environment for pg_restore\n        env = os.environ.copy()\n        env[\"PGPASSWORD\"] = \"postgres\"\n\n        try:\n            # Restore backup without schema filter (tables go to whatever schema they're in)\n            result = subprocess.run(\n                [\n                    \"pg_restore\",\n                    \"-h\", \"localhost\",\n                    \"-p\", \"5432\",\n                    \"-U\", \"postgres\",\n                    \"-d\", \"insforge\",\n                    \"-v\",\n                    str(backup_file),\n                ],\n                env=env,\n                capture_output=True,\n                text=True,\n                timeout=120,  # 2 minute timeout\n            )\n\n            if result.returncode != 0 and \"ERROR\" in result.stderr:\n                logger.warning(f\"| pg_restore had errors for {category_name}: {result.stderr}\")\n                return False\n\n            logger.info(f\"| ✓ {category_name} restored successfully\")\n            return True\n\n        except subprocess.TimeoutExpired:\n            logger.error(f\"| ✗ Restore timed out for {category_name}\")\n            return False\n        except Exception as e:\n            logger.error(f\"| ✗ Failed to restore {category_name}: {e}\")\n            return False\n\n    def get_service_config_for_agent(self) -> dict:\n        \"\"\"Get configuration for agent execution.\n\n        This configuration is passed to the agent/MCP server so it can\n        connect to the Insforge backend.\n\n        Returns:\n            Dictionary containing backend URL and API key\n        \"\"\"\n        config = {\n            \"backend_url\": self.backend_url,\n            \"api_key\": self.api_key,\n        }\n\n        # Include current task context if available\n        if self._current_task_context:\n            config[\"task_context\"] = self._current_task_context\n\n        return config\n\n    def set_verification_environment(self, messages_path: str = None) -> None:\n        \"\"\"Set environment variables needed for verification scripts.\n\n        Args:\n            messages_path: Optional path to messages.json file for verification\n        \"\"\"\n        os.environ[\"INSFORGE_BACKEND_URL\"] = self.backend_url\n        os.environ[\"INSFORGE_API_KEY\"] = self.api_key\n\n        # Set PostgreSQL connection details for direct database verification\n        # (Insforge exposes its internal postgres database for verification)\n        os.environ[\"POSTGRES_HOST\"] = \"localhost\"\n        os.environ[\"POSTGRES_PORT\"] = \"5432\"\n        os.environ[\"POSTGRES_DATABASE\"] = \"insforge\"\n        os.environ[\"POSTGRES_USERNAME\"] = \"postgres\"\n        os.environ[\"POSTGRES_PASSWORD\"] = \"postgres\"\n\n        if messages_path:\n            os.environ[\"MCP_MESSAGES\"] = str(messages_path)\n\n        logger.debug(\"Verification environment variables set for Insforge (including direct postgres access)\")\n"
  },
  {
    "path": "src/mcp_services/insforge/insforge_task_manager.py",
    "content": "\"\"\"\nInsforge Task Manager for MCPMark\n===================================\n\nManages Insforge task discovery, execution, and verification.\n\"\"\"\n\nimport os\nimport subprocess\nimport sys\nfrom dataclasses import dataclass\nfrom pathlib import Path\nfrom typing import Any, Dict, List, Optional\n\nfrom src.base.task_manager import BaseTask, BaseTaskManager\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\n@dataclass\nclass InsforgeTask(BaseTask):\n    \"\"\"Insforge-specific task with backend information.\"\"\"\n\n    task_name: str = \"\"\n    backend_url: Optional[str] = None\n    api_key: Optional[str] = None\n\n\nclass InsforgeTaskManager(BaseTaskManager):\n    \"\"\"Manages Insforge tasks for MCPMark evaluation.\"\"\"\n\n    def __init__(self, tasks_root: Path = None):\n        \"\"\"Initialize Insforge task manager.\n\n        Args:\n            tasks_root: Path to tasks directory\n        \"\"\"\n        if tasks_root is None:\n            tasks_root = Path(__file__).resolve().parents[3] / \"tasks\"\n\n        super().__init__(\n            tasks_root,\n            mcp_service=\"insforge\",\n            task_class=InsforgeTask,\n            task_organization=\"file\",  # Insforge uses file-based tasks\n        )\n\n    def _create_task_from_files(\n        self, category_id: str, task_files_info: Dict[str, Any]\n    ) -> Optional[InsforgeTask]:\n        \"\"\"Instantiate an `InsforgeTask` from the dictionary returned by `_find_task_files`.\"\"\"\n        import json\n\n        # Check for meta.json\n        meta_path = task_files_info[\"instruction_path\"].parent / \"meta.json\"\n        final_category_id = category_id\n        task_id = task_files_info[\"task_id\"]\n\n        if meta_path.exists():\n            try:\n                with open(meta_path, 'r') as f:\n                    meta_data = json.load(f)\n                    # Use values from meta.json if available\n                    final_category_id = meta_data.get(\"category_id\", category_id)\n                    task_id = meta_data.get(\"task_id\", task_id)\n            except Exception as e:\n                logger.warning(f\"Failed to load meta.json from {meta_path}: {e}\")\n\n        return InsforgeTask(\n            task_instruction_path=task_files_info[\"instruction_path\"],\n            task_verification_path=task_files_info[\"verification_path\"],\n            service=\"insforge\",\n            category_id=final_category_id,\n            task_id=task_id,\n            task_name=task_files_info[\"task_id\"],\n        )\n\n    def _get_verification_command(self, task: InsforgeTask) -> List[str]:\n        \"\"\"Get verification command with Insforge backend info.\"\"\"\n        cmd = [sys.executable, str(task.task_verification_path)]\n        return cmd\n\n    def run_verification(self, task: BaseTask) -> subprocess.CompletedProcess:\n        \"\"\"Run verification with Insforge environment.\"\"\"\n        env = os.environ.copy()\n\n        # Pass Insforge connection info to verification script\n        if hasattr(task, \"backend_url\") and task.backend_url:\n            env[\"INSFORGE_BACKEND_URL\"] = task.backend_url\n\n        if hasattr(task, \"api_key\") and task.api_key:\n            env[\"INSFORGE_API_KEY\"] = task.api_key\n\n        return subprocess.run(\n            self._get_verification_command(task),\n            capture_output=True,\n            text=True,\n            timeout=300,\n            env=env,\n        )\n\n    def _format_task_instruction(self, base_instruction: str) -> str:\n        \"\"\"Add Insforge-specific instructions.\"\"\"\n        return (\n            base_instruction\n            + \"\\n\\nNote: Use Insforge MCP tools to complete this task. The backend connection is already configured.\"\n        )\n"
  },
  {
    "path": "src/mcp_services/notion/__init__.py",
    "content": "\"\"\"\nNotion-specific modules for MCPMark.\n\"\"\"\n\nfrom .notion_task_manager import NotionTaskManager, NotionTask\nfrom .notion_state_manager import NotionStateManager\n\n__all__ = [\"NotionTaskManager\", \"NotionTask\", \"NotionStateManager\"]\n"
  },
  {
    "path": "src/mcp_services/notion/notion_login_helper.py",
    "content": "\"\"\"\nNotion Login Helper for MCPMark\n=================================\n\nThis module provides a utility class and CLI script for logging into Notion\nusing Playwright. It saves the authenticated session state to a file,\nwhich can be used for subsequent automated tasks.\n\"\"\"\n\nimport argparse\nfrom pathlib import Path\nfrom typing import Optional\n\nfrom playwright.sync_api import (\n    BrowserContext,\n    Page,\n    TimeoutError as PlaywrightTimeoutError,\n    sync_playwright,\n)\n\nfrom src.base.login_helper import BaseLoginHelper\nfrom src.logger import get_logger\n\n# Initialize logger\nlogger = get_logger(__name__)\n\n\nclass NotionLoginHelper(BaseLoginHelper):\n    \"\"\"\n    Utility helper for logging into Notion using Playwright.\n    \"\"\"\n\n    SUPPORTED_BROWSERS = {\"chromium\", \"firefox\"}\n\n    def __init__(\n        self,\n        *,\n        url: Optional[str] = None,\n        headless: bool = True,\n        state_path: Optional[str | Path] = None,\n        browser: str = \"firefox\",\n    ) -> None:\n        \"\"\"\n        Initializes the Notion login helper.\n\n        Args:\n            url: The Notion URL to open after launching the browser.\n            headless: Whether to run Playwright in headless mode.\n            state_path: The path to save the authenticated session state.\n            browser: The browser engine to use ('chromium' or 'firefox').\n        \"\"\"\n        super().__init__()\n        if browser not in self.SUPPORTED_BROWSERS:\n            raise ValueError(\n                f\"Unsupported browser '{browser}'. Supported browsers are: {', '.join(self.SUPPORTED_BROWSERS)}\"\n            )\n\n        self.url = url or \"https://www.notion.so/login\"\n        self.headless = headless\n        self.browser_name = browser\n        self.state_path = (\n            Path(state_path or Path.cwd() / \"notion_state.json\").expanduser().resolve()\n        )\n        self._browser_context: Optional[BrowserContext] = None\n        self._playwright = None\n        self._browser = None\n\n    def login(self) -> BrowserContext:\n        \"\"\"\n        Launches a browser, performs login, and saves the session state.\n        \"\"\"\n        if self.state_path.exists():\n            try:\n                self.state_path.unlink()\n            except OSError as e:\n                logger.warning(\"Unable to remove existing state file: %s\", e)\n\n        if self._playwright is None:\n            self._playwright = sync_playwright().start()\n\n        browser_type = getattr(self._playwright, self.browser_name)\n        self._browser = browser_type.launch(headless=self.headless)\n        context = self._browser.new_context()\n        page = context.new_page()\n\n        logger.info(\"Navigating to Notion URL: %s\", self.url)\n        page.goto(self.url, wait_until=\"load\")\n\n        if self.headless:\n            self._handle_headless_login(context)\n        else:\n            logger.info(\n                \"A browser window has been opened. Please complete the Notion login.\"\n            )\n            logger.info(\n                \"After you see your workspace, return to this terminal and press <ENTER>.\"\n            )\n            initial_url = page.url\n            input()\n            try:\n                page.wait_for_url(lambda u: u != initial_url, timeout=10_000)\n            except PlaywrightTimeoutError:\n                pass  # It's okay if the URL doesn't change\n\n        try:\n            page.wait_for_load_state(\"domcontentloaded\", timeout=5_000)\n        except PlaywrightTimeoutError:\n            pass\n\n        context.storage_state(path=str(self.state_path))\n        logger.info(\"✅ Login successful! Session state saved to %s\", self.state_path)\n\n        self._browser_context = context\n        return context\n\n    def close(self) -> None:\n        \"\"\"Closes the underlying browser and Playwright instance.\"\"\"\n        if self._browser_context:\n            try:\n                self._browser_context.close()\n            finally:\n                self._browser_context = None\n        if self._browser:\n            try:\n                self._browser.close()\n            finally:\n                self._browser = None\n        if self._playwright:\n            self._playwright.stop()\n            self._playwright = None\n\n    def _handle_headless_login(self, context: BrowserContext) -> None:\n        \"\"\"\n        Guides the user through the login process in headless mode.\n        \"\"\"\n        page: Page = context.pages[0]\n        login_url = \"https://www.notion.so/login\"\n        page.goto(login_url, wait_until=\"domcontentloaded\")\n\n        email = input(\"Enter your Notion email address: \").strip()\n        try:\n            email_input = page.locator(\n                'input[placeholder=\"Enter your email address...\"]'\n            )\n            email_input.wait_for(state=\"visible\", timeout=120_000)\n            email_input.fill(email)\n            email_input.press(\"Enter\")\n        except PlaywrightTimeoutError:\n            raise RuntimeError(\"Timed out waiting for the email input field.\")\n        except Exception:\n            page.get_by_role(\"button\", name=\"Continue\", exact=True).click()\n\n        try:\n            code_input = page.locator('input[placeholder=\"Enter code\"]')\n            code_input.wait_for(state=\"visible\", timeout=120_000)\n            code = input(\"Enter the verification code from your email: \").strip()\n            code_input.fill(code)\n            code_input.press(\"Enter\")\n        except PlaywrightTimeoutError:\n            raise RuntimeError(\"Timed out waiting for the verification code input.\")\n        except Exception:\n            page.get_by_role(\"button\", name=\"Continue\", exact=True).click()\n\n        try:\n            page.wait_for_url(lambda url: url != login_url, timeout=180_000)\n        except PlaywrightTimeoutError:\n            logger.warning(\"Login redirect timed out, but proceeding to save state.\")\n\n        if self.url and self.url != login_url:\n            page.goto(self.url, wait_until=\"domcontentloaded\")\n\n    def __enter__(self) -> \"NotionLoginHelper\":\n        self.login()\n        return self\n\n    def __exit__(self, exc_type, exc_val, exc_tb):\n        self.close()\n\n\ndef main():\n    \"\"\"Main entry point for the Notion login CLI script.\"\"\"\n    parser = argparse.ArgumentParser(\n        description=\"Authenticate to Notion and generate a session state file.\",\n        formatter_class=argparse.ArgumentDefaultsHelpFormatter,\n    )\n    parser.add_argument(\n        \"--headless\",\n        action=\"store_true\",\n        help=\"Run the login flow in headless mode (prompts for credentials).\",\n    )\n    parser.add_argument(\n        \"--browser\",\n        default=\"firefox\",\n        choices=[\"chromium\", \"firefox\"],\n        help=\"The browser engine to use for Playwright.\",\n    )\n    args = parser.parse_args()\n\n    helper = NotionLoginHelper(headless=args.headless, browser=args.browser)\n    with helper:\n        logger.info(\"Login process completed.\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "src/mcp_services/notion/notion_state_manager.py",
    "content": "\"\"\"\nNotion State Manager for MCPMark\n=================================\n\nThis module handles the duplication and management of Notion initial states\nPages for consistent task evaluation using Playwright automation.\n\"\"\"\n\nimport time\nfrom pathlib import Path\nfrom typing import Optional, Tuple, Dict, Any, Set\n\nfrom notion_client import Client\nfrom playwright.sync_api import (\n    Browser,\n    BrowserContext,\n    Page,\n    Playwright,\n    TimeoutError as PlaywrightTimeoutError,\n    sync_playwright,\n)\n\nfrom src.base.state_manager import BaseStateManager, InitialStateInfo\nfrom src.base.task_manager import BaseTask\nfrom src.logger import get_logger\nfrom src.mcp_services.notion.notion_task_manager import NotionTask\nimport re\n\n# Initialize logger\nlogger = get_logger(__name__)\n\n# Pattern to match orphan pages with \"(n)\" suffix, e.g., \"Title (1)\", \"Title (2)\"\nORPHAN_PAGE_PATTERN = re.compile(r\".+\\s+\\(\\d+\\)$\")\n\n# Selectors for Notion UI elements\nPAGE_MENU_BUTTON_SELECTOR = '[data-testid=\"more-button\"], div.notion-topbar-more-button, [aria-label=\"More\"], button[aria-label=\"More\"]'\nDUPLICATE_MENU_ITEM_SELECTOR = 'text=\"Duplicate\"'\nDUPLICATE_WITH_CONTENT_SELECTOR = 'text=\"Duplicate with content\"'\nMOVE_TO_MENU_ITEM_SELECTOR = 'text=\"Move to\"'\nMOVE_TO_SEARCH_INPUT_SELECTOR = (\n    'input[placeholder*=\"Move page to\"], textarea[placeholder*=\"Move page to\"]'\n)\n\n\nclass NotionStateManager(BaseStateManager):\n    \"\"\"\n    Manages the state of Notion initial states using Playwright and the Notion API.\n    \"\"\"\n\n    def __init__(\n        self,\n        source_notion_key: str,\n        eval_notion_key: str,\n        headless: bool = True,\n        browser: str = \"firefox\",\n        eval_parent_page_title: str = \"MCPMark Eval Hub\",\n        source_parent_page_title: str = \"MCPMark Source Hub\",\n    ):\n        \"\"\"\n        Initializes the Notion state manager.\n\n        Args:\n            source_notion_key: The Notion API key for source workspace.\n            eval_notion_key: The Notion API key for evaluation workspace.\n            headless: Whether to run Playwright in headless mode.\n            browser: The browser engine to use ('chromium' or 'firefox').\n            eval_parent_page_title: Parent page title for evaluation workspace.\n        \"\"\"\n        super().__init__(service_name=\"notion\")\n        supported_browsers = {\"chromium\", \"firefox\"}\n        if browser not in supported_browsers:\n            raise ValueError(\n                f\"Unsupported browser '{browser}'. Supported browsers are: {', '.join(supported_browsers)}\"\n            )\n\n        self.browser_name = browser\n\n        # Initialize separate Notion clients with provided keys\n        if not source_notion_key or not eval_notion_key:\n            raise ValueError(\n                \"Both source_notion_key and eval_notion_key must be provided to NotionStateManager.\"\n            )\n\n        self.source_notion_client = Client(auth=source_notion_key)\n        self.eval_notion_client = Client(auth=eval_notion_key)\n\n        self.headless = headless\n        self.state_file = Path(\"notion_state.json\")\n        # Parent page under which duplicated pages should be moved for evaluation\n        self.eval_parent_page_title = eval_parent_page_title\n        # Source hub page that contains all initial-state templates\n        self.source_parent_page_title = source_parent_page_title\n\n        # Cache resolved parent page IDs to avoid repeated workspace-wide searches\n        self._eval_parent_page_id: Optional[str] = None\n        self._source_hub_page_id: Optional[str] = None\n\n        # Browser instance management for reuse within session\n        self._playwright: Optional[Playwright] = None\n        self._browser: Optional[Browser] = None\n        self._context: Optional[BrowserContext] = None\n\n        # Validate initialization\n        if not self.source_notion_client or not self.eval_notion_client:\n            raise ValueError(\n                \"Both source_notion_key and eval_notion_key must be provided and valid\"\n            )\n\n        if not self.state_file.exists():\n            raise FileNotFoundError(\n                \"Authentication state 'notion_state.json' not found. Run the Notion login helper first.\"\n            )\n\n        logger.info(\"Notion state manager initialized successfully\")\n\n    # =========================================================================\n    # Core Template Methods (Required by BaseStateManager)\n    # =========================================================================\n\n    def _cleanup_eval_hub_orphans(self) -> None:\n        \"\"\"Clean up all pages in MCPMark Eval Hub before creating new task state.\"\"\"\n        try:\n            parent_page_id = self._ensure_eval_parent_page_id()\n\n            if not parent_page_id:\n                logger.debug(\n                    \"| ✗ Parent page '%s' not found in eval workspace, skipping cleanup\",\n                    self.eval_parent_page_title,\n                )\n                return\n\n            # Get all child pages and archive them\n            children = self.eval_notion_client.blocks.children.list(\n                block_id=parent_page_id\n            )\n            orphan_count = 0\n            for child in children.get(\"results\", []):\n                if child.get(\"type\") == \"child_page\":\n                    try:\n                        self.eval_notion_client.pages.update(\n                            page_id=child[\"id\"], archived=True\n                        )\n                        orphan_count += 1\n                        logger.debug(\"| ✓ Archived orphan page: %s\", child[\"id\"])\n                    except Exception as e:\n                        logger.warning(\n                            \"| ✗ Failed to archive orphan page %s: %s\", child[\"id\"], e\n                        )\n\n            if orphan_count > 0:\n                logger.info(\n                    \"| ✓ Cleaned up %d orphan page(s) from MCPMark Eval Hub\", orphan_count\n                )\n\n        except Exception as e:\n            logger.warning(\"Orphan cleanup failed (non-critical, continuing): %s\", e)\n            # Don't raise exception - allow execution to continue\n\n    def _cleanup_source_hub_orphans(self, exclude_page_ids: Optional[Set[str]] = None) -> int:\n        \"\"\"Clean up all orphan pages in source hub matching 'xxx (n)' pattern.\n\n        Args:\n            exclude_page_ids: Page IDs to exclude from cleanup (e.g., pages currently being operated on)\n\n        Returns:\n            Number of pages archived\n        \"\"\"\n        exclude_page_ids = exclude_page_ids or set()\n        source_hub_id = self._ensure_source_hub_page_id()\n        if not source_hub_id:\n            return 0\n\n        orphan_count = 0\n        next_cursor = None\n\n        try:\n            while True:\n                kwargs: Dict[str, Any] = {\"block_id\": source_hub_id}\n                if next_cursor:\n                    kwargs[\"start_cursor\"] = next_cursor\n\n                children = self.source_notion_client.blocks.children.list(**kwargs)\n\n                for child in children.get(\"results\", []):\n                    if child.get(\"type\") != \"child_page\":\n                        continue\n\n                    child_id = child.get(\"id\")\n                    if child_id in exclude_page_ids:\n                        continue\n\n                    child_title = (child.get(\"child_page\", {}) or {}).get(\"title\", \"\").strip()\n\n                    # Match \"xxx (n)\" pattern where n is any digit(s)\n                    if ORPHAN_PAGE_PATTERN.match(child_title):\n                        try:\n                            self.source_notion_client.pages.update(\n                                page_id=child_id, archived=True\n                            )\n                            orphan_count += 1\n                            logger.info(\"| ✓ Archived source hub orphan: %s (%s)\", child_title, child_id)\n                        except Exception as e:\n                            logger.warning(\"| ✗ Failed to archive orphan %s: %s\", child_id, e)\n\n                if not children.get(\"has_more\"):\n                    break\n                next_cursor = children.get(\"next_cursor\")\n\n            if orphan_count > 0:\n                logger.info(\"| ✓ Cleaned up %d orphan page(s) from source hub\", orphan_count)\n\n        except Exception as e:\n            logger.warning(\"Source hub orphan cleanup failed (non-critical, continuing): %s\", e)\n\n        return orphan_count\n\n    def _ensure_eval_parent_page_id(self) -> Optional[str]:\n        \"\"\"Resolve and cache the evaluation hub parent page ID.\"\"\"\n        if self._eval_parent_page_id:\n            return self._eval_parent_page_id\n\n        try:\n            response = self.eval_notion_client.search(\n                query=self.eval_parent_page_title,\n                filter={\"property\": \"object\", \"value\": \"page\"},\n            )\n\n            for result in response.get(\"results\", []):\n                props = result.get(\"properties\", {})\n                title_prop = props.get(\"title\", {}).get(\"title\") or props.get(\n                    \"Name\", {}\n                ).get(\"title\")\n                if not title_prop:\n                    continue\n\n                title = \"\".join(t.get(\"plain_text\", \"\") for t in title_prop).strip()\n                if title == self.eval_parent_page_title:\n                    self._eval_parent_page_id = result.get(\"id\")\n                    break\n\n            if not self._eval_parent_page_id:\n                logger.debug(\n                    \"| ✗ Eval parent page '%s' not found via search\",\n                    self.eval_parent_page_title,\n                )\n        except Exception as e:\n            logger.error(\n                \"| ✗ Failed to resolve eval parent page '%s': %s\",\n                self.eval_parent_page_title,\n                e,\n            )\n\n        return self._eval_parent_page_id\n\n    def _ensure_source_hub_page_id(self) -> Optional[str]:\n        \"\"\"Resolve and cache the source hub parent page ID used for initial states.\"\"\"\n        if self._source_hub_page_id:\n            return self._source_hub_page_id\n\n        try:\n            hub_search = self.source_notion_client.search(\n                query=self.source_parent_page_title,\n                filter={\"property\": \"object\", \"value\": \"page\"},\n            )\n\n            for result in hub_search.get(\"results\", []):\n                props = result.get(\"properties\", {})\n                title_prop = props.get(\"title\", {}).get(\"title\") or props.get(\n                    \"Name\", {}\n                ).get(\"title\")\n                current_title = \"\".join(\n                    t.get(\"plain_text\", \"\") for t in (title_prop or [])\n                ).strip()\n                if current_title == self.source_parent_page_title:\n                    self._source_hub_page_id = result.get(\"id\")\n                    break\n\n            if not self._source_hub_page_id:\n                logger.error(\n                    \"| ✗ Source hub page '%s' not found.\",\n                    self.source_parent_page_title,\n                )\n        except Exception as e:\n            logger.error(\n                \"| ✗ Failed to resolve source hub page '%s': %s\",\n                self.source_parent_page_title,\n                e,\n            )\n\n        return self._source_hub_page_id\n\n    def _wait_for_database_ready(\n        self,\n        page_id: str,\n        max_retries: int = 10,\n        retry_delay: int = 2\n    ) -> bool:\n        \"\"\"\n        Wait for the database backend to be ready by checking page accessibility.\n\n        Args:\n            page_id: The ID of the page to check\n            max_retries: Maximum number of retry attempts\n            retry_delay: Delay between retries in seconds\n\n        Returns:\n            True if the database is ready, False if timeout\n        \"\"\"\n        logger.info(\"| ○ Starting heartbeat detection for page %s\", page_id)\n\n        for attempt in range(max_retries):\n            try:\n                # Try to retrieve the page from the evaluation workspace\n                result = self.eval_notion_client.pages.retrieve(page_id=page_id)\n\n                # Check if we got a valid response\n                if result and isinstance(result, dict):\n                    # Additional check: try to get page properties\n                    if \"properties\" in result:\n                        logger.info(\n                            \"| ✓ Database backend is ready (attempt %d/%d)\",\n                            attempt + 1,\n                            max_retries\n                        )\n                        return True\n\n            except Exception as e:\n                logger.debug(\n                    \"| ✗ Database not ready yet (attempt %d/%d): %s\",\n                    attempt + 1,\n                    max_retries,\n                    str(e)\n                )\n\n            # Wait before next retry\n            if attempt < max_retries - 1:\n                time.sleep(retry_delay)\n\n        logger.error(\n            \"| ✗ Database backend failed to become ready after %d attempts\",\n            max_retries\n        )\n        return False\n\n    def _create_initial_state(self, task: BaseTask) -> Optional[InitialStateInfo]:\n        \"\"\"Create initial state by duplicating Notion page.\"\"\"\n        if not isinstance(task, NotionTask):\n            logger.error(\"Task must be NotionTask for Notion state manager\")\n            return None\n\n        # Clean up any orphan pages in eval hub before creating new state\n        self._cleanup_eval_hub_orphans()\n\n        # Clean up orphan pages in source hub before duplication\n        self._cleanup_source_hub_orphans()\n\n        try:\n            initial_state_title = self._category_to_initial_state_title(task.category_id)\n            initial_state_info = self._find_initial_state_by_title(initial_state_title)\n\n            if not initial_state_info:\n                logger.error(\n                    \"| ✗ Initial state not found for category '%s' (title: '%s')\",\n                    task.category_id,\n                    initial_state_title,\n                )\n                return None\n\n            _, initial_state_url = initial_state_info\n\n            duplicated_url, duplicated_id = self._duplicate_initial_state_for_task(\n                initial_state_url, task.category_id, task.name\n            )\n\n            # Wait for database backend to be ready\n            logger.info(\"| ○ Checking database backend accessibility for duplicated page...\")\n            if not self._wait_for_database_ready(duplicated_id):\n                logger.error(\n                    \"| ✗ Database backend is not accessible after duplication for task %s\",\n                    task.name\n                )\n                # Clean up the duplicated page if database is not ready\n                try:\n                    self.eval_notion_client.pages.update(\n                        page_id=duplicated_id, archived=True\n                    )\n                    logger.info(\"| ✓ Cleaned up inaccessible duplicated page: %s\", duplicated_id)\n                except Exception as cleanup_error:\n                    logger.error(\"| ✗ Failed to clean up duplicated page: %s\", cleanup_error)\n\n                raise RuntimeError(\n                    f\"| ✗ Database backend failed to become ready for duplicated page {duplicated_id}\"\n                )\n\n            time.sleep(5) # allow the page to fully load\n\n            return InitialStateInfo(\n                state_id=duplicated_id,\n                state_url=duplicated_url,\n                metadata={\n                    \"original_url\": initial_state_url,\n                    \"category\": task.category_id,\n                    \"task_name\": task.name,\n                },\n            )\n\n        except Exception as e:\n            logger.error(f\"| ✗ Failed to create initial state for {task.name}: {e}\")\n            return None\n\n    def _store_initial_state_info(\n        self, task: BaseTask, state_info: InitialStateInfo\n    ) -> None:\n        \"\"\"Store initial state information in NotionTask object.\"\"\"\n        if isinstance(task, NotionTask):\n            task.duplicated_initial_state_id = state_info.state_id\n            task.duplicated_initial_state_url = state_info.state_url\n            task.original_initial_state_url = state_info.metadata.get(\"original_url\")\n\n            # Track the duplicated page for cleanup\n            self.track_resource(\"page\", state_info.state_id, state_info.metadata)\n\n    def _cleanup_task_initial_state(self, task: BaseTask) -> bool:\n        \"\"\"Clean up initial state for a specific Notion task.\"\"\"\n        if not isinstance(task, NotionTask):\n            return True  # Nothing to clean up for non-Notion tasks\n\n        initial_state_id = task.duplicated_initial_state_id\n        if not initial_state_id:\n            logger.warning(\n                \"| ✗ No duplicated initial state ID found for task %s, skipping cleanup.\",\n                task.name,\n            )\n            return False\n\n        try:\n            # Archive the duplicated page\n            self.eval_notion_client.pages.update(\n                page_id=initial_state_id, archived=True\n            )\n            logger.info(\"| ✓ Archived page initial state: %s\", initial_state_id)\n\n            # Remove from tracked resources to avoid duplicate cleanup\n            self.tracked_resources = [\n                r\n                for r in self.tracked_resources\n                if not (r[\"type\"] == \"page\" and r[\"id\"] == initial_state_id)\n            ]\n\n            return True\n        except Exception as e:\n            logger.error(\"| ✗ Failed to archive initial state %s: %s\", initial_state_id, e)\n            return False\n\n    def _cleanup_single_resource(self, resource: Dict[str, Any]) -> bool:\n        \"\"\"Clean up a single Notion resource.\"\"\"\n        if resource[\"type\"] == \"page\":\n            try:\n                self.eval_notion_client.pages.update(\n                    page_id=resource[\"id\"], archived=True\n                )\n                logger.info(f\"| ✓ Archived Notion page: {resource['id']}\")\n                return True\n            except Exception as e:\n                logger.error(f\"| ✗ Failed to archive Notion page {resource['id']}: {e}\")\n                return False\n\n        logger.warning(f\"| ? Unknown resource type for cleanup: {resource['type']}\")\n        return False\n\n    # =========================================================================\n    # Notion API Operations\n    # =========================================================================\n\n    def _rename_initial_state_via_api(\n        self, initial_state_id: str, new_title: str\n    ) -> None:\n        \"\"\"Renames a Notion page using the API.\"\"\"\n        try:\n            self.eval_notion_client.pages.update(\n                page_id=initial_state_id,\n                properties={\"title\": {\"title\": [{\"text\": {\"content\": new_title}}]}},\n            )\n        except Exception as e:\n            logger.error(\"| ✗ Failed to rename page via API: %s\", e)\n\n    # ------------------------------------------------------------------\n    # Playwright helpers\n    # ------------------------------------------------------------------\n\n    def _ensure_browser(self) -> Tuple[Browser, BrowserContext]:\n        \"\"\"Ensure browser instance is available, reusing existing or creating new.\n\n        Returns:\n            Tuple of (Browser, BrowserContext)\n        \"\"\"\n        if self._playwright is None:\n            self._playwright = sync_playwright().start()\n\n        if self._browser is None:\n            browser_type = getattr(self._playwright, self.browser_name)\n            self._browser = browser_type.launch(headless=self.headless)\n\n        if self._context is None:\n            self._context = self._browser.new_context(\n                storage_state=str(self.state_file),\n                locale=\"en-US\",\n            )\n\n        return self._browser, self._context\n\n    def close(self) -> None:\n        \"\"\"Clean up browser resources. Should be called when session ends.\"\"\"\n        if self._context:\n            try:\n                # Save storage state before closing\n                self._context.storage_state(path=str(self.state_file))\n                self._context.close()\n            except Exception:\n                pass\n            self._context = None\n\n        if self._browser:\n            try:\n                self._browser.close()\n            except Exception:\n                pass\n            self._browser = None\n\n        if self._playwright:\n            try:\n                self._playwright.stop()\n            except Exception:\n                pass\n            self._playwright = None\n\n    def _recover_duplicate_via_ui(\n        self,\n        page: Page,\n        original_title: str,\n        *,\n        timeout: int = 30_000,\n    ) -> Optional[str]:\n        \"\"\"Recover duplicate page URL by navigating via UI when API-based recovery fails.\n\n        This method navigates to the source hub and locates the duplicate page\n        (e.g., \"Title (1)\") in the Notion sidebar, then clicks on it to obtain\n        the URL directly from the browser.\n\n        Args:\n            page: The Playwright page instance\n            original_title: The original page title (without suffix)\n            timeout: Timeout for UI operations in milliseconds\n\n        Returns:\n            The URL of the duplicate page if found, None otherwise\n        \"\"\"\n        try:\n            source_hub_id = self._ensure_source_hub_page_id()\n            if not source_hub_id:\n                logger.warning(\"| ✗ Cannot resolve source hub for UI-based recovery\")\n                return None\n\n            # Build URL to navigate to source hub\n            # Format: https://www.notion.so/<hub-id>\n            clean_hub_id = source_hub_id.replace(\"-\", \"\")\n            source_hub_url = f\"https://www.notion.so/{clean_hub_id}\"\n\n            logger.info(\"| ○ Navigating to source hub for UI-based recovery...\")\n            page.goto(source_hub_url, wait_until=\"domcontentloaded\", timeout=60_000)\n            time.sleep(3)  # Allow page to settle\n\n            # Look for page title with \"(n)\" suffix pattern in sidebar or page content\n            # The duplicate will be named \"Original Title (1)\" or similar\n            duplicate_pattern = re.compile(rf\"^{re.escape(original_title)}\\s*\\(\\d+\\)$\")\n\n            # Try to find the duplicate page in the page list/sidebar\n            # Notion uses different selectors for page links, try common patterns\n            page_link_selectors = [\n                f'a:has-text(\"{original_title} (1)\")',\n                f'div[data-block-id]:has-text(\"{original_title} (1)\")',\n                f'[role=\"treeitem\"]:has-text(\"{original_title} (1)\")',\n            ]\n\n            for selector in page_link_selectors:\n                try:\n                    locator = page.locator(selector).first\n                    if locator.is_visible(timeout=5000):\n                        logger.info(\"| ○ Found duplicate page in UI, clicking...\")\n                        locator.click()\n                        page.wait_for_load_state(\"domcontentloaded\", timeout=timeout)\n                        time.sleep(3)\n                        recovered_url = page.url\n                        logger.info(\"| ✓ Recovered duplicate URL via UI: %s\", recovered_url)\n                        return recovered_url\n                except Exception:\n                    continue\n\n            # If specific selectors didn't work, try a broader search\n            try:\n                # Look for any visible text matching the pattern and click it\n                all_text_elements = page.locator(f'text=\"{original_title} (\"')\n                count = all_text_elements.count()\n                if count > 0:\n                    for i in range(count):\n                        element = all_text_elements.nth(i)\n                        text_content = element.text_content() or \"\"\n                        if duplicate_pattern.match(text_content.strip()):\n                            logger.info(\"| ○ Found duplicate via text search, clicking...\")\n                            element.click()\n                            page.wait_for_load_state(\"domcontentloaded\", timeout=timeout)\n                            time.sleep(3)\n                            recovered_url = page.url\n                            logger.info(\"| ✓ Recovered duplicate URL via UI text search: %s\", recovered_url)\n                            return recovered_url\n            except Exception as e:\n                logger.debug(\"| ✗ Broad text search failed: %s\", e)\n\n            logger.warning(\"| ✗ Could not locate duplicate '%s (n)' in UI\", original_title)\n            return None\n\n        except Exception as e:\n            logger.warning(\"| ✗ UI-based recovery failed: %s\", e)\n            return None\n\n    # =========================================================================\n    # Playwright Automation Methods\n    # =========================================================================\n\n    def _move_current_page_to_env(\n        self, page: Page, *, wait_timeout: int = 60_000\n    ) -> None:\n        \"\"\"Moves the currently open page into the designated evaluation parent page.\n\n        This operation is done via Playwright UI automation because the Notion API\n        does not yet expose a direct \"move\" endpoint for pages. It relies on the\n        following sequence:\n\n        1. Open the page action menu (same selector as duplication).\n        2. Choose the \"Move to\" menu item.\n        3. In the search field that appears (placeholder starts with\n           \"Move page to\"), type the target parent page title.\n        4. Click the matching search result to complete the move.\n        \"\"\"\n\n        logger.info(\n            \"| ○ Moving duplicated page to evaluation parent '%s'...\",\n            self.eval_parent_page_title,\n        )\n\n        try:\n            # Step 1: Open the page menu\n            page.wait_for_selector(\n                PAGE_MENU_BUTTON_SELECTOR, state=\"visible\", timeout=30_000\n            )\n            page.click(PAGE_MENU_BUTTON_SELECTOR)\n\n            # Step 2: Select \"Move to\"\n            page.hover(MOVE_TO_MENU_ITEM_SELECTOR)\n            page.click(MOVE_TO_MENU_ITEM_SELECTOR)\n\n            # Step 3: Fill the destination title\n            page.wait_for_selector(\n                MOVE_TO_SEARCH_INPUT_SELECTOR, state=\"visible\", timeout=15_000\n            )\n\n            # Ensure focus then type the destination title – using type() triggers\n            # key events Notion relies on for search filtering.\n            search_input = page.locator(MOVE_TO_SEARCH_INPUT_SELECTOR).first\n            search_input.click()\n            search_input.fill(\"\")  # Clear any residual text (safety)\n            search_input.type(self.eval_parent_page_title, delay=50)\n\n            # Step 4: Wait for the search result matching the page title, then click it\n            # Selector for the menu item row – ensure we click the outer container, not a nested <div>\n            result_selector = (\n                f'div[role=\"menuitem\"]:has-text(\"{self.eval_parent_page_title}\")'\n            )\n            page.wait_for_selector(\n                result_selector, state=\"visible\", timeout=wait_timeout\n            )\n            page.locator(result_selector).first.click(force=True)\n\n            # Wait for the dialog to disappear – indicates move finished\n            page.wait_for_selector(\n                MOVE_TO_SEARCH_INPUT_SELECTOR, state=\"detached\", timeout=wait_timeout\n            )\n\n            # Give Notion a brief moment to process the move\n            time.sleep(3)\n        except PlaywrightTimeoutError as e:\n            logger.error(\n                \"| ✗ Playwright timed out while moving page to evaluation parent – move may have failed.\"\n            )\n            raise RuntimeError(\"Playwright timeout during move-to operation\") from e\n        except Exception as exc:\n            logger.error(\"| ✗ Unexpected error during move-to operation: %s\", exc)\n            # Propagate the error to allow retry logic at higher level if necessary\n            raise\n\n    def _category_to_initial_state_title(self, category: str) -> str:\n        \"\"\"Converts a category name to a capitalized initial state title.\"\"\"\n        return \" \".join(word.capitalize() for word in category.split(\"_\"))\n\n    def _extract_initial_state_id_from_url(self, url: str) -> str:\n        \"\"\"Extracts the initial state ID from a Notion URL.\"\"\"\n        slug = url.split(\"?\")[0].split(\"#\")[0].rstrip(\"/\").split(\"/\")[-1]\n        compact = \"\".join(c for c in slug if c.isalnum())\n        if len(compact) < 32:\n            raise ValueError(f\"Could not parse initial state ID from URL: {url}\")\n        compact = compact[-32:]\n        return f\"{compact[:8]}-{compact[8:12]}-{compact[12:16]}-{compact[16:20]}-{compact[20:]}\"\n\n    # =========================================================================\n    # URL and State Utilities\n    # =========================================================================\n\n    def _get_slug_base(self, url: str) -> str:\n        \"\"\"Returns the slug part without its trailing 32-char ID (hyphen separated).\"\"\"\n        slug = url.split(\"?\", 1)[0].split(\"#\", 1)[0].rstrip(\"/\").split(\"/\")[-1]\n        match = re.match(r\"^(.*)-([0-9a-fA-F]{32})$\", slug)\n        if match:\n            return match.group(1)\n        return slug\n\n    def _is_valid_duplicate_url(self, original_url: str, duplicated_url: str) -> bool:\n        \"\"\"Checks whether duplicated_url looks like a Notion duplicate (original slug + '-N').\"\"\"\n        orig_base = self._get_slug_base(original_url)\n        dup_base = self._get_slug_base(duplicated_url)\n        if not dup_base.startswith(orig_base + \"-\"):\n            return False\n        suffix = dup_base[len(orig_base) + 1 :]\n        return suffix.isdigit()\n\n    def _find_initial_state_by_title(self, title: str) -> Optional[Tuple[str, str]]:\n        \"\"\"Find a child page under the source hub by exact title.\n\n        Strategy:\n        - Locate the source hub page (\"MCPBench Source Hub\") via search to get its ID.\n        - List its first-level children via `blocks.children.list`.\n        - Find a `child_page` whose title exactly matches `title`.\n        - Return the page ID and URL (retrieved via `pages.retrieve`).\n        \"\"\"\n        try:\n            # 1) Resolve the source hub page once and reuse its ID\n            source_hub_id = self._ensure_source_hub_page_id()\n\n            if not source_hub_id:\n                return None\n\n            # 2) List first-level children of the hub page and find exact title match\n            matched_child_id: Optional[str] = None\n            next_cursor = None\n\n            while True:\n                kwargs = {\"block_id\": source_hub_id}\n                if next_cursor:\n                    kwargs[\"start_cursor\"] = next_cursor\n\n                children = self.source_notion_client.blocks.children.list(**kwargs)\n                for child in children.get(\"results\", []):\n                    if child.get(\"type\") != \"child_page\":\n                        continue  # Only consider child pages\n                    child_title = (child.get(\"child_page\", {}) or {}).get(\"title\", \"\").strip()\n                    if child_title == title:\n                        matched_child_id = child.get(\"id\")\n                        break\n\n                if matched_child_id or not children.get(\"has_more\"):\n                    break\n\n                next_cursor = children.get(\"next_cursor\")\n\n            if not matched_child_id:\n                logger.debug(\"| ✗ No child page titled '%s' under '%s'\", title, self.source_parent_page_title)\n                return None\n\n            # 3) Retrieve the page to get its canonical URL\n            try:\n                page_obj = self.source_notion_client.pages.retrieve(page_id=matched_child_id)\n                page_url = page_obj.get(\"url\")\n            except Exception as e:\n                logger.warning(\"| ✗ Failed to retrieve page URL for '%s' (%s): %s\", title, matched_child_id, e)\n                page_url = None\n\n            if not page_url:\n                # Fall back to returning just the ID if URL couldn't be retrieved\n                logger.debug(\"| ○ Returning page ID without URL for '%s'\", title)\n                return matched_child_id, \"\"\n\n            return matched_child_id, page_url\n        except Exception as e:\n            logger.error(\"| ✗ Error locating initial state '%s' via children listing: %s\", title, e)\n            return None\n\n    # =========================================================================\n    # Duplication and State Management\n    # =========================================================================\n    # NOTE: Initial state type detection logic has been removed because all initial states are pages.\n\n    def _duplicate_current_initial_state(\n        self,\n        page: Page,\n        new_title: Optional[str] = None,\n        *,\n        original_initial_state_id: str,\n        original_initial_state_title: str,\n        wait_timeout: int = 180_000,\n    ) -> str:\n        \"\"\"Duplicates the currently open Notion initial state using Playwright.\"\"\"\n        try:\n            logger.info(\"| ○ Opening page menu...\")\n            page.wait_for_selector(\n                PAGE_MENU_BUTTON_SELECTOR, state=\"visible\", timeout=30_000\n            )\n            page.click(PAGE_MENU_BUTTON_SELECTOR)\n\n            logger.info(\"| ○ Clicking 'Duplicate'...\")\n            page.hover(DUPLICATE_MENU_ITEM_SELECTOR)\n            page.click(DUPLICATE_MENU_ITEM_SELECTOR)\n\n            original_url = page.url\n            logger.info(\n                \"| ○ Waiting for duplicated initial state to load (up to %.1f s)...\",\n                wait_timeout / 1000,\n            )\n            page.wait_for_url(lambda url: url != original_url, timeout=wait_timeout)\n\n            # wait for the page to fully load\n            time.sleep(5)\n            duplicated_url = page.url\n            # Validate that the resulting URL is a genuine duplicate of the original template.\n            if not self._is_valid_duplicate_url(original_url, duplicated_url):\n                # Sometimes duplication succeeds but UI navigates to parent instead of the new page.\n                # In that case, try to find the most recently created page named exactly \"<title> (1)\".\n                logger.warning(\n                    \"| ✗ Duplicate URL pattern mismatch. Attempting recovery by searching for latest '%s (1)' page...\",\n                    original_initial_state_title,\n                )\n\n                target_title = f\"{original_initial_state_title} (1)\"\n                try:\n                    # Wait 5 seconds before the first search to allow Notion to index the new page\n                    time.sleep(5)\n\n                    attempts = 3\n                    source_hub_id = self._ensure_source_hub_page_id()\n                    if not source_hub_id:\n                        logger.error(\n                            \"| ✗ Cannot resolve source hub ID while locating '%s' duplicate.\",\n                            target_title,\n                        )\n                    else:\n                        for retry_idx in range(attempts):\n                            candidates = []\n                            next_cursor = None\n\n                            while True:\n                                kwargs: Dict[str, Any] = {\"block_id\": source_hub_id}\n                                if next_cursor:\n                                    kwargs[\"start_cursor\"] = next_cursor\n\n                                children = self.source_notion_client.blocks.children.list(**kwargs)\n                                for child in children.get(\"results\", []):\n                                    if child.get(\"type\") != \"child_page\":\n                                        continue\n                                    child_id = child.get(\"id\")\n                                    if child_id == original_initial_state_id:\n                                        continue\n\n                                    child_title = (\n                                        (child.get(\"child_page\", {}) or {})\n                                        .get(\"title\", \"\")\n                                        .strip()\n                                    )\n                                    if child_title != target_title:\n                                        continue\n\n                                    created_time = child.get(\"created_time\") or child.get(\n                                        \"last_edited_time\"\n                                    )\n                                    candidates.append((created_time or \"\", child_id))\n\n                                if not children.get(\"has_more\"):\n                                    break\n\n                                next_cursor = children.get(\"next_cursor\")\n\n                            if candidates:\n                                latest_child_id = max(candidates, key=lambda x: x[0])[1]\n                                fallback_url = None\n                                try:\n                                    page_obj = self.source_notion_client.pages.retrieve(\n                                        page_id=latest_child_id\n                                    )\n                                    fallback_url = page_obj.get(\"url\")\n                                except Exception as retrieve_error:\n                                    logger.warning(\n                                        \"| ✗ Failed to resolve URL for duplicate '%s': %s\",\n                                        latest_child_id,\n                                        retrieve_error,\n                                    )\n\n                                if fallback_url:\n                                    logger.info(\n                                        \"| ○ Navigating directly to latest '%s' duplicate via children list...\",\n                                        target_title,\n                                    )\n                                    page.goto(fallback_url, wait_until=\"domcontentloaded\", timeout=120_000)\n                                    time.sleep(5)\n                                    duplicated_url = page.url\n                                    break\n\n                            if retry_idx < attempts - 1:\n                                logger.debug(\n                                    \"| ○ '%s' not visible yet via children listing. Waiting 5s before retry %d/%d...\",\n                                    target_title,\n                                    retry_idx + 1,\n                                    attempts - 1,\n                                )\n                                time.sleep(5)\n\n                    # Re-validate after attempted recovery\n                    if not self._is_valid_duplicate_url(original_url, duplicated_url):\n                        # API-based recovery failed, try UI-based recovery as last resort\n                        logger.warning(\n                            \"| ✗ API-based recovery failed. Trying UI-based recovery...\"\n                        )\n                        ui_recovered_url = self._recover_duplicate_via_ui(\n                            page,\n                            original_initial_state_title,\n                            timeout=wait_timeout,\n                        )\n                        if ui_recovered_url and self._is_valid_duplicate_url(original_url, ui_recovered_url):\n                            duplicated_url = ui_recovered_url\n                            logger.info(\"| ✓ UI-based recovery successful\")\n                        else:\n                            logger.error(\n                                \"| ✗ Could not locate a valid '%s' duplicate after all recovery attempts.\\n|  Original: %s\\n|  Observed: %s\",\n                                target_title,\n                                original_url,\n                                duplicated_url,\n                            )\n                            # Attempt to clean up stray duplicate before propagating error.\n                            self._cleanup_orphan_duplicate(\n                                original_initial_state_id, original_initial_state_title\n                            )\n                            raise RuntimeError(\n                                \"Duplicate URL pattern mismatch – duplication likely failed\"\n                            )\n                except Exception as search_exc:\n                    logger.error(\n                        \"| ✗ Failed during recovery search for '%s': %s\",\n                        target_title,\n                        search_exc,\n                    )\n                    # Attempt to clean up stray duplicate before propagating error.\n                    self._cleanup_orphan_duplicate(\n                        original_initial_state_id, original_initial_state_title\n                    )\n                    raise RuntimeError(\n                        \"Duplicate URL pattern mismatch – duplication likely failed\"\n                    ) from search_exc\n\n            duplicated_initial_state_id = self._extract_initial_state_id_from_url(\n                duplicated_url\n            )\n\n            # Always move to evaluation parent\n            self._move_current_page_to_env(page, wait_timeout=wait_timeout)\n\n            # Rename if new title is provided\n            if new_title:\n                self._rename_initial_state_via_api(\n                    duplicated_initial_state_id, new_title\n                )\n\n            # verify whether the page is moved to the evaluation parent page\n            try:\n                result = self.eval_notion_client.pages.retrieve(\n                    page_id=duplicated_initial_state_id\n                )\n                if not result or not isinstance(result, dict):\n                    logger.error(\n                        \"| ✗ Playwright move to error: Notion API did not return a valid page dict after move.\"\n                    )\n                    raise RuntimeError(\n                        \"Playwright move to error: Notion API did not return a valid page dict after move.\"\n                    )\n                logger.info(\n                    \"| ✓ Page moved to '%s' successfully.\", self.eval_parent_page_title\n                )\n            except Exception as move_exc:\n                logger.error(f\"Playwright move to error: {move_exc}\")\n                raise RuntimeError(\n                    \"Playwright move to error: Notion client failed to retrieve page after move.\"\n                ) from move_exc\n\n            return duplicated_initial_state_id\n        except PlaywrightTimeoutError as e:\n            logger.error(\"Playwright timed out while duplicating initial state.\")\n            raise RuntimeError(\"Playwright timeout during duplication\") from e\n\n    # =========================================================================\n    # Cleanup and Maintenance\n    # =========================================================================\n\n    def _cleanup_orphan_duplicate(\n        self,\n        original_initial_state_id: str,\n        initial_state_title: str,\n    ) -> bool:\n        \"\"\"Finds and archives a stray duplicate (\"orphan\") that matches pattern 'Title (n)'.\n\n        Returns True if at least one orphan duplicate was archived.\n        \"\"\"\n        try:\n            source_hub_id = self._ensure_source_hub_page_id()\n            if not source_hub_id:\n                logger.error(\n                    \"| ✗ Cannot resolve source hub while cleaning up duplicates for '%s'\",\n                    initial_state_title,\n                )\n                return False\n\n            # Match any numbered duplicate \"Title (n)\" where n is any digit(s)\n            title_regex = re.compile(rf\"^{re.escape(initial_state_title)}\\s*\\(\\d+\\)$\")\n\n            archived_any = False\n            next_cursor = None\n            while True:\n                kwargs: Dict[str, Any] = {\"block_id\": source_hub_id}\n                if next_cursor:\n                    kwargs[\"start_cursor\"] = next_cursor\n\n                children = self.source_notion_client.blocks.children.list(**kwargs)\n                for child in children.get(\"results\", []):\n                    if child.get(\"type\") != \"child_page\":\n                        continue\n\n                    dup_id = child.get(\"id\")\n                    if dup_id == original_initial_state_id:\n                        continue\n\n                    title_plain = (\n                        (child.get(\"child_page\", {}) or {}).get(\"title\", \"\")\n                    ).strip()\n                    if not title_regex.match(title_plain):\n                        continue  # not a numbered duplicate\n\n                    try:\n                        self.source_notion_client.pages.update(\n                            page_id=dup_id, archived=True\n                        )\n                        logger.info(\"| ✓ Archived orphan duplicate (%s): %s\", \"page\", dup_id)\n                        archived_any = True\n                    except Exception as exc:\n                        logger.warning(\"| ✗ Failed to archive orphan page %s: %s\", dup_id, exc)\n\n                if not children.get(\"has_more\"):\n                    break\n\n                next_cursor = children.get(\"next_cursor\")\n\n            return archived_any\n        except Exception as exc:\n            logger.warning(\n                \"Error while attempting to cleanup orphan duplicate: %s\", exc\n            )\n            return False\n\n    def _duplicate_initial_state_for_task(\n        self,\n        initial_state_url: str,\n        category: str,\n        task_name: str,\n        *,\n        max_retries: int = 2,\n        initial_wait_ms: int = 180_000,\n    ) -> Tuple[str, str]:\n        \"\"\"Duplicates an initial state for a task, with retries for reliability.\"\"\"\n        if not self.state_file.exists():\n            raise FileNotFoundError(\n                \"Authentication state 'notion_state.json' not found. \"\n                \"Run the Notion login helper first.\"\n            )\n\n        last_exc = None\n        for attempt in range(max_retries + 1):\n            wait_timeout = initial_wait_ms * (attempt + 1)\n            page = None\n            try:\n                # Reuse browser instance within session\n                _, context = self._ensure_browser()\n                page = context.new_page()\n\n                logger.info(\"| ○ Navigating to initial state for %s...\", category)\n                # Start timing from the moment we begin navigating to the initial state page.\n                start_time = time.time()\n                page.goto(initial_state_url, wait_until=\"domcontentloaded\", timeout=120_000)\n                context.storage_state(path=str(self.state_file))\n\n                initial_state_id = self._extract_initial_state_id_from_url(\n                    initial_state_url\n                )\n                initial_state_title = self._category_to_initial_state_title(\n                    category\n                )\n\n                duplicated_id = self._duplicate_current_initial_state(\n                    page,\n                    new_title=initial_state_title,  # Use original initial state name without (1) suffix\n                    original_initial_state_id=initial_state_id,\n                    original_initial_state_title=initial_state_title,\n                    wait_timeout=wait_timeout,\n                )\n                duplicated_url = page.url\n                # Validate URL pattern again at this higher level (should already be validated inside).\n                context.storage_state(path=str(self.state_file))\n                # Log how long the whole duplication (navigate → duplicate) took.\n                elapsed = time.time() - start_time\n                logger.info(\n                    \"| ✓ Initial state duplicated successfully in %.2f seconds (task: %s).\",\n                    elapsed,\n                    task_name,\n                )\n                return duplicated_url, duplicated_id\n            except Exception as e:\n                # No additional cleanup here—handled inside _duplicate_current_template.\n                last_exc = e\n                if attempt < max_retries:\n                    logger.warning(\n                        \"| ✗ Duplication attempt %d failed: %s. Retrying...\",\n                        attempt + 1,\n                        e,\n                    )\n                time.sleep(120 * attempt + 120)\n            finally:\n                # Close the page to prevent accumulation within reused context\n                if page:\n                    try:\n                        page.close()\n                    except Exception:\n                        pass\n\n        raise RuntimeError(\n            f\"Initial state duplication failed for task '{task_name}' after {max_retries + 1} attempts: {last_exc}\"\n        )\n\n    def get_service_config_for_agent(self) -> dict:\n        \"\"\"\n        Get service-specific configuration for agent execution.\n\n        Returns:\n            Dictionary containing configuration needed by the agent/MCP server\n        \"\"\"\n        from src.config.config_schema import ConfigRegistry\n\n        # Get the eval_api_key from config registry\n        config = ConfigRegistry.get_config(\"notion\").get_all()\n        service_config = {}\n\n        if \"eval_api_key\" in config:\n            service_config[\"notion_key\"] = config[\"eval_api_key\"]\n\n        return service_config\n\n"
  },
  {
    "path": "src/mcp_services/notion/notion_task_manager.py",
    "content": "\"\"\"\nNotion Task Manager for MCPMark Evaluation Pipeline\n====================================================\n\nThis module provides utilities for discovering, filtering, and managing\nevaluation tasks within the MCPMark project structure for Notion service.\n\nThe task manager is responsible for:\n- Task discovery and filtering\n- Task verification and result processing\n- Task-specific logic (NOT LLM execution)\n\"\"\"\n\nimport sys\nfrom dataclasses import dataclass\nfrom pathlib import Path\nfrom typing import Any, Dict, List, Optional\n\nfrom src.base.task_manager import BaseTask, BaseTaskManager\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\n@dataclass\nclass NotionTask(BaseTask):\n    \"\"\"Represents a single evaluation task for Notion service.\"\"\"\n\n    # Additional Notion-specific fields\n    # A human-readable slug for the task directory (e.g. \"employee_onboarding\")\n    task_name: str = \"\"\n    original_initial_state_url: Optional[str] = None\n    duplicated_initial_state_url: Optional[str] = None\n    duplicated_initial_state_id: Optional[str] = None\n\n    def __post_init__(self):\n        # Ensure base class fields are set if not provided\n        if (\n            not hasattr(self, \"task_instruction_path\")\n            or self.task_instruction_path is None\n        ):\n            self.task_instruction_path = self.description_path\n        if (\n            not hasattr(self, \"task_verification_path\")\n            or self.task_verification_path is None\n        ):\n            self.task_verification_path = self.verify_path\n\n    @property\n    def description_path(self) -> Path:\n        \"\"\"Alias for task_instruction_path.\"\"\"\n        return self.task_instruction_path\n\n    @property\n    def verify_path(self) -> Path:\n        \"\"\"Alias for task_verification_path.\"\"\"\n        return self.task_verification_path\n\n\n    def get_description(self) -> str:\n        \"\"\"Read and return the task description.\"\"\"\n        if self.description_path.exists():\n            return self.description_path.read_text(encoding=\"utf-8\")\n        return \"\"\n\n\nclass NotionTaskManager(BaseTaskManager):\n    \"\"\"Manages task discovery, filtering, and verification for Notion-based MCPMark evaluation.\"\"\"\n\n    def __init__(self, tasks_root: Path = None, task_suite: str = \"standard\"):\n        \"\"\"Initialize with the tasks directory path.\n\n        Args:\n            tasks_root: Path to the tasks directory\n            task_suite: Logical task suite (e.g., 'standard', 'easy')\n        \"\"\"\n        if tasks_root is None:\n            tasks_root = Path(__file__).resolve().parents[3] / \"tasks\"\n\n        # Call parent constructor\n        super().__init__(tasks_root, mcp_service=\"notion\", task_suite=task_suite)\n\n    # =========================================================================\n    # Service-specific implementations for template methods\n    # =========================================================================\n    # No custom task discovery methods needed; relying entirely on BaseTaskManager defaults.\n\n    def _get_service_directory_name(self) -> str:\n        \"\"\"Return the service directory name for Notion.\"\"\"\n        return \"notion\"\n\n    def _create_task_from_files(\n        self, category_id: str, task_files_info: Dict[str, Any]\n    ) -> Optional[NotionTask]:\n        \"\"\"Instantiate a `NotionTask` from the dictionary returned by `_find_task_files`.\"\"\"\n        import json\n        \n        # Check for meta.json\n        meta_path = task_files_info[\"instruction_path\"].parent / \"meta.json\"\n        final_category_id = category_id\n        task_id = task_files_info[\"task_id\"]\n        \n        if meta_path.exists():\n            try:\n                with open(meta_path, 'r') as f:\n                    meta_data = json.load(f)\n                    # Use values from meta.json if available\n                    final_category_id = meta_data.get(\"category_id\", category_id)\n                    task_id = meta_data.get(\"task_id\", task_id)\n            except Exception as e:\n                logger.warning(f\"Failed to load meta.json from {meta_path}: {e}\")\n\n        return NotionTask(\n            task_instruction_path=task_files_info[\"instruction_path\"],\n            task_verification_path=task_files_info[\"verification_path\"],\n            service=\"notion\",\n            category_id=final_category_id,\n            task_id=task_id,\n            task_name=task_files_info[\"task_id\"],\n        )\n\n    def _get_verification_command(self, task: NotionTask) -> List[str]:\n        \"\"\"Get the verification command for Notion tasks.\n\n        Notion verification requires the duplicated template ID.\n        \"\"\"\n        return [\n            sys.executable,\n            str(task.task_verification_path),\n            task.duplicated_initial_state_id or \"\",\n        ]\n"
  },
  {
    "path": "src/mcp_services/playwright/__init__.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nPlaywright MCP Service for MCPMark\n==================================\n\nThis package provides Playwright MCP integration for web automation tasks.\n\"\"\"\n"
  },
  {
    "path": "src/mcp_services/playwright/playwright_login_helper.py",
    "content": "\"\"\"\nPlaywright Login Helper for MCPMark\n====================================\n\nThis module provides browser session management and authentication utilities\nfor Playwright-based web automation tasks. Handles browser context setup,\nsession persistence, and state management.\n\"\"\"\n\nfrom pathlib import Path\nfrom typing import Optional\n\nfrom playwright.sync_api import (\n    BrowserContext,\n    sync_playwright,\n)\n\nfrom src.base.login_helper import BaseLoginHelper\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\nclass PlaywrightLoginHelper(BaseLoginHelper):\n    \"\"\"\n    Login helper for Playwright web automation tasks.\n\n    Manages browser contexts, session persistence, and authentication state\n    for web automation scenarios.\n    \"\"\"\n\n    SUPPORTED_BROWSERS = {\"chromium\", \"firefox\"}\n\n    def __init__(\n        self,\n        *,\n        browser: str = \"chromium\",\n        headless: bool = True,\n        state_path: Optional[str | Path] = None,\n    ) -> None:\n        \"\"\"\n        Initialize the Playwright login helper.\n\n        Args:\n            browser: Browser engine to use ('chromium' or 'firefox')\n            headless: Whether to run browser in headless mode\n            state_path: Path to save browser session state\n        \"\"\"\n        super().__init__()\n\n        if browser not in self.SUPPORTED_BROWSERS:\n            raise ValueError(\n                f\"Unsupported browser '{browser}'. Supported: {', '.join(self.SUPPORTED_BROWSERS)}\"\n            )\n\n        self.browser_name = browser\n        self.headless = headless\n        self.state_path = (\n            Path(state_path or Path.cwd() / \"playwright_state.json\")\n            .expanduser()\n            .resolve()\n        )\n\n        # Browser management\n        self._playwright = None\n        self._browser = None\n        self._browser_context: Optional[BrowserContext] = None\n\n        logger.info(f\"Initialized PlaywrightLoginHelper with {browser} browser\")\n\n    def login(self, **kwargs) -> bool:\n        \"\"\"\n        Set up browser context and session state.\n\n        For most Playwright tasks, this creates a clean browser context\n        that can be used for web automation. More complex authentication\n        can be handled in specific implementations.\n\n        Returns:\n            bool: True if browser setup successful\n        \"\"\"\n        try:\n            # Clean up any existing browser instances\n            self.close()\n\n            # Start Playwright\n            self._playwright = sync_playwright().start()\n            browser_type = getattr(self._playwright, self.browser_name)\n            self._browser = browser_type.launch(headless=self.headless)\n\n            # Create browser context\n            context_options = {}\n\n            # Load existing state if available\n            if self.state_path.exists():\n                try:\n                    context_options[\"storage_state\"] = str(self.state_path)\n                    logger.info(f\"Loaded browser state from {self.state_path}\")\n                except Exception as e:\n                    logger.warning(f\"Failed to load browser state: {e}\")\n\n            self._browser_context = self._browser.new_context(**context_options)\n\n            # Save current state\n            self._save_browser_state()\n\n            logger.info(\"✅ Browser context setup successful\")\n            return True\n\n        except Exception as e:\n            logger.error(f\"Browser setup failed: {e}\")\n            self.close()\n            return False\n\n    def get_browser_context(self) -> Optional[BrowserContext]:\n        \"\"\"\n        Get the current browser context.\n\n        Returns:\n            BrowserContext or None if not initialized\n        \"\"\"\n        return self._browser_context\n\n    def is_authenticated(self) -> bool:\n        \"\"\"\n        Check if browser context is ready for use.\n\n        Returns:\n            bool: True if browser context is available\n        \"\"\"\n        return self._browser_context is not None\n\n    def get_credentials(self) -> dict:\n        \"\"\"\n        Get browser configuration for MCP integration.\n\n        Returns:\n            dict: Browser configuration parameters\n        \"\"\"\n        return {\n            \"browser\": self.browser_name,\n            \"headless\": self.headless,\n            \"state_path\": str(self.state_path),\n        }\n\n    def _save_browser_state(self) -> None:\n        \"\"\"Save current browser state to file.\"\"\"\n        if self._browser_context:\n            try:\n                self._browser_context.storage_state(path=str(self.state_path))\n                logger.debug(f\"Browser state saved to {self.state_path}\")\n            except Exception as e:\n                logger.warning(f\"Failed to save browser state: {e}\")\n\n    def close(self) -> None:\n        \"\"\"Clean up browser resources.\"\"\"\n        if self._browser_context:\n            try:\n                # Save state before closing\n                self._save_browser_state()\n                self._browser_context.close()\n            except Exception as e:\n                logger.warning(f\"Error closing browser context: {e}\")\n            finally:\n                self._browser_context = None\n\n        if self._browser:\n            try:\n                self._browser.close()\n            except Exception as e:\n                logger.warning(f\"Error closing browser: {e}\")\n            finally:\n                self._browser = None\n\n        if self._playwright:\n            try:\n                self._playwright.stop()\n            except Exception as e:\n                logger.warning(f\"Error stopping Playwright: {e}\")\n            finally:\n                self._playwright = None\n"
  },
  {
    "path": "src/mcp_services/playwright/playwright_state_manager.py",
    "content": "\"\"\"\nPlaywright State Manager for MCPMark\n======================================\n\nThis module manages browser contexts and test environments for Playwright-based\nweb automation tasks. Handles browser isolation, test page setup, and cleanup.\n\"\"\"\n\nimport time\nfrom pathlib import Path\nfrom typing import Optional, Dict, Any, List\n\nfrom playwright.sync_api import (\n    BrowserContext,\n    Page,\n    TimeoutError as PlaywrightTimeoutError,\n)\n\nfrom src.base.state_manager import BaseStateManager, InitialStateInfo\nfrom src.base.task_manager import BaseTask\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\nclass PlaywrightStateManager(BaseStateManager):\n    \"\"\"\n    Manages browser state and test environments for Playwright tasks.\n\n    Provides browser context isolation, test page setup, and resource cleanup\n    for web automation evaluation.\n    \"\"\"\n\n    def __init__(\n        self,\n        browser: str = \"chromium\",\n        headless: bool = True,\n        state_path: Optional[Path] = None,\n        network_origins: str = \"*\",\n        user_profile: str = \"isolated\",\n        viewport_width: int = 1280,\n        viewport_height: int = 720,\n    ):\n        \"\"\"\n        Initialize Playwright state manager.\n\n        Args:\n            browser: Browser engine to use ('chromium' or 'firefox')\n            headless: Whether to run browser in headless mode\n            state_path: Path to browser state file\n            network_origins: Allowed network origins (comma-separated or *)\n            user_profile: User profile type (isolated or persistent)\n            viewport_width: Browser viewport width\n            viewport_height: Browser viewport height\n        \"\"\"\n        super().__init__(service_name=\"playwright\")\n\n        self.browser_name = browser\n        self.headless = headless\n        # self.headless = False\n        self.state_path = state_path or Path.cwd() / \"playwright_state.json\"\n        self.network_origins = network_origins\n        self.user_profile = user_profile\n        self.viewport_width = viewport_width\n        self.viewport_height = viewport_height\n\n        # Browser management\n        self._playwright = None\n        self._browser = None\n        self._current_context: Optional[BrowserContext] = None\n\n        # Task-specific tracking\n        self._current_task_pages: List[Page] = []\n\n        # Test environment URLs for different task categories\n        self.test_environments = {\n            \"element_extraction\": \"https://mcp-eval-website.vercel.app/extraction\",\n            \"form_interaction\": \"https://mcp-eval-website.vercel.app/forms/\",\n            \"web_navigation\": \"https://mcp-eval-website.vercel.app/navigation\",\n            \"authentication\": \"https://mcp-eval-website.vercel.app/auth/turnstile\",\n        }\n\n        logger.info(\"Playwright state manager initialized\")\n\n    def _create_initial_state(self, task: BaseTask) -> Optional[InitialStateInfo]:\n        \"\"\"\n        Create isolated browser context for task execution.\n\n        Args:\n            task: Task for which to create browser state\n\n        Returns:\n            InitialStateInfo with browser context details\n        \"\"\"\n        try:\n            logger.info(\n                \"| Skipping Playwright browser launch – no initial browser state \"\n                \"needed for task: %s\",\n                task.name,\n            )\n\n            # Generate a lightweight identifier to allow resource tracking even\n            # though no real browser context is created.\n            context_id = f\"noop_{task.category_id}_{task.task_id}_{int(time.time())}\"\n\n            # We still expose the canonical test URL (if any) because some\n            # consumers add it to the task metadata.\n            test_url = self.test_environments.get(task.category_id)\n\n            # Record a dummy resource so cleanup logic remains symmetrical.\n            self.track_resource(\n                \"browser_context\",\n                context_id,\n                {\n                    \"task_name\": task.name,\n                    \"task_category\": task.category_id,\n                    \"test_url\": test_url,\n                },\n            )\n\n            return InitialStateInfo(\n                state_id=context_id,\n                state_url=test_url,\n                metadata={\n                    \"browser\": self.browser_name,\n                    \"headless\": self.headless,\n                    \"test_url\": test_url,\n                    \"task_category\": task.category_id,\n                },\n            )\n\n        except Exception as e:\n            logger.error(f\"Failed to create stub initial state for {task.name}: {e}\")\n            return None\n\n    def _store_initial_state_info(\n        self, task: BaseTask, state_info: InitialStateInfo\n    ) -> None:\n        \"\"\"Store browser context information in task object.\"\"\"\n        if hasattr(task, \"__dict__\"):\n            task.browser_context_id = state_info.state_id\n            task.test_url = state_info.state_url\n            task.browser_config = state_info.metadata\n\n    def _cleanup_task_initial_state(self, task: BaseTask) -> bool:\n        \"\"\"Clean up browser context for specific task.\"\"\"\n        try:\n            success = True\n\n            # Close any open pages\n            if self._current_task_pages:\n                for page in self._current_task_pages:\n                    try:\n                        page.close()\n                    except Exception as e:\n                        logger.warning(f\"Failed to close page: {e}\")\n                        success = False\n                self._current_task_pages.clear()\n\n            # Close browser context\n            if self._current_context:\n                try:\n                    self._current_context.close()\n                    logger.info(\"Closed browser context\")\n                except Exception as e:\n                    logger.error(f\"Failed to close browser context: {e}\")\n                    success = False\n                finally:\n                    self._current_context = None\n\n            return success\n\n        except Exception as e:\n            logger.error(f\"Error during browser cleanup for {task.name}: {e}\")\n            return False\n\n    def _cleanup_single_resource(self, resource: Dict[str, Any]) -> bool:\n        \"\"\"Clean up a single browser resource.\"\"\"\n        try:\n            if resource[\"type\"] == \"browser_context\":\n                # Context cleanup is handled in _cleanup_task_initial_state\n                logger.debug(f\"Browser context {resource['id']} marked for cleanup\")\n                return True\n\n            logger.warning(f\"Unknown resource type for cleanup: {resource['type']}\")\n            return False\n\n        except Exception as e:\n            logger.error(f\"Failed to cleanup resource {resource}: {e}\")\n            return False\n\n    def _get_context_options(self, task: BaseTask) -> Dict[str, Any]:\n        \"\"\"Get browser context options based on task requirements.\"\"\"\n        options = {\n            \"viewport\": {\"width\": self.viewport_width, \"height\": self.viewport_height}\n        }\n\n        # Load browser state if available\n        if self.state_path.exists():\n            try:\n                options[\"storage_state\"] = str(self.state_path)\n            except Exception as e:\n                logger.warning(f\"Failed to load browser state: {e}\")\n\n        # Task-specific context options\n        if task.category_id == \"form_interaction\":\n            # Enable form interactions\n            options[\"permissions\"] = [\"geolocation\"]\n        elif task.category_id == \"web_navigation\":\n            # Allow navigation between pages\n            options[\"accept_downloads\"] = False\n\n        return options\n\n    def _setup_test_environment(self, task: BaseTask) -> Optional[str]:\n        \"\"\"Set up test environment for task category.\"\"\"\n        try:\n            test_url = self.test_environments.get(task.category_id)\n            if not test_url:\n                logger.warning(\n                    f\"No test environment defined for category: {task.category_id}\"\n                )\n                return None\n\n            # Create a page and navigate to test environment\n            if self._current_context:\n                page = self._current_context.new_page()\n\n                # Navigate to test URL to ensure it's accessible\n                page.goto(test_url, wait_until=\"networkidle\", timeout=30000)\n                logger.info(f\"Test environment ready: {test_url}\")\n\n                # Track the page for cleanup\n                self._current_task_pages.append(page)\n\n                # Verify page loaded correctly\n                title = page.title()\n                if title:\n                    logger.debug(f\"Page loaded with title: {title}\")\n\n                return test_url\n\n        except PlaywrightTimeoutError:\n            logger.error(f\"Timeout loading test environment: {test_url}\")\n        except Exception as e:\n            logger.error(f\"Failed to setup test environment: {e}\")\n\n        return None\n\n    def get_current_context(self) -> Optional[BrowserContext]:\n        \"\"\"Get the current browser context.\"\"\"\n        return self._current_context\n\n    def get_test_page(self) -> Optional[Page]:\n        \"\"\"Get a page for testing (creates new one if needed).\"\"\"\n        if self._current_context:\n            try:\n                page = self._current_context.new_page()\n                self._current_task_pages.append(page)\n                return page\n            except Exception as e:\n                logger.error(f\"Failed to create test page: {e}\")\n        return None\n\n    def navigate_to_test_url(self, task: BaseTask) -> Optional[Page]:\n        \"\"\"Navigate to the test URL for a specific task.\"\"\"\n        test_url = self.test_environments.get(task.category_id)\n        if not test_url:\n            logger.error(f\"No test URL defined for category: {task.category_id}\")\n            return None\n\n        page = self.get_test_page()\n        if page:\n            try:\n                page.goto(test_url, wait_until=\"networkidle\", timeout=30000)\n                logger.info(f\"Navigated to test URL: {test_url}\")\n                return page\n            except Exception as e:\n                logger.error(f\"Failed to navigate to {test_url}: {e}\")\n\n        return None\n\n    def get_service_config_for_agent(self) -> dict:\n        \"\"\"\n        Get service-specific configuration for agent execution.\n\n        Returns:\n            Dictionary containing browser configuration for MCP server\n        \"\"\"\n        config = {\n            \"browser\": self.browser_name,\n            \"headless\": self.headless,\n        }\n\n        # Add browser state file if it exists\n        if self.state_path.exists():\n            config[\"browser_state\"] = str(self.state_path)\n\n        # Add test environment URLs\n        config[\"test_environments\"] = self.test_environments\n\n        return config\n\n    def close_all(self) -> None:\n        \"\"\"Close all browser resources.\"\"\"\n        try:\n            # Close all pages\n            for page in self._current_task_pages:\n                try:\n                    page.close()\n                except Exception:\n                    pass\n            self._current_task_pages.clear()\n\n            # Close context\n            if self._current_context:\n                self._current_context.close()\n                self._current_context = None\n\n            # Close browser\n            if self._browser:\n                self._browser.close()\n                self._browser = None\n\n            # Stop Playwright\n            if self._playwright:\n                self._playwright.stop()\n                self._playwright = None\n\n            logger.info(\"All browser resources closed\")\n\n        except Exception as e:\n            logger.error(f\"Error closing browser resources: {e}\")\n\n    def set_verification_environment(self, messages_path: str = None) -> None:\n        \"\"\"\n        Set Playwright-specific environment variables for verification scripts.\n\n        Args:\n            messages_path: Optional path to messages.json file for verification\n        \"\"\"\n        import os\n\n        # Set common MCP_MESSAGES if provided\n        if messages_path:\n            os.environ[\"MCP_MESSAGES\"] = str(messages_path)\n            # Also set PLAYWRIGHT_WORK_DIR to the directory containing messages.json\n            work_dir = str(Path(messages_path).parent)\n            os.environ[\"PLAYWRIGHT_WORK_DIR\"] = work_dir\n            logger.info(f\"| Set PLAYWRIGHT_WORK_DIR to: {work_dir}\")\n            logger.info(f\"| Set MCP_MESSAGES to: {messages_path}\")\n\n    def __del__(self):\n        \"\"\"Ensure cleanup on deletion.\"\"\"\n        self.close_all()\n"
  },
  {
    "path": "src/mcp_services/playwright/playwright_task_manager.py",
    "content": "\"\"\"\nPlaywright Task Manager for MCPMark\n====================================\n\nSimple task manager for Playwright MCP tasks.\nFollows anti-over-engineering principles: keep it simple, do what's needed.\n\"\"\"\n\nimport sys\nimport os\nimport subprocess\nfrom pathlib import Path\nfrom typing import List, Dict, Any\n\nfrom src.base.task_manager import BaseTask, BaseTaskManager\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\nclass PlaywrightTask(BaseTask):\n    \"\"\"Playwright-specific task that uses directory name as task name.\"\"\"\n    \n\n\nclass PlaywrightTaskManager(BaseTaskManager):\n    \"\"\"Simple task manager for Playwright MCP tasks.\"\"\"\n\n    def __init__(self, tasks_root: Path = None, task_suite: str = \"standard\"):\n        \"\"\"Initialize with tasks directory.\"\"\"\n        if tasks_root is None:\n            tasks_root = Path(__file__).resolve().parents[3] / \"tasks\"\n\n        super().__init__(\n            tasks_root,\n            mcp_service=\"playwright\",\n            task_class=PlaywrightTask,\n            task_organization=\"directory\",\n            task_suite=task_suite,\n        )\n\n    def _create_task_from_files(\n        self, category_id: str, task_files_info: Dict[str, Any]\n    ) -> PlaywrightTask:\n        \"\"\"Instantiate a `PlaywrightTask` from the dictionary returned by `_find_task_files`.\"\"\"\n        import json\n        \n        # Check for meta.json\n        meta_path = task_files_info[\"instruction_path\"].parent / \"meta.json\"\n        final_category_id = category_id\n        task_id = task_files_info[\"task_id\"]\n        \n        if meta_path.exists():\n            try:\n                with open(meta_path, 'r') as f:\n                    meta_data = json.load(f)\n                    # Use values from meta.json if available\n                    final_category_id = meta_data.get(\"category_id\", category_id)\n                    task_id = meta_data.get(\"task_id\", task_id)\n            except Exception as e:\n                logger.warning(f\"Failed to load meta.json from {meta_path}: {e}\")\n\n        return PlaywrightTask(\n            task_instruction_path=task_files_info[\"instruction_path\"],\n            task_verification_path=task_files_info[\"verification_path\"],\n            service=\"playwright\",\n            category_id=final_category_id,\n            task_id=task_id,\n        )\n\n    def _get_verification_command(self, task: BaseTask) -> List[str]:\n        \"\"\"Get verification command - just run the verify.py script.\"\"\"\n        return [sys.executable, str(task.task_verification_path)]\n\n    def run_verification(self, task: BaseTask) -> subprocess.CompletedProcess:\n        \"\"\"Run verification with Playwright-specific environment.\"\"\"\n        env = os.environ.copy()\n\n        # Pass messages.json path and working directory to verification script\n        messages_path = os.getenv(\"MCP_MESSAGES\")\n        work_dir = os.getenv(\"PLAYWRIGHT_WORK_DIR\")\n        \n        if messages_path:\n            env[\"MCP_MESSAGES\"] = messages_path\n            logger.debug(f\"Setting MCP_MESSAGES to: {messages_path}\")\n        \n        if work_dir:\n            env[\"PLAYWRIGHT_WORK_DIR\"] = work_dir\n            logger.debug(f\"Setting PLAYWRIGHT_WORK_DIR to: {work_dir}\")\n\n        return subprocess.run(\n            self._get_verification_command(task),\n            capture_output=True,\n            text=True,\n            timeout=90,\n            env=env,\n        )\n\n    def _format_task_instruction(self, base_instruction: str) -> str:\n        \"\"\"Add Playwright-specific note to instructions.\"\"\"\n        return (\n            base_instruction\n            + \"\\n\\nUse Playwright MCP tools to complete this web automation task.\"\n        )\n"
  },
  {
    "path": "src/mcp_services/playwright_webarena/playwright_login_helper.py",
    "content": "\"\"\"\nWebArena (Docker) Login Helper for MCPMark\n==========================================\n\nThis helper exposes basic browser configuration for agents. Authentication is\nnot required for the public WebArena environment; isolation is handled via\nDocker containerization in the state manager.\n\"\"\"\n\nfrom __future__ import annotations\n\nfrom pathlib import Path\nfrom typing import Optional\n\nfrom src.base.login_helper import BaseLoginHelper\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\nclass PlaywrightLoginHelper(BaseLoginHelper):\n    \"\"\"\n    Minimal login helper. It does not launch browsers; that is handled by\n    the Playwright MCP client. It simply exposes configuration parameters such\n    as headless mode and an optional storage state file path.\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        browser: str = \"chromium\",\n        headless: bool = True,\n        state_path: Optional[str | Path] = None,\n        base_url: Optional[str] = None,\n    ) -> None:\n        super().__init__()\n        self.browser_name = browser\n        self.headless = headless\n        self.state_path = (\n            Path(state_path or Path.cwd() / \"playwright_state.json\")\n            .expanduser()\n            .resolve()\n        )\n        self.base_url = base_url\n        logger.info(\n            \"Initialized WebArenaLoginHelper (browser=%s, headless=%s)\",\n            browser,\n            headless,\n        )\n\n    def login(self, **kwargs) -> bool:\n        \"\"\"\n        No-op login. For WebArena we don't need credentials; we only provide\n        configuration for the MCP to open a browser.\n        \"\"\"\n        logger.info(\"WebArenaLoginHelper login: no-op\")\n        return True\n\n    def is_authenticated(self) -> bool:\n        return True\n\n    def get_credentials(self) -> dict:\n        return {\n            \"browser\": self.browser_name,\n            \"headless\": self.headless,\n            \"state_path\": str(self.state_path),\n            \"base_url\": self.base_url,\n        }\n\n    def close(self) -> None:\n        # No resources to release\n        pass\n"
  },
  {
    "path": "src/mcp_services/playwright_webarena/playwright_state_manager.py",
    "content": "\"\"\"\nWebArena (Docker) State Manager for MCPMark\n===========================================\n\nThis module manages a WebArena environment that runs inside a Docker container.\nIt is responsible for starting the container in the initial state phase and\nstopping/removing it during cleanup. It exposes the target URL (e.g.\nhttp://localhost:9999) for Playwright MCP-based automation.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport socket\nimport subprocess\nimport time\nfrom dataclasses import dataclass\nfrom pathlib import Path\nfrom typing import Optional, Dict, Any\nfrom urllib.parse import urlparse\n\nimport requests\n\nfrom src.base.state_manager import BaseStateManager, InitialStateInfo\nfrom src.base.task_manager import BaseTask\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\n@dataclass\nclass DockerConfig:\n    image_name: str = \"shopping_admin_final_0719\"\n    image_tar_path: Optional[Path] = None\n    container_name: str = \"shopping_admin\"\n    host_port: int = 7780\n    container_port: int = 80\n    readiness_path: str = \"/admin\"\n    readiness_timeout_seconds: int = 600\n    readiness_poll_interval_seconds: float = 2.0\n\n    @property\n    def base_url(self) -> str:\n        return f\"http://localhost:{self.host_port}\"\n\n\nclass PlaywrightStateManager(BaseStateManager):\n    \"\"\"\n    Manage Docker lifecycle for WebArena-backed tasks.\n\n    - Initial state: ensure image is present (optionally load from tar), then\n      run container and wait until HTTP endpoint is ready.\n    - Cleanup: stop and remove the container.\n    \"\"\"\n    \n    # Category-specific Docker configurations\n    CATEGORY_CONFIGS = {\n        \"reddit\": {\n            \"image_name\": \"postmill-populated-exposed-withimg\",\n            \"container_name\": \"forum\",\n            \"host_port\": 9999,\n            \"readiness_path\": \"/\"\n        },\n        \"shopping\": {\n            \"image_name\": \"shopping_final_0712\",\n            \"container_name\": \"shopping\",\n            \"host_port\": 7770,\n            \"readiness_path\": \"/\"\n        },\n        \"shopping_admin\": {\n            \"image_name\": \"shopping_admin_final_0719\",\n            \"container_name\": \"shopping_admin\",\n            \"host_port\": 7780,\n            \"readiness_path\": \"/admin\"\n        }\n    }\n\n    def __init__(\n        self,\n        *,\n        docker_image_name: str = \"shopping_admin_final_0719\",\n        docker_container_name: str = \"shopping_admin\",\n        host_port: int = 7780,\n        container_port: int = 80,\n        image_tar_path: Optional[str | Path] = None,\n        readiness_path: str = \"/admin\",\n        readiness_timeout_seconds: int = 600,\n        readiness_poll_interval_seconds: float = 2.0,\n        # Playwright browser config params (ignored by this state manager)\n        browser: Optional[str] = None,\n        headless: Optional[bool] = None,\n        network_origins: Optional[str] = None,\n        user_profile: Optional[str] = None,\n        viewport_width: Optional[int] = None,\n        viewport_height: Optional[int] = None,\n        # Debug mode - skip container cleanup\n        skip_cleanup: bool = False,\n    ) -> None:\n        super().__init__(service_name=\"playwright_webarena\")\n\n        self.config = DockerConfig(\n            image_name=docker_image_name,\n            image_tar_path=Path(image_tar_path).expanduser().resolve()\n            if image_tar_path\n            else None,\n            container_name=docker_container_name,\n            host_port=host_port,\n            container_port=container_port,\n            readiness_path=readiness_path,\n            readiness_timeout_seconds=readiness_timeout_seconds,\n            readiness_poll_interval_seconds=readiness_poll_interval_seconds,\n        )\n\n        self.skip_cleanup = skip_cleanup\n\n        logger.info(\n            \"Initialized WebArenaStateManager (image=%s, container=%s, port=%s, skip_cleanup=%s)\",\n            self.config.image_name,\n            self.config.container_name,\n            self.config.host_port,\n            self.skip_cleanup,\n        )\n\n    # ---- Helpers ---------------------------------------------------------\n\n    def _run_cmd(\n        self, args: list[str], *, check: bool = False\n    ) -> subprocess.CompletedProcess:\n        logger.debug(\"| Running command: %s\", \" \".join(args))\n        return subprocess.run(\n            args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=check\n        )\n\n    def _image_exists(self, image: str) -> bool:\n        result = self._run_cmd(\n            [\"docker\", \"images\", \"--format\", \"{{.Repository}}:{{.Tag}}\"]\n        )\n        lines = [line.strip() for line in result.stdout.splitlines() if line.strip()]\n        # Parse target image (allow optional tag; default latest)\n        if \":\" in image:\n            target_repo, target_tag = image.split(\":\", 1)\n        else:\n            target_repo, target_tag = image, \"latest\"\n\n        for repo_tag in lines:\n            if \":\" in repo_tag:\n                repo, tag = repo_tag.split(\":\", 1)\n            else:\n                repo, tag = repo_tag, \"latest\"\n            if repo == target_repo and tag == target_tag:\n                logger.debug(\"| Found Docker image %s:%s\", repo, tag)\n                return True\n        logger.debug(\"| Docker image not found: %s:%s\", target_repo, target_tag)\n        return False\n\n    def _load_image_from_tar_if_needed(self) -> None:\n        if self.config.image_tar_path and not self._image_exists(\n            self.config.image_name\n        ):\n            logger.info(\"| Loading Docker image from tar: %s\", self.config.image_tar_path)\n            result = self._run_cmd(\n                [\"docker\", \"load\", \"--input\", str(self.config.image_tar_path)]\n            )\n            if result.returncode != 0:\n                logger.error(\"| Failed to load Docker image: %s\", result.stderr.strip())\n                raise RuntimeError(f\"docker load failed: {result.stderr}\")\n            logger.info(\"| Docker image loaded\")\n\n    def _stop_and_remove_container(self, name: str) -> None:\n        # Stop (ignore errors if not running)\n        self._run_cmd([\"docker\", \"stop\", name])\n        # Remove (ignore errors if not exists)\n        self._run_cmd([\"docker\", \"rm\", name])\n\n    def _container_is_running(self, name: str) -> bool:\n        result = self._run_cmd(\n            [\"docker\", \"ps\", \"--filter\", f\"name=^{name}$\", \"--format\", \"{{.Names}}\"]\n        )\n        running = any(line.strip() == name for line in result.stdout.splitlines())\n        logger.debug(\"| Container '%s' running: %s\", name, running)\n        return running\n\n    def _port_open(self, host: str, port: int) -> bool:\n        try:\n            with socket.create_connection((host, port), timeout=1.0):\n                return True\n        except OSError:\n            return False\n\n    def _http_ready(self, url: str) -> bool:\n        try:\n            resp = requests.get(url, timeout=3)\n            return resp.status_code < 500\n        except Exception:\n            return False\n\n    def _get_entry_url(self) -> str:\n        base = self.config.base_url.rstrip(\"/\")\n        path = self.config.readiness_path\n        if not path or path == \"/\":\n            return base\n        return f\"{base}{path}\"\n\n    def _wait_until_ready(self) -> bool:\n        deadline = time.time() + self.config.readiness_timeout_seconds\n        base_url = self.config.base_url.rstrip(\"/\")\n        url = self._get_entry_url()\n\n        # Determine host and port from URL for port checks\n        parsed = urlparse(base_url)\n        host = parsed.hostname or \"localhost\"\n        port = parsed.port or self.config.host_port\n\n        # First wait for port to open to avoid long HTTP errors\n        while time.time() < deadline:\n            if self._port_open(host, port):\n                break\n            time.sleep(self.config.readiness_poll_interval_seconds)\n\n        while time.time() < deadline:\n            if self._http_ready(url):\n                logger.info(\"| WebArena HTTP endpoint ready: %s\", url)\n                return True\n            time.sleep(self.config.readiness_poll_interval_seconds)\n\n        logger.error(\"| Timed out waiting for WebArena at %s\", url)\n        return False\n\n    def _wait_for_mysql_ready(self, max_wait_seconds: int = 120) -> bool:\n        \"\"\"Wait for MySQL to be ready in the container.\"\"\"\n        deadline = time.time() + max_wait_seconds\n        while time.time() < deadline:\n            result = self._run_cmd([\n                \"docker\", \"exec\", self.config.container_name,\n                \"mysql\", \"-u\", \"magentouser\", \"-pMyPassword\",\n                \"magentodb\", \"-e\", \"SELECT 1;\"\n            ])\n            if result.returncode == 0:\n                logger.info(\"| MySQL is ready in container %s\", self.config.container_name)\n                return True\n            time.sleep(2)\n        logger.warning(\"| MySQL not ready after %d seconds\", max_wait_seconds)\n        return False\n\n    def _wait_for_magento_ready(self, max_wait_seconds: int = 180) -> bool:\n        \"\"\"Wait for Magento to be fully initialized.\"\"\"\n        deadline = time.time() + max_wait_seconds\n        while time.time() < deadline:\n            # Check if Magento's setup is complete by trying to access config\n            result = self._run_cmd([\n                \"docker\", \"exec\", self.config.container_name,\n                \"/var/www/magento2/bin/magento\", \"config:show\", \"web/unsecure/base_url\"\n            ])\n            if result.returncode == 0:\n                logger.info(\"| Magento is ready in container %s\", self.config.container_name)\n                return True\n            time.sleep(5)\n        logger.warning(\"| Magento not ready after %d seconds\", max_wait_seconds)\n        return False\n\n    def _configure_shopping_post_start(self) -> None:\n        \"\"\"Run Magento-specific steps for shopping container.\n        Waits for services to be ready before configuring.\n        \"\"\"\n        logger.info(\"| Running shopping post-start setup\")\n        \n        # Wait for MySQL to be ready first\n        if not self._wait_for_mysql_ready():\n            logger.warning(\"| MySQL not ready, attempting configuration anyway\")\n        \n        # Wait for Magento to be ready\n        if not self._wait_for_magento_ready():\n            logger.warning(\"| Magento not ready, attempting configuration anyway\")\n        \n        base_url = f\"http://localhost:{self.config.host_port}\"\n\n        cmds = [\n            [\n                \"docker\",\n                \"exec\",\n                self.config.container_name,\n                \"/var/www/magento2/bin/magento\",\n                \"setup:store-config:set\",\n                f\"--base-url={base_url}\",\n            ],\n            [\n                \"docker\",\n                \"exec\",\n                self.config.container_name,\n                \"mysql\",\n                \"-u\",\n                \"magentouser\",\n                \"-pMyPassword\",\n                \"magentodb\",\n                \"-e\",\n                f\"UPDATE core_config_data SET value='{base_url}/' WHERE path IN ('web/secure/base_url', 'web/unsecure/base_url');\",\n            ],\n            [\n                \"docker\",\n                \"exec\",\n                self.config.container_name,\n                \"/var/www/magento2/bin/magento\",\n                \"cache:flush\",\n            ],\n        ]\n\n        for cmd in cmds:\n            result = self._run_cmd(cmd)\n            if result.returncode != 0:\n                logger.warning(\n                    \"| Shopping setup step failed (%s): %s\",\n                    \" \".join(cmd),\n                    result.stderr.strip(),\n                )\n            else:\n                logger.debug(\n                    \"| Shopping setup step ok (%s): %s\",\n                    \" \".join(cmd),\n                    result.stdout.strip(),\n                )\n\n\n    def _configure_shopping_admin_post_start(self) -> None:\n        \"\"\"Run Magento-specific steps for shopping_admin container.\n        Waits for services to be ready before configuring.\n        \"\"\"\n        logger.info(\"| Running shopping_admin post-start setup\")\n        \n        # Wait for MySQL to be ready first\n        if not self._wait_for_mysql_ready():\n            logger.warning(\"| MySQL not ready, attempting configuration anyway\")\n        \n        # Wait for Magento to be ready\n        if not self._wait_for_magento_ready():\n            logger.warning(\"| Magento not ready, attempting configuration anyway\")\n        \n        base_url = f\"http://localhost:{self.config.host_port}\"\n\n        cmds = [\n            [\n                \"docker\",\n                \"exec\",\n                self.config.container_name,\n                \"/var/www/magento2/bin/magento\",\n                \"setup:store-config:set\",\n                f\"--base-url={base_url}\",\n            ],\n            [\n                \"docker\",\n                \"exec\",\n                self.config.container_name,\n                \"mysql\",\n                \"-u\",\n                \"magentouser\",\n                \"-pMyPassword\",\n                \"magentodb\",\n                \"-e\",\n                f\"UPDATE core_config_data SET value='{base_url}/' WHERE path IN ('web/secure/base_url', 'web/unsecure/base_url');\",\n            ],\n            [\n                \"docker\",\n                \"exec\",\n                self.config.container_name,\n                \"/var/www/magento2/bin/magento\",\n                \"config:set\",\n                \"admin/security/password_is_forced\",\n                \"0\",\n            ],\n            [\n                \"docker\",\n                \"exec\",\n                self.config.container_name,\n                \"/var/www/magento2/bin/magento\",\n                \"config:set\",\n                \"admin/security/password_lifetime\",\n                \"0\",\n            ],\n            [\n                \"docker\",\n                \"exec\",\n                self.config.container_name,\n                \"/var/www/magento2/bin/magento\",\n                \"cache:flush\",\n            ],\n        ]\n\n        for cmd in cmds:\n            result = self._run_cmd(cmd)\n            if result.returncode != 0:\n                logger.warning(\n                    \"| Shopping_admin setup step failed (%s): %s\",\n                    \" \".join(cmd),\n                    result.stderr.strip(),\n                )\n            else:\n                logger.debug(\n                    \"| Shopping_admin setup step ok (%s): %s\",\n                    \" \".join(cmd),\n                    result.stdout.strip(),\n                )\n\n    # ---- BaseStateManager hooks -----------------------------------------\n\n    def _create_initial_state(self, task: BaseTask) -> Optional[InitialStateInfo]:\n        try:\n            # Dynamically update config based on task category\n            if hasattr(task, 'category_id') and task.category_id in self.CATEGORY_CONFIGS:\n                category_config = self.CATEGORY_CONFIGS[task.category_id]\n                logger.info(f\"| Using category-specific config for '{task.category_id}': {category_config}\")\n                \n                # Update the config with category-specific values\n                self.config.image_name = category_config[\"image_name\"]\n                self.config.container_name = category_config[\"container_name\"]\n                self.config.host_port = category_config[\"host_port\"]\n                self.config.readiness_path = category_config[\"readiness_path\"]\n            \n            # Ensure image exists (load from tar if configured)\n            self._load_image_from_tar_if_needed()\n\n            # Ensure any stale container is gone\n            self._stop_and_remove_container(self.config.container_name)\n\n            # Run container\n            run_cmd = [\n                \"docker\",\n                \"run\",\n                \"--name\",\n                self.config.container_name,\n                \"-p\",\n                f\"{self.config.host_port}:{self.config.container_port}\",\n                \"-d\",\n                self.config.image_name,\n            ]\n            print(\"| Docker run command: \", run_cmd)\n            result = self._run_cmd(run_cmd)\n            if result.returncode != 0:\n                logger.error(\"| Failed to start container: %s\", result.stderr.strip())\n                return None\n            container_id = result.stdout.strip()\n            logger.info(\n                \"| Started container %s (%s)\", self.config.container_name, container_id\n            )\n\n            # Special handling for shopping and shopping_admin\n            if self.config.container_name == \"shopping\":\n                self._configure_shopping_post_start()\n            if self.config.container_name == \"shopping_admin\":\n                self._configure_shopping_admin_post_start()\n\n            # Wait for readiness\n            if not self._wait_until_ready():\n                # Cleanup on failure\n                self._stop_and_remove_container(self.config.container_name)\n                return None\n\n            entry_url = self._get_entry_url()\n\n            # Track resource for cleanup\n            self.track_resource(\n                \"docker_container\",\n                self.config.container_name,\n                {\n                    \"image\": self.config.image_name,\n                    \"host_port\": self.config.host_port,\n                    \"container_port\": self.config.container_port,\n                    \"base_url\": entry_url,\n                },\n            )\n\n            # Provide initial state info\n            return InitialStateInfo(\n                state_id=self.config.container_name,\n                state_url=entry_url,\n                metadata={\n                    \"docker_image\": self.config.image_name,\n                    \"container_name\": self.config.container_name,\n                    \"host_port\": self.config.host_port,\n                    \"container_port\": self.config.container_port,\n                    \"base_url\": entry_url,\n                    \"category\": task.category_id,\n                },\n            )\n        except Exception as exc:\n            logger.error(\"| Failed to create WebArena initial state: %s\", exc)\n            return None\n\n    def _store_initial_state_info(\n        self, task: BaseTask, state_info: InitialStateInfo\n    ) -> None:\n        if hasattr(task, \"__dict__\"):\n            task.docker_container_name = state_info.state_id\n            task.base_url = state_info.state_url\n            task.docker_metadata = state_info.metadata\n\n    def _cleanup_task_initial_state(self, task: BaseTask) -> bool:\n        if self.skip_cleanup:\n            logger.info(\"| Skipping container cleanup (skip_cleanup=True)\")\n            logger.info(\"| Container is still running at: %s\", self._get_entry_url())\n            logger.info(\n                \"| To manually stop: docker stop %s && docker rm %s\",\n                self.config.container_name,\n                self.config.container_name,\n            )\n            return True\n\n        try:\n            self._stop_and_remove_container(self.config.container_name)\n            return True\n        except Exception as exc:\n            logger.error(\"| Failed to cleanup container for %s: %s\", task.name, exc)\n            return False\n\n    def _cleanup_single_resource(self, resource: Dict[str, Any]) -> bool:\n        if self.skip_cleanup:\n            logger.info(\n                \"| Skipping resource cleanup for %s (skip_cleanup=True)\",\n                resource.get(\"id\"),\n            )\n            return True\n\n        try:\n            if resource.get(\"type\") == \"docker_container\":\n                self._stop_and_remove_container(resource[\"id\"])\n                return True\n            logger.warning(\n                \"| Unknown resource type for cleanup: %s\", resource.get(\"type\")\n            )\n            return False\n        except Exception as exc:\n            logger.error(\"| Resource cleanup failed: %s\", exc)\n            return False\n\n    def get_service_config_for_agent(self) -> dict:\n        \"\"\"\n        Provide configuration to the agent. The key piece is the base URL that\n        agents should navigate to when starting tasks.\n        \"\"\"\n        return {\n            \"environment\": \"webarena-docker\",\n            \"base_url\": self._get_entry_url(),\n            \"docker\": {\n                \"image\": self.config.image_name,\n                \"container\": self.config.container_name,\n                \"host_port\": self.config.host_port,\n                \"container_port\": self.config.container_port,\n            },\n        }\n\n    def close_all(self) -> None:\n        if self.skip_cleanup:\n            logger.info(\"| Skipping container cleanup in close_all (skip_cleanup=True)\")\n            return\n\n        try:\n            self._stop_and_remove_container(self.config.container_name)\n        except Exception:\n            # Best effort\n            pass\n\n    def __del__(self) -> None:\n        if not self.skip_cleanup:\n            self.close_all()\n"
  },
  {
    "path": "src/mcp_services/playwright_webarena/playwright_task_manager.py",
    "content": "\"\"\"\nWebArena Playwright Task Manager for MCPMark\n============================================\n\nSimple task manager for WebArena-backed Playwright MCP tasks.\n\"\"\"\n\nfrom __future__ import annotations\nimport sys\nimport os\nimport subprocess\nfrom pathlib import Path\nfrom typing import List, Dict, Any\n\nfrom src.logger import get_logger\nfrom src.base.task_manager import BaseTask, BaseTaskManager\nlogger = get_logger(__name__)\n\nclass PlaywrightTaskManager(BaseTaskManager):\n    \"\"\"Task manager for Playwright tasks against a WebArena environment.\"\"\"\n\n    def __init__(\n        self,\n        tasks_root: Path | None = None,\n        base_url: str | None = None,\n        task_suite: str = \"standard\",\n    ):\n        if tasks_root is None:\n            tasks_root = Path(__file__).resolve().parents[3] / \"tasks\"\n        super().__init__(\n            tasks_root,\n            mcp_service=\"playwright_webarena\",\n            task_class=BaseTask,\n            task_organization=\"directory\",\n            task_suite=task_suite,\n        )\n\n    def _create_task_from_files(\n        self, category_id: str, task_files_info: Dict[str, Any]\n    ) -> BaseTask:\n        import json\n        \n        # Check for meta.json\n        meta_path = task_files_info[\"instruction_path\"].parent / \"meta.json\"\n        final_category_id = category_id\n        task_id = task_files_info[\"task_id\"]\n        \n        if meta_path.exists():\n            try:\n                with open(meta_path, 'r') as f:\n                    meta_data = json.load(f)\n                    # Use values from meta.json if available\n                    final_category_id = meta_data.get(\"category_id\", category_id)\n                    task_id = meta_data.get(\"task_id\", task_id)\n            except Exception as e:\n                logger.warning(f\"Failed to load meta.json from {meta_path}: {e}\")\n\n        task = BaseTask(\n            task_instruction_path=task_files_info[\"instruction_path\"],\n            task_verification_path=task_files_info[\"verification_path\"],\n            service=\"playwright_webarena\",\n            category_id=final_category_id,\n            task_id=task_id,\n        )\n        \n        return task\n\n\n    # NEW: 注入统一前缀（基于 state manager 注入的 task.base_url）\n    def get_task_instruction(self, task: BaseTask) -> str:\n        base_instruction = task.get_task_instruction().strip()\n        base_url = getattr(task, \"base_url\", None)\n        prefix = f\"Navigate to {base_url.rstrip('/')} and complete the following task.\"\n        # 前缀 + 原始任务说明\n        return self._format_task_instruction(f\"{prefix}\\n\\n{base_instruction}\")\n\n    def _get_verification_command(self, task: BaseTask) -> List[str]:\n        return [sys.executable, str(task.task_verification_path)]\n\n    # 将 base_url 通过环境变量传给 verify.py\n    def run_verification(self, task: BaseTask) -> subprocess.CompletedProcess:\n        env = os.environ.copy()\n        base_url = getattr(task, \"base_url\", None)\n        if base_url:\n            env[\"WEBARENA_BASE_URL\"] = base_url.rstrip(\"/\")\n        return subprocess.run(\n            self._get_verification_command(task),\n            capture_output=True,\n            text=True,\n            timeout=300,\n            env=env,\n        )\n\n    def _format_task_instruction(self, base_instruction: str) -> str:\n        note = \"Use Playwright MCP tools to complete this task.\"\n        return (base_instruction \n                + \"\\n\\n\" \n                + note + \"\\n\\nNote: Based on your understanding, solve the task all at once by yourself, don't ask for my opinions on anything.\")\n"
  },
  {
    "path": "src/mcp_services/playwright_webarena/reddit_env_setup.md",
    "content": "# WebArena Reddit环境搭建指南\n\n本指南介绍如何搭建WebArena Reddit环境，用于Playwright MCP自动化测试。\n\n## 系统要求\n\n- Ubuntu 22.04+ 或其他Linux发行版\n- Docker环境\n- 至少50GB可用磁盘空间\n- 至少4GB内存\n\n## 快速设置步骤\n\n### 1. 下载Reddit Docker镜像\n\nWebArena提供3个镜像源，选择网络最快的：\n\n```bash\n# 选项1: Google Drive (通常最快)\npip install gdown\ngdown 17Qpp1iu_mPqzgO_73Z9BnFjHrzmX9DGf\n\n# 选项2: Archive.org\nwget https://archive.org/download/webarena-env-forum-image/postmill-populated-exposed-withimg.tar\n\n# 选项3: CMU服务器\nwget http://metis.lti.cs.cmu.edu/webarena-images/postmill-populated-exposed-withimg.tar\n```\n\n### 2. 安装Docker (如果尚未安装)\n\n```bash\nsudo apt update\nsudo apt install docker.io -y\nsudo systemctl start docker\nsudo systemctl enable docker\nsudo usermod -aG docker $USER\nnewgrp docker\n```\n\n### 3. 启动Reddit环境\n\n```bash\n# 加载Docker镜像 (约50GB，需要等待几分钟)\ndocker load --input postmill-populated-exposed-withimg.tar\n\n# 启动容器\ndocker run --name forum -p 9999:80 -d postmill-populated-exposed-withimg\n\n# 等待服务启动 (约1-2分钟)\nsleep 120\n\n# 验证服务状态\ndocker logs forum | tail -10\ncurl -I http://localhost:9999\n```\n\n### 4. 验证环境\n\n访问 `http://localhost:9999` 应该看到Postmill论坛主页，包含：\n- 导航栏 (Forums, Wiki)\n- 搜索框\n- 登录/注册链接\n- 论坛列表 (AskReddit, technology, gaming等)\n\n## 端口开放策略\n\n根据使用场景选择合适的端口开放策略：\n\n### 策略1: GCP防火墙规则 (推荐 - 生产环境)\n\n**适用场景**: 长期使用、团队协作、稳定的公共访问\n\n```bash\n# 安装gcloud CLI (如果尚未安装)\ncurl https://sdk.cloud.google.com | bash\nexec -l $SHELL\n\n# 认证\ngcloud auth login\n\n# 创建防火墙规则\ngcloud compute firewall-rules create allow-reddit-9999 \\\n  --allow tcp:9999 \\\n  --source-ranges 0.0.0.0/0 \\\n  --description \"Allow access to WebArena Reddit on port 9999\"\n\n# 获取外部IP\ngcloud compute instances list\n```\n\n**优点**: 永久有效、稳定、无额外依赖  \n**缺点**: 需要GCP权限、公网完全开放\n\n### 策略2: ngrok隧道 (快速分享)\n\n**适用场景**: 临时演示、快速测试、无需GCP权限\n\n```bash\n# 安装ngrok\nwget https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.tgz\ntar xvzf ngrok-v3-stable-linux-amd64.tgz\nsudo mv ngrok /usr/local/bin\n\n# 创建隧道\nngrok http 9999\n```\n\n**优点**: 即时生效、HTTPS支持、无需服务器配置  \n**缺点**: 临时URL、需要保持运行、免费版有限制\n\n### 策略3: Cloudflared隧道 (免费持久)\n\n**适用场景**: 长期免费使用、无需GCP、需要稳定访问\n\n```bash\n# 安装cloudflared\nwget https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64\nsudo mv cloudflared-linux-amd64 /usr/local/bin/cloudflared\nsudo chmod +x /usr/local/bin/cloudflared\n\n# 创建临时隧道\ncloudflared tunnel --url http://localhost:9999\n\n# 或创建永久隧道 (需要Cloudflare账号)\ncloudflared tunnel login\ncloudflared tunnel create webarena-reddit\ncloudflared tunnel route dns webarena-reddit reddit.yourdomain.com\n```\n\n**优点**: 免费、持久、自定义域名  \n**缺点**: 需要Cloudflare账号、设置稍复杂\n\n### 策略4: SSH端口转发 (开发调试)\n\n**适用场景**: 本地开发、安全要求高、团队内部访问\n\n```bash\n# 在本地机器上执行\nssh -L 8080:localhost:9999 user@your-server-ip\n\n# 然后访问 http://localhost:8080\n```\n\n**优点**: 最安全、无需开放公网端口  \n**缺点**: 需要SSH访问、仅限本地使用\n\n## Playwright MCP测试\n\n环境搭建完成后，可以使用Playwright MCP进行自动化测试：\n\n```javascript\n// 基础连接测试\nawait page.goto('http://your-reddit-url:9999');\n\n// 导航测试\nawait page.click('text=Forums');\nawait page.click('text=AskReddit');\n\n// 表单交互测试\nawait page.click('text=Log in');\nawait page.fill('[placeholder=\"Username\"]', 'testuser');\nawait page.fill('[placeholder=\"Password\"]', 'testpass');\n```\n\n## 故障排除\n\n### 容器启动失败\n```bash\n# 检查容器状态\ndocker ps -a\n\n# 查看详细日志\ndocker logs forum\n\n# 重启容器\ndocker restart forum\n```\n\n### 服务未就绪\n```bash\n# 检查PostgreSQL是否完全启动\ndocker logs forum | grep \"database system is ready\"\n\n# 等待更长时间 (数据库恢复需要时间)\nsleep 300\n```\n\n### 端口被占用\n```bash\n# 检查端口使用情况\nnetstat -tlnp | grep 9999\n\n# 使用不同端口\ndocker run --name forum -p 8888:80 -d postmill-populated-exposed-withimg\n```\n\n## 环境重置\n\n完成测试后重置环境：\n\n```bash\n# 停止并删除容器\ndocker stop forum\ndocker rm forum\n\n# 重新启动\ndocker run --name forum -p 9999:80 -d postmill-populated-exposed-withimg\n```\n\n## 高级配置\n\n### 环境变量设置 (WebArena标准)\n```bash\nexport REDDIT=\"your-server-hostname:9999\"\nexport REDDIT_URL=\"http://your-server-hostname:9999\"\n```\n\n### 批量任务测试\n```bash\n# 准备WebArena测试配置\nmkdir -p ~/.webarena\necho \"REDDIT=your-server-hostname:9999\" >> ~/.webarena/config\n```\n\n---\n\n**注意**: 这个Reddit环境包含成千上万的预填充数据，完全模拟真实的Reddit使用场景，非常适合进行复杂的Web自动化任务测试。"
  },
  {
    "path": "src/mcp_services/postgres/__init__.py",
    "content": "\"\"\"\nPostgreSQL MCP Service for MCPMark\n===================================\n\nThis module provides PostgreSQL database integration for MCPMark evaluation.\n\"\"\"\n\nfrom .postgres_login_helper import PostgresLoginHelper\nfrom .postgres_state_manager import PostgresStateManager\nfrom .postgres_task_manager import PostgresTaskManager, PostgresTask\n\n__all__ = [\n    \"PostgresLoginHelper\",\n    \"PostgresStateManager\",\n    \"PostgresTaskManager\",\n    \"PostgresTask\",\n]\n"
  },
  {
    "path": "src/mcp_services/postgres/postgres_login_helper.py",
    "content": "\"\"\"\nPostgreSQL Login Helper for MCPMark\n====================================\n\nHandles PostgreSQL authentication and connection validation.\n\"\"\"\n\nimport json\nimport psycopg2\nfrom pathlib import Path\nfrom typing import Optional, Dict, Any\n\nfrom src.base.login_helper import BaseLoginHelper\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\nclass PostgresLoginHelper(BaseLoginHelper):\n    \"\"\"Handles PostgreSQL authentication and connection validation.\"\"\"\n\n    def __init__(\n        self,\n        host: str = \"localhost\",\n        port: int = 5432,\n        database: str = \"postgres\",\n        username: str = \"postgres\",\n        password: str = None,\n        state_path: Optional[Path] = None,\n    ):\n        \"\"\"Initialize PostgreSQL login helper.\n\n        Args:\n            host: Database host\n            port: Database port\n            database: Database name\n            username: Database username\n            password: Database password\n            state_path: Path to save connection state\n        \"\"\"\n        super().__init__()\n        self.host = host\n        self.port = port\n        self.database = database\n        self.username = username\n        self.password = password\n        self.state_path = state_path or Path.home() / \".mcpbench\" / \"postgres_auth.json\"\n\n        # Ensure state directory exists\n        self.state_path.parent.mkdir(parents=True, exist_ok=True)\n\n    def login(self, **kwargs) -> bool:\n        \"\"\"Test PostgreSQL connection and save state.\n\n        Returns:\n            bool: True if connection successful\n        \"\"\"\n        try:\n            # Test connection\n            conn = psycopg2.connect(\n                host=self.host,\n                port=self.port,\n                database=self.database,\n                user=self.username,\n                password=self.password,\n                connect_timeout=10,\n            )\n\n            # Execute test query\n            with conn.cursor() as cur:\n                cur.execute(\"SELECT version()\")\n                version = cur.fetchone()[0]\n                logger.info(f\"PostgreSQL connection successful: {version}\")\n\n                # Check permissions\n                cur.execute(\n                    \"\"\"\n                    SELECT has_database_privilege(%s, 'CREATE')\n                \"\"\",\n                    (self.database,),\n                )\n                can_create = cur.fetchone()[0]\n\n                if not can_create:\n                    logger.warning(\"User does not have CREATE privilege on database\")\n\n            conn.close()\n\n            # Save connection state\n            self._save_connection_state(\n                {\n                    \"host\": self.host,\n                    \"port\": self.port,\n                    \"database\": self.database,\n                    \"username\": self.username,\n                    \"version\": version,\n                    \"can_create\": can_create,\n                    \"authenticated_at\": self._get_current_timestamp(),\n                }\n            )\n\n            return True\n\n        except psycopg2.Error as e:\n            logger.error(f\"PostgreSQL connection failed: {e}\")\n            return False\n        except Exception as e:\n            logger.error(f\"Unexpected error during PostgreSQL login: {e}\")\n            return False\n\n    def _save_connection_state(self, state: Dict[str, Any]):\n        \"\"\"Save connection state to file.\"\"\"\n        try:\n            # Don't save password\n            safe_state = {k: v for k, v in state.items() if k != \"password\"}\n\n            with open(self.state_path, \"w\") as f:\n                json.dump(safe_state, f, indent=2)\n\n            # Set restrictive permissions\n            self.state_path.chmod(0o600)\n            logger.info(f\"Connection state saved to: {self.state_path}\")\n\n        except Exception as e:\n            logger.error(f\"Failed to save connection state: {e}\")\n\n    def _get_current_timestamp(self) -> str:\n        \"\"\"Get current timestamp in ISO format.\"\"\"\n        from datetime import datetime, timezone\n\n        return datetime.now(timezone.utc).isoformat()\n\n    def is_connected(self) -> bool:\n        \"\"\"Check if we can connect to PostgreSQL.\"\"\"\n        return self.login()\n\n    def get_connection_params(self) -> Dict[str, Any]:\n        \"\"\"Get connection parameters (without password).\"\"\"\n        return {\n            \"host\": self.host,\n            \"port\": self.port,\n            \"database\": self.database,\n            \"user\": self.username,\n        }\n"
  },
  {
    "path": "src/mcp_services/postgres/postgres_state_manager.py",
    "content": "\"\"\"\nPostgreSQL State Manager for MCPMark\n=====================================\n\nManages database state for PostgreSQL tasks including schema setup,\ntest data creation, and cleanup.\n\"\"\"\n\nimport os\nimport subprocess\nimport sys\nimport psycopg2\nfrom psycopg2 import sql\nfrom pathlib import Path\nfrom typing import Optional, Dict, Any, List\n\nfrom src.base.state_manager import BaseStateManager, InitialStateInfo\nfrom src.base.task_manager import BaseTask\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\nclass PostgresStateManager(BaseStateManager):\n    \"\"\"Manages PostgreSQL database state for task evaluation.\"\"\"\n\n    def __init__(\n        self,\n        host: str = \"localhost\",\n        port: int = 5432,\n        database: str = \"postgres\",\n        username: str = \"postgres\",\n        password: str = None,\n    ):\n        \"\"\"Initialize PostgreSQL state manager.\n\n        Args:\n            host: Database host\n            port: Database port\n            database: Main database name\n            username: Database username\n            password: Database password\n            template_db: Template database for initial states\n        \"\"\"\n        super().__init__(service_name=\"postgres\")\n\n        self.host = host\n        self.port = port\n        self.database = database\n        self.username = username\n        self.password = password\n\n        # Connection parameters\n        self.conn_params = {\n            \"host\": host,\n            \"port\": port,\n            \"user\": username,\n            \"password\": password,\n        }\n\n        # Track created databases for cleanup\n        self.created_databases: List[str] = []\n\n        # Track current task database for agent configuration\n        self._current_task_database: Optional[str] = None\n\n        # Validate connection on initialization\n        try:\n            self._test_connection()\n            logger.info(\"PostgreSQL state manager initialized successfully\")\n            self._setup_database()\n        except Exception as e:\n            raise RuntimeError(f\"PostgreSQL initialization failed: {e}\")\n\n    def _test_connection(self):\n        \"\"\"Test database connection.\"\"\"\n        conn = psycopg2.connect(**self.conn_params, database=\"postgres\")\n        conn.close()\n    \n    def _setup_database(self):\n        \"\"\"Setup all required databases by downloading and restoring from backup.\"\"\"\n        databases = ['employees', 'chinook', 'dvdrental', 'sports', 'lego']\n        \n        for db_name in databases:\n            if not self._database_exists(db_name):\n                logger.info(f\"Setting up {db_name} database...\")\n                \n                # Path to backup file\n                backup_dir = Path(__file__).parent.parent.parent.parent / \"postgres_state\"\n                backup_file = backup_dir / f\"{db_name}.backup\"\n                \n                # Download backup if not exists\n                if not backup_file.exists():\n                    backup_dir.mkdir(parents=True, exist_ok=True)\n                    logger.info(f\"Downloading {db_name} backup...\")\n                    try:\n                        import urllib.request\n                        urllib.request.urlretrieve(\n                            f'https://storage.mcpmark.ai/postgres/{db_name}.backup',\n                            str(backup_file)\n                        )\n                        logger.info(f\"{db_name} backup downloaded\")\n                    except Exception as e:\n                        logger.warning(f\"Failed to download {db_name} backup: {e}\")\n                        continue\n                \n                # Create database\n                try:\n                    self._create_empty_database(db_name)\n                    logger.info(f\"Created {db_name} database\")\n                except Exception as e:\n                    logger.warning(f\"Failed to create {db_name} database: {e}\")\n                    continue\n                \n                # Restore from backup\n                env = os.environ.copy()\n                env['PGPASSWORD'] = self.password\n                \n                try:\n                    result = subprocess.run([\n                        'pg_restore',\n                        '-h', str(self.host),\n                        '-p', str(self.port),\n                        '-U', self.username,\n                        '-d', db_name,\n                        '-v',\n                        str(backup_file)\n                    ], env=env, capture_output=True, text=True)\n                    \n                    if result.returncode != 0:\n                        logger.warning(f\"pg_restore had errors for {db_name}: {result.stderr}\")\n                    else:\n                        logger.info(f\"{db_name} database restored successfully\")\n                except Exception as e:\n                    logger.warning(f\"Failed to restore {db_name} database: {e}\")\n            else:\n                logger.debug(f\"{db_name} database already exists\")\n\n    def _setup_database(self):\n        \"\"\"Setup all required databases by downloading and restoring from backup.\"\"\"\n        databases = ['employees', 'chinook', 'dvdrental', 'sports', 'lego']\n\n        for db_name in databases:\n            if not self._database_exists(db_name):\n                logger.info(f\"Setting up {db_name} database...\")\n\n                # Path to backup file\n                backup_dir = Path(__file__).parent.parent.parent.parent / \"postgres_state\"\n                backup_file = backup_dir / f\"{db_name}.backup\"\n\n                # Download backup if not exists\n                if not backup_file.exists():\n                    backup_dir.mkdir(parents=True, exist_ok=True)\n                    logger.info(f\"Downloading {db_name} backup...\")\n                    try:\n                        import urllib.request\n                        urllib.request.urlretrieve(\n                            f'https://storage.mcpmark.ai/postgres/{db_name}.backup',\n                            str(backup_file)\n                        )\n                        logger.info(f\"{db_name} backup downloaded\")\n                    except Exception as e:\n                        logger.warning(f\"Failed to download {db_name} backup: {e}\")\n                        continue\n\n                # Create database\n                try:\n                    self._create_empty_database(db_name)\n                    logger.info(f\"Created {db_name} database\")\n                except Exception as e:\n                    logger.warning(f\"Failed to create {db_name} database: {e}\")\n                    continue\n\n                # Restore from backup\n                env = os.environ.copy()\n                env['PGPASSWORD'] = self.password\n\n                try:\n                    result = subprocess.run([\n                        'pg_restore',\n                        '-h', str(self.host),\n                        '-p', str(self.port),\n                        '-U', self.username,\n                        '-d', db_name,\n                        '-v',\n                        str(backup_file)\n                    ], env=env, capture_output=True, text=True)\n\n                    if result.returncode != 0 and \"ERROR\" in result.stderr:\n                        logger.warning(f\"pg_restore had errors for {db_name}: {result.stderr}\")\n                    else:\n                        logger.info(f\"{db_name} database restored successfully\")\n                except Exception as e:\n                    logger.warning(f\"Failed to restore {db_name} database: {e}\")\n            else:\n                logger.debug(f\"{db_name} database already exists\")\n\n    def _create_initial_state(self, task: BaseTask) -> Optional[InitialStateInfo]:\n        \"\"\"Create initial database state for a task.\"\"\"\n        try:\n            # Generate unique database name\n            db_name = f\"mcpmark_{task.category_id}_{task.task_id}_{self._get_timestamp()}\"\n\n            # Create database from template if exists, otherwise empty\n            if self._database_exists(task.category_id):\n                self._create_database_from_template(db_name, task.category_id)\n                logger.info(\n                    f\"| Created database '{db_name}' from template '{task.category_id}'\"\n                )\n            else:\n                self._create_empty_database(db_name)\n                logger.info(f\"| Created empty database '{db_name}'\")\n                # Run prepare_environment.py if it exists\n                self._run_prepare_environment(db_name, task)\n                logger.info(f\"| Prepared environment for database '{db_name}'\")\n\n            # Track for cleanup\n            self.created_databases.append(db_name)\n            self.track_resource(\"database\", db_name, {\"task\": task.name})\n\n\n            return InitialStateInfo(\n                state_id=db_name,\n                state_url=f\"postgresql://{self.username}@{self.host}:{self.port}/{db_name}\",\n                metadata={\n                    \"database\": db_name,\n                    \"category\": task.category_id,\n                    \"task_id\": task.task_id,\n                },\n            )\n\n        except Exception as e:\n            logger.error(f\"Failed to create initial state for {task.name}: {e}\")\n            return None\n\n    def _store_initial_state_info(\n        self, task: BaseTask, state_info: InitialStateInfo\n    ) -> None:\n        \"\"\"Store database info in task object.\"\"\"\n        if hasattr(task, \"__dict__\"):\n            task.database_name = state_info.state_id\n            task.database_url = state_info.state_url\n            # Store current task database for agent configuration\n            self._current_task_database = state_info.state_id\n\n    def _cleanup_task_initial_state(self, task: BaseTask) -> bool:\n        \"\"\"Clean up task database.\"\"\"\n        if hasattr(task, \"database_name\") and task.database_name:\n            try:\n                self._drop_database(task.database_name)\n                logger.info(f\"| Dropped database: {task.database_name}\")\n\n                # Remove from tracking\n                self.created_databases = [\n                    db for db in self.created_databases if db != task.database_name\n                ]\n                # Clear current task database\n                if self._current_task_database == task.database_name:\n                    self._current_task_database = None\n                return True\n            except Exception as e:\n                logger.error(f\"Failed to drop database {task.database_name}: {e}\")\n                return False\n        return True\n\n    def _cleanup_single_resource(self, resource: Dict[str, Any]) -> bool:\n        \"\"\"Clean up a single PostgreSQL resource.\"\"\"\n        if resource[\"type\"] == \"database\":\n            try:\n                self._drop_database(resource[\"id\"])\n                logger.info(f\"| Dropped database: {resource['id']}\")\n                return True\n            except Exception as e:\n                logger.error(f\"| Failed to drop database {resource['id']}: {e}\")\n                return False\n        return False\n\n    def _database_exists(self, db_name: str) -> bool:\n        \"\"\"Check if database exists.\"\"\"\n        conn = psycopg2.connect(**self.conn_params, database=\"postgres\")\n        try:\n            with conn.cursor() as cur:\n                cur.execute(\"SELECT 1 FROM pg_database WHERE datname = %s\", (db_name,))\n                return cur.fetchone() is not None\n        finally:\n            conn.close()\n\n    def _create_database_from_template(self, new_db: str, template_db: str):\n        \"\"\"Create database from template.\"\"\"\n        conn = psycopg2.connect(**self.conn_params, database=\"postgres\")\n        conn.autocommit = True\n        try:\n            with conn.cursor() as cur:\n                cur.execute(\n                    sql.SQL(\"\"\"\n                    SELECT pg_terminate_backend(pid)\n                    FROM pg_stat_activity\n                    WHERE datname = %s AND pid <> pg_backend_pid()\n                \"\"\"),\n                    (template_db,),\n                )\n                cur.execute(\n                    sql.SQL(\"CREATE DATABASE {} WITH TEMPLATE {}\").format(\n                        sql.Identifier(new_db), sql.Identifier(template_db)\n                    )\n                )\n        finally:\n            conn.close()\n\n    def _create_empty_database(self, db_name: str):\n        \"\"\"Create empty database.\"\"\"\n        conn = psycopg2.connect(**self.conn_params, database=\"postgres\")\n        conn.autocommit = True\n        try:\n            with conn.cursor() as cur:\n                cur.execute(\n                    sql.SQL(\"CREATE DATABASE {}\").format(sql.Identifier(db_name))\n                )\n        finally:\n            conn.close()\n\n    def _drop_database(self, db_name: str):\n        \"\"\"Drop database.\"\"\"\n        conn = psycopg2.connect(**self.conn_params, database=\"postgres\")\n        conn.autocommit = True\n        try:\n            with conn.cursor() as cur:\n                # Terminate connections\n                cur.execute(\n                    sql.SQL(\"\"\"\n                    SELECT pg_terminate_backend(pid)\n                    FROM pg_stat_activity\n                    WHERE datname = %s AND pid <> pg_backend_pid()\n                \"\"\"),\n                    (db_name,),\n                )\n\n                # Drop database\n                cur.execute(\n                    sql.SQL(\"DROP DATABASE IF EXISTS {}\").format(\n                        sql.Identifier(db_name)\n                    )\n                )\n        finally:\n            conn.close()\n\n    def _run_prepare_environment(self, db_name: str, task: BaseTask):\n        \"\"\"Run prepare_environment.py script if it exists in the task directory.\"\"\"\n        # Find the task directory containing prepare_environment.py\n        task_dir = task.task_instruction_path.parent\n        prepare_script = task_dir / \"prepare_environment.py\"\n\n        if not prepare_script.exists():\n            logger.debug(f\"No prepare_environment.py found for task {task.name}\")\n            return\n\n        logger.info(f\"| Running prepare_environment.py for task {task.name}\")\n\n        # Set up environment variables for the script\n        env = os.environ.copy()\n        env.update({\n            \"POSTGRES_HOST\": str(self.host),\n            \"POSTGRES_PORT\": str(self.port),\n            \"POSTGRES_DATABASE\": db_name,\n            \"POSTGRES_USERNAME\": self.username,\n            \"POSTGRES_PASSWORD\": self.password or \"\",\n        })\n\n        try:\n            # Run the prepare_environment.py script\n            result = subprocess.run(\n                [sys.executable, str(prepare_script)],\n                cwd=str(task_dir),  # Run from task directory to access data/ folder\n                env=env,\n                capture_output=True,\n                text=True,\n                timeout=300,  # 5 minute timeout\n            )\n\n            if result.returncode == 0:\n                logger.info(f\"| ✓ Environment preparation completed for {task.name}\")\n                if result.stdout.strip():\n                    logger.debug(f\"| prepare_environment.py output: {result.stdout}\")\n            else:\n                logger.error(f\"| ❌ Environment preparation failed for {task.name}\")\n                logger.error(f\"| Error output: {result.stderr}\")\n                raise RuntimeError(f\"prepare_environment.py failed with exit code {result.returncode}\")\n\n        except subprocess.TimeoutExpired:\n            logger.error(f\"❌ Environment preparation timed out for {task.name}\")\n            raise RuntimeError(\"prepare_environment.py execution timed out\")\n        except Exception as e:\n            logger.error(f\"❌ Failed to run prepare_environment.py for {task.name}: {e}\")\n            raise\n\n    def _setup_task_specific_data(self, db_name: str, task: BaseTask):\n        \"\"\"Set up task-specific schema and data.\"\"\"\n        conn = psycopg2.connect(**self.conn_params, database=db_name)\n        try:\n            with conn.cursor() as cur:\n                if task.category_id == \"basic_queries\":\n                    self._setup_basic_queries_data(cur)\n                elif task.category_id == \"data_manipulation\":\n                    self._setup_data_manipulation_data(cur)\n                elif task.category_id == \"table_operations\":\n                    self._setup_table_operations_data(cur)\n                # Add more categories as needed\n\n            conn.commit()\n        except Exception as e:\n            conn.rollback()\n            logger.error(f\"Failed to setup task data: {e}\")\n            raise\n        finally:\n            conn.close()\n\n    def _setup_basic_queries_data(self, cursor):\n        \"\"\"Set up data for basic query tasks.\"\"\"\n        cursor.execute(\"\"\"\n            CREATE TABLE employees (\n                id SERIAL PRIMARY KEY,\n                name VARCHAR(100) NOT NULL,\n                department VARCHAR(50),\n                salary DECIMAL(10, 2),\n                hire_date DATE\n            );\n\n            INSERT INTO employees (name, department, salary, hire_date) VALUES\n            ('John Doe', 'Engineering', 75000.00, '2020-01-15'),\n            ('Jane Smith', 'Marketing', 65000.00, '2019-03-22'),\n            ('Bob Johnson', 'Engineering', 80000.00, '2018-07-01'),\n            ('Alice Brown', 'HR', 55000.00, '2021-02-10');\n        \"\"\")\n\n    def _setup_data_manipulation_data(self, cursor):\n        \"\"\"Set up data for data manipulation tasks.\"\"\"\n        cursor.execute(\"\"\"\n            CREATE TABLE products (\n                id SERIAL PRIMARY KEY,\n                name VARCHAR(100) NOT NULL,\n                category VARCHAR(50),\n                price DECIMAL(10, 2),\n                stock INTEGER DEFAULT 0\n            );\n\n            CREATE TABLE orders (\n                id SERIAL PRIMARY KEY,\n                product_id INTEGER REFERENCES products(id),\n                quantity INTEGER NOT NULL,\n                order_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP\n            );\n        \"\"\")\n\n    def _setup_table_operations_data(self, cursor):\n        \"\"\"Set up for table operation tasks.\"\"\"\n        # Start with minimal schema that tasks will modify\n        cursor.execute(\"\"\"\n            CREATE TABLE test_table (\n                id SERIAL PRIMARY KEY,\n                data VARCHAR(255)\n            );\n        \"\"\")\n\n    def _get_timestamp(self) -> str:\n        \"\"\"Get timestamp for unique naming.\"\"\"\n        from datetime import datetime\n\n        return datetime.now().strftime(\"%Y%m%d%H%M%S\")\n\n    def get_service_config_for_agent(self) -> dict:\n        \"\"\"Get configuration for agent execution.\"\"\"\n        config = {\n            \"host\": self.host,\n            \"port\": self.port,\n            \"username\": self.username,\n            \"password\": self.password,\n        }\n\n        # If there's a current task database, include it\n        if hasattr(self, \"_current_task_database\") and self._current_task_database:\n            config[\"current_database\"] = self._current_task_database\n            config[\"database_url\"] = (\n                f\"postgresql://{self.username}:{self.password}@{self.host}:{self.port}/{self._current_task_database}\"\n            )\n        else:\n            # Fallback to default database\n            config[\"database\"] = self.database\n            config[\"database_url\"] = (\n                f\"postgresql://{self.username}:{self.password}@{self.host}:{self.port}/{self.database}\"\n            )\n\n        return config\n"
  },
  {
    "path": "src/mcp_services/postgres/postgres_task_manager.py",
    "content": "\"\"\"\nPostgreSQL Task Manager for MCPMark\n====================================\n\nManages PostgreSQL task discovery, execution, and verification.\n\"\"\"\n\nimport os\nimport subprocess\nimport sys\nfrom dataclasses import dataclass\nfrom pathlib import Path\nfrom typing import Any, Dict, List, Optional\n\nfrom src.base.task_manager import BaseTask, BaseTaskManager\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\n@dataclass\nclass PostgresTask(BaseTask):\n    \"\"\"PostgreSQL-specific task with database information.\"\"\"\n\n    task_name: str = \"\"\n    database_name: Optional[str] = None\n    database_url: Optional[str] = None\n    expected_queries: Optional[List[str]] = None\n    expected_tables: Optional[List[str]] = None\n\n\nclass PostgresTaskManager(BaseTaskManager):\n    \"\"\"Manages PostgreSQL tasks for MCPMark evaluation.\"\"\"\n\n    def __init__(self, tasks_root: Path = None, task_suite: str = \"standard\"):\n        \"\"\"Initialize PostgreSQL task manager.\n\n        Args:\n            tasks_root: Path to tasks directory\n            task_suite: Logical task suite (e.g., 'standard', 'easy')\n        \"\"\"\n        if tasks_root is None:\n            tasks_root = Path(__file__).resolve().parents[3] / \"tasks\"\n\n        super().__init__(\n            tasks_root,\n            mcp_service=\"postgres\",\n            task_class=PostgresTask,\n            task_organization=\"file\",  # PostgreSQL uses file-based tasks\n            task_suite=task_suite,\n        )\n\n    def _create_task_from_files(\n        self, category_id: str, task_files_info: Dict[str, Any]\n    ) -> Optional[PostgresTask]:\n        \"\"\"Instantiate a `PostgresTask` from the dictionary returned by `_find_task_files`.\"\"\"\n        import json\n        \n        # Check for meta.json\n        meta_path = task_files_info[\"instruction_path\"].parent / \"meta.json\"\n        final_category_id = category_id\n        task_id = task_files_info[\"task_id\"]\n        \n        if meta_path.exists():\n            try:\n                with open(meta_path, 'r') as f:\n                    meta_data = json.load(f)\n                    # Use values from meta.json if available\n                    final_category_id = meta_data.get(\"category_id\", category_id)\n                    task_id = meta_data.get(\"task_id\", task_id)\n            except Exception as e:\n                logger.warning(f\"Failed to load meta.json from {meta_path}: {e}\")\n\n        return PostgresTask(\n            task_instruction_path=task_files_info[\"instruction_path\"],\n            task_verification_path=task_files_info[\"verification_path\"],\n            service=\"postgres\",\n            category_id=final_category_id,\n            task_id=task_id,\n            task_name=task_files_info[\"task_id\"],\n        )\n\n    def _get_verification_command(self, task: PostgresTask) -> List[str]:\n        \"\"\"Get verification command with database info.\"\"\"\n        cmd = [sys.executable, str(task.task_verification_path)]\n\n        # Pass database name as argument if available\n        if task.database_name:\n            cmd.append(task.database_name)\n\n        return cmd\n\n    def run_verification(self, task: BaseTask) -> subprocess.CompletedProcess:\n        \"\"\"Run verification with PostgreSQL environment.\"\"\"\n        env = os.environ.copy()\n\n        # Pass database connection info to verification script\n        if hasattr(task, \"database_name\") and task.database_name:\n            env[\"POSTGRES_DATABASE\"] = task.database_name\n\n        if hasattr(task, \"database_url\") and task.database_url:\n            env[\"DATABASE_URL\"] = task.database_url\n\n        return subprocess.run(\n            self._get_verification_command(task),\n            capture_output=True,\n            text=True,\n            timeout=300,\n            env=env,\n        )\n\n    def _format_task_instruction(self, base_instruction: str) -> str:\n        \"\"\"Add PostgreSQL-specific instructions.\"\"\"\n        return (\n            base_instruction\n            + \"\\n\\nNote: Use PostgreSQL MCP tools to complete this task. The database connection is already configured.\"\n        )\n"
  },
  {
    "path": "src/mcp_services/supabase/__init__.py",
    "content": "\"\"\"Supabase MCP service integration for MCPMark.\"\"\"\n\nfrom .supabase_login_helper import SupabaseLoginHelper\nfrom .supabase_state_manager import SupabaseStateManager\nfrom .supabase_task_manager import SupabaseTaskManager\n\n__all__ = [\n    \"SupabaseLoginHelper\",\n    \"SupabaseStateManager\",\n    \"SupabaseTaskManager\",\n]\n"
  },
  {
    "path": "src/mcp_services/supabase/supabase_login_helper.py",
    "content": "\"\"\"\nSupabase Login Helper for MCPMark\n===================================\n\nHandles configuration and validation for Supabase MCP service.\n\"\"\"\n\nimport os\nfrom typing import Dict, Any, Optional\n\nfrom src.base.login_helper import BaseLoginHelper\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\nclass SupabaseLoginHelper(BaseLoginHelper):\n    \"\"\"Login helper for Supabase MCP service.\n\n    Validates PostgREST API URL and API key configuration.\n    \"\"\"\n\n    def __init__(self):\n        super().__init__(\"supabase\")\n\n    def prepare_credentials(self) -> Dict[str, Any]:\n        \"\"\"Prepare credentials for Supabase/PostgREST connection.\n\n        Returns:\n            Dictionary containing api_url, api_key, and postgres connection details\n        \"\"\"\n        # Get PostgREST API configuration (from Supabase CLI)\n        api_url = os.getenv(\"SUPABASE_API_URL\", \"http://localhost:54321\")\n        api_key = os.getenv(\"SUPABASE_API_KEY\")\n\n        # Get PostgreSQL connection details (Supabase CLI defaults)\n        postgres_host = os.getenv(\"SUPABASE_DB_HOST\", \"localhost\")\n        postgres_port = int(os.getenv(\"SUPABASE_DB_PORT\", \"54322\"))\n        postgres_user = os.getenv(\"SUPABASE_DB_USER\", \"postgres\")\n        postgres_password = os.getenv(\"SUPABASE_DB_PASSWORD\", \"postgres\")\n        postgres_database = os.getenv(\"SUPABASE_DB_NAME\", \"postgres\")\n\n        if not api_key:\n            logger.warning(\n                \"SUPABASE_API_KEY not set.\\n\"\n                \"Run 'supabase status' to get your anon or service_role key.\\n\"\n                \"Set SUPABASE_API_KEY in your .mcp_env file.\"\n            )\n            # Try to get it from supabase status\n            api_key = self._get_key_from_supabase_status()\n\n        return {\n            \"api_url\": api_url,\n            \"api_key\": api_key or \"\",\n            \"postgres_host\": postgres_host,\n            \"postgres_port\": postgres_port,\n            \"postgres_user\": postgres_user,\n            \"postgres_password\": postgres_password,\n            \"postgres_database\": postgres_database,\n        }\n\n    def _get_key_from_supabase_status(self) -> Optional[str]:\n        \"\"\"Try to get anon key from supabase status command.\n\n        Returns:\n            Anon key if found, None otherwise\n        \"\"\"\n        import subprocess\n\n        try:\n            result = subprocess.run(\n                [\"supabase\", \"status\"],\n                capture_output=True,\n                text=True,\n                timeout=10,\n            )\n\n            if result.returncode == 0:\n                # Parse output for anon key\n                for line in result.stdout.split('\\n'):\n                    if 'anon key:' in line.lower():\n                        # Extract the key after the colon\n                        key = line.split(':', 1)[1].strip()\n                        logger.info(\"Found anon key from 'supabase status'\")\n                        return key\n\n        except (subprocess.SubprocessError, FileNotFoundError):\n            logger.debug(\"Could not run 'supabase status' to get anon key\")\n\n        return None\n\n    def test_credentials(self, credentials: Dict[str, Any]) -> bool:\n        \"\"\"Test if Supabase credentials are valid.\n\n        Args:\n            credentials: Dictionary with api_url, api_key, and postgres connection details\n\n        Returns:\n            True if credentials are valid\n        \"\"\"\n        import requests\n        import psycopg2\n\n        api_url = credentials[\"api_url\"]\n        api_key = credentials.get(\"api_key\", \"\")\n\n        # Test PostgreSQL connection\n        try:\n            conn_params = {\n                \"host\": credentials[\"postgres_host\"],\n                \"port\": credentials[\"postgres_port\"],\n                \"user\": credentials[\"postgres_user\"],\n                \"password\": credentials[\"postgres_password\"],\n                \"database\": credentials[\"postgres_database\"],\n            }\n            conn = psycopg2.connect(**conn_params)\n            conn.close()\n            logger.info(\"✓ PostgreSQL connection successful\")\n        except Exception as e:\n            logger.error(f\"✗ PostgreSQL connection failed: {e}\")\n            return False\n\n        # Test PostgREST API connection (optional - may not be running yet)\n        try:\n            headers = {}\n            if api_key:\n                headers[\"apikey\"] = api_key\n                headers[\"Authorization\"] = f\"Bearer {api_key}\"\n\n            response = requests.get(api_url, headers=headers, timeout=5)\n\n            # Any response (including 404, 401) means the API is reachable\n            logger.info(f\"✓ PostgREST API reachable at {api_url} (status: {response.status_code})\")\n            return True\n\n        except requests.exceptions.ConnectionError:\n            logger.warning(\n                f\"⚠ PostgREST API not reachable at {api_url}.\\n\"\n                \"Make sure PostgREST is running (e.g., docker run -p 3000:3000 postgrest/postgrest)\\n\"\n                \"or use a cloud Supabase instance URL.\"\n            )\n            # Still return True as PostgreSQL connection works\n            return True\n        except Exception as e:\n            logger.warning(f\"⚠ PostgREST API test failed: {e}\")\n            # Still return True as PostgreSQL connection works\n            return True\n\n    def format_credentials_info(self, credentials: Dict[str, Any]) -> str:\n        \"\"\"Format credentials info for display.\n\n        Args:\n            credentials: Dictionary with connection details\n\n        Returns:\n            Formatted string describing the credentials\n        \"\"\"\n        api_url = credentials[\"api_url\"]\n        has_api_key = bool(credentials.get(\"api_key\"))\n        postgres_host = credentials[\"postgres_host\"]\n        postgres_db = credentials[\"postgres_database\"]\n\n        return (\n            f\"Supabase Configuration:\\n\"\n            f\"  API URL: {api_url}\\n\"\n            f\"  API Key: {'✓ Configured' if has_api_key else '✗ Not set'}\\n\"\n            f\"  PostgreSQL: {postgres_host}/{postgres_db}\"\n        )\n"
  },
  {
    "path": "src/mcp_services/supabase/supabase_state_manager.py",
    "content": "\"\"\"\nSupabase State Manager for MCPMark\n====================================\n\nManages database state for Supabase tasks using the same PostgreSQL backend\nas Insforge, but accessed via PostgREST/Supabase MCP server.\n\"\"\"\n\nimport os\nimport sys\nimport subprocess\nimport psycopg2\nfrom psycopg2 import sql\nfrom pathlib import Path\nfrom typing import Optional, Dict, Any, List\n\nfrom src.base.state_manager import BaseStateManager, InitialStateInfo\nfrom src.base.task_manager import BaseTask\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\nclass SupabaseStateManager(BaseStateManager):\n    \"\"\"Manages Supabase/PostgREST database state for task evaluation.\n\n    Uses the same PostgreSQL database as Insforge but exposes it via\n    PostgREST API for the Supabase MCP server to access.\n    \"\"\"\n\n    def __init__(\n        self,\n        api_url: str,\n        api_key: str,\n        postgres_host: str = \"localhost\",\n        postgres_port: int = 54322,  # Supabase CLI default port\n        postgres_user: str = \"postgres\",\n        postgres_password: str = \"postgres\",\n        postgres_database: str = \"postgres\",  # Supabase CLI default database\n    ):\n        \"\"\"Initialize Supabase state manager.\n\n        Args:\n            api_url: PostgREST API URL from Supabase CLI (default: http://localhost:54321)\n            api_key: API key from Supabase CLI (anon or service_role key)\n            postgres_host: PostgreSQL host for direct database operations\n            postgres_port: PostgreSQL port (Supabase CLI uses 54322)\n            postgres_user: PostgreSQL username\n            postgres_password: PostgreSQL password\n            postgres_database: Main PostgreSQL database name\n        \"\"\"\n        super().__init__(service_name=\"supabase\")\n\n        self.api_url = api_url.rstrip('/')\n        self.api_key = api_key\n\n        # PostgreSQL connection for state management (Supabase CLI instance)\n        self.postgres_host = postgres_host\n        self.postgres_port = postgres_port\n        self.postgres_user = postgres_user\n        self.postgres_password = postgres_password\n        self.postgres_database = postgres_database\n\n        # Track current task context for agent configuration\n        self._current_task_context: Optional[Dict[str, Any]] = None\n\n        # Validate connection on initialization\n        try:\n            self._test_connection()\n            logger.info(\"Supabase state manager initialized successfully\")\n        except Exception as e:\n            raise RuntimeError(f\"Supabase initialization failed: {e}\")\n\n        # Store baseline tables (system tables that exist before any tasks run)\n        self._baseline_tables = set(\n            (t['schema'], t['name']) for t in self._get_all_tables()\n        )\n        logger.debug(f\"Stored baseline: {len(self._baseline_tables)} tables\")\n\n    def _test_connection(self):\n        \"\"\"Test PostgreSQL connection.\"\"\"\n        try:\n            conn_params = {\n                \"host\": self.postgres_host,\n                \"port\": self.postgres_port,\n                \"user\": self.postgres_user,\n                \"password\": self.postgres_password,\n                \"database\": self.postgres_database,\n            }\n            conn = psycopg2.connect(**conn_params)\n            conn.close()\n            logger.debug(\"PostgreSQL connection test successful\")\n        except Exception as e:\n            raise RuntimeError(f\"Cannot connect to PostgreSQL: {e}\")\n\n    def _create_initial_state(self, task: BaseTask) -> Optional[InitialStateInfo]:\n        \"\"\"Create initial backend state for a task.\n\n        Restores from backup which may place tables in public or task-specific schema.\n\n        Args:\n            task: Task for which to create initial state\n\n        Returns:\n            InitialStateInfo object or None if creation failed\n        \"\"\"\n        try:\n            # Generate unique state ID for this task run\n            state_id = f\"{task.category_id}_{task.task_id}_{self._get_timestamp()}\"\n            schema_name = task.category_id\n\n            logger.info(f\"| Creating initial state for Supabase task: {task.name}\")\n\n            # Drop schema first (cleanup from previous runs)\n            self._drop_schema(schema_name)\n\n            # Get list of existing tables before restore (to track what we create)\n            tables_before = self._get_all_tables()\n            logger.info(f\"| Tables before restore: {len(tables_before)}\")\n\n            # Note: Don't create schema here - pg_restore will create it from the backup\n\n            # Restore from backup if backup exists (may create tables in public or task schema)\n            if self._restore_from_backup(schema_name):\n                logger.info(f\"| ✓ Restored '{schema_name}' from backup\")\n            else:\n                logger.info(f\"| ○ No backup found for '{schema_name}'\")\n                # Run prepare_environment.py if it exists\n                task_prepared = self._run_prepare_environment(task)\n                if not task_prepared:\n                    logger.debug(f\"| No prepare_environment.py found for task {task.name}\")\n\n            # Get list of tables after restore (to track what we need to clean up)\n            tables_after = self._get_all_tables()\n\n            # Track ALL new tables created by the restore (compare before/after)\n            tables_before_set = {(t['schema'], t['name']) for t in tables_before}\n            created_tables = [\n                t for t in tables_after\n                if (t['schema'], t['name']) not in tables_before_set\n            ]\n\n            logger.info(f\"| Tracked {len(created_tables)} new tables for cleanup\")\n            for t in created_tables:\n                logger.debug(f\"|   - {t['schema']}.{t['name']}\")\n\n            # Track the task context including created tables\n            context = {\n                \"state_id\": state_id,\n                \"category_id\": task.category_id,\n                \"task_id\": task.task_id,\n                \"task_name\": task.name,\n                \"schema\": schema_name,\n                \"created_tables\": created_tables,\n            }\n\n            return InitialStateInfo(\n                state_id=state_id,\n                state_url=self.api_url,\n                metadata=context,\n            )\n\n        except Exception as e:\n            logger.error(f\"Failed to create initial state for {task.name}: {e}\")\n            return None\n\n    def _store_initial_state_info(\n        self, task: BaseTask, state_info: InitialStateInfo\n    ) -> None:\n        \"\"\"Store backend info in task object for agent access.\"\"\"\n        if hasattr(task, \"__dict__\"):\n            task.api_url = self.api_url\n            task.api_key = self.api_key\n            task.state_id = state_info.state_id\n\n            # Store current task context for agent configuration\n            self._current_task_context = state_info.metadata\n\n    def _cleanup_task_initial_state(self, task: BaseTask) -> bool:\n        \"\"\"Clean up task-specific resources.\n\n        Drops ALL tables created during task (both setup and agent-created)\n        by comparing against baseline.\n\n        Args:\n            task: Task whose initial state should be cleaned up\n\n        Returns:\n            True if cleanup successful\n        \"\"\"\n        try:\n            logger.info(f\"| Cleaning up initial state for task: {task.name}\")\n\n            if self._current_task_context:\n                schema_name = self._current_task_context.get(\"schema\")\n\n                # Get ALL current tables\n                all_current_tables = self._get_all_tables()\n\n                # Find tables to drop: anything not in baseline\n                tables_to_drop = [\n                    t for t in all_current_tables\n                    if (t['schema'], t['name']) not in self._baseline_tables\n                ]\n\n                logger.info(f\"| Found {len(tables_to_drop)} tables to clean up (setup + agent-created)\")\n\n                # Drop individual tables\n                for table_info in tables_to_drop:\n                    try:\n                        self._drop_table(table_info[\"schema\"], table_info[\"name\"])\n                        logger.debug(f\"| ✓ Dropped table: {table_info['schema']}.{table_info['name']}\")\n                    except Exception as e:\n                        logger.warning(f\"| Failed to drop table {table_info}: {e}\")\n\n                # Drop the task schema (may be empty if all tables were in public)\n                if schema_name:\n                    try:\n                        self._drop_schema(schema_name)\n                        logger.info(f\"| ✓ Dropped schema: {schema_name}\")\n                    except Exception as e:\n                        logger.warning(f\"| Failed to drop schema {schema_name}: {e}\")\n\n                # Clear task context\n                if self._current_task_context.get(\"task_name\") == task.name:\n                    self._current_task_context = None\n\n            logger.info(f\"| ✓ Initial state cleanup completed for {task.name}\")\n            return True\n\n        except Exception as e:\n            logger.error(f\"Failed to cleanup task initial state for {task.name}: {e}\")\n            return False\n\n    def _cleanup_single_resource(self, resource: Dict[str, Any]) -> bool:\n        \"\"\"Clean up a single tracked resource.\n\n        Args:\n            resource: Resource dictionary with type, id, and metadata\n\n        Returns:\n            True if cleanup successful\n        \"\"\"\n        resource_type = resource[\"type\"]\n        resource_id = resource[\"id\"]\n\n        logger.debug(f\"| Cleanup for {resource_type} {resource_id} (handled by task scripts)\")\n        return True\n\n    def _run_prepare_environment(self, task: BaseTask) -> bool:\n        \"\"\"Run prepare_environment.py script if it exists in the task directory.\n\n        The script should use database operations to set up required state.\n\n        Args:\n            task: Task for which to prepare environment\n\n        Returns:\n            True if script ran successfully, False if script doesn't exist\n        \"\"\"\n        task_dir = task.task_instruction_path.parent\n        prepare_script = task_dir / \"prepare_environment.py\"\n\n        if not prepare_script.exists():\n            logger.debug(f\"No prepare_environment.py found for task {task.name}\")\n            return False\n\n        logger.info(f\"| Running prepare_environment.py for task {task.name}\")\n\n        # Set up environment variables for the script\n        env = os.environ.copy()\n        env.update({\n            \"SUPABASE_API_URL\": self.api_url,\n            \"SUPABASE_API_KEY\": self.api_key,\n            \"POSTGRES_HOST\": self.postgres_host,\n            \"POSTGRES_PORT\": str(self.postgres_port),\n            \"POSTGRES_DATABASE\": self.postgres_database,\n            \"POSTGRES_USERNAME\": self.postgres_user,\n            \"POSTGRES_PASSWORD\": self.postgres_password,\n        })\n\n        try:\n            # Run the prepare_environment.py script\n            result = subprocess.run(\n                [sys.executable, str(prepare_script)],\n                cwd=str(task_dir),  # Run from task directory\n                env=env,\n                capture_output=True,\n                text=True,\n                timeout=300,  # 5 minute timeout\n            )\n\n            if result.returncode == 0:\n                logger.info(f\"| ✓ Environment preparation completed for {task.name}\")\n                if result.stdout.strip():\n                    logger.debug(f\"| prepare_environment.py output: {result.stdout}\")\n                return True\n            else:\n                logger.error(f\"| ✗ Environment preparation failed for {task.name}\")\n                logger.error(f\"| Error output: {result.stderr}\")\n                raise RuntimeError(f\"prepare_environment.py failed with exit code {result.returncode}\")\n\n        except subprocess.TimeoutExpired:\n            logger.error(f\"✗ Environment preparation timed out for {task.name}\")\n            raise RuntimeError(\"prepare_environment.py execution timed out\")\n        except Exception as e:\n            logger.error(f\"✗ Failed to run prepare_environment.py for {task.name}: {e}\")\n            raise\n\n    def _get_timestamp(self) -> str:\n        \"\"\"Get timestamp for unique naming.\"\"\"\n        from datetime import datetime\n        return datetime.now().strftime(\"%Y%m%d%H%M%S\")\n\n    def _drop_schema(self, schema_name: str) -> None:\n        \"\"\"Drop schema and all its contents.\"\"\"\n        conn_params = {\n            \"host\": self.postgres_host,\n            \"port\": self.postgres_port,\n            \"user\": self.postgres_user,\n            \"password\": self.postgres_password,\n            \"database\": self.postgres_database,\n        }\n\n        conn = psycopg2.connect(**conn_params)\n        conn.autocommit = True\n        try:\n            with conn.cursor() as cur:\n                cur.execute(\n                    sql.SQL(\"DROP SCHEMA IF EXISTS {} CASCADE\").format(\n                        sql.Identifier(schema_name)\n                    )\n                )\n                logger.debug(f\"| Dropped schema: {schema_name}\")\n        finally:\n            conn.close()\n\n    def _create_schema(self, schema_name: str) -> None:\n        \"\"\"Create empty schema.\"\"\"\n        conn_params = {\n            \"host\": self.postgres_host,\n            \"port\": self.postgres_port,\n            \"user\": self.postgres_user,\n            \"password\": self.postgres_password,\n            \"database\": self.postgres_database,\n        }\n\n        conn = psycopg2.connect(**conn_params)\n        conn.autocommit = True\n        try:\n            with conn.cursor() as cur:\n                cur.execute(\n                    sql.SQL(\"CREATE SCHEMA {}\").format(sql.Identifier(schema_name))\n                )\n                logger.debug(f\"| Created schema: {schema_name}\")\n        finally:\n            conn.close()\n\n    def _get_all_tables(self) -> List[Dict[str, str]]:\n        \"\"\"Get list of all user tables.\n\n        Returns:\n            List of dicts with 'schema' and 'name' keys\n        \"\"\"\n        conn_params = {\n            \"host\": self.postgres_host,\n            \"port\": self.postgres_port,\n            \"user\": self.postgres_user,\n            \"password\": self.postgres_password,\n            \"database\": self.postgres_database,\n        }\n\n        conn = psycopg2.connect(**conn_params)\n        try:\n            with conn.cursor() as cur:\n                cur.execute(\"\"\"\n                    SELECT table_schema, table_name\n                    FROM information_schema.tables\n                    WHERE table_type = 'BASE TABLE'\n                    AND table_schema NOT IN ('information_schema', 'pg_catalog')\n                    AND table_schema NOT LIKE 'pg_%'\n                    AND table_name NOT LIKE '\\\\_%'\n                    ORDER BY table_schema, table_name\n                \"\"\")\n                rows = cur.fetchall()\n                return [{\"schema\": row[0], \"name\": row[1]} for row in rows]\n        finally:\n            conn.close()\n\n    def _drop_table(self, schema_name: str, table_name: str) -> None:\n        \"\"\"Drop a specific table or materialized view.\"\"\"\n        conn_params = {\n            \"host\": self.postgres_host,\n            \"port\": self.postgres_port,\n            \"user\": self.postgres_user,\n            \"password\": self.postgres_password,\n            \"database\": self.postgres_database,\n        }\n\n        conn = psycopg2.connect(**conn_params)\n        conn.autocommit = True\n        try:\n            with conn.cursor() as cur:\n                # Try dropping as table first\n                cur.execute(\n                    sql.SQL(\"DROP TABLE IF EXISTS {}.{} CASCADE\").format(\n                        sql.Identifier(schema_name),\n                        sql.Identifier(table_name)\n                    )\n                )\n                # Also try dropping as materialized view (in case agent created one)\n                cur.execute(\n                    sql.SQL(\"DROP MATERIALIZED VIEW IF EXISTS {}.{} CASCADE\").format(\n                        sql.Identifier(schema_name),\n                        sql.Identifier(table_name)\n                    )\n                )\n                logger.debug(f\"| Dropped table/view: {schema_name}.{table_name}\")\n        finally:\n            conn.close()\n\n    def _restore_from_backup(self, category_name: str) -> bool:\n        \"\"\"Restore from backup file.\n\n        Tables may be restored into public schema or category-specific schema\n        depending on how the backup was created.\n\n        Args:\n            category_name: Name of category (e.g., 'employees', 'chinook', 'lego')\n\n        Returns:\n            True if backup was restored, False if no backup exists\n        \"\"\"\n        # Path to backup file (same as used by Insforge/Postgres)\n        backup_dir = Path(__file__).parent.parent.parent.parent / \"postgres_state\"\n        backup_file = backup_dir / f\"{category_name}.backup\"\n\n        logger.debug(f\"| Looking for backup at: {backup_file}\")\n\n        if not backup_file.exists():\n            logger.info(f\"| ○ No backup file found: {backup_file}\")\n            return False\n\n        logger.info(f\"| Restoring {category_name} from backup...\")\n\n        # Set up environment for pg_restore\n        env = os.environ.copy()\n        env[\"PGPASSWORD\"] = self.postgres_password\n\n        try:\n            # Restore backup\n            result = subprocess.run(\n                [\n                    \"pg_restore\",\n                    \"-h\", self.postgres_host,\n                    \"-p\", str(self.postgres_port),\n                    \"-U\", self.postgres_user,\n                    \"-d\", self.postgres_database,\n                    \"-v\",\n                    str(backup_file),\n                ],\n                env=env,\n                capture_output=True,\n                text=True,\n                timeout=120,  # 2 minute timeout\n            )\n\n            if result.returncode != 0 and \"ERROR\" in result.stderr:\n                logger.warning(f\"| pg_restore had errors for {category_name}: {result.stderr}\")\n                return False\n\n            logger.info(f\"| ✓ {category_name} restored successfully\")\n            return True\n\n        except subprocess.TimeoutExpired:\n            logger.error(f\"| ✗ Restore timed out for {category_name}\")\n            return False\n        except Exception as e:\n            logger.error(f\"| ✗ Failed to restore {category_name}: {e}\")\n            return False\n\n    def get_service_config_for_agent(self) -> dict:\n        \"\"\"Get configuration for agent execution.\n\n        This configuration is passed to the agent/MCP server so it can\n        connect to the Supabase/PostgREST endpoint.\n\n        Returns:\n            Dictionary containing API URL and API key\n        \"\"\"\n        config = {\n            \"api_url\": self.api_url,\n            \"api_key\": self.api_key,\n            \"schema\": \"public\",  # Default schema for PostgREST\n        }\n\n        # Include current task context if available\n        if self._current_task_context:\n            config[\"task_context\"] = self._current_task_context\n            # If task uses a specific schema, include it\n            if self._current_task_context.get(\"schema\"):\n                config[\"schema\"] = self._current_task_context[\"schema\"]\n\n        return config\n\n    def set_verification_environment(self, messages_path: str = None) -> None:\n        \"\"\"Set environment variables needed for verification scripts.\n\n        Args:\n            messages_path: Optional path to messages.json file for verification\n        \"\"\"\n        os.environ[\"SUPABASE_API_URL\"] = self.api_url\n        os.environ[\"SUPABASE_API_KEY\"] = self.api_key\n\n        # Set PostgreSQL connection details for direct database verification\n        os.environ[\"POSTGRES_HOST\"] = self.postgres_host\n        os.environ[\"POSTGRES_PORT\"] = str(self.postgres_port)\n        os.environ[\"POSTGRES_DATABASE\"] = self.postgres_database\n        os.environ[\"POSTGRES_USERNAME\"] = self.postgres_user\n        os.environ[\"POSTGRES_PASSWORD\"] = self.postgres_password\n\n        if messages_path:\n            os.environ[\"MCP_MESSAGES\"] = str(messages_path)\n\n        logger.debug(\"Verification environment variables set for Supabase (including direct postgres access)\")\n"
  },
  {
    "path": "src/mcp_services/supabase/supabase_task_manager.py",
    "content": "\"\"\"\nSupabase Task Manager for MCPMark\n===================================\n\nManages Supabase task discovery, execution, and verification.\nReuses Postgres tasks but accesses them via PostgREST/Supabase MCP.\n\"\"\"\n\nimport os\nimport subprocess\nimport sys\nfrom dataclasses import dataclass\nfrom pathlib import Path\nfrom typing import Any, Dict, List, Optional\n\nfrom src.base.task_manager import BaseTask, BaseTaskManager\nfrom src.logger import get_logger\n\nlogger = get_logger(__name__)\n\n\n@dataclass\nclass SupabaseTask(BaseTask):\n    \"\"\"Supabase-specific task with API information.\"\"\"\n\n    task_name: str = \"\"\n    api_url: Optional[str] = None\n    api_key: Optional[str] = None\n\n\nclass SupabaseTaskManager(BaseTaskManager):\n    \"\"\"Manages Supabase tasks for MCPMark evaluation.\n\n    Uses the same task structure as Postgres tasks but accessed via\n    PostgREST/Supabase MCP server.\n    \"\"\"\n\n    def __init__(self, tasks_root: Path = None):\n        \"\"\"Initialize Supabase task manager.\n\n        Args:\n            tasks_root: Path to tasks directory\n        \"\"\"\n        if tasks_root is None:\n            tasks_root = Path(__file__).resolve().parents[3] / \"tasks\"\n\n        super().__init__(\n            tasks_root,\n            mcp_service=\"supabase\",\n            task_class=SupabaseTask,\n            task_organization=\"file\",  # Supabase uses file-based tasks (like Postgres)\n        )\n\n    def _create_task_from_files(\n        self, category_id: str, task_files_info: Dict[str, Any]\n    ) -> Optional[SupabaseTask]:\n        \"\"\"Instantiate a `SupabaseTask` from the dictionary returned by `_find_task_files`.\"\"\"\n        import json\n\n        # Check for meta.json\n        meta_path = task_files_info[\"instruction_path\"].parent / \"meta.json\"\n        final_category_id = category_id\n        task_id = task_files_info[\"task_id\"]\n\n        if meta_path.exists():\n            try:\n                with open(meta_path, 'r') as f:\n                    meta_data = json.load(f)\n                    # Use values from meta.json if available\n                    final_category_id = meta_data.get(\"category_id\", category_id)\n                    task_id = meta_data.get(\"task_id\", task_id)\n            except Exception as e:\n                logger.warning(f\"Failed to load meta.json from {meta_path}: {e}\")\n\n        return SupabaseTask(\n            task_instruction_path=task_files_info[\"instruction_path\"],\n            task_verification_path=task_files_info[\"verification_path\"],\n            service=\"supabase\",\n            category_id=final_category_id,\n            task_id=task_id,\n            task_name=task_files_info[\"task_id\"],\n        )\n\n    def _get_verification_command(self, task: SupabaseTask) -> List[str]:\n        \"\"\"Get verification command with Supabase API info.\"\"\"\n        cmd = [sys.executable, str(task.task_verification_path)]\n        return cmd\n\n    def run_verification(self, task: BaseTask) -> subprocess.CompletedProcess:\n        \"\"\"Run verification with Supabase environment.\"\"\"\n        env = os.environ.copy()\n\n        # Pass Supabase connection info to verification script\n        if hasattr(task, \"api_url\") and task.api_url:\n            env[\"SUPABASE_API_URL\"] = task.api_url\n\n        if hasattr(task, \"api_key\") and task.api_key:\n            env[\"SUPABASE_API_KEY\"] = task.api_key\n\n        return subprocess.run(\n            self._get_verification_command(task),\n            capture_output=True,\n            text=True,\n            timeout=300,\n            env=env,\n        )\n\n    def _format_task_instruction(self, base_instruction: str) -> str:\n        \"\"\"Add Supabase-specific instructions.\"\"\"\n        return (\n            base_instruction\n            + \"\\n\\nNote: Use Supabase MCP tools (PostgREST) to complete this task. The API connection is already configured.\"\n        )\n"
  },
  {
    "path": "src/model_config.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nModel Configuration for MCPMark\n================================\n\nThis module provides configuration management for different LLM models,\nautomatically detecting the required API keys and base URLs based on the model name.\n\"\"\"\n\nimport os\nfrom typing import Dict, List\n\nfrom src.logger import get_logger\n\n# Initialize logger\nlogger = get_logger(__name__)\n\n\nclass ModelConfig:\n    \"\"\"\n    Configuration container for a specific model.\n    It loads the necessary API key and base URL from environment variables.\n    \"\"\"\n\n    # Model configuration mapping\n    MODEL_CONFIGS = {\n        # OpenAI models\n        \"gpt-4o\": {\n            \"provider\": \"openai\",\n            \"api_key_var\": \"OPENAI_API_KEY\",\n            \"litellm_input_model_name\": \"openai/gpt-4o\",\n        },\n        \"gpt-4.1\": {\n            \"provider\": \"openai\",\n            \"api_key_var\": \"OPENAI_API_KEY\",\n            \"litellm_input_model_name\": \"openai/gpt-4.1\",\n        },\n        \"gpt-4.1-mini\": {\n            \"provider\": \"openai\",\n            \"api_key_var\": \"OPENAI_API_KEY\",\n            \"litellm_input_model_name\": \"openai/gpt-4.1-mini\",\n        },\n        \"gpt-4.1-nano\": {\n            \"provider\": \"openai\",\n            \"api_key_var\": \"OPENAI_API_KEY\",\n            \"litellm_input_model_name\": \"openai/gpt-4.1-nano\",\n        },\n        \"gpt-5.2\": {\n            \"provider\": \"openai\",\n            \"api_key_var\": \"OPENAI_API_KEY\",\n            \"litellm_input_model_name\": \"openai/gpt-5.2\",\n        },\n        \"gpt-5\": {\n            \"provider\": \"openai\",\n            \"api_key_var\": \"OPENAI_API_KEY\",\n            \"litellm_input_model_name\": \"openai/gpt-5\",\n        },\n        \"gpt-5-mini\": {\n            \"provider\": \"openai\",\n            \"api_key_var\": \"OPENAI_API_KEY\",\n            \"litellm_input_model_name\": \"openai/gpt-5-mini\",\n        },\n        \"gpt-5-nano\": {\n            \"provider\": \"openai\",\n            \"api_key_var\": \"OPENAI_API_KEY\",\n            \"litellm_input_model_name\": \"openai/gpt-5-nano\",\n        },\n        \"o3\": {\n            \"provider\": \"openai\",\n            \"api_key_var\": \"OPENAI_API_KEY\",\n            \"litellm_input_model_name\": \"openai/o3\",\n        },\n        \"o4-mini\": {\n            \"provider\": \"openai\",\n            \"api_key_var\": \"OPENAI_API_KEY\",\n            \"litellm_input_model_name\": \"openai/o4-mini\",\n        },\n        \"gpt-oss-120b\": {\n            \"provider\": \"openai\",\n            \"api_key_var\": \"OPENROUTER_API_KEY\",\n            \"litellm_input_model_name\": \"openrouter/openai/gpt-oss-120b\",\n        },\n        # DeepSeek models\n        \"deepseek-v3.2-instruct\": {\n            \"provider\": \"deepseek\",\n            \"api_key_var\": \"DEEPSEEK_API_KEY\",\n            \"litellm_input_model_name\": \"deepseek/deepseek-chat\",\n        },\n        \"deepseek-v3.2-thinking\": {\n            \"provider\": \"deepseek\",\n            \"api_key_var\": \"DEEPSEEK_API_KEY\",\n            \"litellm_input_model_name\": \"deepseek/deepseek-reasoner\",\n        },\n        # Anthropic models\n        \"claude-3.7-sonnet\": {\n            \"provider\": \"anthropic\",\n            \"api_key_var\": \"ANTHROPIC_API_KEY\",\n            \"litellm_input_model_name\": \"anthropic/claude-3-7-sonnet-20250219\",\n        },\n        \"claude-sonnet-4\": {\n            \"provider\": \"anthropic\",\n            \"api_key_var\": \"ANTHROPIC_API_KEY\",\n            \"litellm_input_model_name\": \"anthropic/claude-sonnet-4-20250514\",\n        },\n        \"claude-sonnet-4.5\": {\n            \"provider\": \"anthropic\",\n            \"api_key_var\": \"ANTHROPIC_API_KEY\",\n            \"litellm_input_model_name\": \"anthropic/claude-sonnet-4-5-20250929\",\n        },\n        \"claude-opus-4\": {\n            \"provider\": \"anthropic\",\n            \"api_key_var\": \"ANTHROPIC_API_KEY\",\n            \"litellm_input_model_name\": \"anthropic/claude-opus-4-20250514\",\n        },\n        \"claude-opus-4.1\": {\n            \"provider\": \"anthropic\",\n            \"api_key_var\": \"ANTHROPIC_API_KEY\",\n            \"litellm_input_model_name\": \"anthropic/claude-opus-4-1-20250805\",\n        },\n        \"claude-opus-4.5\": {\n            \"provider\": \"anthropic\",\n            \"api_key_var\": \"ANTHROPIC_API_KEY\",\n            \"litellm_input_model_name\": \"anthropic/claude-opus-4-5-20251101\",\n        },\n        # Google models\n        \"gemini-2.5-pro\": {\n            \"provider\": \"google\",\n            \"api_key_var\": \"GEMINI_API_KEY\",\n            \"litellm_input_model_name\": \"gemini/gemini-2.5-pro\",\n        },\n        \"gemini-2.5-flash\": {\n            \"provider\": \"google\",\n            \"api_key_var\": \"GEMINI_API_KEY\",\n            \"litellm_input_model_name\": \"gemini/gemini-2.5-flash\",\n        },\n        \"gemini-3-pro\": {\n            \"provider\": \"google\",\n            \"api_key_var\": \"GEMINI_API_KEY\",\n            \"litellm_input_model_name\": \"gemini/gemini-3-pro-preview\",\n        },\n        # Moonshot models\n        \"kimi-k2-0711\": {\n            \"provider\": \"moonshot\",\n            \"api_key_var\": \"MOONSHOT_API_KEY\",\n            \"litellm_input_model_name\": \"moonshot/kimi-k2-0711-preview\",\n        },\n        \"kimi-k2-0905\": {\n            \"provider\": \"moonshot\",\n            \"api_key_var\": \"MOONSHOT_API_KEY\",\n            \"litellm_input_model_name\": \"moonshot/kimi-k2-0905-preview\",\n        },\n        \"kimi-k2-thinking\": {\n            \"provider\": \"moonshot\",\n            \"api_key_var\": \"OPENROUTER_API_KEY\",\n            \"litellm_input_model_name\": \"openrouter/moonshotai/kimi-k2-thinking\",\n        },\n        # Grok models\n        \"grok-4\": {\n            \"provider\": \"xai\",\n            \"api_key_var\": \"GROK_API_KEY\",\n            \"litellm_input_model_name\": \"xai/grok-4-0709\",\n        },\n        \"grok-code-fast-1\": {\n            \"provider\": \"xai\",\n            \"api_key_var\": \"GROK_API_KEY\",\n            \"litellm_input_model_name\": \"xai/grok-code-fast-1\",\n        },\n        # Qwen models\n        \"qwen-3-coder-plus\": {\n            \"provider\": \"qwen\",\n            \"api_key_var\": \"DASHSCOPE_API_KEY\",\n            \"litellm_input_model_name\": \"dashscope/qwen3-coder-plus\",\n        },\n        \"qwen-3-max\": {\n            \"provider\": \"qwen\",\n            \"api_key_var\": \"DASHSCOPE_API_KEY\",\n            \"litellm_input_model_name\": \"dashscope/qwen3-max-preview\",\n        },\n        # Zhipu\n        \"glm-4.5\": {\n            \"provider\": \"zhipu\",\n            \"api_key_var\": \"OPENROUTER_API_KEY\",\n            \"litellm_input_model_name\": \"openrouter/z-ai/glm-4.5\",\n        }\n    }\n\n    def __init__(self, model_name: str):\n        \"\"\"\n        Initializes the model configuration.\n\n        Args:\n            model_name: The name of the model (e.g., 'gpt-4o', 'deepseek-chat').\n\n        Raises:\n            ValueError: If the model is not supported or environment variables are missing.\n        \"\"\"\n        self.short_model_name = model_name\n        model_info = self._get_model_info(model_name)\n\n        # Load API key, base URL and LiteLLM model name from environment variables\n        if \"base_url_var\" in model_info:\n            self.base_url = os.getenv(model_info[\"base_url_var\"])\n        else:\n            self.base_url = None\n        \n        self.api_key = os.getenv(model_info[\"api_key_var\"])\n        if not self.api_key:\n            raise ValueError(\n                f\"Missing required environment variable: {model_info['api_key_var']}\"\n            )\n\n        self.litellm_input_model_name = model_info.get(\"litellm_input_model_name\", model_name)\n\n    def _get_model_info(self, model_name: str) -> Dict[str, str]:\n        \"\"\"\n        Retrieves the configuration details for a given model name.\n        For unsupported models, defaults to using OPENAI_BASE_URL and OPENAI_API_KEY.\n        \"\"\"\n        if model_name not in self.MODEL_CONFIGS:\n            logger.warning(\n                f\"Model '{model_name}' not in supported list. Using default OpenAI configuration.\"\n            )\n            # Return default configuration for unsupported models\n            return {\n                \"provider\": \"openai\",\n                \"api_key_var\": \"OPENAI_API_KEY\",\n                \"litellm_input_model_name\": model_name,\n            }\n        return self.MODEL_CONFIGS[model_name]\n\n    @classmethod\n    def get_supported_models(cls) -> List[str]:\n        \"\"\"Returns a list of all supported model names.\"\"\"\n        return list(cls.MODEL_CONFIGS.keys())\n\n\ndef main():\n    \"\"\"Example usage of the ModelConfig class.\"\"\"\n    logger.info(\"Supported models: %s\", ModelConfig.get_supported_models())\n\n    try:\n        # Example: Create a model config for DeepSeek\n        model_config = ModelConfig(\"deepseek-chat\")\n        logger.info(\"✅ DeepSeek model config created successfully.\")\n        logger.info(\"Short model name: %s\", model_config.short_model_name)\n        logger.info(\"API key loaded: %s\", bool(model_config.api_key))\n\n    except ValueError as e:\n        logger.error(\"⚠️  Configuration error: %s\", e)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "src/results_reporter.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nResults Reporter for MCPMark Evaluation Pipeline\n================================================\n\nThis module provides utilities for saving evaluation results in a structured format.\n\"\"\"\n\nimport json\nfrom dataclasses import dataclass\nfrom datetime import datetime\nfrom pathlib import Path\nfrom typing import Any, Dict, List, Optional\n\nfrom src.logger import get_logger\n\n# Initialize logger\nlogger = get_logger(__name__)\n\n\n@dataclass\nclass TaskResult:\n    \"\"\"\n    Represents the result of a single task evaluation.\n\n    Attributes:\n        task_name: The full name of the task (e.g., \"category_id__task_id\").\n        success: Whether the task completed successfully.\n        category_id: The task category ID.\n        task_id: The task identifier (number or slug).\n        error_message: Error message from agent execution if it failed.\n        verification_error: Error message from verification if it failed.\n        verification_output: Captured stdout from verification script.\n        model_output: Agent conversation trajectory (messages).\n        token_usage: Token usage statistics.\n        turn_count: Number of turns taken during task execution.\n        agent_execution_time: Time for Step 2 (agent execution) in seconds.\n        task_execution_time: Total time for Steps 1-4 in seconds.\n    \"\"\"\n\n    task_name: str\n    success: bool\n    category_id: Optional[str] = None\n    task_id: Optional[str] = None\n    error_message: Optional[str] = None  # Agent execution error\n    verification_error: Optional[str] = None  # Verification error (separate from agent error)\n    verification_output: Optional[str] = None  # Verification stdout/stderr\n    model_output: Optional[Any] = None  # Agent conversation trajectory\n    token_usage: Optional[Dict[str, int]] = None  # Token usage statistics\n    turn_count: Optional[int] = None  # Number of turns taken during task execution\n    agent_execution_time: float = 0.0  # Time for Step 2 (agent execution) in seconds\n    task_execution_time: float = 0.0  # Total time for Steps 1-4 in seconds\n\n    @property\n    def status(self) -> str:\n        \"\"\"Returns the status of the task as 'PASS' or 'FAIL'.\"\"\"\n        return \"PASS\" if self.success else \"FAIL\"\n\n\n@dataclass\nclass EvaluationReport:\n    \"\"\"Represents a complete evaluation report for a model.\"\"\"\n\n    model_name: str\n    model_config: Dict[str, Any]\n    total_tasks: int\n    successful_tasks: int\n    failed_tasks: int\n    task_results: List[TaskResult]\n    tasks_filter: Optional[str] = None\n\n    @property\n    def success_rate(self) -> float:\n        \"\"\"Calculates the overall success rate as a percentage.\"\"\"\n        if self.total_tasks == 0:\n            return 0.0\n        return (self.successful_tasks / self.total_tasks) * 100\n\n    @property\n    def total_input_tokens(self) -> int:\n        \"\"\"Calculate total input tokens across all tasks.\"\"\"\n        total = 0\n        for result in self.task_results:\n            if result.token_usage:\n                total += (result.token_usage.get(\"input_tokens\") or 0)\n        return total\n\n    @property\n    def total_output_tokens(self) -> int:\n        \"\"\"Calculate total output tokens across all tasks.\"\"\"\n        total = 0\n        for result in self.task_results:\n            if result.token_usage:\n                total += (result.token_usage.get(\"output_tokens\") or 0)\n        return total\n\n    @property\n    def total_tokens(self) -> int:\n        \"\"\"Calculate total tokens across all tasks.\"\"\"\n        total = 0\n        for result in self.task_results:\n            if result.token_usage:\n                total += (result.token_usage.get(\"total_tokens\") or 0)\n        return total\n    \n    @property\n    def total_reasoning_tokens(self) -> int:\n        \"\"\"Calculate total reasoning tokens across all tasks.\"\"\"\n        total = 0\n        for result in self.task_results:\n            if result.token_usage:\n                total += (result.token_usage.get(\"reasoning_tokens\") or 0)\n        return total\n\n    @property\n    def avg_input_tokens(self) -> float:\n        \"\"\"Calculate average input tokens per task.\"\"\"\n        if self.total_tasks == 0:\n            return 0.0\n        return self.total_input_tokens / self.total_tasks\n\n    @property\n    def avg_output_tokens(self) -> float:\n        \"\"\"Calculate average output tokens per task.\"\"\"\n        if self.total_tasks == 0:\n            return 0.0\n        return self.total_output_tokens / self.total_tasks\n\n    @property\n    def avg_total_tokens(self) -> float:\n        \"\"\"Calculate average total tokens per task.\"\"\"\n        if self.total_tasks == 0:\n            return 0.0\n        return self.total_tokens / self.total_tasks\n    \n    @property\n    def avg_reasoning_tokens(self) -> float:\n        \"\"\"Calculate average reasoning tokens per task.\"\"\"\n        if self.total_tasks == 0:\n            return 0.0\n        return self.total_reasoning_tokens / self.total_tasks\n\n    @property\n    def total_task_execution_time(self) -> float:\n        \"\"\"Calculates the total task execution time from sum of all task execution times.\"\"\"\n        # Use sum of individual task execution times instead of pipeline wall clock time\n        # This ensures resume functionality shows correct total time\n        return sum(task.task_execution_time for task in self.task_results)\n    \n    @property\n    def total_agent_execution_time(self) -> float:\n        \"\"\"Calculates the total agent execution time (Step 2) across all tasks.\"\"\"\n        return sum(task.agent_execution_time for task in self.task_results)\n\n    def get_category_stats(self) -> Dict[str, Dict[str, Any]]:\n        \"\"\"\n        Calculates and returns success statistics grouped by task category.\n        \"\"\"\n        category_stats = {}\n\n        for result in self.task_results:\n            category = result.category_id or \"Uncategorized\"\n            if category not in category_stats:\n                category_stats[category] = {\n                    \"total\": 0,\n                    \"successful\": 0,\n                    \"failed\": 0,\n                    \"success_rate\": 0.0,\n                    \"avg_execution_time\": 0.0,\n                    \"avg_agent_execution_time\": 0.0,\n                    \"total_input_tokens\": 0,\n                    \"total_output_tokens\": 0,\n                    \"total_tokens\": 0,\n                    \"total_reasoning_tokens\": 0,\n                    \"avg_input_tokens\": 0.0,\n                    \"avg_output_tokens\": 0.0,\n                    \"avg_total_tokens\": 0.0,\n                    \"avg_reasoning_tokens\": 0.0,\n                    \"total_turns\": 0,\n                    \"avg_turns\": 0.0,\n                }\n\n            category_stats[category][\"total\"] += 1\n            if result.success:\n                category_stats[category][\"successful\"] += 1\n            else:\n                category_stats[category][\"failed\"] += 1\n\n            # Add token and turn usage\n            if result.token_usage:\n                category_stats[category][\"total_input_tokens\"] += (\n                    result.token_usage.get(\"input_tokens\") or 0\n                )\n                category_stats[category][\"total_output_tokens\"] += (\n                    result.token_usage.get(\"output_tokens\") or 0\n                )\n                category_stats[category][\"total_tokens\"] += (\n                    result.token_usage.get(\"total_tokens\") or 0\n                )\n                category_stats[category][\"total_reasoning_tokens\"] += result.token_usage.get(\n                    \"reasoning_tokens\", 0\n                ) or 0\n\n            # Accumulate turns\n            if result.turn_count is not None:\n                category_stats[category][\"total_turns\"] += result.turn_count\n\n        # Calculate derived metrics like success rate and average time\n        for category, stats in category_stats.items():\n            if stats[\"total\"] > 0:\n                stats[\"success_rate\"] = (stats[\"successful\"] / stats[\"total\"]) * 100\n                category_results = [\n                    r\n                    for r in self.task_results\n                    if (r.category_id or \"Uncategorized\") == category\n                ]\n                total_time = sum(r.task_execution_time for r in category_results)\n                stats[\"avg_execution_time\"] = total_time / len(category_results)\n                \n                # Add agent execution time stats\n                total_agent_time = sum(r.agent_execution_time for r in category_results)\n                stats[\"avg_agent_execution_time\"] = total_agent_time / len(category_results)\n\n                # Calculate average tokens and turns\n                stats[\"avg_input_tokens\"] = stats[\"total_input_tokens\"] / stats[\"total\"]\n                stats[\"avg_output_tokens\"] = (\n                    stats[\"total_output_tokens\"] / stats[\"total\"]\n                )\n                stats[\"avg_total_tokens\"] = stats[\"total_tokens\"] / stats[\"total\"]\n                stats[\"avg_reasoning_tokens\"] = stats[\"total_reasoning_tokens\"] / stats[\"total\"]\n\n                stats[\"avg_turns\"] = (\n                    stats[\"total_turns\"] / stats[\"total\"] if stats[\"total\"] > 0 else 0\n                )\n\n        return category_stats\n\n\nclass ResultsReporter:\n    \"\"\"Handles saving evaluation results in structured formats.\"\"\"\n\n    def __init__(self):\n        \"\"\"Initialize the results reporter.\"\"\"\n        pass\n\n    def save_messages_json(self, messages: Any, output_path: Path) -> Path:\n        \"\"\"Saves the conversation messages/trajectory as messages.json.\"\"\"\n        output_path.parent.mkdir(parents=True, exist_ok=True)\n        with output_path.open(\"w\", encoding=\"utf-8\") as f:\n            json.dump(messages, f, indent=2, ensure_ascii=False)\n        return output_path\n\n    def save_meta_json(\n        self,\n        task_result: TaskResult,\n        model_config: Dict[str, Any],\n        start_time: datetime,\n        end_time: datetime,\n        output_path: Path,\n    ) -> Path:\n        \"\"\"Saves task metadata (excluding messages) as meta.json.\"\"\"\n        output_path.parent.mkdir(parents=True, exist_ok=True)\n\n        meta_data = {\n            \"task_name\": task_result.task_name,\n            \"model_name\": model_config.get(\"model_name\", \"unknown\"),\n            \"litellm_run_model_name\": model_config.get(\"litellm_run_model_name\"),\n            \"reasoning_effort\": model_config.get(\"reasoning_effort\"),\n            \"mcp\": model_config.get(\"mcp_service\", \"unknown\"),\n            \"timeout\": model_config.get(\"timeout\", 300),\n            \"time\": {\"start\": start_time.isoformat(), \"end\": end_time.isoformat()},\n            \"agent_execution_time\": task_result.agent_execution_time,\n            \"task_execution_time\": task_result.task_execution_time,\n            \"execution_result\": {\n                \"success\": task_result.success,\n                \"error_message\": task_result.error_message,\n                \"verification_error\": task_result.verification_error,\n                \"verification_output\": task_result.verification_output,\n            },\n            \"token_usage\": task_result.token_usage or {},\n            \"turn_count\": task_result.turn_count,\n        }\n\n        with output_path.open(\"w\", encoding=\"utf-8\") as f:\n            json.dump(meta_data, f, indent=2, ensure_ascii=False)\n        return output_path\n\n    def save_model_summary(self, report: EvaluationReport, output_path: Path) -> Path:\n        \"\"\"Saves a concise model-level summary.\"\"\"\n        output_path.parent.mkdir(parents=True, exist_ok=True)\n\n        category_stats = report.get_category_stats()\n\n        # Aggregate turn counts using category_stats\n        total_turns = sum(stats[\"total_turns\"] for stats in category_stats.values())\n        avg_turns = total_turns / report.total_tasks if report.total_tasks > 0 else 0\n\n        summary = {\n            \"model_name\": report.model_name,\n            \"model_config\": report.model_config,\n            \"total_tasks\": report.total_tasks,\n            \"successful_tasks\": report.successful_tasks,\n            \"failed_tasks\": report.failed_tasks,\n            \"success_rate\": round(report.success_rate, 2),\n            \"total_task_execution_time\": report.total_task_execution_time,\n            \"average_task_execution_time\": report.total_task_execution_time / report.total_tasks\n            if report.total_tasks > 0\n            else 0,\n            \"total_agent_execution_time\": report.total_agent_execution_time,\n            \"average_agent_execution_time\": report.total_agent_execution_time / report.total_tasks\n            if report.total_tasks > 0\n            else 0,\n            \"token_usage\": {\n                \"total_input_tokens\": report.total_input_tokens,\n                \"total_output_tokens\": report.total_output_tokens,\n                \"total_tokens\": report.total_tokens,\n                \"total_reasoning_tokens\": report.total_reasoning_tokens,\n                \"avg_input_tokens\": round(report.avg_input_tokens, 2),\n                \"avg_output_tokens\": round(report.avg_output_tokens, 2),\n                \"avg_total_tokens\": round(report.avg_total_tokens, 2),\n                \"avg_reasoning_tokens\": round(report.avg_reasoning_tokens, 2),\n            },\n            \"turn_usage\": {\n                \"total_turns\": total_turns,\n                \"avg_turns\": round(avg_turns, 2),\n            },\n            \"category_breakdown\": {\n                category: {\n                    \"total\": stats[\"total\"],\n                    \"success_rate\": round(stats[\"success_rate\"], 2),\n                    \"avg_time\": round(stats[\"avg_execution_time\"], 2),\n                    \"token_usage\": {\n                        \"total_input\": stats[\"total_input_tokens\"],\n                        \"total_output\": stats[\"total_output_tokens\"],\n                        \"total\": stats[\"total_tokens\"],\n                        \"total_reasoning\": stats[\"total_reasoning_tokens\"],\n                        \"avg_input\": round(stats[\"avg_input_tokens\"], 2),\n                        \"avg_output\": round(stats[\"avg_output_tokens\"], 2),\n                        \"avg_total\": round(stats[\"avg_total_tokens\"], 2),\n                        \"avg_reasoning\": round(stats[\"avg_reasoning_tokens\"], 2),\n                    },\n                    \"turn_usage\": {\n                        \"total_turns\": stats[\"total_turns\"],\n                        \"avg_turns\": round(stats[\"avg_turns\"], 2),\n                    },\n                }\n                for category, stats in category_stats.items()\n            },\n        }\n\n        with output_path.open(\"w\", encoding=\"utf-8\") as f:\n            json.dump(summary, f, indent=2, ensure_ascii=False)\n        return output_path\n"
  },
  {
    "path": "src/services.py",
    "content": "\"\"\"\nService Definitions for MCPMark\n================================\n\nSingle source of truth for all MCP service configurations.\nAdding a new service only requires modifying this file.\n\nNote: Environment variables are already loaded from .mcp_env when the app starts,\nso we can reference them directly via the config system.\n\nMCP server creation is now handled entirely within src.agent.MCPAgent; therefore,\nthe legacy \"mcp_server\" and \"eval_config\" entries in each service definition are\ndeprecated and set to None for backward-compatibility.\n\"\"\"\n\n# Service definitions\nSERVICES = {\n    \"notion\": {\n        \"config_schema\": {\n            \"source_api_key\": {\n                \"env_var\": \"SOURCE_NOTION_API_KEY\",\n                \"required\": True,\n                \"description\": \"Notion API key for source hub with templates\",\n            },\n            \"eval_api_key\": {\n                \"env_var\": \"EVAL_NOTION_API_KEY\",\n                \"required\": True,\n                \"description\": \"Notion API key for evaluation hub\",\n            },\n            \"source_parent_page_title\": {\n                \"env_var\": \"SOURCE_PARENT_PAGE_TITLE\",\n                \"default\": \"MCPMark Source Hub\",\n                \"required\": False,\n                \"description\": \"Title of the source hub page that contains all initial states\",\n            },\n            \"eval_parent_page_title\": {\n                \"env_var\": \"EVAL_PARENT_PAGE_TITLE\",\n                \"required\": True,\n                \"description\": \"Title of the parent page in evaluation workspace\",\n            },\n            \"playwright_headless\": {\n                \"env_var\": \"PLAYWRIGHT_HEADLESS\",\n                \"default\": True,\n                \"required\": False,\n                \"description\": \"Run browser in headless mode\",\n                \"transform\": \"bool\",  # Will be handled by GenericConfigSchema\n            },\n            \"playwright_browser\": {\n                \"env_var\": \"PLAYWRIGHT_BROWSER\",\n                \"default\": \"firefox\",\n                \"required\": False,\n                \"description\": \"Browser to use for Playwright\",\n                \"validator\": \"in:chromium,firefox,webkit\",  # Simple validator syntax\n            },\n        },\n        \"components\": {\n            \"task_manager\": \"src.mcp_services.notion.notion_task_manager.NotionTaskManager\",\n            \"state_manager\": \"src.mcp_services.notion.notion_state_manager.NotionStateManager\",\n            \"login_helper\": \"src.mcp_services.notion.notion_login_helper.NotionLoginHelper\",\n        },\n        \"config_mapping\": {\n            # Maps config schema keys to class constructor parameters\n            \"state_manager\": {\n                \"source_notion_key\": \"source_api_key\",\n                \"eval_notion_key\": \"eval_api_key\",\n                \"headless\": \"playwright_headless\",\n                \"browser\": \"playwright_browser\",\n                \"source_parent_page_title\": \"source_parent_page_title\",\n                \"eval_parent_page_title\": \"eval_parent_page_title\",\n            },\n            \"login_helper\": {\n                \"headless\": \"playwright_headless\",\n                \"browser\": \"playwright_browser\",\n            },\n        },\n        # MCP server is now instantiated dynamically in MCPAgent; kept for backward\n        # compatibility but set to None to indicate deprecation.\n        \"mcp_server\": None,\n        \"eval_config\": None,\n    },\n    \"github\": {\n        \"config_schema\": {\n            \"github_tokens\": {\n                \"env_var\": \"GITHUB_TOKENS\",\n                \"required\": True,\n                \"description\": \"GitHub personal access token(s) - comma-separated for round-robin\",\n                \"transform\": \"list\",  # Will split by comma\n            },\n            # Evaluation organisation / user that hosts ephemeral test repositories\n            \"eval_org\": {\n                \"env_var\": \"GITHUB_EVAL_ORG\",\n                \"default\": \"mcpleague-eval\",\n                \"required\": False,\n                \"description\": \"Evaluation organisation or user for creating temporary test repositories\",\n            },\n            # (source_org removed – template repos now imported from local files)\n        },\n        \"components\": {\n            \"task_manager\": \"src.mcp_services.github.github_task_manager.GitHubTaskManager\",\n            \"state_manager\": \"src.mcp_services.github.github_state_manager.GitHubStateManager\",\n            \"login_helper\": \"src.mcp_services.github.github_login_helper.GitHubLoginHelper\",\n        },\n        \"config_mapping\": {\n            \"state_manager\": {\n                \"github_token\": \"github_tokens\",\n                \"eval_org\": \"eval_org\",\n            },\n            \"login_helper\": {\n                # Login helper needs a single token, we'll use the first one\n                \"token\": \"github_tokens\",\n            },\n        },\n        \"mcp_server\": None,\n        \"eval_config\": None,\n    },\n    \"filesystem\": {\n        \"config_schema\": {\n            \"test_root\": {\n                \"env_var\": \"FILESYSTEM_TEST_ROOT\",\n                \"default\": None,\n                \"required\": False,\n                \"description\": \"Root directory for filesystem tests\",\n                \"transform\": \"path\",  # Convert to Path object\n            },\n            \"cleanup_on_exit\": {\n                \"env_var\": \"FILESYSTEM_CLEANUP\",\n                \"default\": True,\n                \"required\": False,\n                \"description\": \"Clean up test directories after tasks\",\n                \"transform\": \"bool\",\n            },\n        },\n        \"components\": {\n            \"task_manager\": \"src.mcp_services.filesystem.filesystem_task_manager.FilesystemTaskManager\",\n            \"state_manager\": \"src.mcp_services.filesystem.filesystem_state_manager.FilesystemStateManager\",\n            \"login_helper\": \"src.mcp_services.filesystem.filesystem_login_helper.FilesystemLoginHelper\",\n        },\n        \"config_mapping\": {\n            \"state_manager\": {\n                \"test_root\": \"test_root\",\n                \"cleanup_on_exit\": \"cleanup_on_exit\",\n            }\n        },\n        \"mcp_server\": None,\n        \"eval_config\": None,\n    },\n    \"playwright\": {\n        \"config_schema\": {\n            \"browser\": {\n                \"env_var\": \"PLAYWRIGHT_BROWSER\",\n                \"default\": \"chromium\",\n                \"required\": False,\n                \"description\": \"Browser to use (chromium, firefox, webkit)\",\n                \"validator\": \"in:chromium,firefox,webkit\",\n            },\n            \"headless\": {\n                \"env_var\": \"PLAYWRIGHT_HEADLESS\",\n                \"default\": True,\n                \"required\": False,\n                \"description\": \"Run browser in headless mode\",\n                \"transform\": \"bool\",\n            },\n            \"network_origins\": {\n                \"env_var\": \"PLAYWRIGHT_NETWORK_ORIGINS\",\n                \"default\": \"*\",\n                \"required\": False,\n                \"description\": \"Allowed network origins (comma-separated or *)\",\n            },\n            \"user_profile\": {\n                \"env_var\": \"PLAYWRIGHT_USER_PROFILE\",\n                \"default\": \"isolated\",\n                \"required\": False,\n                \"description\": \"User profile type (isolated or persistent)\",\n                \"validator\": \"in:isolated,persistent\",\n            },\n            \"viewport_width\": {\n                \"env_var\": \"PLAYWRIGHT_VIEWPORT_WIDTH\",\n                \"default\": 1280,\n                \"required\": False,\n                \"description\": \"Browser viewport width\",\n                \"transform\": \"int\",\n            },\n            \"viewport_height\": {\n                \"env_var\": \"PLAYWRIGHT_VIEWPORT_HEIGHT\",\n                \"default\": 720,\n                \"required\": False,\n                \"description\": \"Browser viewport height\",\n                \"transform\": \"int\",\n            },\n        },\n        \"components\": {\n            \"task_manager\": \"src.mcp_services.playwright.playwright_task_manager.PlaywrightTaskManager\",\n            \"state_manager\": \"src.mcp_services.playwright.playwright_state_manager.PlaywrightStateManager\",\n            \"login_helper\": \"src.mcp_services.playwright.playwright_login_helper.PlaywrightLoginHelper\",\n        },\n        \"config_mapping\": {\n            \"state_manager\": {\n                \"browser\": \"browser\",\n                \"headless\": \"headless\",\n                \"network_origins\": \"network_origins\",\n                \"user_profile\": \"user_profile\",\n                \"viewport_width\": \"viewport_width\",\n                \"viewport_height\": \"viewport_height\",\n            },\n            \"login_helper\": {\n                \"browser\": \"browser\",\n                \"headless\": \"headless\",\n            },\n        },\n        \"mcp_server\": None,\n        \"eval_config\": None,\n    },\n    \"postgres\": {\n        \"config_schema\": {\n            \"host\": {\n                \"env_var\": \"POSTGRES_HOST\",\n                \"default\": \"localhost\",\n                \"required\": False,\n                \"description\": \"PostgreSQL server host\",\n            },\n            \"port\": {\n                \"env_var\": \"POSTGRES_PORT\",\n                \"default\": 5432,\n                \"required\": False,\n                \"description\": \"PostgreSQL server port\",\n                \"transform\": \"int\",\n                \"validator\": \"port\",  # Validates port range 1-65535\n            },\n            \"database\": {\n                \"env_var\": \"POSTGRES_DATABASE\",\n                \"default\": \"postgres\",\n                \"required\": False,\n                \"description\": \"PostgreSQL database name\",\n            },\n            \"username\": {\n                \"env_var\": \"POSTGRES_USERNAME\",\n                \"default\": \"postgres\",\n                \"required\": False,\n                \"description\": \"PostgreSQL username\",\n            },\n            \"password\": {\n                \"env_var\": \"POSTGRES_PASSWORD\",\n                \"default\": \"password\",\n                \"required\": False,\n                \"description\": \"PostgreSQL password\",\n            },\n        },\n        \"components\": {\n            \"task_manager\": \"src.mcp_services.postgres.postgres_task_manager.PostgresTaskManager\",\n            \"state_manager\": \"src.mcp_services.postgres.postgres_state_manager.PostgresStateManager\",\n            \"login_helper\": \"src.mcp_services.postgres.postgres_login_helper.PostgresLoginHelper\",\n        },\n        \"config_mapping\": {\n            \"state_manager\": {\n                \"host\": \"host\",\n                \"port\": \"port\",\n                \"database\": \"database\",\n                \"username\": \"username\",\n                \"password\": \"password\",\n            },\n            \"login_helper\": {\n                \"host\": \"host\",\n                \"port\": \"port\",\n                \"database\": \"database\",\n                \"username\": \"username\",\n                \"password\": \"password\",\n            },\n        },\n        \"mcp_server\": None,\n        \"eval_config\": None,\n    },\n    \"insforge\": {\n        \"config_schema\": {\n            \"api_key\": {\n                \"env_var\": \"INSFORGE_API_KEY\",\n                \"required\": True,\n                \"description\": \"Insforge backend API key for authentication\",\n            },\n            \"backend_url\": {\n                \"env_var\": \"INSFORGE_BACKEND_URL\",\n                \"required\": True,\n                \"description\": \"Insforge backend URL (e.g., https://your-app.insforge.app)\",\n            },\n        },\n        \"components\": {\n            \"task_manager\": \"src.mcp_services.insforge.insforge_task_manager.InsforgeTaskManager\",\n            \"state_manager\": \"src.mcp_services.insforge.insforge_state_manager.InsforgeStateManager\",\n            \"login_helper\": \"src.mcp_services.insforge.insforge_login_helper.InsforgeLoginHelper\",\n        },\n        \"config_mapping\": {\n            \"state_manager\": {\n                \"api_key\": \"api_key\",\n                \"backend_url\": \"backend_url\",\n            },\n            \"login_helper\": {\n                \"api_key\": \"api_key\",\n                \"backend_url\": \"backend_url\",\n            },\n        },\n        \"mcp_server\": None,\n        \"eval_config\": None,\n    },\n    \"supabase\": {\n        \"config_schema\": {\n            \"api_url\": {\n                \"env_var\": \"SUPABASE_API_URL\",\n                \"required\": False,\n                \"description\": \"Supabase PostgREST API URL (default: http://localhost:54321 from CLI)\",\n                \"default\": \"http://localhost:54321\",\n            },\n            \"api_key\": {\n                \"env_var\": \"SUPABASE_API_KEY\",\n                \"required\": False,\n                \"description\": \"Supabase API key (anon or service_role key from 'supabase status')\",\n            },\n            \"postgres_host\": {\n                \"env_var\": \"SUPABASE_DB_HOST\",\n                \"required\": False,\n                \"description\": \"PostgreSQL host for Supabase CLI instance\",\n                \"default\": \"localhost\",\n            },\n            \"postgres_port\": {\n                \"env_var\": \"SUPABASE_DB_PORT\",\n                \"required\": False,\n                \"description\": \"PostgreSQL port for Supabase CLI instance (default: 54322)\",\n                \"default\": 54322,\n            },\n            \"postgres_user\": {\n                \"env_var\": \"SUPABASE_DB_USER\",\n                \"required\": False,\n                \"description\": \"PostgreSQL username\",\n                \"default\": \"postgres\",\n            },\n            \"postgres_password\": {\n                \"env_var\": \"SUPABASE_DB_PASSWORD\",\n                \"required\": False,\n                \"description\": \"PostgreSQL password\",\n                \"default\": \"postgres\",\n            },\n            \"postgres_database\": {\n                \"env_var\": \"SUPABASE_DB_NAME\",\n                \"required\": False,\n                \"description\": \"PostgreSQL database name\",\n                \"default\": \"postgres\",\n            },\n        },\n        \"components\": {\n            \"task_manager\": \"src.mcp_services.supabase.supabase_task_manager.SupabaseTaskManager\",\n            \"state_manager\": \"src.mcp_services.supabase.supabase_state_manager.SupabaseStateManager\",\n            \"login_helper\": \"src.mcp_services.supabase.supabase_login_helper.SupabaseLoginHelper\",\n        },\n        \"config_mapping\": {\n            \"state_manager\": {\n                \"api_url\": \"api_url\",\n                \"api_key\": \"api_key\",\n                \"postgres_host\": \"postgres_host\",\n                \"postgres_port\": \"postgres_port\",\n                \"postgres_user\": \"postgres_user\",\n                \"postgres_password\": \"postgres_password\",\n                \"postgres_database\": \"postgres_database\",\n            },\n            \"login_helper\": {},\n        },\n        \"mcp_server\": None,\n        \"eval_config\": None,\n    },\n    \"playwright_webarena\": {\n        \"config_schema\": {\n            \"browser\": {\n                \"env_var\": \"PLAYWRIGHT_BROWSER\",\n                \"default\": \"chromium\",\n                \"required\": False,\n                \"description\": \"Browser to use (chromium, firefox, webkit)\",\n                \"validator\": \"in:chromium,firefox,webkit\",\n            },\n            \"headless\": {\n                \"env_var\": \"PLAYWRIGHT_HEADLESS\",\n                \"default\": True,\n                \"required\": False,\n                \"description\": \"Run browser in headless mode\",\n                \"transform\": \"bool\",\n            },\n            \"network_origins\": {\n                \"env_var\": \"PLAYWRIGHT_NETWORK_ORIGINS\",\n                \"default\": \"*\",\n                \"required\": False,\n                \"description\": \"Allowed network origins (comma-separated or *)\",\n            },\n            \"user_profile\": {\n                \"env_var\": \"PLAYWRIGHT_USER_PROFILE\",\n                \"default\": \"isolated\",\n                \"required\": False,\n                \"description\": \"User profile type (isolated or persistent)\",\n                \"validator\": \"in:isolated,persistent\",\n            },\n            \"viewport_width\": {\n                \"env_var\": \"PLAYWRIGHT_VIEWPORT_WIDTH\",\n                \"default\": 1280,\n                \"required\": False,\n                \"description\": \"Browser viewport width\",\n                \"transform\": \"int\",\n            },\n            \"viewport_height\": {\n                \"env_var\": \"PLAYWRIGHT_VIEWPORT_HEIGHT\",\n                \"default\": 720,\n                \"required\": False,\n                \"description\": \"Browser viewport height\",\n                \"transform\": \"int\",\n            },\n            \"skip_cleanup\": {\n                \"env_var\": \"PLAYWRIGHT_WEBARENA_SKIP_CLEANUP\",\n                \"default\": False,\n                \"required\": False,\n                \"description\": \"Skip Docker container cleanup for debugging\",\n                \"transform\": \"bool\",\n            },\n        },\n        \"components\": {\n            \"task_manager\": \"src.mcp_services.playwright_webarena.playwright_task_manager.PlaywrightTaskManager\",\n            \"state_manager\": \"src.mcp_services.playwright_webarena.playwright_state_manager.PlaywrightStateManager\",\n            \"login_helper\": \"src.mcp_services.playwright_webarena.playwright_login_helper.PlaywrightLoginHelper\",\n        },\n        \"config_mapping\": {\n            \"state_manager\": {\n                \"browser\": \"browser\",\n                \"headless\": \"headless\",\n                \"network_origins\": \"network_origins\",\n                \"user_profile\": \"user_profile\",\n                \"viewport_width\": \"viewport_width\",\n                \"viewport_height\": \"viewport_height\",\n                \"skip_cleanup\": \"skip_cleanup\",\n            },\n            \"login_helper\": {\n                \"browser\": \"browser\",\n                \"headless\": \"headless\",\n            },\n            \"task_manager\": {},\n        },\n        \"mcp_server\": None,\n        \"eval_config\": None,\n    },\n}\n\n\ndef get_service_definition(service_name: str) -> dict:\n    \"\"\"Get MCP service definition by name.\"\"\"\n    if service_name not in SERVICES:\n        raise ValueError(f\"Unknown MCP service: {service_name}\")\n    return SERVICES[service_name]\n\n\ndef get_supported_mcp_services() -> list:\n    \"\"\"Get list of implemented MCP services.\"\"\"\n    return [\n        name\n        for name, config in SERVICES.items()\n        if config[\"components\"][\"task_manager\"] is not None\n    ]\n"
  },
  {
    "path": "tasks/__init__.py",
    "content": ""
  },
  {
    "path": "tasks/filesystem/easy/.gitkeep",
    "content": ""
  },
  {
    "path": "tasks/filesystem/easy/file_context/file_splitting/description.md",
    "content": "# File Splitting Task\n\n## 📋 Task Description\n\nYou need to split a large text file into multiple smaller files with equal character counts. The task involves creating a new directory and splitting the content into exactly 3 files.\n\n## 🎯 Task Objectives\n\n1. **Create a new directory** named `split` in the test directory\n2. **Split the file** `large_file.txt` into exactly 3 files with **similar** character counts (maximum character difference of 100 between any two files)\n3. **Name the files** as `split_01.txt`, `split_02.txt`, `split_03.txt` in the `split` directory\n"
  },
  {
    "path": "tasks/filesystem/easy/file_context/file_splitting/meta.json",
    "content": "{\n  \"task_id\": \"file_splitting\",\n  \"task_name\": \"File Splitting\",\n  \"category_id\": \"file_context\",\n  \"category_name\": \"File Context\",\n  \"description\": \"Split large_file.txt into three nearly equal chunks stored as split_01.txt-split_03.txt inside a new split directory.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"content transformation\",\n    \"file automation\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"file_context/\\n    \\u251c\\u2500\\u2500 file_01.txt\\n    \\u251c\\u2500\\u2500 file_02.txt\\n    \\u251c\\u2500\\u2500 file_03.txt\\n    \\u251c\\u2500\\u2500 file_04.txt\\n    \\u251c\\u2500\\u2500 file_05.txt\\n    \\u251c\\u2500\\u2500 file_06.txt\\n    \\u251c\\u2500\\u2500 file_07.txt\\n    \\u251c\\u2500\\u2500 file_08.txt\\n    \\u251c\\u2500\\u2500 file_09.txt\\n    \\u251c\\u2500\\u2500 file_10.txt\\n    \\u251c\\u2500\\u2500 file_11.txt\\n    \\u251c\\u2500\\u2500 file_12.txt\\n    \\u251c\\u2500\\u2500 file_13.txt\\n    \\u251c\\u2500\\u2500 file_14.txt\\n    \\u251c\\u2500\\u2500 file_15.txt\\n    \\u251c\\u2500\\u2500 file_16.txt\\n    \\u251c\\u2500\\u2500 file_17.txt\\n    \\u251c\\u2500\\u2500 file_18.txt\\n    \\u251c\\u2500\\u2500 file_19.txt\\n    \\u251c\\u2500\\u2500 file_20.txt\\n    \\u2514\\u2500\\u2500 large_file.txt\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/file_context.zip\",\n    \"stateOriginalUrl\": null\n  }\n}\n"
  },
  {
    "path": "tasks/filesystem/easy/file_context/file_splitting/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for File Splitting Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_split_directory_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the split directory exists.\"\"\"\n    split_dir = test_dir / \"split\"\n    \n    if not split_dir.exists():\n        print(\"❌ Directory 'split' not found\")\n        return False\n    \n    if not split_dir.is_dir():\n        print(\"❌ 'split' exists but is not a directory\")\n        return False\n    \n    print(\"✅ Split directory found\")\n    return True\n\ndef verify_all_split_files_exist(test_dir: Path) -> bool:\n    \"\"\"Verify that all 3 split files exist with correct names.\"\"\"\n    split_dir = test_dir / \"split\"\n    \n    expected_files = [f\"split_{i:02d}.txt\" for i in range(1, 4)]\n    missing_files = []\n    \n    for filename in expected_files:\n        file_path = split_dir / filename\n        if not file_path.exists():\n            missing_files.append(filename)\n    \n    if missing_files:\n        print(f\"❌ Missing files: {missing_files}\")\n        return False\n    \n    print(\"✅ All 3 split files exist with correct names\")\n    return True\n\ndef verify_similar_file_lengths(test_dir: Path) -> bool:\n    \"\"\"Verify that all split files have similar character counts (within 30 characters difference).\"\"\"\n    split_dir = test_dir / \"split\"\n    \n    file_lengths = []\n    for i in range(1, 4):\n        filename = f\"split_{i:02d}.txt\"\n        file_path = split_dir / filename\n        \n        try:\n            content = file_path.read_text()\n            file_lengths.append(len(content))\n        except Exception as e:\n            print(f\"❌ Error reading {filename}: {e}\")\n            return False\n    \n    # Check if all lengths are within 30 characters of each other\n    min_length = min(file_lengths)\n    max_length = max(file_lengths)\n    length_difference = max_length - min_length\n    \n    if length_difference > 100:\n        print(f\"❌ File lengths differ by more than 30 characters: {length_difference}\")\n        print(f\"   Min length: {min_length}, Max length: {max_length}\")\n        print(f\"   All lengths: {file_lengths}\")\n        return False\n    \n    print(f\"✅ All files have similar lengths (difference: {length_difference} characters)\")\n    print(f\"   Min: {min_length}, Max: {max_length}\")\n    return True\n\ndef verify_content_integrity(test_dir: Path) -> bool:\n    \"\"\"Verify that concatenated split files equal the original file.\"\"\"\n    split_dir = test_dir / \"split\"\n    original_file = test_dir / \"large_file.txt\"\n    \n    # Read original content\n    try:\n        original_content = original_file.read_text()\n    except Exception as e:\n        print(f\"❌ Error reading original file: {e}\")\n        return False\n    \n    # Concatenate all split files\n    concatenated_content = \"\"\n    for i in range(1, 4):\n        filename = f\"split_{i:02d}.txt\"\n        file_path = split_dir / filename\n        \n        try:\n            content = file_path.read_text()\n            concatenated_content += content\n        except Exception as e:\n            print(f\"❌ Error reading {filename}: {e}\")\n            return False\n    \n    # Compare content\n    if concatenated_content != original_content:\n        print(\"❌ Concatenated content does not match original file\")\n        print(f\"   Original length: {len(original_content)}\")\n        print(f\"   Concatenated length: {len(concatenated_content)}\")\n        return False\n    \n    print(\"✅ Concatenated content matches original file exactly\")\n    return True\n\ndef verify_no_extra_files(test_dir: Path) -> bool:\n    \"\"\"Verify that no extra files exist in the split directory.\"\"\"\n    split_dir = test_dir / \"split\"\n    \n    expected_files = {f\"split_{i:02d}.txt\" for i in range(1, 4)}\n    actual_files = {f.name for f in split_dir.iterdir() if f.is_file()}\n    \n    extra_files = actual_files - expected_files\n    if extra_files:\n        print(f\"❌ Extra files found in split directory: {extra_files}\")\n        return False\n    \n    print(\"✅ No extra files in split directory\")\n    return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying File Splitting Task...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Split Directory Exists\", verify_split_directory_exists),\n        (\"All Split Files Exist\", verify_all_split_files_exist),\n        (\"Similar File Lengths\", verify_similar_file_lengths),\n        (\"Content Integrity\", verify_content_integrity),\n        (\"No Extra Files\", verify_no_extra_files),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ File splitting task completed correctly!\")\n        print(\"🎉 Task verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/easy/file_context/pattern_matching/description.md",
    "content": "# File Filtering Task: Find Files with Common Substring\n\n## 📋 Task Description\n\nYour task is to find all files that contain a substring of 30 or more characters that also appears in `large_file.txt`. **You are not allowed to use python code.**\n\n## 🎯 Task Objectives\n\n1. **Read the reference file** `large_file.txt` to understand its content\n2. **Examine each file** from file_01.txt to file_20.txt\n3. **Find files** that contain a substring of 30 or more characters that matches a substring in `large_file.txt`\n4. **Create a file `answer.txt`** and write the results to it with the following format:\n   - One line per matching file\n   - Format: `filename.txt`\n   - Do not add any things else other than `filename.txt.`\n"
  },
  {
    "path": "tasks/filesystem/easy/file_context/pattern_matching/meta.json",
    "content": "{\n  \"task_id\": \"pattern_matching\",\n  \"task_name\": \"Pattern Matching\",\n  \"category_id\": \"file_context\",\n  \"category_name\": \"File Context\",\n  \"description\": \"Scan file_01.txt through file_20.txt for any 30+ character substring that also appears in large_file.txt and list each matching filename in answer.txt.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"pattern analysis\",\n    \"search and filtering\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"file_context/\\n    \\u251c\\u2500\\u2500 file_01.txt\\n    \\u251c\\u2500\\u2500 file_02.txt\\n    \\u251c\\u2500\\u2500 file_03.txt\\n    \\u251c\\u2500\\u2500 file_04.txt\\n    \\u251c\\u2500\\u2500 file_05.txt\\n    \\u251c\\u2500\\u2500 file_06.txt\\n    \\u251c\\u2500\\u2500 file_07.txt\\n    \\u251c\\u2500\\u2500 file_08.txt\\n    \\u251c\\u2500\\u2500 file_09.txt\\n    \\u251c\\u2500\\u2500 file_10.txt\\n    \\u251c\\u2500\\u2500 file_11.txt\\n    \\u251c\\u2500\\u2500 file_12.txt\\n    \\u251c\\u2500\\u2500 file_13.txt\\n    \\u251c\\u2500\\u2500 file_14.txt\\n    \\u251c\\u2500\\u2500 file_15.txt\\n    \\u251c\\u2500\\u2500 file_16.txt\\n    \\u251c\\u2500\\u2500 file_17.txt\\n    \\u251c\\u2500\\u2500 file_18.txt\\n    \\u251c\\u2500\\u2500 file_19.txt\\n    \\u251c\\u2500\\u2500 file_20.txt\\n    \\u2514\\u2500\\u2500 large_file.txt\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/file_context.zip\",\n    \"stateOriginalUrl\": null\n  }\n}\n"
  },
  {
    "path": "tasks/filesystem/easy/file_context/pattern_matching/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for File Filtering Task: Find Files with Common Substring\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_answer_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the answer.txt file exists.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    if not answer_file.exists():\n        print(\"❌ File 'answer.txt' not found\")\n        return False\n    \n    print(\"✅ Answer file found\")\n    return True\n\ndef verify_answer_format(test_dir: Path) -> bool:\n    \"\"\"Verify that the answer file has the correct format.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text().strip()\n        \n        # If file is empty, that's acceptable (no matches found)\n        if not content:\n            print(\"✅ Answer file is empty (no matches found)\")\n            return True\n        \n        lines = content.split('\\n')\n        \n        for i, line in enumerate(lines, 1):\n            line = line.strip()\n            if not line:\n                continue\n                \n            # Check format: just filename.txt\n            if not line.endswith('.txt') or not line.startswith('file_'):\n                print(f\"❌ Line {i} has incorrect format: {line}\")\n                print(\"   Expected format: filename.txt\")\n                return False\n        \n        print(\"✅ Answer format is correct\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error reading answer file: {e}\")\n        return False\n\ndef find_30_plus_char_matches(test_dir: Path) -> set:\n    \"\"\"Find all files that have 30+ character substring matches with large_file.txt.\"\"\"\n    large_file = test_dir / \"large_file.txt\"\n    if not large_file.exists():\n        print(\"❌ large_file.txt not found\")\n        return set()\n    \n    large_content = large_file.read_text()\n    matching_files = set()\n    \n    # Check each file from file_01.txt to file_20.txt\n    for i in range(1, 21):\n        filename = f\"file_{i:02d}.txt\"\n        file_path = test_dir / filename\n        \n        if not file_path.exists():\n            continue\n            \n        file_content = file_path.read_text()\n        \n        # Check if there's a substring of 30+ characters that matches\n        has_match = False\n        for start_pos in range(len(file_content)):\n            for end_pos in range(start_pos + 30, len(file_content) + 1):\n                substring = file_content[start_pos:end_pos]\n                if substring in large_content:\n                    has_match = True\n                    break\n            if has_match:\n                break\n        \n        if has_match:\n            matching_files.add(filename)\n    \n    return matching_files\n\ndef verify_matches_are_correct(test_dir: Path) -> bool:\n    \"\"\"Verify that the files listed in answer.txt actually have 30+ character matches.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text().strip()\n        \n        # If no content, check if there should actually be no matches\n        if not content:\n            expected_matches = find_30_plus_char_matches(test_dir)\n            if expected_matches:\n                print(\"❌ Answer file is empty but matches should exist\")\n                for filename in expected_matches:\n                    print(f\"   Expected: {filename}\")\n                return False\n            else:\n                print(\"✅ No matches found (correct)\")\n                return True\n        \n        # Parse answer file\n        answer_files = set()\n        lines = content.split('\\n')\n        for line in lines:\n            line = line.strip()\n            if not line:\n                continue\n            answer_files.add(line)\n        \n        # Get expected matches\n        expected_matches = find_30_plus_char_matches(test_dir)\n        \n        # Check if all answer files actually have matches\n        for filename in answer_files:\n            if filename not in expected_matches:\n                print(f\"❌ File {filename} listed in answer but has no valid 30+ character match\")\n                return False\n        \n        # Check if all expected matches are in answer\n        for filename in expected_matches:\n            if filename not in answer_files:\n                print(f\"❌ Missing match for {filename} in answer file\")\n                return False\n        \n        print(\"✅ All matches are correct\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying matches: {e}\")\n        return False\n\ndef verify_files_exist(test_dir: Path) -> bool:\n    \"\"\"Verify that all files mentioned in answer.txt actually exist.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text().strip()\n        \n        if not content:\n            return True  # No files to verify\n        \n        lines = content.split('\\n')\n        for line in lines:\n            line = line.strip()\n            if not line:\n                continue\n                \n            file_path = test_dir / line\n            \n            if not file_path.exists():\n                print(f\"❌ File mentioned in answer does not exist: {line}\")\n                return False\n        \n        print(\"✅ All files mentioned in answer exist\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying file existence: {e}\")\n        return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying File Filtering Task: Find Files with Common Substring...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Answer File Exists\", verify_answer_file_exists),\n        (\"Answer Format\", verify_answer_format),\n        (\"Files Exist\", verify_files_exist),\n        (\"Matches are Correct\", verify_matches_are_correct),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ File filtering task completed correctly!\")\n        print(\"🎉 Task verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/easy/file_context/uppercase/description.md",
    "content": "# File Context Task: Convert Files to Uppercase\n\n## 📋 Task Description\n\nYou need to process 5 text files (file_01.txt to file_05.txt) and convert their content to uppercase format.\n\n## 🎯 Task Objectives\n\n1. **Create an uppercase directory** in the test environment root\n2. **Convert each file** from file_01.txt to file_05.txt to uppercase\n3. **Save converted files** in the uppercase/ directory with the same names\n"
  },
  {
    "path": "tasks/filesystem/easy/file_context/uppercase/meta.json",
    "content": "{\n  \"task_id\": \"uppercase\",\n  \"task_name\": \"Uppercase\",\n  \"category_id\": \"file_context\",\n  \"category_name\": \"File Context\",\n  \"description\": \"Copy file_01.txt-file_05.txt into an uppercase/ folder and convert the contents of every file to uppercase text.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"content transformation\",\n    \"batch processing\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"file_context/\\n    \\u251c\\u2500\\u2500 file_01.txt\\n    \\u251c\\u2500\\u2500 file_02.txt\\n    \\u251c\\u2500\\u2500 file_03.txt\\n    \\u251c\\u2500\\u2500 file_04.txt\\n    \\u251c\\u2500\\u2500 file_05.txt\\n    \\u251c\\u2500\\u2500 file_06.txt\\n    \\u251c\\u2500\\u2500 file_07.txt\\n    \\u251c\\u2500\\u2500 file_08.txt\\n    \\u251c\\u2500\\u2500 file_09.txt\\n    \\u251c\\u2500\\u2500 file_10.txt\\n    \\u251c\\u2500\\u2500 file_11.txt\\n    \\u251c\\u2500\\u2500 file_12.txt\\n    \\u251c\\u2500\\u2500 file_13.txt\\n    \\u251c\\u2500\\u2500 file_14.txt\\n    \\u251c\\u2500\\u2500 file_15.txt\\n    \\u251c\\u2500\\u2500 file_16.txt\\n    \\u251c\\u2500\\u2500 file_17.txt\\n    \\u251c\\u2500\\u2500 file_18.txt\\n    \\u251c\\u2500\\u2500 file_19.txt\\n    \\u251c\\u2500\\u2500 file_20.txt\\n    \\u2514\\u2500\\u2500 large_file.txt\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/file_context.zip\",\n    \"stateOriginalUrl\": null\n  }\n}\n"
  },
  {
    "path": "tasks/filesystem/easy/file_context/uppercase/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for File Context Task: Convert Files to Uppercase\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\nimport re\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_uppercase_directory_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the uppercase directory exists.\"\"\"\n    uppercase_dir = test_dir / \"uppercase\"\n    \n    if not uppercase_dir.exists():\n        print(\"❌ Directory 'uppercase' not found\")\n        return False\n    \n    if not uppercase_dir.is_dir():\n        print(\"❌ 'uppercase' exists but is not a directory\")\n        return False\n    \n    print(\"✅ Uppercase directory found\")\n    return True\n\ndef verify_uppercase_files_exist(test_dir: Path) -> bool:\n    \"\"\"Verify that all 5 uppercase files exist.\"\"\"\n    uppercase_dir = test_dir / \"uppercase\"\n    \n    for i in range(1, 6):\n        filename = f\"file_{i:02d}.txt\"\n        file_path = uppercase_dir / filename\n        \n        if not file_path.exists():\n            print(f\"❌ File '{filename}' not found in uppercase directory\")\n            return False\n    \n    print(\"✅ All 5 uppercase files found\")\n    return True\n\ndef verify_uppercase_content(test_dir: Path) -> bool:\n    \"\"\"Verify that uppercase files contain the correct uppercase content.\"\"\"\n    uppercase_dir = test_dir / \"uppercase\"\n    \n    for i in range(1, 6):\n        filename = f\"file_{i:02d}.txt\"\n        original_file = test_dir / filename\n        uppercase_file = uppercase_dir / filename\n        \n        if not original_file.exists():\n            print(f\"❌ Original file '{filename}' not found\")\n            return False\n        \n        try:\n            original_content = original_file.read_text()\n            uppercase_content = uppercase_file.read_text()\n            \n            # Check if uppercase content is the uppercase version of original\n            expected_uppercase = original_content.upper()\n            \n            if uppercase_content != expected_uppercase:\n                print(f\"❌ File '{filename}' content is not properly converted to uppercase\")\n                return False\n                \n        except Exception as e:\n            print(f\"❌ Error reading file '{filename}': {e}\")\n            return False\n    \n    print(\"✅ All uppercase files contain correct uppercase content\")\n    return True\n\ndef verify_answer_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the answer.txt file exists in the uppercase directory.\"\"\"\n    uppercase_dir = test_dir / \"uppercase\"\n    answer_file = uppercase_dir / \"answer.txt\"\n    \n    if not answer_file.exists():\n        print(\"❌ File 'answer.txt' not found in uppercase directory\")\n        return False\n    \n    print(\"✅ Answer file found in uppercase directory\")\n    return True\n\ndef verify_answer_format(test_dir: Path) -> bool:\n    \"\"\"Verify that the answer file has the correct format.\"\"\"\n    uppercase_dir = test_dir / \"uppercase\"\n    answer_file = uppercase_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text().strip()\n        \n        if not content:\n            print(\"❌ Answer file is empty\")\n            return False\n        \n        lines = content.split('\\n')\n        \n        # Check if we have exactly 10 lines\n        if len(lines) != 10:\n            print(f\"❌ Answer file has {len(lines)} lines, expected 10\")\n            return False\n        \n        for i, line in enumerate(lines, 1):\n            line = line.strip()\n            if not line:\n                print(f\"❌ Line {i} is empty\")\n                return False\n            \n            # Check format: filename:word_count\n            if ':' not in line:\n                print(f\"❌ Line {i} has incorrect format: {line}\")\n                print(\"   Expected format: filename:word_count\")\n                return False\n            \n            parts = line.split(':', 1)\n            if len(parts) != 2:\n                print(f\"❌ Line {i} has incorrect format: {line}\")\n                print(\"   Expected format: filename:word_count\")\n                return False\n            \n            filename, word_count_str = parts\n            \n            # Check filename format\n            if not filename.endswith('.txt') or not filename.startswith('file_'):\n                print(f\"❌ Line {i} has invalid filename: {filename}\")\n                return False\n            \n            # Check word count format (should be integer)\n            try:\n                word_count = int(word_count_str)\n                if word_count <= 0:\n                    print(f\"❌ Line {i} has invalid word count: {word_count_str}\")\n                    return False\n            except ValueError:\n                print(f\"❌ Line {i} has non-integer word count: {word_count_str}\")\n                return False\n        \n        print(\"✅ Answer format is correct\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error reading answer file: {e}\")\n        return False\n\ndef count_words_in_file(file_path: Path) -> int:\n    \"\"\"Count words in a file.\"\"\"\n    try:\n        content = file_path.read_text()\n        # Split by whitespace and filter out empty strings\n        words = [word for word in content.split() if word.strip()]\n        return len(words)\n    except Exception as e:\n        print(f\"❌ Error reading file {file_path}: {e}\")\n        return 0\n\ndef verify_word_counts_are_correct(test_dir: Path) -> bool:\n    \"\"\"Verify that the word counts in answer.txt are correct.\"\"\"\n    uppercase_dir = test_dir / \"uppercase\"\n    answer_file = uppercase_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text().strip()\n        lines = content.split('\\n')\n        \n        # Expected word counts based on answer.md\n        expected_counts = [22, 22, 22, 22, 18, 22, 22, 22, 18, 20]\n        \n        # Create a set of expected file entries for easier checking\n        expected_entries = set()\n        for i in range(1, 11):\n            filename = f\"file_{i:02d}.txt\"\n            expected_count = expected_counts[i - 1]\n            if i == 6:  # Special case for file_06.txt: can be 21 or 22\n                expected_entries.add(f\"{filename}:21\")\n                expected_entries.add(f\"{filename}:22\")\n            else:\n                expected_entries.add(f\"{filename}:{expected_count}\")\n        \n        # Check each line in the answer file\n        found_entries = set()\n        for line in lines:\n            line = line.strip()\n            if line in expected_entries:\n                found_entries.add(line)\n            else:\n                print(f\"❌ Invalid entry: {line}\")\n                return False\n        \n        # Check if we found all expected entries\n        if len(found_entries) != 10:\n            print(f\"❌ Found {len(found_entries)} entries, expected 10\")\n            missing = expected_entries - found_entries\n            if missing:\n                print(f\"   Missing entries: {missing}\")\n            return False\n        \n        print(\"✅ All word counts are correct\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying word counts: {e}\")\n        return False\n\ndef verify_all_files_are_included(test_dir: Path) -> bool:\n    \"\"\"Verify that all 10 files are included in the answer.\"\"\"\n    uppercase_dir = test_dir / \"uppercase\"\n    answer_file = uppercase_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text().strip()\n        lines = content.split('\\n')\n        \n        # Check that all 10 files are present\n        found_files = set()\n        for line in lines:\n            parts = line.split(':', 1)\n            filename = parts[0]\n            found_files.add(filename)\n        \n        expected_files = {f\"file_{i:02d}.txt\" for i in range(1, 11)}\n        \n        if found_files != expected_files:\n            missing = expected_files - found_files\n            extra = found_files - expected_files\n            if missing:\n                print(f\"❌ Missing files in answer: {missing}\")\n            if extra:\n                print(f\"❌ Extra files in answer: {extra}\")\n            return False\n        \n        print(\"✅ All 10 files are included in answer\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying file inclusion: {e}\")\n        return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    try:\n        test_dir = get_test_directory()\n        print(f\"🔍 Verifying Uppercase in: {test_dir}\")\n        print()\n        \n        # Run all verification checks\n        checks = [\n            (\"Uppercase directory exists\", verify_uppercase_directory_exists),\n            (\"Uppercase files exist\", verify_uppercase_files_exist),\n            (\"Uppercase content is correct\", verify_uppercase_content),\n        ]\n        \n        all_passed = True\n        for check_name, check_func in checks:\n            print(f\"📋 {check_name}...\")\n            if not check_func(test_dir):\n                all_passed = False\n            print()\n        \n        if all_passed:\n            print(\"🎉 All verification checks passed!\")\n            sys.exit(0)\n        else:\n            print(\"❌ Some verification checks failed!\")\n            sys.exit(1)\n            \n    except Exception as e:\n        print(f\"❌ Verification failed with error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/easy/file_property/largest_rename/description.md",
    "content": "# Largest File Rename Task\n\n## 📋 Task Description\n\nRename the largest `.jpg` file in the test directory to `largest.jpg` based on file size.\n\n## 🎯 Task Objectives\n\n1. **Find all `.jpg` files** in the test directory\n2. **Determine which `.jpg` file is the largest** by file size\n3. **Rename the largest `.jpg` file to `largest.jpg`**\n"
  },
  {
    "path": "tasks/filesystem/easy/file_property/largest_rename/meta.json",
    "content": "{\n  \"task_id\": \"largest_rename\",\n  \"task_name\": \"Largest File Rename\",\n  \"category_id\": \"file_property\",\n  \"category_name\": \"File Property\",\n  \"description\": \"Identify the largest .jpg in the workspace and rename it to largest.jpg while leaving the other files untouched.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"file organization\",\n    \"attribute inspection\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"file_property/\\n    \\u251c\\u2500\\u2500 bear.jpg\\n    \\u251c\\u2500\\u2500 bridge.jpg\\n    \\u251c\\u2500\\u2500 bus.MOV\\n    \\u251c\\u2500\\u2500 random_file_1.txt\\n    \\u251c\\u2500\\u2500 random_file_2.txt\\n    \\u251c\\u2500\\u2500 random_file_3.txt\\n    \\u251c\\u2500\\u2500 road.MOV\\n    \\u2514\\u2500\\u2500 sg.jpg\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/file_property.zip\",\n    \"stateOriginalUrl\": null\n  }\n}\n"
  },
  {
    "path": "tasks/filesystem/easy/file_property/largest_rename/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Largest File Rename Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_sg_jpg_not_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that sg.jpg does not exist.\"\"\"\n    sg_file = test_dir / \"sg.jpg\"\n    \n    if sg_file.exists():\n        print(\"❌ sg.jpg still exists (should be renamed)\")\n        return False\n    \n    print(\"✅ sg.jpg does not exist\")\n    return True\n\ndef verify_largest_jpg_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that largest.jpg exists.\"\"\"\n    largest_file = test_dir / \"largest.jpg\"\n    \n    if not largest_file.exists():\n        print(\"❌ largest.jpg does not exist\")\n        return False\n    \n    print(\"✅ largest.jpg exists\")\n    return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    try:\n        test_dir = get_test_directory()\n        print(f\"🔍 Verifying largest file rename in: {test_dir}\")\n        \n        # Run all verification checks\n        checks = [\n            (\"sg.jpg does not exist\", verify_sg_jpg_not_exists),\n            (\"largest.jpg exists\", verify_largest_jpg_exists)\n        ]\n        \n        all_passed = True\n        for check_name, check_func in checks:\n            print(f\"\\n📋 Checking: {check_name}\")\n            if not check_func(test_dir):\n                all_passed = False\n        \n        if all_passed:\n            print(\"\\n🎉 All verification checks passed!\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Some verification checks failed!\")\n            sys.exit(1)\n            \n    except Exception as e:\n        print(f\"❌ Verification failed with error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/easy/file_property/txt_merging/description.md",
    "content": "# Text File Merging Task\n\n## 📋 Task Description\n\nMerge all `.txt` files in the test directory into a single file called `merge.txt`. The merged file should contain the content from all `.txt` files.\n\n## 🎯 Task Objectives\n\n1. **Read all `.txt` files** in the test directory\n2. **Create a new file** called `merge.txt` in the test directory\n3. **Write the content** from all `.txt` files into `merge.txt`\n4. **The order** of content doesn't matter - as long as all content from all `.txt` files is present in `merge.txt`\n"
  },
  {
    "path": "tasks/filesystem/easy/file_property/txt_merging/meta.json",
    "content": "{\n  \"task_id\": \"txt_merging\",\n  \"task_name\": \"Text File Merging\",\n  \"category_id\": \"file_property\",\n  \"category_name\": \"File Property\",\n  \"description\": \"Combine the contents of every .txt file into a single merge.txt file so the archive has one consolidated view.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"content consolidation\",\n    \"file automation\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"file_property/\\n    \\u251c\\u2500\\u2500 bear.jpg\\n    \\u251c\\u2500\\u2500 bridge.jpg\\n    \\u251c\\u2500\\u2500 bus.MOV\\n    \\u251c\\u2500\\u2500 random_file_1.txt\\n    \\u251c\\u2500\\u2500 random_file_2.txt\\n    \\u251c\\u2500\\u2500 random_file_3.txt\\n    \\u251c\\u2500\\u2500 road.MOV\\n    \\u2514\\u2500\\u2500 sg.jpg\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/file_property.zip\",\n    \"stateOriginalUrl\": null\n  }\n}\n"
  },
  {
    "path": "tasks/filesystem/easy/file_property/txt_merging/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Text File Merging Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef get_expected_contents():\n    \"\"\"Return the expected content from each .txt file.\"\"\"\n    return [\n        \"O rErmZ4tDgzMNoxn1oNfQhT1TRpy9w0tQPGTcrsaoMFrrgt9bY5mgBxO6q8c8lZywXxEEBWW4i6Jh9NbAtYtRKvkzB4bshGIMzn2G1 rDTpKJj\",\n        \"DmRrDFFaIl1mPubzSJJaN4aMeZyBHqVxZe5tpztHQ9zSe6b69Hnl7coqeNJXHXU2EnaDnyhYxZSWHPn3IWLsLGWrx7py8d37Z8blMnh7VDUH7hAMamhLRO8lfUVV1roM8a0njnW9evXRq5AoNTt8Tv7kQ5LmLe6Z66MZwtjckRAXmOB4x3AYbbxLULYZAxitW1KNG1yTaDOYZQhtKdZkX1XqytzBl9dRXI4gk91ZlVHLOiujwUa89EVsdjayKeCc21gCJMXvbhDSOGAs6dXZEHuaHQnnBdM19X3TwPgfDONyhlc pjwoQ45D56UQVWxwNIJUTgwS1vctYOx4XFpMgf3PRQ7zZdfhIuPBFdQwnQvYUeQbWa5gnyMO9FVSU0vm9uccbJQvkcEAJzMkEh9i7z6EEixtbwVedlTGWL2XBwjenRdf2qsOgvJo8Dyuvf35ieCFMG7wR7200rs GJZ5bRdx4R2gGOWVMi3MOBrqcw3KhbcpJtdQoKMALEjBMrY7VYKtAZNI6LoXX OOTJZ3x3usHRJY0gMtKhh6OJ 37aknvBwNYJ0IRWYWaeJ8LBwJyO6ZV3ZJ0palISQvGaHEZ0olHnK2iNCTxqxvF8J7EdIdIPYssl5f0XgPl6\",\n        \"aFCzXJbJq02zlCKnyarJnPUiwVIuUrQci3fZvGD53F5fUsKDUlEwO5 ANJ2VgBnJ5cuBJzjILcM9AxTvyNZ5NPIHjSCo5O20K\"\n    ]\n\ndef verify_merge_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that merge.txt exists in the test directory.\"\"\"\n    merge_file = test_dir / \"merge.txt\"\n    \n    if not merge_file.exists():\n        print(\"❌ merge.txt not found\")\n        return False\n    \n    if not merge_file.is_file():\n        print(\"❌ merge.txt exists but is not a file\")\n        return False\n    \n    print(\"✅ merge.txt exists\")\n    return True\n\ndef verify_merge_file_contents(test_dir: Path) -> bool:\n    \"\"\"Verify that merge.txt contains all expected content strings.\"\"\"\n    merge_file = test_dir / \"merge.txt\"\n    expected_contents = get_expected_contents()\n    \n    try:\n        with open(merge_file, 'r', encoding='utf-8') as f:\n            merge_content = f.read()\n    except Exception as e:\n        print(f\"❌ Failed to read merge.txt: {e}\")\n        return False\n    \n    # Check that each expected content string is present in the merged file\n    missing_contents = []\n    for content in expected_contents:\n        if content not in merge_content:\n            missing_contents.append(content[:50] + \"...\" if len(content) > 50 else content)\n    \n    if missing_contents:\n        print(f\"❌ Missing content in merge.txt:\")\n        for content in missing_contents:\n            print(f\"   - {content}\")\n        return False\n    \n    print(\"✅ merge.txt contains all expected content\")\n    return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    try:\n        test_dir = get_test_directory()\n        print(f\"🔍 Verifying text file merging in: {test_dir}\")\n        \n        # Run all verification checks\n        checks = [\n            (\"Merge file existence\", verify_merge_file_exists),\n            (\"Merge file contents\", verify_merge_file_contents)\n        ]\n        \n        all_passed = True\n        for check_name, check_func in checks:\n            print(f\"\\n📋 Checking: {check_name}\")\n            if not check_func(test_dir):\n                all_passed = False\n        \n        if all_passed:\n            print(\"\\n🎉 All verification checks passed!\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Some verification checks failed!\")\n            sys.exit(1)\n            \n    except Exception as e:\n        print(f\"❌ Verification failed with error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/filesystem/easy/folder_structure/structure_analysis/description.md",
    "content": "# Directory Structure Analysis Task\n\nYou need to recursively traverse the entire folder structure under the main directory and count the total number of `.py` files in the entire directory (including all subdirectories).\n\nWrite the answer (just a single number) in a file named `structure_analysis.txt` in the main directory (at the same level as the `complex_structure` folder).\n\nYou should not change or delete any existed files.\n\nDo not try to use python code.\n"
  },
  {
    "path": "tasks/filesystem/easy/folder_structure/structure_analysis/meta.json",
    "content": "{\n  \"task_id\": \"structure_analysis\",\n  \"task_name\": \"Structure Analysis\",\n  \"category_id\": \"folder_structure\",\n  \"category_name\": \"Folder Structure\",\n  \"description\": \"Recursively inspect the complex_structure tree, count all .py files, and save the total as the only line of structure_analysis.txt.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"data extraction\",\n    \"filesystem traversal\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"folder_structure/\\n    \\u2514\\u2500\\u2500 complex_structure/\\n            \\u251c\\u2500\\u2500 deeply/\\n            \\u2502       \\u2514\\u2500\\u2500 nested/\\n            \\u2502               \\u2514\\u2500\\u2500 folder/\\n            \\u2502                       \\u2514\\u2500\\u2500 structure/\\n            \\u251c\\u2500\\u2500 empty_folder/\\n            \\u251c\\u2500\\u2500 folder_lxkHt_0_1/\\n            \\u2502       \\u2514\\u2500\\u2500 file_PeLzC_0.txt\\n            \\u251c\\u2500\\u2500 folder_QdTAj_0_2/\\n            \\u2502       \\u251c\\u2500\\u2500 folder_eXccj_1_0/\\n            \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_Mqlwh_2_1/\\n            \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_cKxcP_3_3/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_BPTMK_4_1/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_RHtBP_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_QNqjq_4_0/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_gRwPE_5_1/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_jVlpp_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_vJuHz_1.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_XdXYJ_5_0/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_KvkKi_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_gGxLG_2.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_Hzkxo_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_XRjeh_1.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_vIBIt_4_2/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_kRDNS_5_0/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_wFSjJ_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_NyBSO_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_EOCNf_1.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_gmrXA_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_NcruA_3_1/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_bLWDj_1.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_WAftR_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_qCDFI_3_2/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_eSMOJ_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_oxADy_2.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_RTbbc_1.txt\\n            \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_QVHUU_3_0/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_FEPTK_4_1/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_GHoMC_5_1/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_rAMYd_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_iBDUY_5_0/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_IJCaw_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_VRXgp_5_2/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_hkUmS_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_nqLAf_1.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_XflmA_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_FlPoK_4_3/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_hSVNm_5_3/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_klnbn_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_iZuEl_5_0/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_LqAmy_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_LcURj_5_2/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_RgwOS_1.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_ZHnYb_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_tuZQJ_5_1/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_LHuIx_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_asJnB_1.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_EzLdu_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_ndhsJ_4_0/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_CUSXK_5_0/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_DpiuM_1.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_pSqeG_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_pstmE_5_1/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_YwdJt_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_StlsP_5_2/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_kriBJ_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_XCEdm_1.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_ToDjh_1.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_xbIVx_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_PJBok_4_4/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_mzxaf_5_0/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_ILBzj_2.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_MTGMm_1.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_zBDqz_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_sULMj_5_1/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_BHziw_1.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_sIjiu_2.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_VqNkB_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_vypSi_5_3/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_kZbIm_1.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_sOBtE_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_ZLGHy_5_2/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_azaFF_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_nAFRe_1.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_mIkQU_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_sGPxd_1.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_VTbEG_4_2/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_HtYLg_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_JXjMd_1.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_tPccB_2.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_BuOSw_1.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_TpoqE_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_wTvun_3_4/\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_GyhyE_1.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_POsla_2.txt\\n            \\u2502       \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_tSsvk_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_irNju_0.txt\\n            \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_jYBRm_1.txt\\n            \\u2502       \\u2502       \\u251c\\u2500\\u2500 folder_YlJLI_2_0/\\n            \\u2502       \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_FpFSL_0.txt\\n            \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_cFgBr_2.txt\\n            \\u2502       \\u2502       \\u251c\\u2500\\u2500 file_lKEWN_1.txt\\n            \\u2502       \\u2502       \\u2514\\u2500\\u2500 file_ZEWFP_0.txt\\n            \\u2502       \\u2514\\u2500\\u2500 file_ayUCH_0.txt\\n            \\u251c\\u2500\\u2500 folder_xtgyi_0_0/\\n            \\u2502       \\u2514\\u2500\\u2500 file_BvSOB_0.txt\\n            \\u251c\\u2500\\u2500 mixed_content/\\n            \\u2502       \\u2514\\u2500\\u2500 images_and_text/\\n            \\u2502               \\u2514\\u2500\\u2500 notes.txt\\n            \\u251c\\u2500\\u2500 project/\\n            \\u2502       \\u251c\\u2500\\u2500 docs/\\n            \\u2502       \\u2502       \\u2514\\u2500\\u2500 archive/\\n            \\u2502       \\u2502               \\u2514\\u2500\\u2500 2023/\\n            \\u2502       \\u2502                       \\u2514\\u2500\\u2500 reports/\\n            \\u2502       \\u2502                               \\u251c\\u2500\\u2500 report_0.txt\\n            \\u2502       \\u2502                               \\u251c\\u2500\\u2500 report_1.txt\\n            \\u2502       \\u2502                               \\u2514\\u2500\\u2500 report_2.txt\\n            \\u2502       \\u2514\\u2500\\u2500 src/\\n            \\u2502               \\u2514\\u2500\\u2500 main/\\n            \\u2502                       \\u2514\\u2500\\u2500 resources/\\n            \\u2514\\u2500\\u2500 m.py\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/folder_structure.zip\",\n    \"stateOriginalUrl\": null\n  }\n}\n"
  },
  {
    "path": "tasks/filesystem/easy/folder_structure/structure_analysis/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Directory Structure Analysis Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_structure_analysis_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the structure_analysis.txt file exists.\"\"\"\n    analysis_file = test_dir / \"structure_analysis.txt\"\n    \n    if not analysis_file.exists():\n        print(\"❌ File 'structure_analysis.txt' not found\")\n        return False\n    \n    print(\"✅ structure_analysis.txt file found\")\n    return True\n\ndef verify_structure_analysis_content(test_dir: Path) -> bool:\n    \"\"\"Verify that the structure_analysis.txt file contains the correct count.\"\"\"\n    analysis_file = test_dir / \"structure_analysis.txt\"\n    \n    try:\n        content = analysis_file.read_text().strip()\n        \n        if not content:\n            print(\"❌ structure_analysis.txt file is empty\")\n            return False\n        \n        # The expected answer is 1\n        expected_count = 1\n        \n        # Check if content is exactly \"1\"\n        if content != str(expected_count):\n            print(f\"❌ Expected '{expected_count}', but found: '{content}'\")\n            return False\n        \n        print(f\"✅ Python file count is correct: {content}\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error reading structure_analysis.txt file: {e}\")\n        return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    try:\n        test_dir = get_test_directory()\n        print(f\"🔍 Verifying Directory Structure Analysis Task in: {test_dir}\")\n        print()\n        \n        # Define verification steps\n        verification_steps = [\n            (\"Structure Analysis File Exists\", verify_structure_analysis_file_exists),\n            (\"Python File Count is Correct\", verify_structure_analysis_content),\n        ]\n        \n        # Run all verification steps\n        all_passed = True\n        for step_name, verify_func in verification_steps:\n            print(f\"📋 {step_name}...\")\n            if not verify_func(test_dir):\n                all_passed = False\n            print()\n        \n        # Final result\n        if all_passed:\n            print(\"🎉 All verification checks passed!\")\n            sys.exit(0)\n        else:\n            print(\"❌ Some verification checks failed!\")\n            sys.exit(1)\n            \n    except Exception as e:\n        print(f\"❌ Verification failed with error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/filesystem/easy/legal_document/file_reorganize/description.md",
    "content": "# Legal Document File Reorganization Task\n\n**Overview**\n\nThe folder \"legal_files/\" contains multiple versions of the Stock Purchase Agreement (Preferred_Stock_Purchase_Agreement_v0.txt through Preferred_Stock_Purchase_Agreement_v10.txt).\n\n## Task\n\nYour task is to:\n\n1. Identify the final version of the document among the different versions\n2. Create a folder named `final_version` inside the `legal_files/` directory\n3. Create an **empty file** with the same name as the final version in the newly created `final_version/` folder\n4. Keep the original file in its original location\n\nNote: Due to the large file size, you only need to create an empty file (not copy the content). The filename should remain unchanged in the `final_version/` folder.\n"
  },
  {
    "path": "tasks/filesystem/easy/legal_document/file_reorganize/meta.json",
    "content": "{\n  \"task_id\": \"file_reorganize\",\n  \"task_name\": \"File Reorganize\",\n  \"category_id\": \"legal_document\",\n  \"category_name\": \"Legal Document\",\n  \"description\": \"Determine the final Stock Purchase Agreement version and create an empty copy of that filename inside legal_files/final_version/.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"file organization\",\n    \"version management\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"legal_document/\\n    \\u2514\\u2500\\u2500 legal_files/\\n            \\u251c\\u2500\\u2500 Preferred_Stock_Purchase_Agreement_v0.txt\\n            \\u251c\\u2500\\u2500 Preferred_Stock_Purchase_Agreement_v1.txt\\n            \\u251c\\u2500\\u2500 Preferred_Stock_Purchase_Agreement_v2.txt\\n            \\u251c\\u2500\\u2500 Preferred_Stock_Purchase_Agreement_v3.txt\\n            \\u251c\\u2500\\u2500 Preferred_Stock_Purchase_Agreement_v4.txt\\n            \\u251c\\u2500\\u2500 Preferred_Stock_Purchase_Agreement_v5.txt\\n            \\u251c\\u2500\\u2500 Preferred_Stock_Purchase_Agreement_v6.txt\\n            \\u251c\\u2500\\u2500 Preferred_Stock_Purchase_Agreement_v7.txt\\n            \\u251c\\u2500\\u2500 Preferred_Stock_Purchase_Agreement_v8.txt\\n            \\u251c\\u2500\\u2500 Preferred_Stock_Purchase_Agreement_v9.txt\\n            \\u2514\\u2500\\u2500 Preferred_Stock_Purchase_Agreement_v10.txt\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/legal_document.zip\",\n    \"stateOriginalUrl\": \"https://www.cooleygo.com/documents/nvca-financing-documents\"\n  }\n}\n"
  },
  {
    "path": "tasks/filesystem/easy/legal_document/file_reorganize/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Legal Document File Reorganization Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_final_version_folder_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the final_version folder exists in legal_files.\"\"\"\n    final_version_dir = test_dir / \"legal_files\" / \"final_version\"\n    \n    if not final_version_dir.exists():\n        print(\"❌ Folder 'legal_files/final_version' not found\")\n        return False\n    \n    if not final_version_dir.is_dir():\n        print(\"❌ 'legal_files/final_version' exists but is not a directory\")\n        return False\n    \n    print(\"✅ Folder 'legal_files/final_version' found\")\n    return True\n\ndef verify_target_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that Preferred_Stock_Purchase_Agreement_v10.txt exists in final_version folder.\"\"\"\n    target_file = test_dir / \"legal_files\" / \"final_version\" / \"Preferred_Stock_Purchase_Agreement_v10.txt\"\n    \n    if not target_file.exists():\n        print(\"❌ File 'legal_files/final_version/Preferred_Stock_Purchase_Agreement_v10.txt' not found\")\n        return False\n    \n    if not target_file.is_file():\n        print(\"❌ 'Preferred_Stock_Purchase_Agreement_v10.txt' exists but is not a file\")\n        return False\n    \n    print(\"✅ Target file 'Preferred_Stock_Purchase_Agreement_v10.txt' found in final_version folder\")\n    return True\n\ndef verify_original_file_preserved(test_dir: Path) -> bool:\n    \"\"\"Verify that the original v10 file is still in place.\"\"\"\n    original_file = test_dir / \"legal_files\" / \"Preferred_Stock_Purchase_Agreement_v10.txt\"\n    \n    if not original_file.exists():\n        print(\"❌ Original file 'Preferred_Stock_Purchase_Agreement_v10.txt' was removed\")\n        return False\n    \n    print(\"✅ Original file 'Preferred_Stock_Purchase_Agreement_v10.txt' preserved\")\n    return True\n\ndef verify_only_v10_in_final_version(test_dir: Path) -> bool:\n    \"\"\"Verify that final_version folder contains only v10 file.\"\"\"\n    final_version_dir = test_dir / \"legal_files\" / \"final_version\"\n    \n    # Get all files in final_version folder\n    files = list(final_version_dir.iterdir())\n    \n    # Filter out directories, keep only files\n    files_only = [f for f in files if f.is_file()]\n    \n    if len(files_only) != 1:\n        print(f\"❌ final_version folder should contain exactly 1 file, but found {len(files_only)}\")\n        for f in files_only:\n            print(f\"   - {f.name}\")\n        return False\n    \n    # Check if the only file is v10\n    if files_only[0].name != \"Preferred_Stock_Purchase_Agreement_v10.txt\":\n        print(f\"❌ final_version folder contains wrong file: {files_only[0].name}\")\n        print(\"   Expected: Preferred_Stock_Purchase_Agreement_v10.txt\")\n        return False\n    \n    print(\"✅ final_version folder contains only Preferred_Stock_Purchase_Agreement_v10.txt\")\n    return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying Legal Document File Reorganization Task...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Final Version Folder Exists\", verify_final_version_folder_exists),\n        (\"Target File Exists\", verify_target_file_exists),\n        (\"Only V10 in Final Version\", verify_only_v10_in_final_version),\n        (\"Original File Preserved\", verify_original_file_preserved),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ Legal document file reorganization completed correctly!\")\n        print(\"🎉 Task verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/filesystem/easy/papers/papers_counting/description.md",
    "content": "# File Context Task: Count HTML Files\n\n## 📋 Task Description\n\nYou need to count the number of HTML files in the given directory and write the count to a file.\n\n## 🎯 Task Objectives\n\n1. **Count HTML files** in the given directory\n2. **Create a file** named `count.txt` in the same directory\n3. **Write the count** (just the number) to `count.txt`\n\n## 📝 Expected Output\n\n- File `count.txt` containing only the number of HTML files found\n"
  },
  {
    "path": "tasks/filesystem/easy/papers/papers_counting/meta.json",
    "content": "{\n  \"task_id\": \"papers_counting\",\n  \"task_name\": \"Papers Counting\",\n  \"category_id\": \"papers\",\n  \"category_name\": \"Papers\",\n  \"description\": \"Count how many .html papers live in the directory and write just that number into count.txt.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"data extraction\",\n    \"reporting\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"papers/\\n    \\u251c\\u2500\\u2500 1707.06347.html\\n    \\u251c\\u2500\\u2500 2105.04165.html\\n    \\u251c\\u2500\\u2500 2201.11903.html\\n    \\u251c\\u2500\\u2500 2303.08774.html\\n    \\u251c\\u2500\\u2500 2306.08640.html\\n    \\u251c\\u2500\\u2500 2310.02255.html\\n    \\u251c\\u2500\\u2500 2310.08446.html\\n    \\u251c\\u2500\\u2500 2312.00849.html\\n    \\u251c\\u2500\\u2500 2312.07533.html\\n    \\u251c\\u2500\\u2500 2312.11805.html\\n    \\u251c\\u2500\\u2500 2402.00253.html\\n    \\u251c\\u2500\\u2500 2402.03300.html\\n    \\u251c\\u2500\\u2500 2403.05530.html\\n    \\u251c\\u2500\\u2500 2404.13046.html\\n    \\u251c\\u2500\\u2500 2404.14367.html\\n    \\u251c\\u2500\\u2500 2404.14396.html\\n    \\u251c\\u2500\\u2500 2405.09818.html\\n    \\u251c\\u2500\\u2500 2405.13911.html\\n    \\u251c\\u2500\\u2500 2405.16473.html\\n    \\u251c\\u2500\\u2500 2405.16640.html\\n    \\u251c\\u2500\\u2500 2406.08478.html\\n    \\u251c\\u2500\\u2500 2406.16852.html\\n    \\u251c\\u2500\\u2500 2406.17294.html\\n    \\u251c\\u2500\\u2500 2407.01284.html\\n    \\u251c\\u2500\\u2500 2407.01509.html\\n    \\u251c\\u2500\\u2500 2407.21783.html\\n    \\u251c\\u2500\\u2500 2408.03326.html\\n    \\u251c\\u2500\\u2500 2408.12528.html\\n    \\u251c\\u2500\\u2500 2409.19256.html\\n    \\u251c\\u2500\\u2500 2410.05993.html\\n    \\u251c\\u2500\\u2500 2410.06166.html\\n    \\u251c\\u2500\\u2500 2410.10563.html\\n    \\u251c\\u2500\\u2500 2410.13848.html\\n    \\u251c\\u2500\\u2500 2410.17885.html\\n    \\u251c\\u2500\\u2500 2410.21276.html\\n    \\u251c\\u2500\\u2500 2411.07975.html\\n    \\u251c\\u2500\\u2500 2411.10442.html\\n    \\u251c\\u2500\\u2500 2411.11930.html\\n    \\u251c\\u2500\\u2500 2411.14432.html\\n    \\u251c\\u2500\\u2500 2412.05271.html\\n    \\u251c\\u2500\\u2500 2412.08443.html\\n    \\u251c\\u2500\\u2500 2412.10302.html\\n    \\u251c\\u2500\\u2500 2412.15115.html\\n    \\u251c\\u2500\\u2500 2412.16720.html\\n    \\u251c\\u2500\\u2500 2412.17256.html\\n    \\u251c\\u2500\\u2500 2412.18319.html\\n    \\u251c\\u2500\\u2500 2412.20631.html\\n    \\u251c\\u2500\\u2500 2501.04686.html\\n    \\u251c\\u2500\\u2500 2501.06186.html\\n    \\u251c\\u2500\\u2500 2501.12599.html\\n    \\u251c\\u2500\\u2500 2501.12948.html\\n    \\u251c\\u2500\\u2500 2501.17811.html\\n    \\u251c\\u2500\\u2500 2502.01456.html\\n    \\u251c\\u2500\\u2500 2502.09621.html\\n    \\u251c\\u2500\\u2500 2502.10391.html\\n    \\u251c\\u2500\\u2500 2502.13923.html\\n    \\u251c\\u2500\\u2500 2503.01785.html\\n    \\u251c\\u2500\\u2500 2503.06520.html\\n    \\u251c\\u2500\\u2500 2503.06749.html\\n    \\u251c\\u2500\\u2500 2503.07065.html\\n    \\u251c\\u2500\\u2500 2503.07365.html\\n    \\u251c\\u2500\\u2500 2503.07536.html\\n    \\u251c\\u2500\\u2500 2503.10291.html\\n    \\u251c\\u2500\\u2500 2503.10615.html\\n    \\u251c\\u2500\\u2500 2503.12937.html\\n    \\u251c\\u2500\\u2500 2503.13939.html\\n    \\u251c\\u2500\\u2500 2503.14476.html\\n    \\u251c\\u2500\\u2500 2503.17352.html\\n    \\u251c\\u2500\\u2500 2503.18892.html\\n    \\u251c\\u2500\\u2500 2503.19786.html\\n    \\u251c\\u2500\\u2500 2503.20783.html\\n    \\u251c\\u2500\\u2500 2503.21620.html\\n    \\u251c\\u2500\\u2500 2503.21776.html\\n    \\u251c\\u2500\\u2500 2503.22679.html\\n    \\u251c\\u2500\\u2500 2504.02587.html\\n    \\u251c\\u2500\\u2500 2504.05599.html\\n    \\u251c\\u2500\\u2500 2504.07491.html\\n    \\u251c\\u2500\\u2500 2504.07934.html\\n    \\u251c\\u2500\\u2500 2504.07954.html\\n    \\u251c\\u2500\\u2500 2504.11455.html\\n    \\u251c\\u2500\\u2500 2504.14945.html\\n    \\u251c\\u2500\\u2500 2504.16656.html\\n    \\u251c\\u2500\\u2500 2505.00703.html\\n    \\u2514\\u2500\\u2500 arxiv_2025.bib\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/papers.zip\",\n    \"stateOriginalUrl\": null\n  }\n}\n"
  },
  {
    "path": "tasks/filesystem/easy/papers/papers_counting/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Paper Counting Task: Count HTML Files\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_count_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the count.txt file exists.\"\"\"\n    count_file = test_dir / \"count.txt\"\n    \n    if not count_file.exists():\n        print(\"❌ File 'count.txt' not found\")\n        return False\n    \n    print(\"✅ count.txt file found\")\n    return True\n\ndef verify_count_content(test_dir: Path) -> bool:\n    \"\"\"Verify that count.txt contains the correct number (83).\"\"\"\n    count_file = test_dir / \"count.txt\"\n    \n    try:\n        content = count_file.read_text().strip()\n        \n        # Check if content is exactly \"83\"\n        if content == \"83\":\n            print(\"✅ count.txt contains the correct number: 83\")\n            return True\n        else:\n            print(f\"❌ count.txt contains '{content}' but expected '83'\")\n            return False\n        \n    except Exception as e:\n        print(f\"❌ Error reading count.txt: {e}\")\n        return False\n\ndef verify_actual_html_count(test_dir: Path) -> bool:\n    \"\"\"Verify that there are actually 83 HTML files in the directory.\"\"\"\n    html_files = list(test_dir.glob(\"*.html\"))\n    count = len(html_files)\n    \n    if count == 83:\n        print(f\"✅ Verified: There are exactly {count} HTML files in the directory\")\n        return True\n    else:\n        print(f\"⚠️  Found {count} HTML files in the directory (expected 83)\")\n        return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    try:\n        test_dir = get_test_directory()\n        print(f\"🔍 Verifying HTML file count in: {test_dir}\")\n        \n        # Define verification steps\n        verification_steps = [\n            (\"Count File Exists\", verify_count_file_exists),\n            (\"Count Content\", verify_count_content),\n            (\"Actual HTML Count\", verify_actual_html_count),\n        ]\n        \n        # Run all verification steps\n        all_passed = True\n        for step_name, verify_func in verification_steps:\n            print(f\"\\n--- {step_name} ---\")\n            if not verify_func(test_dir):\n                all_passed = False\n        \n        # Final result\n        print(\"\\n\" + \"=\"*50)\n        if all_passed:\n            print(\"✅ HTML file count is correct!\")\n            print(\"🎉 Task verification: PASS\")\n            sys.exit(0)\n        else:\n            print(\"❌ Task verification: FAIL\")\n            sys.exit(1)\n            \n    except Exception as e:\n        print(f\"❌ Verification failed with error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/filesystem/easy/student_database/duplicate_name/description.md",
    "content": "Please help me identify any duplicate name from the list of all the 150 students. Do not use python code. You only need to find **any one** duplicate name. Then generate a `namesake.txt` file to record the result in the following format, with only three lines. Note: when recording the name, replace underscores with spaces.\n\nname: xxx\ncount: xxx\nids: xxx, xxx, ...\n"
  },
  {
    "path": "tasks/filesystem/easy/student_database/duplicate_name/meta.json",
    "content": "{\n  \"task_id\": \"duplicate_name\",\n  \"task_name\": \"Duplicate Name\",\n  \"category_id\": \"student_database\",\n  \"category_name\": \"Student Database\",\n  \"description\": \"Search the 150 student folders for any repeated full name and document the name, count, and ids in namesake.txt.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"pattern analysis\",\n    \"data validation\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"student_database/\\n    \\u251c\\u2500\\u2500 20101250_Patricia_Jones/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20101701_Isabella_Davis/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20102572_Michael_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20104233_Robert_Lopez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20104498_Sarah_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20104653_Sophia_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20104675_Michael_Gonzalez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20104846_Christopher_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20107487_Mia_Martin/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20108742_Sarah_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20109144_Emma_Thomas/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20109803_Oliver_Hernandez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20111634_Isabella_Thomas/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20112439_Christopher_Moore/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20113368_William_Wilson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20113603_Robert_Rodriguez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20114397_Isabella_Martin/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20114869_Ethan_Martin/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20115252_Mason_Johnson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20115632_Elizabeth_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20115753_Charlotte_Johnson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20115924_Michael_Lopez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20116232_Olivia_Lopez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20119528_Thomas_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20122427_Karen_Gonzalez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20122977_Evelyn_Miller/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20123376_Joseph_Johnson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20125451_Barbara_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20126203_Barbara_Davis/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20126394_Olivia_Williams/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20126471_Ethan_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20127423_John_Williams/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20128249_Oliver_Smith/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20128879_Christopher_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20129898_Jessica_Johnson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20131271_Olivia_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20131518_Sophia_Smith/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20132026_Isabella_Smith/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20132370_James_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20132669_Noah_Smith/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20133527_Mason_Jackson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20133697_Isabella_Smith/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20135821_Thomas_Wilson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20136681_Benjamin_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20136890_Benjamin_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20137514_Lucas_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20139234_Harper_Martinez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20139637_Noah_Johnson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20139647_Patricia_Lopez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20141421_Linda_Gonzalez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20142085_William_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20142383_Amelia_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20143406_Susan_Martin/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20143830_James_Garcia/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20146035_Christopher_Garcia/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20146277_William_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20146279_Christopher_Moore/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20147301_James_Jones/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20147789_James_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20148681_John_Hernandez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20148778_Susan_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20149712_Jessica_Rodriguez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20151012_Harper_Miller/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20153174_Benjamin_Jackson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20153412_Charlotte_Martin/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20153606_James_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20153687_Richard_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20154518_John_Gonzalez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20154710_Benjamin_Rodriguez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20156469_Jennifer_Hernandez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20156522_Jennifer_Martinez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20156851_Noah_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20157943_Harper_Williams/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20158266_Sophia_Moore/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20158294_Sophia_Wilson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20158819_Sarah_Wilson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20159113_John_Rodriguez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20159695_James_Moore/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20161279_William_Moore/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20162253_Mason_Rodriguez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20162542_Mia_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20163356_Ava_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20164515_Patricia_Moore/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20164801_Noah_Rodriguez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20165511_Mary_Gonzalez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20166436_Christopher_Jackson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20166487_Barbara_Hernandez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20166564_Ava_Lopez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20166998_Ava_Lopez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20168311_Lucas_Jackson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20168491_Karen_Martinez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20169515_Thomas_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20171050_Christopher_Rodriguez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20171406_Mary_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20171613_Ethan_Moore/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20172106_Isabella_Rodriguez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20173259_Michael_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20173492_Richard_Miller/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20173501_Mary_Smith/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20173517_Susan_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20174207_Richard_Wilson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20174369_Mary_Garcia/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20175314_William_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20176169_Lucas_Smith/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20176947_Noah_Miller/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20177389_James_Smith/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20178687_Isabella_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20179461_William_Johnson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20179690_Linda_Thomas/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20181056_Sarah_Hernandez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20182020_Patricia_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20182390_Ethan_Wilson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20183149_David_Smith/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20183219_Charlotte_Williams/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20184489_Jessica_Gonzalez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20186154_Charlotte_Smith/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20186510_James_Thomas/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20187107_David_Martinez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20187144_Mary_Jackson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20187892_Christopher_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20187921_Mary_Jones/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20187967_Sarah_Davis/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20188937_James_Moore/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20189123_Mary_Martin/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20189192_Olivia_Jones/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20189268_Emma_Williams/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20189854_William_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20191265_Joseph_Lopez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20192725_Robert_Martinez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20194054_Michael_Jones/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20194160_Benjamin_Jackson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20194164_Sarah_Jones/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20194525_John_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20195164_Jennifer_Gonzalez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20195982_David_Jackson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20196776_William_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20196896_Olivia_Jones/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20196961_Joseph_Thomas/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20196998_Ethan_Wilson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20198548_Evelyn_Moore/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20199036_Benjamin_Hernandez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20199583_Mary_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20199735_Mason_Johnson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20199872_Sophia_Jackson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20199980_James_Rodriguez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20201385_John_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20201800_John_Jones/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20202548_Robert_Miller/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20203855_Mia_Miller/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u2514\\u2500\\u2500 20204611_Sarah_Wilson/\\n            \\u251c\\u2500\\u2500 basic_info.txt\\n            \\u2514\\u2500\\u2500 recommendation_letter.txt\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/student_database.zip\",\n    \"stateOriginalUrl\": null\n  }\n}\n"
  },
  {
    "path": "tasks/filesystem/easy/student_database/duplicate_name/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Student Database Task: Find Duplicate Names\nSimplified version that only checks against expected results without folder validation\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_namesake_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the namesake.txt file exists.\"\"\"\n    namesake_file = test_dir / \"namesake.txt\"\n    \n    if not namesake_file.exists():\n        print(\"❌ File 'namesake.txt' not found\")\n        return False\n    \n    print(\"✅ Namesake file found\")\n    return True\n\ndef parse_namesake_file(test_dir: Path) -> dict:\n    \"\"\"Parse the namesake.txt file and return structured data.\"\"\"\n    namesake_file = test_dir / \"namesake.txt\"\n    \n    try:\n        content = namesake_file.read_text()\n        lines = content.strip().split('\\n')\n        \n        namesakes = {}\n        current_line = 0\n        \n        while current_line < len(lines):\n            # Skip blank lines\n            if not lines[current_line].strip():\n                current_line += 1\n                continue\n            \n            # Check if we have enough lines for a complete group\n            if current_line + 2 >= len(lines):\n                print(f\"❌ Incomplete group at line {current_line + 1}\")\n                return {}\n            \n            # Parse group\n            name_line = lines[current_line].strip()\n            count_line = lines[current_line + 1].strip()\n            ids_line = lines[current_line + 2].strip()\n            \n            # Extract name\n            if not name_line.startswith(\"name: \"):\n                print(f\"❌ Invalid name line format at line {current_line + 1}: {name_line}\")\n                return {}\n            name = name_line.replace(\"name: \", \"\").strip()\n            \n            # Extract count\n            if not count_line.startswith(\"count: \"):\n                print(f\"❌ Invalid count line format at line {current_line + 2}: {count_line}\")\n                return {}\n            count_str = count_line.replace(\"count: \", \"\").strip()\n            try:\n                count = int(count_str)\n            except ValueError:\n                print(f\"❌ Invalid count format: {count_str}\")\n                return {}\n            \n            # Extract IDs\n            if not ids_line.startswith(\"ids: \"):\n                print(f\"❌ Invalid ids line format at line {current_line + 3}: {ids_line}\")\n                return {}\n            ids_str = ids_line.replace(\"ids: \", \"\").strip()\n            ids = [id.strip() for id in ids_str.split(\",\")]\n            \n            namesakes[name] = {\n                'count': count,\n                'ids': ids\n            }\n            \n            current_line += 4  # Skip to next group (after blank line)\n        \n        return namesakes\n        \n    except Exception as e:\n        print(f\"❌ Error parsing namesake file: {e}\")\n        return {}\n\ndef verify_against_expected_results(namesakes: dict) -> bool:\n    \"\"\"Verify that exactly 1 duplicate name is found and it is correct.\"\"\"\n    \n    # Expected duplicate names from answer.md (hardcoded)\n    expected_duplicates = {\n        'Isabella Smith': ['20132026', '20133697'],\n        'Ava Lopez': ['20166564', '20166998'],\n        'James Moore': ['20159695', '20188937'],\n        'William Taylor': ['20175314', '20189854'],\n        'Ethan Wilson': ['20182390', '20196998'],\n        'Christopher Taylor': ['20128879', '20187892'],\n        'William Anderson': ['20142085', '20146277'],\n        'James Anderson': ['20147789', '20153606'],\n        'Olivia Jones': ['20189192', '20196896'],\n        'Mason Johnson': ['20115252', '20199735'],\n        'Benjamin Jackson': ['20153174', '20194160'],\n        'John Taylor': ['20194525', '20201385'],\n        'Susan Anderson': ['20148778', '20173517'],\n        'Christopher Moore': ['20112439', '20146279'],\n        'Sarah Wilson': ['20158819', '20204611'],\n        'Sarah Brown': ['20104498', '20108742']\n    }\n    \n    # Check if exactly 1 duplicate name is found\n    if len(namesakes) != 1:\n        print(f\"❌ Expected exactly 1 duplicate name, but found {len(namesakes)}\")\n        return False\n    \n    print(f\"✅ Found exactly 1 duplicate name (as required)\")\n    \n    # Check if the namesake in the file is actually a correct duplicate\n    for name, data in namesakes.items():\n        if name not in expected_duplicates:\n            print(f\"❌ '{name}' is not a duplicate name (not in expected list)\")\n            return False\n        \n        expected_ids = set(expected_duplicates[name])\n        stated_ids = set(data['ids'])\n        \n        if expected_ids != stated_ids:\n            print(f\"❌ ID mismatch for '{name}':\")\n            print(f\"   Expected: {sorted(expected_ids)}\")\n            print(f\"   Stated: {sorted(stated_ids)}\")\n            return False\n        \n        # Verify count matches\n        if data['count'] != 2:\n            print(f\"❌ Count mismatch for '{name}': expected 2, got {data['count']}\")\n            return False\n    \n    print(\"✅ The identified duplicate name is correct\")\n    print(\"✅ All student IDs match expected results\")\n    print(\"✅ Count is correct (2 for the duplicate name)\")\n    return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying Student Database Task: Find Duplicate Names...\")\n    \n    # Check if namesake file exists\n    print(\"\\n--- File Existence Check ---\")\n    if not verify_namesake_file_exists(test_dir):\n        print(\"\\n❌ Basic verification failed, cannot proceed with content verification\")\n        sys.exit(1)\n    \n    # Parse the file and run content verification\n    print(\"\\n--- Content Verification ---\")\n    namesakes = parse_namesake_file(test_dir)\n    \n    if not namesakes:\n        print(\"❌ Failed to parse namesake file\")\n        sys.exit(1)\n    \n    # Verify against expected results\n    print(\"\\n--- Results Verification ---\")\n    if not verify_against_expected_results(namesakes):\n        print(\"\\n❌ Task verification: FAIL\")\n        sys.exit(1)\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    print(\"✅ Namesake identification completed correctly!\")\n    print(f\"🎉 Found 1 duplicate name (exactly 1 required)\")\n    print(\"🎉 Task verification: PASS\")\n    sys.exit(0)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/easy/student_database/recommender_name/description.md",
    "content": "Please find the recommendation letter for Patricia Jones and identify who wrote it. Generate a `recommender.txt` file with only the author's name.\n"
  },
  {
    "path": "tasks/filesystem/easy/student_database/recommender_name/meta.json",
    "content": "{\n  \"task_id\": \"recommender_name\",\n  \"task_name\": \"Recommender Name\",\n  \"category_id\": \"student_database\",\n  \"category_name\": \"Student Database\",\n  \"description\": \"Read Patricia Jones's recommendation letter to capture who signed it and store only that name in recommender.txt.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"data extraction\",\n    \"document search\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"student_database/\\n    \\u251c\\u2500\\u2500 20101250_Patricia_Jones/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20101701_Isabella_Davis/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20102572_Michael_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20104233_Robert_Lopez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20104498_Sarah_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20104653_Sophia_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20104675_Michael_Gonzalez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20104846_Christopher_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20107487_Mia_Martin/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20108742_Sarah_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20109144_Emma_Thomas/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20109803_Oliver_Hernandez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20111634_Isabella_Thomas/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20112439_Christopher_Moore/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20113368_William_Wilson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20113603_Robert_Rodriguez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20114397_Isabella_Martin/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20114869_Ethan_Martin/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20115252_Mason_Johnson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20115632_Elizabeth_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20115753_Charlotte_Johnson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20115924_Michael_Lopez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20116232_Olivia_Lopez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20119528_Thomas_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20122427_Karen_Gonzalez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20122977_Evelyn_Miller/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20123376_Joseph_Johnson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20125451_Barbara_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20126203_Barbara_Davis/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20126394_Olivia_Williams/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20126471_Ethan_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20127423_John_Williams/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20128249_Oliver_Smith/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20128879_Christopher_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20129898_Jessica_Johnson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20131271_Olivia_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20131518_Sophia_Smith/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20132026_Isabella_Smith/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20132370_James_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20132669_Noah_Smith/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20133527_Mason_Jackson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20133697_Isabella_Smith/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20135821_Thomas_Wilson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20136681_Benjamin_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20136890_Benjamin_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20137514_Lucas_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20139234_Harper_Martinez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20139637_Noah_Johnson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20139647_Patricia_Lopez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20141421_Linda_Gonzalez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20142085_William_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20142383_Amelia_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20143406_Susan_Martin/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20143830_James_Garcia/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20146035_Christopher_Garcia/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20146277_William_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20146279_Christopher_Moore/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20147301_James_Jones/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20147789_James_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20148681_John_Hernandez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20148778_Susan_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20149712_Jessica_Rodriguez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20151012_Harper_Miller/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20153174_Benjamin_Jackson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20153412_Charlotte_Martin/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20153606_James_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20153687_Richard_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20154518_John_Gonzalez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20154710_Benjamin_Rodriguez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20156469_Jennifer_Hernandez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20156522_Jennifer_Martinez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20156851_Noah_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20157943_Harper_Williams/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20158266_Sophia_Moore/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20158294_Sophia_Wilson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20158819_Sarah_Wilson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20159113_John_Rodriguez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20159695_James_Moore/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20161279_William_Moore/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20162253_Mason_Rodriguez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20162542_Mia_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20163356_Ava_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20164515_Patricia_Moore/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20164801_Noah_Rodriguez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20165511_Mary_Gonzalez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20166436_Christopher_Jackson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20166487_Barbara_Hernandez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20166564_Ava_Lopez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20166998_Ava_Lopez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20168311_Lucas_Jackson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20168491_Karen_Martinez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20169515_Thomas_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20171050_Christopher_Rodriguez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20171406_Mary_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20171613_Ethan_Moore/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20172106_Isabella_Rodriguez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20173259_Michael_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20173492_Richard_Miller/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20173501_Mary_Smith/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20173517_Susan_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20174207_Richard_Wilson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20174369_Mary_Garcia/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20175314_William_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20176169_Lucas_Smith/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20176947_Noah_Miller/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20177389_James_Smith/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20178687_Isabella_Anderson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20179461_William_Johnson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20179690_Linda_Thomas/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20181056_Sarah_Hernandez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20182020_Patricia_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20182390_Ethan_Wilson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20183149_David_Smith/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20183219_Charlotte_Williams/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20184489_Jessica_Gonzalez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20186154_Charlotte_Smith/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20186510_James_Thomas/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20187107_David_Martinez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20187144_Mary_Jackson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20187892_Christopher_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20187921_Mary_Jones/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20187967_Sarah_Davis/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20188937_James_Moore/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20189123_Mary_Martin/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20189192_Olivia_Jones/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20189268_Emma_Williams/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20189854_William_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20191265_Joseph_Lopez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20192725_Robert_Martinez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20194054_Michael_Jones/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20194160_Benjamin_Jackson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20194164_Sarah_Jones/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20194525_John_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20195164_Jennifer_Gonzalez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20195982_David_Jackson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20196776_William_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20196896_Olivia_Jones/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20196961_Joseph_Thomas/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20196998_Ethan_Wilson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20198548_Evelyn_Moore/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20199036_Benjamin_Hernandez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20199583_Mary_Brown/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20199735_Mason_Johnson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20199872_Sophia_Jackson/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20199980_James_Rodriguez/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20201385_John_Taylor/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20201800_John_Jones/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20202548_Robert_Miller/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u251c\\u2500\\u2500 20203855_Mia_Miller/\\n    \\u2502       \\u251c\\u2500\\u2500 basic_info.txt\\n    \\u2502       \\u2514\\u2500\\u2500 recommendation_letter.txt\\n    \\u2514\\u2500\\u2500 20204611_Sarah_Wilson/\\n            \\u251c\\u2500\\u2500 basic_info.txt\\n            \\u2514\\u2500\\u2500 recommendation_letter.txt\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/student_database.zip\",\n    \"stateOriginalUrl\": null\n  }\n}\n"
  },
  {
    "path": "tasks/filesystem/easy/student_database/recommender_name/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Student Database Task: Find Recommender Name\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_recommender_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the recommender.txt file exists.\"\"\"\n    recommender_file = test_dir / \"recommender.txt\"\n    \n    if not recommender_file.exists():\n        print(\"❌ File 'recommender.txt' not found\")\n        return False\n    \n    print(\"✅ Recommender file found\")\n    return True\n\ndef verify_recommender_content(test_dir: Path) -> bool:\n    \"\"\"Verify that the recommender.txt file contains 'Brown'.\"\"\"\n    recommender_file = test_dir / \"recommender.txt\"\n    \n    try:\n        content = recommender_file.read_text()\n        \n        if \"Brown\" in content:\n            print(\"✅ Recommender name 'Brown' found in file\")\n            return True\n        else:\n            print(\"❌ Recommender name 'Brown' not found in file\")\n            print(f\"   File content: {content.strip()}\")\n            return False\n        \n    except Exception as e:\n        print(f\"❌ Error reading recommender file: {e}\")\n        return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying Student Database Task: Find Recommender Name...\")\n    \n    # Check if recommender file exists\n    print(\"\\n--- File Existence Check ---\")\n    if not verify_recommender_file_exists(test_dir):\n        print(\"\\n❌ Basic verification failed, cannot proceed with content verification\")\n        sys.exit(1)\n    \n    # Verify content\n    print(\"\\n--- Content Verification ---\")\n    if not verify_recommender_content(test_dir):\n        print(\"\\n❌ Task verification: FAIL\")\n        sys.exit(1)\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    print(\"✅ Recommender identification completed correctly!\")\n    print(\"🎉 Task verification: PASS\")\n    sys.exit(0)\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/filesystem/standard/desktop/music_report/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### 1. Data Loading\n\n- Read and extract song information from `jay_chou/`\n- Read and extract song information from `jj_lin/`\n\n### 2. Popularity Score Calculation\n\nFor each songs, calculate popularity scores using this formula (keep 3 decimal places):\n\n```\npopularity_score = (rating × 0.4) + (play_count_normalized × 0.4) + (year_factor × 0.2)\n\nWhere:\n- rating: song rating (1-5 scale)\n- play_count_normalized: play_count / 250 (0-1 scale)\n- year_factor: (2025 - release_year) / 25 (recency bonus)\n```\n\n### 3. Generate Analysis Report\n\nCreate a file named `music_analysis_report.txt`\n\n in the `music/` folder with the following exact format:\n\n**Lines 1-20**: Each line contains one song in format `songname:popularity_score`\n\n- Sort songs by popularity_score in descending order (highest first)\n- Use exact song names as they appear in the source files\n- Include all 20 songs from both artists\n\n**Lines 21-25**: Top 5 song names only (one per line)\n\n- List the top 5 songs by popularity_score\n- No scores, just song names\n- One song name per line\n\n**Important**: The file must contain exactly 25 lines with no additional content, headers, or formatting.\n"
  },
  {
    "path": "tasks/filesystem/standard/desktop/music_report/meta.json",
    "content": "{\n  \"task_id\": \"music_report\",\n  \"task_name\": \"Music Report\",\n  \"category_id\": \"desktop\",\n  \"category_name\": \"Desktop\",\n  \"description\": \"Search and analyze desktop music files to generate a scored recommendation list using specified computation rules and criteria.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-12\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data extraction\",\n    \"pattern analysis\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"desktop/\\n    ├── exp_logs/\\n    │       ├── aug/\\n    │       │       └── augmentation_log.txt\\n    │       ├── project_1/\\n    │       │       ├── data.csv\\n    │       │       ├── model.py\\n    │       │       └── README.md\\n    │       ├── project_2/\\n    │       │       ├── analysis_report.md\\n    │       │       └── data_analysis.py\\n    │       ├── sep/\\n    │       │       └── september_summary.csv\\n    │       ├── exp_record.md\\n    │       ├── experiment_summary.md\\n    │       └── results_record.csv\\n    ├── learning/\\n    │       ├── 2024/\\n    │       │       └── learning_progress.csv\\n    │       ├── 2025/\\n    │       │       └── learning_roadmap.md\\n    │       ├── activities/\\n    │       │       └── study_notes.py\\n    │       ├── research/\\n    │       │       └── research_topics.md\\n    │       ├── schedule/\\n    │       │       └── weekly_schedule.csv\\n    │       └── learning_goals.md\\n    ├── music/\\n    │       ├── beni/\\n    │       │       └── playlist_manager.py\\n    │       ├── jay_chou/\\n    │       │       └── favorite_songs.csv\\n    │       ├── jj_lin/\\n    │       │       └── top_songs.txt\\n    │       └── music_collection.md\\n    ├── old_homebrew/\\n    │       ├── 2023-09-23_22/\\n    │       │       ├── opt/\\n    │       │       └── Users/\\n    │       └── 2023-09-23_23/\\n    │               ├── opt/\\n    │               └── Users/\\n    ├── play/\\n    │       ├── game_plan/\\n    │       │       └── gaming_schedule.md\\n    │       ├── hongkong_tour/\\n    │       │       └── travel_itinerary.csv\\n    │       ├── kit&shoes_collection/\\n    │       │       └── inventory.py\\n    │       └── others/\\n    │               └── entertainment_planner.md\\n    └── travel_plan/\\n            ├── travel_bucket_list.md\\n            └── travel_calculator.py\\n\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/desktop.zip\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/desktop/music_report/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Desktop 2 Music Report Task: Music Collection Analysis\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\n# Hardcoded expected data from answer.json\nEXPECTED_SONGS = [\n    {\"song_name\": \"晴天\", \"popularity_score\": 2.576},\n    {\"song_name\": \"七里香\", \"popularity_score\": 2.488},\n    {\"song_name\": \"江南\", \"popularity_score\": 2.488},\n    {\"song_name\": \"夜曲\", \"popularity_score\": 2.448},\n    {\"song_name\": \"一千年以后\", \"popularity_score\": 2.44},\n    {\"song_name\": \"稻香\", \"popularity_score\": 2.376},\n    {\"song_name\": \"青花瓷\", \"popularity_score\": 2.336},\n    {\"song_name\": \"不为谁而作的歌\", \"popularity_score\": 2.32},\n    {\"song_name\": \"学不会\", \"popularity_score\": 2.304},\n    {\"song_name\": \"小酒窝\", \"popularity_score\": 2.264},\n    {\"song_name\": \"可惜没如果\", \"popularity_score\": 2.248},\n    {\"song_name\": \"修炼爱情\", \"popularity_score\": 2.24},\n    {\"song_name\": \"背对背拥抱\", \"popularity_score\": 2.24},\n    {\"song_name\": \"爱笑的眼睛\", \"popularity_score\": 2.232},\n    {\"song_name\": \"她说\", \"popularity_score\": 2.216},\n    {\"song_name\": \"简单爱\", \"popularity_score\": 1.952},\n    {\"song_name\": \"龙卷风\", \"popularity_score\": 1.936},\n    {\"song_name\": \"双截棍\", \"popularity_score\": 1.92},\n    {\"song_name\": \"可爱女人\", \"popularity_score\": 1.912},\n    {\"song_name\": \"星晴\", \"popularity_score\": 1.896}\n]\n\nEXPECTED_TOP_5 = [\"晴天\", \"七里香\", \"江南\", \"夜曲\", \"一千年以后\"]\n\ndef verify_report_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the music_analysis_report.txt file exists.\"\"\"\n    report_file = test_dir / \"music\" / \"music_analysis_report.txt\"\n    \n    if not report_file.exists():\n        print(\"❌ 'music_analysis_report.txt' file not found in music/ folder\")\n        return False\n    \n    if not report_file.is_file():\n        print(\"❌ 'music_analysis_report.txt' exists but is not a file\")\n        return False\n    \n    print(\"✅ 'music_analysis_report.txt' file exists\")\n    return True\n\ndef verify_file_content_structure(test_dir: Path) -> bool:\n    \"\"\"Verify that the file has exactly 25 lines.\"\"\"\n    report_file = test_dir / \"music\" / \"music_analysis_report.txt\"\n    \n    try:\n        content = report_file.read_text(encoding='utf-8')\n        lines = content.strip().split('\\n')\n        \n        if len(lines) != 25:\n            print(f\"❌ File should have exactly 25 lines, but has {len(lines)}\")\n            return False\n        \n        print(\"✅ File has exactly 25 lines\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error reading file content: {e}\")\n        return False\n\ndef verify_song_ranking_format(test_dir: Path) -> bool:\n    \"\"\"Verify that lines 1-20 contain songs with scores in correct format.\"\"\"\n    report_file = test_dir / \"music\" / \"music_analysis_report.txt\"\n    \n    try:\n        content = report_file.read_text(encoding='utf-8')\n        lines = content.strip().split('\\n')\n        \n        # Check lines 1-20 (index 0-19)\n        for i in range(20):\n            line = lines[i].strip()\n            if not line:\n                print(f\"❌ Line {i+1} is empty\")\n                return False\n            \n            # Check format: songname:popularity_score\n            if ':' not in line:\n                print(f\"❌ Line {i+1} missing colon separator: '{line}'\")\n                return False\n            \n            parts = line.split(':', 1)\n            if len(parts) != 2:\n                print(f\"❌ Line {i+1} has incorrect format: '{line}'\")\n                return False\n            \n            song_name, score_str = parts\n            \n            if not song_name.strip():\n                print(f\"❌ Line {i+1} has empty song name: '{line}'\")\n                return False\n            \n            try:\n                score = float(score_str.strip())\n                if score < 0 or score > 5:\n                    print(f\"❌ Line {i+1} has invalid score range: {score}\")\n                    return False\n            except ValueError:\n                print(f\"❌ Line {i+1} has invalid score format: '{score_str}'\")\n                return False\n        \n        print(\"✅ Lines 1-20 have correct song:score format\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking song ranking format: {e}\")\n        return False\n\ndef verify_song_ranking_order_with_tolerance(test_dir: Path) -> bool:\n    \"\"\"Verify that songs are ranked by popularity score in descending order, allowing equal scores to be swapped.\"\"\"\n    report_file = test_dir / \"music\" / \"music_analysis_report.txt\"\n    \n    try:\n        content = report_file.read_text(encoding='utf-8')\n        lines = content.strip().split('\\n')\n        \n        scores = []\n        for i in range(20):\n            line = lines[i].strip()\n            parts = line.split(':', 1)\n            score = float(parts[1].strip())\n            scores.append(score)\n        \n        # Check if scores are in descending order, allowing equal scores to be adjacent\n        for i in range(1, len(scores)):\n            if scores[i] > scores[i-1]:\n                print(f\"❌ Scores not in descending order: {scores[i-1]} < {scores[i]} at line {i+1}\")\n                return False\n        \n        print(\"✅ Songs are ranked by popularity score in descending order (allowing equal scores)\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking song ranking order: {e}\")\n        return False\n\ndef verify_song_names_match_expected(test_dir: Path) -> bool:\n    \"\"\"Verify that all expected song names are present in the ranking.\"\"\"\n    report_file = test_dir / \"music\" / \"music_analysis_report.txt\"\n    \n    try:\n        content = report_file.read_text(encoding='utf-8')\n        lines = content.strip().split('\\n')\n        \n        found_songs = []\n        for i in range(20):\n            line = lines[i].strip()\n            song_name = line.split(':', 1)[0].strip()\n            found_songs.append(song_name)\n        \n        # Check if all expected songs are present\n        missing_songs = []\n        for expected_song in EXPECTED_SONGS:\n            if expected_song[\"song_name\"] not in found_songs:\n                missing_songs.append(expected_song[\"song_name\"])\n        \n        if missing_songs:\n            print(f\"❌ Missing expected songs: {missing_songs}\")\n            return False\n        \n        print(\"✅ All expected song names are present\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking song names: {e}\")\n        return False\n\ndef verify_popularity_scores_match_expected(test_dir: Path) -> bool:\n    \"\"\"Verify that popularity scores match the expected values.\"\"\"\n    report_file = test_dir / \"music\" / \"music_analysis_report.txt\"\n    \n    try:\n        content = report_file.read_text(encoding='utf-8')\n        lines = content.strip().split('\\n')\n        \n        score_errors = []\n        for i in range(20):\n            line = lines[i].strip()\n            parts = line.split(':', 1)\n            song_name = parts[0].strip()\n            actual_score = float(parts[1].strip())\n            \n            # Find expected score for this song\n            expected_score = None\n            for expected_song in EXPECTED_SONGS:\n                if expected_song[\"song_name\"] == song_name:\n                    expected_score = expected_song[\"popularity_score\"]\n                    break\n            \n            if expected_score is not None:\n                # Allow small floating point precision differences\n                if abs(actual_score - expected_score) > 0.001:\n                    score_errors.append(f\"{song_name}: expected {expected_score}, got {actual_score}\")\n        \n        if score_errors:\n            print(f\"❌ Score mismatches: {score_errors}\")\n            return False\n        \n        print(\"✅ All popularity scores match expected values\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking popularity scores: {e}\")\n        return False\n\ndef verify_top_5_songs(test_dir: Path) -> bool:\n    \"\"\"Verify that lines 21-25 contain the top 5 song names, allowing equal scores to be in different order.\"\"\"\n    report_file = test_dir / \"music\" / \"music_analysis_report.txt\"\n    \n    try:\n        content = report_file.read_text(encoding='utf-8')\n        lines = content.strip().split('\\n')\n        \n        # Check lines 21-25 (index 20-24)\n        found_top_5 = []\n        for i in range(5):\n            line_num = i + 21\n            line = lines[i + 20].strip()  # Index 20-24 for lines 21-25\n            \n            if not line:\n                print(f\"❌ Line {line_num} is empty\")\n                return False\n            \n            if ':' in line:\n                print(f\"❌ Line {line_num} should not contain colon: '{line}'\")\n                return False\n            \n            found_top_5.append(line)\n        \n        # Check if all expected top 5 songs are present (order doesn't matter for equal scores)\n        missing_songs = []\n        for expected_song in EXPECTED_TOP_5:\n            if expected_song not in found_top_5:\n                missing_songs.append(expected_song)\n        \n        if missing_songs:\n            print(f\"❌ Missing expected top 5 songs: {missing_songs}\")\n            return False\n        \n        # Check if the order is valid (allowing equal scores to be swapped)\n        # Since 七里香 and 江南 both have score 2.488, they can be in either order\n        valid_orders = [\n            [\"晴天\", \"七里香\", \"江南\", \"夜曲\", \"一千年以后\"],  # Original order\n            [\"晴天\", \"江南\", \"七里香\", \"夜曲\", \"一千年以后\"],  # Swapped 七里香 and 江南\n        ]\n        \n        order_valid = False\n        for valid_order in valid_orders:\n            if found_top_5 == valid_order:\n                order_valid = True\n                break\n        \n        if not order_valid:\n            print(f\"❌ Top 5 songs order is invalid. Found: {found_top_5}\")\n            print(f\"Expected one of: {valid_orders}\")\n            return False\n        \n        print(\"✅ Lines 21-25 contain correct top 5 song names in valid order\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking top 5 songs: {e}\")\n        return False\n\ndef verify_no_extra_content(test_dir: Path) -> bool:\n    \"\"\"Verify that the file contains no extra content beyond the 25 lines.\"\"\"\n    report_file = test_dir / \"music\" / \"music_analysis_report.txt\"\n    \n    try:\n        content = report_file.read_text(encoding='utf-8')\n        lines = content.strip().split('\\n')\n        \n        if len(lines) != 25:\n            print(f\"❌ File should have exactly 25 lines, but has {len(lines)}\")\n            return False\n        \n        print(\"✅ File contains exactly 25 lines with no extra content\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking for extra content: {e}\")\n        return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying Desktop 2 Music Report Task: Music Collection Analysis...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Report File Exists\", verify_report_file_exists),\n        (\"File Content Structure\", verify_file_content_structure),\n        (\"Song Ranking Format\", verify_song_ranking_format),\n        (\"Song Ranking Order\", verify_song_ranking_order_with_tolerance),\n        (\"Song Names Match Expected\", verify_song_names_match_expected),\n        (\"Popularity Scores Match Expected\", verify_popularity_scores_match_expected),\n        (\"Top 5 Songs\", verify_top_5_songs),\n        (\"No Extra Content\", verify_no_extra_content),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ Music collection analysis completed correctly!\")\n        print(\"🎉 Task verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/desktop/project_management/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n1. **Create the main directory structure** in `desktop_2`:\n\n   - Create a new directory in main directory called `organized_projects`\n   - Inside `organized_projects`, create 3 main subdirectories: `experiments`, `learning`, and `personal`\n   - Inside `experiments`, create 2 subdirectories: `ml_projects` and `data_analysis`\n   - Inside `learning`, create 2 subdirectories: `progress_tracking` and `resources`\n   - Inside `personal`, create 2 subdirectories: `entertainment` and `collections`\n2. **Move all the Python files** to `experiments/ml_projects/`:\n3. **Move all the CSV files** to `experiments/data_analysis/`:\n4. **Only Move learning-related markdown files** to `learning/resources/`:\n5. **Only Move entertainment planning-related markdown files** to `personal/entertainment/`:\n6. **Only Move music collection-related markdown files** to `personal/collections/`:\n7. **step 4.5.6 should move all the markdown files.**\n8. **Create a project structure documentation file**:\n\n   - Create `project_structure.md` in the `organized_projects` directory\n   - Document the new organization with exact file counts for each subdirectory\n   - Include a summary of what types of files are in each directory\n"
  },
  {
    "path": "tasks/filesystem/standard/desktop/project_management/meta.json",
    "content": "{\n  \"task_id\": \"project_management\",\n  \"task_name\": \"Project Management\",\n  \"category_id\": \"desktop\",\n  \"category_name\": \"Desktop\",\n  \"description\": \"Reorganize scattered desktop files into a structured project directory system based on content type, purpose, and file format analysis.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-12\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"file organization\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"desktop/\\n    ├── exp_logs/\\n    │       ├── aug/\\n    │       │       └── augmentation_log.txt\\n    │       ├── project_1/\\n    │       │       ├── data.csv\\n    │       │       ├── model.py\\n    │       │       └── README.md\\n    │       ├── project_2/\\n    │       │       ├── analysis_report.md\\n    │       │       └── data_analysis.py\\n    │       ├── sep/\\n    │       │       └── september_summary.csv\\n    │       ├── exp_record.md\\n    │       ├── experiment_summary.md\\n    │       └── results_record.csv\\n    ├── learning/\\n    │       ├── 2024/\\n    │       │       └── learning_progress.csv\\n    │       ├── 2025/\\n    │       │       └── learning_roadmap.md\\n    │       ├── activities/\\n    │       │       └── study_notes.py\\n    │       ├── research/\\n    │       │       └── research_topics.md\\n    │       ├── schedule/\\n    │       │       └── weekly_schedule.csv\\n    │       └── learning_goals.md\\n    ├── music/\\n    │       ├── beni/\\n    │       │       └── playlist_manager.py\\n    │       ├── jay_chou/\\n    │       │       └── favorite_songs.csv\\n    │       ├── jj_lin/\\n    │       │       └── top_songs.txt\\n    │       └── music_collection.md\\n    ├── old_homebrew/\\n    │       ├── 2023-09-23_22/\\n    │       │       ├── opt/\\n    │       │       └── Users/\\n    │       └── 2023-09-23_23/\\n    │               ├── opt/\\n    │               └── Users/\\n    ├── play/\\n    │       ├── game_plan/\\n    │       │       └── gaming_schedule.md\\n    │       ├── hongkong_tour/\\n    │       │       └── travel_itinerary.csv\\n    │       ├── kit&shoes_collection/\\n    │       │       └── inventory.py\\n    │       └── others/\\n    │               └── entertainment_planner.md\\n    └── travel_plan/\\n            ├── travel_bucket_list.md\\n            └── travel_calculator.py\\n\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/desktop.zip\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/desktop/project_management/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Desktop 2 Project Management Task: File Reorganization\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_organized_projects_directory_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the organized_projects directory exists.\"\"\"\n    organized_dir = test_dir / \"organized_projects\"\n    \n    if not organized_dir.exists():\n        print(\"❌ 'organized_projects' directory not found\")\n        return False\n    \n    if not organized_dir.is_dir():\n        print(\"❌ 'organized_projects' exists but is not a directory\")\n        return False\n    \n    print(\"✅ 'organized_projects' directory exists\")\n    return True\n\ndef verify_directory_structure(test_dir: Path) -> bool:\n    \"\"\"Verify that all required subdirectories exist.\"\"\"\n    organized_dir = test_dir / \"organized_projects\"\n    \n    required_dirs = [\n        \"experiments\",\n        \"experiments/ml_projects\",\n        \"experiments/data_analysis\",\n        \"learning\",\n        \"learning/progress_tracking\",\n        \"learning/resources\",\n        \"personal\",\n        \"personal/entertainment\",\n        \"personal/collections\"\n    ]\n    \n    missing_dirs = []\n    for dir_path in required_dirs:\n        full_path = organized_dir / dir_path\n        if not full_path.exists():\n            missing_dirs.append(dir_path)\n        elif not full_path.is_dir():\n            missing_dirs.append(f\"{dir_path} (not a directory)\")\n    \n    if missing_dirs:\n        print(f\"❌ Missing or invalid directories: {missing_dirs}\")\n        return False\n    \n    print(\"✅ All required directory structure created correctly\")\n    return True\n\ndef verify_python_files_in_ml_projects(test_dir: Path) -> bool:\n    \"\"\"Verify that all Python files are moved to experiments/ml_projects.\"\"\"\n    organized_dir = test_dir / \"organized_projects\"\n    ml_projects_dir = organized_dir / \"experiments\" / \"ml_projects\"\n    \n    expected_python_files = [\n        \"study_notes.py\",\n        \"model.py\",\n        \"data_analysis.py\",\n        \"travel_calculator.py\",\n        \"inventory.py\",\n        \"playlist_manager.py\"\n    ]\n    \n    missing_files = []\n    for filename in expected_python_files:\n        file_path = ml_projects_dir / filename\n        if not file_path.exists():\n            missing_files.append(filename)\n    \n    if missing_files:\n        print(f\"❌ Missing Python files in ml_projects: {missing_files}\")\n        return False\n    \n    print(\"✅ All Python files moved to experiments/ml_projects\")\n    return True\n\ndef verify_csv_files_in_data_analysis(test_dir: Path) -> bool:\n    \"\"\"Verify that all CSV files are moved to experiments/data_analysis.\"\"\"\n    organized_dir = test_dir / \"organized_projects\"\n    data_analysis_dir = organized_dir / \"experiments\" / \"data_analysis\"\n    \n    expected_csv_files = [\n        \"learning_progress.csv\",\n        \"weekly_schedule.csv\",\n        \"results_record.csv\",\n        \"september_summary.csv\",\n        \"data.csv\",\n        \"favorite_songs.csv\",\n        \"travel_itinerary.csv\"\n    ]\n    \n    missing_files = []\n    for filename in expected_csv_files:\n        file_path = data_analysis_dir / filename\n        if not file_path.exists():\n            missing_files.append(filename)\n    \n    if missing_files:\n        print(f\"❌ Missing CSV files in data_analysis: {missing_files}\")\n        return False\n    \n    print(\"✅ All CSV files moved to experiments/data_analysis\")\n    return True\n\ndef verify_learning_md_files_in_resources(test_dir: Path) -> bool:\n    \"\"\"Verify that learning-related markdown files are moved to learning/resources.\"\"\"\n    organized_dir = test_dir / \"organized_projects\"\n    resources_dir = organized_dir / \"learning\" / \"resources\"\n    \n    expected_learning_files = [\n        \"learning_roadmap.md\",\n        \"research_topics.md\",\n        \"experiment_summary.md\",\n        \"exp_record.md\",\n        \"README.md\",\n        \"analysis_report.md\",\n        \"learning_goals.md\"\n    ]\n    \n    missing_files = []\n    for filename in expected_learning_files:\n        file_path = resources_dir / filename\n        if not file_path.exists():\n            missing_files.append(filename)\n    \n    if missing_files:\n        print(f\"❌ Missing learning markdown files in resources: {missing_files}\")\n        return False\n    \n    print(\"✅ All learning markdown files moved to learning/resources\")\n    return True\n\ndef verify_entertainment_md_files_in_entertainment(test_dir: Path) -> bool:\n    \"\"\"Verify that entertainment planning markdown files are moved to personal/entertainment.\"\"\"\n    organized_dir = test_dir / \"organized_projects\"\n    entertainment_dir = organized_dir / \"personal\" / \"entertainment\"\n    \n    expected_entertainment_files = [\n        \"gaming_schedule.md\",\n        \"entertainment_planner.md\",\n        \"travel_bucket_list.md\"\n    ]\n    \n    missing_files = []\n    for filename in expected_entertainment_files:\n        file_path = entertainment_dir / filename\n        if not file_path.exists():\n            missing_files.append(filename)\n    \n    if missing_files:\n        print(f\"❌ Missing entertainment markdown files in entertainment: {missing_files}\")\n        return False\n    \n    print(\"✅ All entertainment markdown files moved to personal/entertainment\")\n    return True\n\ndef verify_music_md_files_in_collections(test_dir: Path) -> bool:\n    \"\"\"Verify that music collection markdown files are moved to personal/collections.\"\"\"\n    organized_dir = test_dir / \"organized_projects\"\n    collections_dir = organized_dir / \"personal\" / \"collections\"\n    \n    expected_music_files = [\n        \"music_collection.md\"\n    ]\n    \n    missing_files = []\n    for filename in expected_music_files:\n        file_path = collections_dir / filename\n        if not file_path.exists():\n            missing_files.append(filename)\n    \n    if missing_files:\n        print(f\"❌ Missing music collection markdown files in collections: {filename}\")\n        return False\n    \n    print(\"✅ All music collection markdown files moved to personal/collections\")\n    return True\n\ndef verify_progress_tracking_empty(test_dir: Path) -> bool:\n    \"\"\"Verify that progress_tracking directory is empty.\"\"\"\n    organized_dir = test_dir / \"organized_projects\"\n    progress_dir = organized_dir / \"learning\" / \"progress_tracking\"\n    \n    files_in_progress = list(progress_dir.iterdir())\n    if files_in_progress:\n        print(f\"❌ progress_tracking directory should be empty, but contains: {[f.name for f in files_in_progress]}\")\n        return False\n    \n    print(\"✅ progress_tracking directory is correctly empty\")\n    return True\n\ndef verify_project_structure_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that project_structure.md file exists.\"\"\"\n    organized_dir = test_dir / \"organized_projects\"\n    structure_file = organized_dir / \"project_structure.md\"\n    \n    if not structure_file.exists():\n        print(\"❌ 'project_structure.md' file not found\")\n        return False\n    \n    if not structure_file.is_file():\n        print(\"❌ 'project_structure.md' exists but is not a file\")\n        return False\n    \n    print(\"✅ 'project_structure.md' file exists\")\n    return True\n\ndef verify_file_counts(test_dir: Path) -> bool:\n    \"\"\"Verify that each directory has the correct number of files.\"\"\"\n    organized_dir = test_dir / \"organized_projects\"\n    \n    expected_counts = {\n        \"experiments/ml_projects\": 6,      # 6 Python files\n        \"experiments/data_analysis\": 7,    # 7 CSV files\n        \"learning/resources\": 7,           # 7 learning markdown files\n        \"learning/progress_tracking\": 0,   # 0 files (empty)\n        \"personal/entertainment\": 3,       # 3 entertainment markdown files\n        \"personal/collections\": 1          # 1 music collection markdown file\n    }\n    \n    incorrect_counts = []\n    for dir_path, expected_count in expected_counts.items():\n        full_path = organized_dir / dir_path\n        actual_count = len([f for f in full_path.iterdir() if f.is_file()])\n        \n        if actual_count != expected_count:\n            incorrect_counts.append(f\"{dir_path}: expected {expected_count}, got {actual_count}\")\n    \n    if incorrect_counts:\n        print(f\"❌ Incorrect file counts: {incorrect_counts}\")\n        return False\n    \n    print(\"✅ All directories have correct file counts\")\n    return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying Desktop 2 Project Management Task: File Reorganization...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Organized Projects Directory Exists\", verify_organized_projects_directory_exists),\n        (\"Directory Structure\", verify_directory_structure),\n        (\"Python Files in ML Projects\", verify_python_files_in_ml_projects),\n        (\"CSV Files in Data Analysis\", verify_csv_files_in_data_analysis),\n        (\"Learning Markdown Files in Resources\", verify_learning_md_files_in_resources),\n        (\"Entertainment Markdown Files in Entertainment\", verify_entertainment_md_files_in_entertainment),\n        (\"Music Collection Files in Collections\", verify_music_md_files_in_collections),\n        (\"Progress Tracking Empty\", verify_progress_tracking_empty),\n        (\"Project Structure File Exists\", verify_project_structure_file_exists),\n        (\"File Counts\", verify_file_counts),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ Desktop 2 project reorganization completed correctly!\")\n        print(\"🎉 Task verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/desktop/timeline_extraction/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\nRead all the files under current path, extract every time/plan information that clearly indicates 2024, and integrate them into a list and create a file in main directory called `timeline.txt`. Write the timeline in the file in the following format.\n\n### Rules\n- If a task only shows month without day, use the 1st day of that month\n- If a task only shows year without month and day, skip it.\n- If a file shows multiple tasks on the same date, count only once per date\n\n### Output Format\n- Each line format: `file_path:time`\n    - `file_path`: The file path where this time information appears (**relative to the current path**)\n    - `time`: Specific time, if it's a time period, write the start time (YYYY-MM-DD)\n\n### Sorting Requirements\n- Sort by chronological order\n"
  },
  {
    "path": "tasks/filesystem/standard/desktop/timeline_extraction/meta.json",
    "content": "{\n  \"task_id\": \"timeline_extraction\",\n  \"task_name\": \"Timeline Extraction\",\n  \"category_id\": \"desktop\",\n  \"category_name\": \"Desktop\",\n  \"description\": \"Extract temporal event information from various desktop files and compile a comprehensive chronological timeline of activities and milestones.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-12\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data extraction\",\n    \"pattern analysis\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"desktop/\\n    ├── exp_logs/\\n    │       ├── aug/\\n    │       │       └── augmentation_log.txt\\n    │       ├── project_1/\\n    │       │       ├── data.csv\\n    │       │       ├── model.py\\n    │       │       └── README.md\\n    │       ├── project_2/\\n    │       │       ├── analysis_report.md\\n    │       │       └── data_analysis.py\\n    │       ├── sep/\\n    │       │       └── september_summary.csv\\n    │       ├── exp_record.md\\n    │       ├── experiment_summary.md\\n    │       └── results_record.csv\\n    ├── learning/\\n    │       ├── 2024/\\n    │       │       └── learning_progress.csv\\n    │       ├── 2025/\\n    │       │       └── learning_roadmap.md\\n    │       ├── activities/\\n    │       │       └── study_notes.py\\n    │       ├── research/\\n    │       │       └── research_topics.md\\n    │       ├── schedule/\\n    │       │       └── weekly_schedule.csv\\n    │       └── learning_goals.md\\n    ├── music/\\n    │       ├── beni/\\n    │       │       └── playlist_manager.py\\n    │       ├── jay_chou/\\n    │       │       └── favorite_songs.csv\\n    │       ├── jj_lin/\\n    │       │       └── top_songs.txt\\n    │       └── music_collection.md\\n    ├── old_homebrew/\\n    │       ├── 2023-09-23_22/\\n    │       │       ├── opt/\\n    │       │       └── Users/\\n    │       └── 2023-09-23_23/\\n    │               ├── opt/\\n    │               └── Users/\\n    ├── play/\\n    │       ├── game_plan/\\n    │       │       └── gaming_schedule.md\\n    │       ├── hongkong_tour/\\n    │       │       └── travel_itinerary.csv\\n    │       ├── kit&shoes_collection/\\n    │       │       └── inventory.py\\n    │       └── others/\\n    │               └── entertainment_planner.md\\n    └── travel_plan/\\n            ├── travel_bucket_list.md\\n            └── travel_calculator.py\\n\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/desktop.zip\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/desktop/timeline_extraction/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Desktop 2 Timeline Extraction Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\nimport re\nfrom datetime import datetime\nfrom typing import List, Tuple, Set\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_timeline_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the timeline.txt file exists in the main directory.\"\"\"\n    timeline_file = test_dir / \"timeline.txt\"\n    \n    if not timeline_file.exists():\n        print(\"❌ 'timeline.txt' file not found in main directory\")\n        return False\n    \n    if not timeline_file.is_file():\n        print(\"❌ 'timeline.txt' exists but is not a file\")\n        return False\n    \n    print(\"✅ 'timeline.txt' file exists in main directory\")\n    return True\n\ndef verify_timeline_file_readable(test_dir: Path) -> bool:\n    \"\"\"Verify that the timeline.txt file is readable.\"\"\"\n    timeline_file = test_dir / \"timeline.txt\"\n    \n    try:\n        content = timeline_file.read_text(encoding='utf-8')\n        if not content.strip():\n            print(\"❌ 'timeline.txt' file is empty\")\n            return False\n        \n        print(\"✅ 'timeline.txt' file is readable\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error reading 'timeline.txt' file: {e}\")\n        return False\n\ndef verify_line_count(test_dir: Path) -> bool:\n    \"\"\"Verify that the timeline.txt file has exactly 43 lines.\"\"\"\n    timeline_file = test_dir / \"timeline.txt\"\n    \n    try:\n        content = timeline_file.read_text(encoding='utf-8')\n        lines = [line.strip() for line in content.split('\\n') if line.strip()]\n        \n        if len(lines) != 43:\n            print(f\"❌ Expected 43 lines, but found {len(lines)} lines\")\n            return False\n        \n        print(f\"✅ File contains exactly {len(lines)} lines\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking line count: {e}\")\n        return False\n\ndef verify_line_format(test_dir: Path) -> bool:\n    \"\"\"Verify that each line contains both file path and date time information.\"\"\"\n    timeline_file = test_dir / \"timeline.txt\"\n    \n    try:\n        content = timeline_file.read_text(encoding='utf-8')\n        lines = [line.strip() for line in content.split('\\n') if line.strip()]\n        \n        # More flexible pattern: just check if line contains both path-like content and date-like content\n        date_pattern = r'\\d{4}-\\d{2}-\\d{2}'  # YYYY-MM-DD format\n        \n        invalid_lines = []\n        for i, line in enumerate(lines, 1):\n            # Check if line contains a date\n            if not re.search(date_pattern, line):\n                invalid_lines.append(f\"Line {i}: '{line}' (no valid date found)\")\n                continue\n            \n            # Check if line contains path-like content (contains '/' or '.' and not just a date)\n            # More flexible: look for path anywhere in the line, not just at the beginning\n            path_found = False\n            \n            # Split line into words and look for path-like content\n            words = line.split()\n            for word in words:\n                # Check if word looks like a file path (contains '/' or '.' and not just a date)\n                if ('/' in word or '.' in word) and not re.match(r'^\\d{4}-\\d{2}-\\d{2}$', word.strip()):\n                    path_found = True\n                    break\n            \n            # Also check if line contains path-like content with colon separator\n            if ':' in line:\n                parts = line.split(':')\n                for part in parts:\n                    if ('/' in part or '.' in part) and not re.match(r'^\\d{4}-\\d{2}-\\d{2}$', part.strip()):\n                        path_found = True\n                        break\n            \n            if not path_found:\n                invalid_lines.append(f\"Line {i}: '{line}' (no valid path found)\")\n                continue\n        \n        if invalid_lines:\n            print(f\"❌ Invalid line format found: {invalid_lines[:5]}...\")\n            return False\n        \n        print(\"✅ All lines contain both file path and date time information\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking line format: {e}\")\n        return False\n\ndef verify_date_format(test_dir: Path) -> bool:\n    \"\"\"Verify that all dates are in valid YYYY-MM-DD format.\"\"\"\n    timeline_file = test_dir / \"timeline.txt\"\n    \n    try:\n        content = timeline_file.read_text(encoding='utf-8')\n        lines = [line.strip() for line in content.split('\\n') if line.strip()]\n        \n        invalid_dates = []\n        for i, line in enumerate(lines, 1):\n            try:\n                # Find date pattern in the line (more flexible)\n                date_match = re.search(r'\\d{4}-\\d{2}-\\d{2}', line)\n                if not date_match:\n                    invalid_dates.append(f\"Line {i}: '{line}' (no date found)\")\n                    continue\n                \n                date_part = date_match.group()\n                datetime.strptime(date_part, '%Y-%m-%d')\n            except (IndexError, ValueError) as e:\n                invalid_dates.append(f\"Line {i}: '{line}' (invalid date: {e})\")\n        \n        if invalid_dates:\n            print(f\"❌ Invalid date format found: {invalid_dates[:5]}...\")\n            return False\n        \n        print(\"✅ All dates are in valid YYYY-MM-DD format\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking date format: {e}\")\n        return False\n\ndef verify_chronological_order(test_dir: Path) -> bool:\n    \"\"\"Verify that dates are in chronological order.\"\"\"\n    timeline_file = test_dir / \"timeline.txt\"\n    \n    try:\n        content = timeline_file.read_text(encoding='utf-8')\n        lines = [line.strip() for line in content.split('\\n') if line.strip()]\n        \n        dates = []\n        for line in lines:\n            # Find date pattern in the line (more flexible)\n            date_match = re.search(r'\\d{4}-\\d{2}-\\d{2}', line)\n            if date_match:\n                date_obj = datetime.strptime(date_match.group(), '%Y-%m-%d')\n                dates.append(date_obj)\n        \n        # Check if dates are in ascending order\n        for i in range(1, len(dates)):\n            if dates[i] < dates[i-1]:\n                print(f\"❌ Date order violation: {dates[i-1].strftime('%Y-%m-%d')} comes after {dates[i].strftime('%Y-%m-%d')}\")\n                return False\n        \n        print(\"✅ All dates are in chronological order\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking chronological order: {e}\")\n        return False\n\ndef verify_expected_entries(test_dir: Path) -> bool:\n    \"\"\"Verify that all expected entries from answer.txt are present.\"\"\"\n    timeline_file = test_dir / \"timeline.txt\"\n    \n    try:\n        content = timeline_file.read_text(encoding='utf-8')\n        actual_lines = [line.strip() for line in content.split('\\n') if line.strip()]\n        \n        # Expected entries from answer.txt\n        expected_entries = {\n            \"exp_logs/project_2/analysis_report.md:2024-01-01\",\n            \"learning/2024/learning_progress.csv:2024-01-01\",\n            \"exp_logs/experiment_summary.md:2024-01-05\",\n            \"play/kit&shoes_collection/inventory.py:2024-01-05\",\n            \"exp_logs/experiment_summary.md:2024-01-10\",\n            \"play/kit&shoes_collection/inventory.py:2024-01-10\",\n            \"exp_logs/aug/augmentation_log.txt:2024-01-15\",\n            \"exp_logs/experiment_summary.md:2024-01-15\",\n            \"play/kit&shoes_collection/inventory.py:2024-01-15\",\n            \"learning/2024/learning_progress.csv:2024-02-01\",\n            \"learning/2024/learning_progress.csv:2024-03-01\",\n            \"play/hongkong_tour/travel_itinerary.csv:2024-03-15\",\n            \"travel_plan/travel_calculator.py:2024-03-15\",\n            \"play/hongkong_tour/travel_itinerary.csv:2024-03-16\",\n            \"play/hongkong_tour/travel_itinerary.csv:2024-03-17\",\n            \"play/hongkong_tour/travel_itinerary.csv:2024-03-18\",\n            \"play/hongkong_tour/travel_itinerary.csv:2024-03-19\",\n            \"play/hongkong_tour/travel_itinerary.csv:2024-03-20\",\n            \"travel_plan/travel_bucket_list.md:2024-04-01\",\n            \"learning/2024/learning_progress.csv:2024-04-01\",\n            \"learning/2024/learning_progress.csv:2024-05-01\",\n            \"travel_plan/travel_bucket_list.md:2024-06-01\",\n            \"learning/2024/learning_progress.csv:2024-06-01\",\n            \"learning/2024/learning_progress.csv:2024-07-01\",\n            \"exp_logs/exp_record.md:2024-08-01\",\n            \"exp_logs/results_record.csv:2024-08-01\",\n            \"travel_plan/travel_bucket_list.md:2024-08-01\",\n            \"learning/2024/learning_progress.csv:2024-08-01\",\n            \"exp_logs/results_record.csv:2024-08-02\",\n            \"exp_logs/results_record.csv:2024-08-03\",\n            \"exp_logs/results_record.csv:2024-08-04\",\n            \"exp_logs/exp_record.md:2024-09-01\",\n            \"exp_logs/sep/september_summary.csv:2024-09-01\",\n            \"learning/2024/learning_progress.csv:2024-09-01\",\n            \"exp_logs/sep/september_summary.csv:2024-09-05\",\n            \"exp_logs/sep/september_summary.csv:2024-09-10\",\n            \"exp_logs/sep/september_summary.csv:2024-09-15\",\n            \"exp_logs/sep/september_summary.csv:2024-09-20\",\n            \"exp_logs/sep/september_summary.csv:2024-09-25\",\n            \"exp_logs/sep/september_summary.csv:2024-09-30\",\n            \"learning/2024/learning_progress.csv:2024-10-01\",\n            \"learning/2024/learning_progress.csv:2024-11-01\",\n            \"learning/2024/learning_progress.csv:2024-12-01\"\n        }\n        \n        # Check if each expected entry is found in actual lines (more flexible matching)\n        missing_entries = []\n        for expected in expected_entries:\n            expected_path, expected_date = expected.split(':')\n            found = False\n            \n            for actual_line in actual_lines:\n                # Check if line contains both the expected path and date\n                # More flexible: path can be anywhere in the line, not just at the beginning\n                if expected_path in actual_line and expected_date in actual_line:\n                    found = True\n                    break\n            \n            if not found:\n                missing_entries.append(expected)\n        \n        # Check for extra entries (lines that don't match any expected pattern)\n        extra_entries = []\n        for actual_line in actual_lines:\n            # Extract date from actual line\n            date_match = re.search(r'\\d{4}-\\d{2}-\\d{2}', actual_line)\n            if not date_match:\n                continue\n                \n            actual_date = date_match.group()\n            \n            # Try to extract file path from the line\n            actual_path = None\n            words = actual_line.split()\n            for word in words:\n                if ('/' in word or '.' in word) and not re.match(r'^\\d{4}-\\d{2}-\\d{2}$', word.strip()):\n                    actual_path = word\n                    break\n            \n            if not actual_path:\n                continue\n            \n            # Find if this line matches any expected entry\n            found_expected = False\n            for expected in expected_entries:\n                expected_path, expected_date = expected.split(':')\n                if expected_path in actual_path and expected_date == actual_date:\n                    found_expected = True\n                    break\n            \n            if not found_expected:\n                extra_entries.append(actual_line)\n        \n        if missing_entries:\n            print(f\"❌ Missing {len(missing_entries)} expected entries\")\n            print(f\"   Examples: {missing_entries[:3]}\")\n            return False\n        \n        if extra_entries:\n            print(f\"❌ Found {len(extra_entries)} unexpected entries\")\n            print(f\"   Examples: {extra_entries[:3]}\")\n            return False\n        \n        print(\"✅ All expected entries are present, no extra entries\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking expected entries: {e}\")\n        return False\n\ndef verify_no_duplicates(test_dir: Path) -> bool:\n    \"\"\"Verify that there are no duplicate entries.\"\"\"\n    timeline_file = test_dir / \"timeline.txt\"\n    \n    try:\n        content = timeline_file.read_text(encoding='utf-8')\n        lines = [line.strip() for line in content.split('\\n') if line.strip()]\n        \n        if len(lines) != len(set(lines)):\n            print(\"❌ Duplicate entries found in timeline.txt\")\n            return False\n        \n        print(\"✅ No duplicate entries found\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking for duplicates: {e}\")\n        return False\n\ndef verify_file_paths_exist(test_dir: Path) -> bool:\n    \"\"\"Verify that all file paths mentioned in timeline.txt actually exist.\"\"\"\n    timeline_file = test_dir / \"timeline.txt\"\n    \n    try:\n        content = timeline_file.read_text(encoding='utf-8')\n        lines = [line.strip() for line in content.split('\\n') if line.strip()]\n        \n        missing_files = []\n        for line in lines:\n            # Try to extract file path from the line (more flexible)\n            file_path_found = False\n            \n            # Method 1: Split by colon and check each part\n            if ':' in line:\n                parts = line.split(':')\n                for part in parts:\n                    part = part.strip()\n                    if part and ('/' in part or '.' in part) and not re.match(r'^\\d{4}-\\d{2}-\\d{2}$', part):\n                        # This looks like a file path\n                        full_path = test_dir / part\n                        if not full_path.exists():\n                            missing_files.append(part)\n                        file_path_found = True\n                        break\n            \n            # Method 2: Split into words and look for path-like content\n            if not file_path_found:\n                words = line.split()\n                for word in words:\n                    word = word.strip()\n                    if ('/' in word or '.' in word) and not re.match(r'^\\d{4}-\\d{2}-\\d{2}$', word):\n                        # This looks like a file path\n                        full_path = test_dir / word\n                        if not full_path.exists():\n                            missing_files.append(word)\n                        file_path_found = True\n                        break\n            \n            # Method 3: Look for path pattern in the entire line\n            if not file_path_found:\n                # Use regex to find path-like patterns\n                path_pattern = r'[a-zA-Z0-9_\\-\\.\\/]+/[a-zA-Z0-9_\\-\\.\\/]+'\n                path_matches = re.findall(path_pattern, line)\n                for match in path_matches:\n                    if '.' in match or '/' in match:\n                        full_path = test_dir / match\n                        if not full_path.exists():\n                            missing_files.append(match)\n                        file_path_found = True\n                        break\n        \n        if missing_files:\n            print(f\"❌ {len(missing_files)} referenced files do not exist\")\n            print(f\"   Examples: {missing_files[:3]}\")\n            return False\n        \n        print(\"✅ All referenced file paths exist\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking file paths: {e}\")\n        return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying Desktop Timeline Extraction Task...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Timeline File Exists\", verify_timeline_file_exists),\n        (\"File is Readable\", verify_timeline_file_readable),\n        (\"Correct Line Count\", verify_line_count),\n        (\"Line Format\", verify_line_format),\n        (\"Date Format\", verify_date_format),\n        (\"Chronological Order\", verify_chronological_order),\n        (\"Expected Entries\", verify_expected_entries),\n        (\"No Duplicates\", verify_no_duplicates),\n        (\"File Paths Exist\", verify_file_paths_exist),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ Desktop 2 Timeline Extraction completed correctly!\")\n        print(\"🎉 Task verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/desktop_template/budget_computation/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### Task Description\n\nYou need to analyze all the files in the desktop environment to calculate personal life expenses and create a budget summary.\n\n### Task Objectives\n\n1. **Locate and analyze all files** in the desktop environment\n2. **Extract personal life expenses** from the files (such as salary, food, living material, tax, expenses on the internet, ...) (exclude expenses in project/work)\n3. **Create a file named `total_budget.txt`** in the main directory\n4. **Format each expense entry** as `file_path;price` (one per line)\n5. **Add total sum** as the last line, rounded to 2 decimal places\n\n### Output Format\n\nThe `total_budget.txt` file should contain:\n\n- One expense per line in format: `file_path;price`\n- File path should be the relative path from the main directory\n- Price should be rounded to 2 decimal places\n- Last line should be the total sum\n- No additional text or explanations\n\n### Important Notes\n\n- Only include personal life expenses (not in project/work)\n- Use the cheapest available price when multiple options exist for one thing\n- The total should match the sum of all individual expenses\n- Hint: If a file contains 1 item for personal consumption, it means that all the entry in entire file is for personal consumption\n"
  },
  {
    "path": "tasks/filesystem/standard/desktop_template/budget_computation/meta.json",
    "content": "{\n  \"task_id\": \"budget_computation\",\n  \"task_name\": \"Budget Computation\",\n  \"category_id\": \"desktop_template\",\n  \"category_name\": \"Desktop Template\",\n  \"description\": \"Analyze personal expense data extracted from desktop files to create a detailed budget summary report for financial review.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-14\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data extraction\",\n    \"pattern analysis\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"desktop_template/\\n    ├── Archives/\\n    │       ├── backup_contacts.csv\\n    │       └── tax_documents_2022.csv\\n    ├── Desktop/\\n    │       └── contacts.csv\\n    ├── Documents/\\n    │       ├── Personal/\\n    │       │       └── tax_info_2023.csv\\n    │       ├── Projects/\\n    │       │       └── budget_tracker.csv\\n    │       ├── Work/\\n    │       │       ├── client_list.csv\\n    │       │       └── timesheet.csv\\n    │       ├── budget.csv\\n    │       └── important_dates.csv\\n    ├── Downloads/\\n    │       ├── expenses.csv\\n    │       ├── fitness_log.csv\\n    │       └── price_comparisons.csv\\n    ├── Temp/\\n    │       └── test_data.csv\\n    ├── book_list.txt\\n    ├── bookmark_export.txt\\n    ├── calculations.txt\\n    ├── correspondence_2023.txt\\n    ├── draft_letter.txt\\n    ├── emergency_contacts.txt\\n    ├── example.txt\\n    └── experiment_results.txt\\n\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/desktop_template.zip\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/desktop_template/budget_computation/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Budget Computation Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\nfrom collections import Counter\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_total_budget_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the total_budget.txt file exists.\"\"\"\n    budget_file = test_dir / \"total_budget.txt\"\n    \n    if not budget_file.exists():\n        print(\"❌ File 'total_budget.txt' not found\")\n        return False\n    \n    print(\"✅ total_budget.txt file found\")\n    return True\n\ndef verify_file_format(test_dir: Path) -> bool:\n    \"\"\"Verify that the total_budget.txt file has proper format.\"\"\"\n    budget_file = test_dir / \"total_budget.txt\"\n    \n    try:\n        content = budget_file.read_text()\n        lines = [line.strip() for line in content.split('\\n') if line.strip()]\n        \n        if len(lines) < 2:\n            print(\"❌ File must contain at least 2 lines (expenses + total)\")\n            return False\n        \n        # Check that all lines except the last follow the format file_path;price\n        for i, line in enumerate(lines[:-1]):\n            if ';' not in line:\n                print(f\"❌ Line {i+1} does not contain ';' separator: {line}\")\n                return False\n            \n            parts = line.split(';')\n            if len(parts) != 2:\n                print(f\"❌ Line {i+1} does not have exactly 2 parts: {line}\")\n                return False\n            \n            # Check if second part is a valid number\n            try:\n                float(parts[1])\n            except ValueError:\n                print(f\"❌ Line {i+1} price is not a valid number: {parts[1]}\")\n                return False\n        \n        # Check if last line is a valid number (total)\n        try:\n            float(lines[-1])\n        except ValueError:\n            print(f\"❌ Last line is not a valid number: {lines[-1]}\")\n            return False\n        \n        print(\"✅ File format is correct\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error reading or parsing file: {e}\")\n        return False\n\ndef verify_expense_entries(test_dir: Path) -> bool:\n    \"\"\"Verify that all 15 required expense entries are present.\"\"\"\n    budget_file = test_dir / \"total_budget.txt\"\n    \n    try:\n        content = budget_file.read_text()\n        lines = [line.strip() for line in content.split('\\n') if line.strip()]\n        \n        # Should have 16 lines total (15 expenses + 1 total)\n        if len(lines) != 16:\n            print(f\"❌ Expected 16 lines (15 expenses + 1 total), found {len(lines)}\")\n            return False\n        \n        # Check that we have exactly 15 expense entries\n        expense_lines = lines[:-1]  # All lines except the last\n        \n        if len(expense_lines) != 15:\n            print(f\"❌ Expected 15 expense entries, found {len(expense_lines)}\")\n            return False\n        \n        print(\"✅ File contains exactly 15 expense entries\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking expense entries: {e}\")\n        return False\n\ndef verify_file_paths_and_counts(test_dir: Path) -> bool:\n    \"\"\"Verify that all required file paths are present with correct counts.\"\"\"\n    budget_file = test_dir / \"total_budget.txt\"\n    \n    try:\n        content = budget_file.read_text()\n        lines = [line.strip() for line in content.split('\\n') if line.strip()]\n        expense_lines = lines[:-1]  # All lines except the last\n        \n        # Extract file paths from expense lines\n        file_paths = []\n        for line in expense_lines:\n            file_path = line.split(';')[0]\n            file_paths.append(file_path)\n        \n        # Count occurrences of each path\n        path_counts = Counter(file_paths)\n        \n        # Expected file paths and their counts based on answer.txt\n        expected_paths = {\n            'Archives/tax_documents_2022.csv': 3,\n            'Documents/Personal/tax_info_2023.csv': 3,\n            'Documents/budget.csv': 3,\n            'Downloads/expenses.csv': 3,\n            'Downloads/price_comparisons.csv': 3\n        }\n        \n        # Helper function to check if a path contains the expected path\n        def path_matches_expected(actual_path: str, expected_path: str) -> bool:\n            \"\"\"Check if actual path contains the expected path (allowing for prefixes like './')\"\"\"\n            # Remove common prefixes like './', '../', etc.\n            normalized_actual = actual_path\n            while normalized_actual.startswith('./') or normalized_actual.startswith('../'):\n                normalized_actual = normalized_actual[2:] if normalized_actual.startswith('./') else normalized_actual[3:]\n            \n            # Check if the normalized path contains the expected path\n            return expected_path in normalized_actual or normalized_actual == expected_path\n        \n        # Check if all expected paths are present with correct counts\n        for expected_path, expected_count in expected_paths.items():\n            # Find matching actual paths\n            matching_paths = []\n            for actual_path in path_counts.keys():\n                if path_matches_expected(actual_path, expected_path):\n                    matching_paths.append(actual_path)\n            \n            if not matching_paths:\n                print(f\"❌ Missing expected file path: {expected_path}\")\n                return False\n            \n            # Sum up the counts from all matching paths\n            total_count = sum(path_counts[path] for path in matching_paths)\n            if total_count != expected_count:\n                print(f\"❌ Path {expected_path} has wrong count: expected {expected_count}, found {total_count}\")\n                print(f\"   Matching paths: {matching_paths}\")\n                return False\n        \n        # Check if there are any completely unexpected paths (not matching any expected path)\n        all_matching_paths = set()\n        for expected_path in expected_paths.keys():\n            for actual_path in path_counts.keys():\n                if path_matches_expected(actual_path, expected_path):\n                    all_matching_paths.add(actual_path)\n        \n        unexpected_paths = set(path_counts.keys()) - all_matching_paths\n        if unexpected_paths:\n            print(f\"❌ Unexpected file paths found: {unexpected_paths}\")\n            return False\n        \n        print(\"✅ All expected file paths are present with correct counts\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking file paths: {e}\")\n        return False\n\ndef verify_individual_prices(test_dir: Path) -> bool:\n    \"\"\"Verify that all individual prices match the expected values.\"\"\"\n    budget_file = test_dir / \"total_budget.txt\"\n    \n    try:\n        content = budget_file.read_text()\n        lines = [line.strip() for line in content.split('\\n') if line.strip()]\n        expense_lines = lines[:-1]  # All lines except the last\n        \n        # Expected prices based on answer.txt\n        expected_expenses = [\n            ('Archives/tax_documents_2022.csv', 42000.00),\n            ('Archives/tax_documents_2022.csv', 1800.00),\n            ('Archives/tax_documents_2022.csv', 950.00),\n            ('Documents/Personal/tax_info_2023.csv', 45000.00),\n            ('Documents/Personal/tax_info_2023.csv', 2500.00),\n            ('Documents/Personal/tax_info_2023.csv', 1200.00),\n            ('Documents/budget.csv', 250.00),\n            ('Documents/budget.csv', 180.00),\n            ('Documents/budget.csv', 120.00),\n            ('Downloads/expenses.csv', 45.99),\n            ('Downloads/expenses.csv', 99.00),\n            ('Downloads/expenses.csv', 234.50),\n            ('Downloads/price_comparisons.csv', 879.99),\n            ('Downloads/price_comparisons.csv', 289.99),\n            ('Downloads/price_comparisons.csv', 74.99)\n        ]\n        \n        # Helper function to check if a path contains the expected path\n        def path_matches_expected(actual_path: str, expected_path: str) -> bool:\n            \"\"\"Check if actual path contains the expected path (allowing for prefixes like './')\"\"\"\n            # Remove common prefixes like './', '../', etc.\n            normalized_actual = actual_path\n            while normalized_actual.startswith('./') or normalized_actual.startswith('../'):\n                normalized_actual = normalized_actual[2:] if normalized_actual.startswith('./') else normalized_actual[3:]\n            \n            # Check if the normalized path contains the expected path\n            return expected_path in normalized_actual or normalized_actual == expected_path\n        \n        # Parse actual expenses\n        actual_expenses = []\n        for line in expense_lines:\n            parts = line.split(';')\n            file_path = parts[0]\n            price = float(parts[1])\n            actual_expenses.append((file_path, price))\n        \n        # Create a counter for expected expenses to handle duplicates\n        expected_expenses_counter = Counter(expected_expenses)\n        actual_expenses_counter = Counter(actual_expenses)\n        \n        # Check if all expected expenses are present with correct counts\n        for expected_expense, expected_count in expected_expenses_counter.items():\n            expected_path, expected_price = expected_expense\n            \n            # Find matching actual expenses\n            matching_expenses = []\n            for actual_expense, actual_count in actual_expenses_counter.items():\n                actual_path, actual_price = actual_expense\n                if path_matches_expected(actual_path, expected_path) and abs(actual_price - expected_price) < 0.01:\n                    matching_expenses.append(actual_expense)\n            \n            if not matching_expenses:\n                print(f\"❌ Missing expected expense: {expected_expense}\")\n                return False\n            \n            # Sum up the counts from all matching expenses\n            total_count = sum(actual_expenses_counter[expense] for expense in matching_expenses)\n            if total_count != expected_count:\n                print(f\"❌ Expense {expected_expense} has wrong count: expected {expected_count}, found {total_count}\")\n                print(f\"   Matching expenses: {matching_expenses}\")\n                return False\n        \n        # Check if there are any completely unexpected expenses (not matching any expected expense)\n        all_matching_expenses = set()\n        for expected_expense in expected_expenses_counter.keys():\n            expected_path, expected_price = expected_expense\n            for actual_expense in actual_expenses_counter.keys():\n                actual_path, actual_price = actual_expense\n                if path_matches_expected(actual_path, expected_path) and abs(actual_price - expected_price) < 0.01:\n                    all_matching_expenses.add(actual_expense)\n        \n        unexpected_expenses = set(actual_expenses_counter.keys()) - all_matching_expenses\n        if unexpected_expenses:\n            print(f\"❌ Unexpected expenses found: {unexpected_expenses}\")\n            return False\n        \n        print(\"✅ All individual prices match expected values\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking individual prices: {e}\")\n        return False\n\ndef verify_total_price(test_dir: Path) -> bool:\n    \"\"\"Verify that the total price is correct.\"\"\"\n    budget_file = test_dir / \"total_budget.txt\"\n    \n    try:\n        content = budget_file.read_text()\n        lines = [line.strip() for line in content.split('\\n') if line.strip()]\n        \n        # Get the total from the last line\n        total_line = lines[-1]\n        try:\n            actual_total = float(total_line)\n        except ValueError:\n            print(f\"❌ Last line is not a valid number: {total_line}\")\n            return False\n        \n        # Expected total based on answer.txt\n        expected_total = 95624.46\n        \n        if abs(actual_total - expected_total) > 0.01:  # Allow small floating point differences\n            print(f\"❌ Expected total {expected_total}, found {actual_total}\")\n            return False\n        \n        print(\"✅ Total price is correct\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking total price: {e}\")\n        return False\n\ndef verify_total_calculation(test_dir: Path) -> bool:\n    \"\"\"Verify that the total matches the sum of individual expenses.\"\"\"\n    budget_file = test_dir / \"total_budget.txt\"\n    \n    try:\n        content = budget_file.read_text()\n        lines = [line.strip() for line in content.split('\\n') if line.strip()]\n        expense_lines = lines[:-1]  # All lines except the last\n        \n        # Calculate sum of individual expenses\n        calculated_total = 0.0\n        for line in expense_lines:\n            price = float(line.split(';')[1])\n            calculated_total += price\n        \n        # Get the stated total from the last line\n        stated_total = float(lines[-1])\n        \n        # Check if they match (allow small floating point differences)\n        if abs(calculated_total - stated_total) > 0.01:\n            print(f\"❌ Total calculation mismatch: calculated {calculated_total:.2f}, stated {stated_total:.2f}\")\n            return False\n        \n        print(\"✅ Total calculation is correct\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying total calculation: {e}\")\n        return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying Budget Computation Task...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Total Budget File Exists\", verify_total_budget_file_exists),\n        (\"File Format\", verify_file_format),\n        (\"Expense Entries Count\", verify_expense_entries),\n        (\"File Paths and Counts\", verify_file_paths_and_counts),\n        (\"Individual Prices\", verify_individual_prices),\n        (\"Total Price\", verify_total_price),\n        (\"Total Calculation\", verify_total_calculation),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ Budget computation task completed successfully!\")\n        print(\"🎉 All verification steps passed\")\n        print(\"📊 Summary:\")\n        print(\"   - 15 expense entries found\")\n        print(\"   - 5 different file paths covered\")\n        print(\"   - All individual prices correct\")\n        print(\"   - Total price: $95,624.46\")\n        print(\"   - Calculation verified\")\n        sys.exit(0)\n    else:\n        print(\"❌ Budget computation task verification: FAIL\")\n        print(\"Please check the errors above and ensure all requirements are met\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/desktop_template/contact_information/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### Task Description\n\nYour task is to compile all contact information from all the files into a single CSV table. You need to extract all people's contact information and organize it systematically.\n\n### Task Objectives\n\n1. **Scan all files** in the directory\n2. **Extract contact information** for all individuals and organizations found\n3. **Create a CSV file** named `contact_info.csv` in the main directory\n4. **Structure the CSV** with the following columns:\n   - First column: Name (required)\n   - Second column: Email (required)\n   - Third column: Phone (required)\n   - Additional columns: Any other contact information types found\n5. **Consolidate information** by merging the same types of information across entries into single columns\n6. **Leave cells blank** if specific information is not available for a person/organization\n7. Each entry from different files should be processed and listed separately, without any secondary processing.\n\n### Expected Output\n\n- **File name**: `contact_info.csv`\n- **Format**: CSV with headers and data rows\n\n### Reasoning Task\n\nAfter creating the contact_info.csv file, analyze the data to answer:\n**What is Charlie Davis's job/profession?**\n\nHint: focus on the contact information in contact_info.csv.\n\nWrite your answer in a file named `answer.txt` in the main directory.\n\n### Important Notes\n\n- Do not modify any existing files\n- Only create the two new files: `contact_info.csv` and `answer.txt`\n"
  },
  {
    "path": "tasks/filesystem/standard/desktop_template/contact_information/meta.json",
    "content": "{\n  \"task_id\": \"contact_information\",\n  \"task_name\": \"Contact Information\",\n  \"category_id\": \"desktop_template\",\n  \"category_name\": \"Desktop Template\",\n  \"description\": \"Extract contact details from various file formats on desktop and perform reasoning analysis on the collected relationship data.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-14\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data extraction\",\n    \"cross-referencing\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"desktop_template/\\n    ├── Archives/\\n    │       ├── backup_contacts.csv\\n    │       └── tax_documents_2022.csv\\n    ├── Desktop/\\n    │       └── contacts.csv\\n    ├── Documents/\\n    │       ├── Personal/\\n    │       │       └── tax_info_2023.csv\\n    │       ├── Projects/\\n    │       │       └── budget_tracker.csv\\n    │       ├── Work/\\n    │       │       ├── client_list.csv\\n    │       │       └── timesheet.csv\\n    │       ├── budget.csv\\n    │       └── important_dates.csv\\n    ├── Downloads/\\n    │       ├── expenses.csv\\n    │       ├── fitness_log.csv\\n    │       └── price_comparisons.csv\\n    ├── Temp/\\n    │       └── test_data.csv\\n    ├── book_list.txt\\n    ├── bookmark_export.txt\\n    ├── calculations.txt\\n    ├── correspondence_2023.txt\\n    ├── draft_letter.txt\\n    ├── emergency_contacts.txt\\n    ├── example.txt\\n    └── experiment_results.txt\\n\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/desktop_template.zip\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/desktop_template/contact_information/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Contact Information Compilation Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport csv\nimport os\nimport re\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_contact_info_csv_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the contact_info.csv file exists in the main directory.\"\"\"\n    contact_file = test_dir / \"contact_info.csv\"\n    \n    if not contact_file.exists():\n        print(\"❌ File 'contact_info.csv' not found in main directory\")\n        return False\n    \n    print(\"✅ contact_info.csv file found\")\n    return True\n\ndef verify_answer_txt_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the answer.txt file exists in the main directory.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    if not answer_file.exists():\n        print(\"❌ File 'answer.txt' not found in main directory\")\n        return False\n    \n    print(\"✅ answer.txt file found\")\n    return True\n\ndef verify_csv_structure(test_dir: Path) -> bool:\n    \"\"\"Verify that the CSV file has the correct structure.\"\"\"\n    contact_file = test_dir / \"contact_info.csv\"\n    \n    try:\n        with open(contact_file, 'r', encoding='utf-8') as f:\n            reader = csv.reader(f)\n            rows = list(reader)\n            \n        if len(rows) < 2:  # Need at least header + 1 data row\n            print(\"❌ CSV file has insufficient rows\")\n            return False\n        \n        headers = rows[0]\n        if not headers:\n            print(\"❌ CSV file has no headers\")\n            return False\n        \n        # Check that Name is the first column\n        if headers[0].lower() != 'name':\n            print(\"❌ First column is not 'Name'\")\n            return False\n        \n        # Check that Email and Phone are present (order may vary)\n        header_lower = [h.lower() for h in headers]\n        if 'email' not in header_lower:\n            print(\"❌ 'Email' column not found\")\n            return False\n        \n        if 'phone' not in header_lower:\n            print(\"❌ 'Phone' column not found\")\n            return False\n        \n        print(\"✅ CSV structure is correct\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error reading CSV file: {e}\")\n        return False\n\ndef verify_csv_content_accuracy(test_dir: Path) -> bool:\n    \"\"\"Verify that the CSV content contains all required data, regardless of row order or extra entries.\"\"\"\n    contact_file = test_dir / \"contact_info.csv\"\n    \n    try:\n        with open(contact_file, 'r', encoding='utf-8') as f:\n            reader = csv.DictReader(f)\n            rows = list(reader)\n        \n        # Expected data from answer.csv (hardcoded as required)\n        expected_data = [\n            {\"Name\": \"John Smith\", \"Email\": \"john@email.com\", \"Phone\": \"555-0101\", \"Status\": \"\", \"Industry\": \"\"},\n            {\"Name\": \"Jane Doe\", \"Email\": \"jane@email.com\", \"Phone\": \"555-0102\", \"Status\": \"\", \"Industry\": \"\"},\n            {\"Name\": \"Bob Johnson\", \"Email\": \"bob@email.com\", \"Phone\": \"555-0103\", \"Status\": \"\", \"Industry\": \"\"},\n            {\"Name\": \"Alice Brown\", \"Email\": \"alice@email.com\", \"Phone\": \"555-0201\", \"Status\": \"Inactive\", \"Industry\": \"\"},\n            {\"Name\": \"Charlie Davis\", \"Email\": \"charlie@email.com\", \"Phone\": \"555-0202\", \"Status\": \"Active\", \"Industry\": \"\"},\n            {\"Name\": \"David Wilson\", \"Email\": \"david@email.com\", \"Phone\": \"555-0203\", \"Status\": \"Inactive\", \"Industry\": \"\"},\n            {\"Name\": \"Acme Corp\", \"Email\": \"acme@corp.com\", \"Phone\": \"\", \"Status\": \"\", \"Industry\": \"Technology\"},\n            {\"Name\": \"Global Inc\", \"Email\": \"global@inc.com\", \"Phone\": \"\", \"Status\": \"\", \"Industry\": \"Finance\"},\n            {\"Name\": \"Local Business\", \"Email\": \"local@biz.com\", \"Phone\": \"\", \"Status\": \"\", \"Industry\": \"Retail\"},\n            {\"Name\": \"Spouse\", \"Email\": \"\", \"Phone\": \"+1-555-0124\", \"Status\": \"\", \"Industry\": \"\"},\n            {\"Name\": \"Parent\", \"Email\": \"\", \"Phone\": \"+1-555-0125\", \"Status\": \"\", \"Industry\": \"\"},\n            {\"Name\": \"Sibling\", \"Email\": \"\", \"Phone\": \"+1-555-0126\", \"Status\": \"\", \"Industry\": \"\"},\n            {\"Name\": \"Primary Doctor\", \"Email\": \"\", \"Phone\": \"+1-555-0201\", \"Status\": \"\", \"Industry\": \"\"},\n            {\"Name\": \"Dentist\", \"Email\": \"\", \"Phone\": \"+1-555-0202\", \"Status\": \"\", \"Industry\": \"\"},\n            {\"Name\": \"Pharmacy\", \"Email\": \"\", \"Phone\": \"+1-555-0203\", \"Status\": \"\", \"Industry\": \"\"}\n        ]\n        \n        # Convert expected data to a dictionary for easier lookup\n        # We'll use Name as the key since it should be unique\n        expected_dict = {}\n        for entry in expected_data:\n            expected_dict[entry[\"Name\"]] = entry\n        \n        # Check each row for accuracy, regardless of order\n        # Allow extra entries and mixed content\n        found_entries = set()\n        extra_entries = []\n        \n        for i, row in enumerate(rows):\n            row_name = row.get('Name', '')\n            if not row_name:\n                # Skip rows without names (they're not valid entries)\n                continue\n            \n            if row_name in expected_dict:\n                # This is one of our expected entries\n                if row_name in found_entries:\n                    print(f\"❌ Duplicate name found: '{row_name}'\")\n                    return False\n                \n                found_entries.add(row_name)\n                expected = expected_dict[row_name]\n                \n                # Check all columns for this entry\n                for key, expected_value in expected.items():\n                    if key in row:\n                        actual_value = row[key] if row[key] else \"\"\n                        if actual_value != expected_value:\n                            print(f\"❌ Entry '{row_name}', column '{key}': expected '{expected_value}', got '{actual_value}'\")\n                            return False\n                    else:\n                        print(f\"❌ Entry '{row_name}' missing column '{key}'\")\n                        return False\n            else:\n                # This is an extra entry - record it for informational purposes\n                extra_entries.append(row_name)\n        \n        # Verify all expected entries were found\n        if len(found_entries) != len(expected_data):\n            missing = set(expected_dict.keys()) - found_entries\n            print(f\"❌ Missing entries: {missing}\")\n            return False\n        \n        # Report extra entries if any\n        if extra_entries:\n            print(f\"ℹ️  Found {len(extra_entries)} extra entries: {extra_entries}\")\n        \n        print(f\"✅ CSV content accuracy verified: found all {len(expected_data)} required entries (plus {len(extra_entries)} extra entries)\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying CSV content: {e}\")\n        return False\n\ndef verify_csv_data_completeness(test_dir: Path) -> bool:\n    \"\"\"Verify that all required data is present and no entries are missing.\"\"\"\n    contact_file = test_dir / \"contact_info.csv\"\n    \n    try:\n        with open(contact_file, 'r', encoding='utf-8') as f:\n            reader = csv.DictReader(f)\n            rows = list(reader)\n        \n        # Check that all expected names are present\n        expected_names = [\n            \"John Smith\", \"Jane Doe\", \"Bob Johnson\", \"Alice Brown\", \n            \"Charlie Davis\", \"David Wilson\", \"Acme Corp\", \"Global Inc\", \n            \"Local Business\", \"Spouse\", \"Parent\", \"Sibling\", \n            \"Primary Doctor\", \"Dentist\", \"Pharmacy\"\n        ]\n        \n        actual_names = [row.get('Name', '') for row in rows if row.get('Name')]\n        \n        missing_names = set(expected_names) - set(actual_names)\n        if missing_names:\n            print(f\"❌ Missing names: {missing_names}\")\n            return False\n        \n        extra_names = set(actual_names) - set(expected_names)\n        if extra_names:\n            print(f\"⚠️  Extra names found: {extra_names}\")\n            # This is a warning, not an error\n        \n        print(\"✅ CSV data completeness verified\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking data completeness: {e}\")\n        return False\n\ndef verify_answer_content(test_dir: Path) -> bool:\n    \"\"\"Verify that the answer.txt contains the correct answer about Charlie Davis.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text().strip().lower()\n        \n        # The answer should contain \"dentist\" (as per answer.txt)\n        if \"dentist\" in content:\n            print(\"✅ Answer about Charlie Davis's job is correct\")\n            return True\n        else:\n            print(f\"❌ Answer does not contain 'dentist'. Found: '{content}'\")\n            return False\n        \n    except Exception as e:\n        print(f\"❌ Error reading answer.txt: {e}\")\n        return False\n\ndef verify_file_locations(test_dir: Path) -> bool:\n    \"\"\"Verify that files are in the correct locations.\"\"\"\n    contact_file = test_dir / \"contact_info.csv\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    # Check that files are in the main directory, not in subdirectories\n    if contact_file.parent != test_dir:\n        print(f\"❌ contact_info.csv is not in main directory: {contact_file}\")\n        return False\n    \n    if answer_file.parent != test_dir:\n        print(f\"❌ answer.txt is not in main directory: {answer_file}\")\n        return False\n    \n    print(\"✅ Files are in correct locations\")\n    return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying Contact Information Compilation Task...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Contact Info CSV Exists\", verify_contact_info_csv_exists),\n        (\"Answer TXT Exists\", verify_answer_txt_exists),\n        (\"Files in Correct Locations\", verify_file_locations),\n        (\"CSV Structure\", verify_csv_structure),\n        (\"CSV Content Accuracy (Flexible)\", verify_csv_content_accuracy),\n        (\"CSV Data Completeness\", verify_csv_data_completeness),\n        (\"Answer Content\", verify_answer_content),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ Contact Information Compilation Task completed successfully!\")\n        print(\"🎉 Task verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/desktop_template/file_arrangement/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### Task Description\n\nYou are tasked with organizing files on an AI researcher's desktop into a structured folder system. You need to create specific folders and move files to their designated locations according to the provided organization scheme.\n\n### Task Objectives\n\n1. **Create the following folder structure** in the main directory:\n\n   - `work/` - for work, research and projects related files\n   - `life/` - for personal life related files\n   - `archives/` - for archived files or files with past dates in its file names\n   - `temp/` - for temporary files, drafts\n   - `others/` - for files that cannot be classified elsewhere\n\n### Important Notes\n\n- All files must be moved from their current locations to the specified folders\n- The `others/` folder is for files that don't fit the other categories\n- Do not modify the contents of any files, only move them to the correct locations\n- If you are not sure about which folder it should belongs to, you can read the context in the files before making decisions\n- **Do not change files' name**\n"
  },
  {
    "path": "tasks/filesystem/standard/desktop_template/file_arrangement/meta.json",
    "content": "{\n  \"task_id\": \"file_arrangement\",\n  \"task_name\": \"File Arrangement\",\n  \"category_id\": \"desktop_template\",\n  \"category_name\": \"Desktop Template\",\n  \"description\": \"Classify and organize desktop files into appropriate categories following specified classification rules and naming convention standards.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-14\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"file organization\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"desktop_template/\\n    ├── Archives/\\n    │       ├── backup_contacts.csv\\n    │       └── tax_documents_2022.csv\\n    ├── Desktop/\\n    │       └── contacts.csv\\n    ├── Documents/\\n    │       ├── Personal/\\n    │       │       └── tax_info_2023.csv\\n    │       ├── Projects/\\n    │       │       └── budget_tracker.csv\\n    │       ├── Work/\\n    │       │       ├── client_list.csv\\n    │       │       └── timesheet.csv\\n    │       ├── budget.csv\\n    │       └── important_dates.csv\\n    ├── Downloads/\\n    │       ├── expenses.csv\\n    │       ├── fitness_log.csv\\n    │       └── price_comparisons.csv\\n    ├── Temp/\\n    │       └── test_data.csv\\n    ├── book_list.txt\\n    ├── bookmark_export.txt\\n    ├── calculations.txt\\n    ├── correspondence_2023.txt\\n    ├── draft_letter.txt\\n    ├── emergency_contacts.txt\\n    ├── example.txt\\n    └── experiment_results.txt\\n\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/desktop_template.zip\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/desktop_template/file_arrangement/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Desktop File Organization Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_folder_structure(test_dir: Path) -> bool:\n    \"\"\"Verify that all required folders exist.\"\"\"\n    required_folders = [\"work\", \"life\", \"archives\", \"temp\", \"others\"]\n    missing_folders = []\n    \n    for folder in required_folders:\n        folder_path = test_dir / folder\n        if not folder_path.exists() or not folder_path.is_dir():\n            missing_folders.append(folder)\n    \n    if missing_folders:\n        print(f\"❌ Missing required folders: {missing_folders}\")\n        return False\n    \n    print(\"✅ All required folders exist\")\n    return True\n\ndef verify_work_folder_files(test_dir: Path) -> bool:\n    \"\"\"Verify that work folder contains the required files.\"\"\"\n    work_dir = test_dir / \"work\"\n    required_files = [\n        \"client_list.csv\",\n        \"timesheet.csv\", \n        \"experiment_results.txt\",\n        \"budget_tracker.csv\",\n        \"expenses.csv\"\n    ]\n    \n    missing_files = []\n    for file_name in required_files:\n        file_path = work_dir / file_name\n        if not file_path.exists():\n            missing_files.append(file_name)\n    \n    if missing_files:\n        print(f\"❌ Missing required files in work/ folder: {missing_files}\")\n        return False\n    \n    # Count total files in work folder for info\n    total_files = len([f for f in work_dir.iterdir() if f.is_file()])\n    print(f\"✅ All required files found in work/ folder (total: {total_files} files)\")\n    return True\n\ndef verify_life_folder_files(test_dir: Path) -> bool:\n    \"\"\"Verify that life folder contains the required files.\"\"\"\n    life_dir = test_dir / \"life\"\n    required_files = [\n        \"contacts.csv\",\n        \"budget.csv\",\n        \"fitness_log.csv\",\n        \"price_comparisons.csv\",\n        \"book_list.txt\",\n        \"bookmark_export.txt\",\n        \"emergency_contacts.txt\"\n    ]\n    \n    missing_files = []\n    for file_name in required_files:\n        file_path = life_dir / file_name\n        if not file_path.exists():\n            missing_files.append(file_name)\n    \n    if missing_files:\n        print(f\"❌ Missing required files in life/ folder: {missing_files}\")\n        return False\n    \n    # Count total files in life folder for info\n    total_files = len([f for f in life_dir.iterdir() if f.is_file()])\n    print(f\"✅ All required files found in life/ folder (total: {total_files} files)\")\n    return True\n\ndef verify_archives_folder_files(test_dir: Path) -> bool:\n    \"\"\"Verify that archives folder contains the required files.\"\"\"\n    archives_dir = test_dir / \"archives\"\n    required_files = [\n        \"backup_contacts.csv\",\n        \"tax_documents_2022.csv\",\n        \"correspondence_2023.txt\",\n        \"tax_info_2023.csv\"\n    ]\n    \n    missing_files = []\n    for file_name in required_files:\n        file_path = archives_dir / file_name\n        if not file_path.exists():\n            missing_files.append(file_name)\n    \n    if missing_files:\n        print(f\"❌ Missing required files in archives/ folder: {missing_files}\")\n        return False\n    \n    # Count total files in archives folder for info\n    total_files = len([f for f in archives_dir.iterdir() if f.is_file()])\n    print(f\"✅ All required files found in archives/ folder (total: {total_files} files)\")\n    return True\n\ndef verify_temp_folder_files(test_dir: Path) -> bool:\n    \"\"\"Verify that temp folder contains the required files.\"\"\"\n    temp_dir = test_dir / \"temp\"\n    required_files = [\n        \"test_data.csv\",\n        \"draft_letter.txt\"\n    ]\n    \n    missing_files = []\n    for file_name in required_files:\n        file_path = temp_dir / file_name\n        if not file_path.exists():\n            missing_files.append(file_name)\n    \n    if missing_files:\n        print(f\"❌ Missing required files in temp/ folder: {missing_files}\")\n        return False\n    \n    # Count total files in temp folder for info\n    total_files = len([f for f in temp_dir.iterdir() if f.is_file()])\n    print(f\"✅ All required files found in temp/ folder (total: {total_files} files)\")\n    return True\n\ndef verify_others_folder_files(test_dir: Path) -> bool:\n    \"\"\"Verify that others folder exists and can contain any files.\"\"\"\n    others_dir = test_dir / \"others\"\n    \n    if not others_dir.exists() or not others_dir.is_dir():\n        print(\"❌ others/ folder not found\")\n        return False\n    \n    # Count files in others folder for info\n    total_files = len([f for f in others_dir.iterdir() if f.is_file()])\n    print(f\"✅ others/ folder exists (contains {total_files} files)\")\n    return True\n\ndef verify_required_files_in_correct_folders(test_dir: Path) -> bool:\n    \"\"\"Verify that all 18 required files are in their correct designated folders.\"\"\"\n    # Define the mapping of required files to their correct folders\n    required_file_mapping = {\n        \"work\": [\n            \"client_list.csv\",\n            \"timesheet.csv\", \n            \"experiment_results.txt\",\n            \"budget_tracker.csv\",\n            \"expenses.csv\",\n        ],\n        \"life\": [\n            \"contacts.csv\",\n            \"budget.csv\",\n            \"fitness_log.csv\",\n            \"price_comparisons.csv\",\n            \"book_list.txt\",\n            \"bookmark_export.txt\",\n            \"emergency_contacts.txt\"\n        ],\n        \"archives\": [\n            \"backup_contacts.csv\",\n            \"tax_documents_2022.csv\",\n            \"correspondence_2023.txt\",\n            \"tax_info_2023.csv\"\n        ],\n        \"temp\": [\n            \"test_data.csv\",\n            \"draft_letter.txt\"\n        ]\n    }\n    \n    missing_files = []\n    \n    # Check each required file is in its correct folder\n    for folder, files in required_file_mapping.items():\n        folder_path = test_dir / folder\n        for file_name in files:\n            file_path = folder_path / file_name\n            if not file_path.exists():\n                missing_files.append(f\"{folder}/{file_name}\")\n    \n    if missing_files:\n        print(f\"❌ Missing required files: {missing_files}\")\n        return False\n    \n    print(\"✅ All 18 required files are in their correct designated folders\")\n    return True\n\ndef verify_no_duplicate_required_files(test_dir: Path) -> bool:\n    \"\"\"Verify that the 18 required files are not duplicated across folders.\"\"\"\n    required_files = [\n        \"client_list.csv\", \"timesheet.csv\", \"experiment_results.txt\", \"budget_tracker.csv\",\n        \"contacts.csv\", \"budget.csv\", \"expenses.csv\", \"fitness_log.csv\",\n        \"price_comparisons.csv\", \"book_list.txt\", \"bookmark_export.txt\", \"emergency_contacts.txt\",\n        \"backup_contacts.csv\", \"tax_documents_2022.csv\", \"correspondence_2023.txt\", \"tax_info_2023.csv\",\n        \"test_data.csv\", \"draft_letter.txt\"\n    ]\n    \n    # Check for duplicates of required files\n    file_locations = {}\n    duplicates = []\n    \n    for folder in [\"work\", \"life\", \"archives\", \"temp\", \"others\"]:\n        folder_path = test_dir / folder\n        if folder_path.exists() and folder_path.is_dir():\n            for file_path in folder_path.iterdir():\n                if file_path.is_file() and file_path.name in required_files:\n                    if file_path.name in file_locations:\n                        duplicates.append(f\"{file_path.name} (in {file_locations[file_path.name]} and {folder}/)\")\n                    else:\n                        file_locations[file_path.name] = f\"{folder}/\"\n    \n    if duplicates:\n        print(f\"❌ Duplicate required files found: {duplicates}\")\n        return False\n    \n    print(\"✅ No duplicate required files found\")\n    return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying Desktop File Organization Task...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Folder Structure\", verify_folder_structure),\n        (\"Required Files in Work Folder\", verify_work_folder_files),\n        (\"Required Files in Life Folder\", verify_life_folder_files),\n        (\"Required Files in Archives Folder\", verify_archives_folder_files),\n        (\"Required Files in Temp Folder\", verify_temp_folder_files),\n        (\"Others Folder Exists\", verify_others_folder_files),\n        (\"All Required Files in Correct Folders\", verify_required_files_in_correct_folders),\n        (\"No Duplicate Required Files\", verify_no_duplicate_required_files),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ Desktop file organization task completed successfully!\")\n        print(\"🎉 All 18 required files are correctly placed in their designated folders\")\n        print(\"📊 Summary:\")\n        print(\"   - work/ folder: 5 required files\")\n        print(\"   - life/ folder: 7 required files\") \n        print(\"   - archives/ folder: 4 required files\")\n        print(\"   - temp/ folder: 2 required files\")\n        print(\"   - others/ folder: can contain any files\")\n        print(\"   - Total required files: 18\")\n        print(\"   - Note: Other files can be placed in any folder\")\n        sys.exit(0)\n    else:\n        print(\"❌ Desktop file organization task verification: FAIL\")\n        print(\"Please check the errors above and ensure all 18 required files are in their correct locations\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/filesystem/standard/file_context/duplicates_searching/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### Task Description\n\nYou are given a directory containing multiple text files. Some files have identical content and need to be organized. Your task is to identify all files with duplicate content and move them to a newly created 'duplicates' directory.\n\n### Task Objectives\n\n1. **Scan all text files** in the test directory to identify groups with identical content\n2. **Create a 'duplicates' directory** in the test directory root\n3. **Move all duplicate files** into the 'duplicates' directory\n4. **Leave unique files** in their original location\n\n### Expected Output\n\nAfter completing the task, the directory structure should be:\n\n- `duplicates/` directory containing all files with duplicate content\n- Original directory containing only files with unique content\n"
  },
  {
    "path": "tasks/filesystem/standard/file_context/duplicates_searching/meta.json",
    "content": "{\n  \"task_id\": \"duplicates_searching\",\n  \"task_name\": \"Duplicates Searching\",\n  \"category_id\": \"file_context\",\n  \"category_name\": \"File Context\",\n  \"description\": \"Scan directory to identify files with identical content, then organize all duplicate files into a separate dedicated directory for cleanup.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-06\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"pattern analysis\",\n    \"file organization\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"file_context/\\n    ├── file_01.txt\\n    ├── file_02.txt\\n    ├── file_03.txt\\n    ├── file_04.txt\\n    ├── file_05.txt\\n    ├── file_06.txt\\n    ├── file_07.txt\\n    ├── file_08.txt\\n    ├── file_09.txt\\n    ├── file_10.txt\\n    ├── file_11.txt\\n    ├── file_12.txt\\n    ├── file_13.txt\\n    ├── file_14.txt\\n    ├── file_15.txt\\n    ├── file_16.txt\\n    ├── file_17.txt\\n    ├── file_18.txt\\n    ├── file_19.txt\\n    ├── file_20.txt\\n    └── large_file.txt\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/file_context.zip\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/file_context/duplicates_searching/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for File Duplicates Detection and Organization Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\nimport hashlib\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef calculate_file_hash(file_path: Path) -> str:\n    \"\"\"Calculate MD5 hash of file content.\"\"\"\n    try:\n        with open(file_path, 'rb') as f:\n            return hashlib.md5(f.read()).hexdigest()\n    except Exception as e:\n        print(f\"❌ Error reading file {file_path}: {e}\")\n        return None\n\ndef verify_duplicates_directory_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the duplicates directory exists.\"\"\"\n    duplicates_dir = test_dir / \"duplicates\"\n    \n    if not duplicates_dir.exists():\n        print(\"❌ 'duplicates' directory not found\")\n        return False\n    \n    if not duplicates_dir.is_dir():\n        print(\"❌ 'duplicates' exists but is not a directory\")\n        return False\n    \n    print(\"✅ 'duplicates' directory exists\")\n    return True\n\ndef get_expected_duplicate_groups():\n    \"\"\"Return the expected duplicate file groups based on content analysis.\"\"\"\n    # Based on the answer.md and content analysis\n    return {\n        # Group 1: file_01.txt, file_02.txt (identical content)\n        \"group1\": [\"file_01.txt\", \"file_02.txt\"],\n        # Group 2: file_03.txt, file_04.txt (identical content)\n        \"group2\": [\"file_03.txt\", \"file_04.txt\"],\n        # Group 3: file_07.txt, file_08.txt (identical content)\n        \"group3\": [\"file_07.txt\", \"file_08.txt\"],\n        # Group 4: file_10.txt, file_11.txt (identical content)\n        \"group4\": [\"file_10.txt\", \"file_11.txt\"],\n        # Group 5: file_13.txt, file_14.txt (identical content)\n        \"group5\": [\"file_13.txt\", \"file_14.txt\"],\n        # Group 6: file_15.txt, file_16.txt (identical content)\n        \"group6\": [\"file_15.txt\", \"file_16.txt\"],\n        # Group 7: file_18.txt, file_19.txt (identical content)\n        \"group7\": [\"file_18.txt\", \"file_19.txt\"]\n    }\n\ndef get_expected_unique_files():\n    \"\"\"Return the expected unique files that should remain in original location.\"\"\"\n    return [\n        \"file_05.txt\", \"file_06.txt\", \"file_09.txt\", \n        \"file_12.txt\", \"file_17.txt\", \"file_20.txt\"\n    ]\n\ndef verify_duplicate_files_moved(test_dir: Path) -> bool:\n    \"\"\"Verify that all duplicate files have been moved to the duplicates directory.\"\"\"\n    duplicates_dir = test_dir / \"duplicates\"\n    expected_duplicate_groups = get_expected_duplicate_groups()\n    \n    # Check that all expected duplicate files are in the duplicates directory\n    missing_files = []\n    for group_name, files in expected_duplicate_groups.items():\n        for filename in files:\n            file_path = duplicates_dir / filename\n            if not file_path.exists():\n                missing_files.append(filename)\n    \n    if missing_files:\n        print(f\"❌ Missing duplicate files in 'duplicates' directory: {missing_files}\")\n        return False\n    \n    print(\"✅ All expected duplicate files are in the 'duplicates' directory\")\n    return True\n\ndef verify_unique_files_remain(test_dir: Path) -> bool:\n    \"\"\"Verify that unique files remain in the original location.\"\"\"\n    expected_unique_files = get_expected_unique_files()\n    \n    missing_files = []\n    for filename in expected_unique_files:\n        file_path = test_dir / filename\n        if not file_path.exists():\n            missing_files.append(filename)\n    \n    if missing_files:\n        print(f\"❌ Missing unique files in original location: {missing_files}\")\n        return False\n    \n    print(\"✅ All expected unique files remain in the original location\")\n    return True\n\ndef verify_no_duplicate_files_in_original(test_dir: Path) -> bool:\n    \"\"\"Verify that no duplicate files remain in the original location.\"\"\"\n    expected_duplicate_groups = get_expected_duplicate_groups()\n    \n    remaining_duplicates = []\n    for group_name, files in expected_duplicate_groups.items():\n        for filename in files:\n            file_path = test_dir / filename\n            if file_path.exists():\n                remaining_duplicates.append(filename)\n    \n    if remaining_duplicates:\n        print(f\"❌ Duplicate files still exist in original location: {remaining_duplicates}\")\n        return False\n    \n    print(\"✅ No duplicate files remain in the original location\")\n    return True\n\ndef verify_content_integrity(test_dir: Path) -> bool:\n    \"\"\"Verify that file content integrity is maintained after moving.\"\"\"\n    duplicates_dir = test_dir / \"duplicates\"\n    expected_duplicate_groups = get_expected_duplicate_groups()\n    \n    # Check that files in each duplicate group have identical content\n    for group_name, files in expected_duplicate_groups.items():\n        if len(files) < 2:\n            continue\n            \n        # Calculate hash of the first file in the group\n        first_file = duplicates_dir / files[0]\n        if not first_file.exists():\n            print(f\"❌ First file of group {group_name} not found: {files[0]}\")\n            return False\n        \n        first_hash = calculate_file_hash(first_file)\n        if first_hash is None:\n            return False\n        \n        # Check that all other files in the group have the same hash\n        for filename in files[1:]:\n            file_path = duplicates_dir / filename\n            if not file_path.exists():\n                print(f\"❌ File in group {group_name} not found: {filename}\")\n                return False\n            \n            file_hash = calculate_file_hash(file_path)\n            if file_hash is None:\n                return False\n            \n            if file_hash != first_hash:\n                print(f\"❌ Files in group {group_name} have different content: {files[0]} vs {filename}\")\n                return False\n    \n    print(\"✅ Content integrity verified - duplicate files have identical content\")\n    return True\n\ndef verify_total_file_count(test_dir: Path) -> bool:\n    \"\"\"Verify that the duplicates directory contains exactly 14 files.\"\"\"\n    duplicates_dir = test_dir / \"duplicates\"\n    \n    # Count files in original location (excluding the duplicates directory itself)\n    original_files = [f for f in test_dir.iterdir() if f.is_file()]\n    \n    # Count files in duplicates directory\n    duplicate_files = [f for f in duplicates_dir.iterdir() if f.is_file()]\n    \n    # Expected: 14 files in duplicates directory\n    expected_duplicates = 14\n    actual_duplicates = len(duplicate_files)\n    \n    if actual_duplicates != expected_duplicates:\n        print(f\"❌ Wrong number of files in duplicates directory. Expected: {expected_duplicates}, Actual: {actual_duplicates}\")\n        return False\n    \n    print(f\"✅ Duplicates directory has correct number of files: {actual_duplicates}\")\n    return True\n\n\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying File Duplicates Detection and Organization Task...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Duplicates Directory Exists\", verify_duplicates_directory_exists),\n        (\"Duplicate Files Moved\", verify_duplicate_files_moved),\n        (\"Unique Files Remain\", verify_unique_files_remain),\n        (\"No Duplicates in Original\", verify_no_duplicate_files_in_original),\n        (\"Content Integrity\", verify_content_integrity),\n        (\"Duplicates Count\", verify_total_file_count),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ File duplicates detection and organization completed correctly!\")\n        print(\"🎉 Task verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/file_context/file_merging/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### Task Description\n\nYou are given a directory containing multiple text files of varying sizes. Your task is to identify the 10 smallest .txt files, merge their content in alphabetical order, and create a consolidated file called \"merged_content.txt\" with proper formatting.\n\n### Task Objectives\n\n1. **Identify the 10 smallest .txt files** in the test directory\n2. **Sort the selected files alphabetically** by filename\n3. **Merge the content** of these files into a single file\n4. **Add file headers** (file name) before each file's content\n"
  },
  {
    "path": "tasks/filesystem/standard/file_context/file_merging/meta.json",
    "content": "{\n  \"task_id\": \"file_merging\",\n  \"task_name\": \"File Merging\",\n  \"category_id\": \"file_context\",\n  \"category_name\": \"File Context\",\n  \"description\": \"Identify the 10 smallest text files in the directory, then merge their content in alphabetical order into a single consolidated file.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-07\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"content transformation\",\n    \"file organization\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"file_context/\\n    ├── file_01.txt\\n    ├── file_02.txt\\n    ├── file_03.txt\\n    ├── file_04.txt\\n    ├── file_05.txt\\n    ├── file_06.txt\\n    ├── file_07.txt\\n    ├── file_08.txt\\n    ├── file_09.txt\\n    ├── file_10.txt\\n    ├── file_11.txt\\n    ├── file_12.txt\\n    ├── file_13.txt\\n    ├── file_14.txt\\n    ├── file_15.txt\\n    ├── file_16.txt\\n    ├── file_17.txt\\n    ├── file_18.txt\\n    ├── file_19.txt\\n    ├── file_20.txt\\n    └── large_file.txt\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/file_context.zip\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/file_context/file_merging/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for File Merging Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef get_expected_files() -> list:\n    \"\"\"Get the expected 10 files in alphabetical order.\"\"\"\n    # The 10 smallest files (excluding file_12.txt) in alphabetical order\n    expected_files = [\n        \"file_10.txt\",\n        \"file_11.txt\", \n        \"file_13.txt\",\n        \"file_14.txt\",\n        \"file_15.txt\",\n        \"file_16.txt\",\n        \"file_17.txt\",\n        \"file_18.txt\",\n        \"file_19.txt\",\n        \"file_20.txt\"\n    ]\n    return expected_files\n\ndef verify_merged_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the merged_content.txt file exists.\"\"\"\n    merged_file = test_dir / \"merged_content.txt\"\n    \n    if not merged_file.exists():\n        print(\"❌ File 'merged_content.txt' not found\")\n        return False\n    \n    print(\"✅ Merged content file found\")\n    return True\n\n\n\ndef verify_correct_files_selected(test_dir: Path) -> bool:\n    \"\"\"Verify that the correct 10 files were selected and included.\"\"\"\n    expected_files = get_expected_files()\n    merged_file = test_dir / \"merged_content.txt\"\n    \n    try:\n        content = merged_file.read_text()\n        \n        # Check if all expected files are present\n        for expected_file in expected_files:\n            if expected_file not in content:\n                print(f\"❌ Expected file '{expected_file}' not found in merged content\")\n                return False\n        \n        # Check if file_12.txt is NOT present (should be excluded)\n        if \"file_12.txt\" in content:\n            print(\"❌ file_12.txt should be excluded but was found in merged content\")\n            return False\n        \n        print(\"✅ Correct files selected and included\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying file selection: {e}\")\n        return False\n\ndef verify_alphabetical_order(test_dir: Path) -> bool:\n    \"\"\"Verify that files are in alphabetical order.\"\"\"\n    expected_files = get_expected_files()\n    merged_file = test_dir / \"merged_content.txt\"\n    \n    try:\n        content = merged_file.read_text()\n        lines = content.split('\\n')\n        \n        # Extract filenames from the content (lines that contain .txt)\n        found_files = []\n        for line in lines:\n            line = line.strip()\n            # Check if this line contains any of the expected filenames\n            for expected_file in expected_files:\n                if expected_file in line:\n                    found_files.append(expected_file)\n                    break\n        \n        # Check if files are in alphabetical order\n        if found_files != expected_files:\n            print(f\"❌ Files not in correct alphabetical order\")\n            print(f\"   Expected: {expected_files}\")\n            print(f\"   Found: {found_files}\")\n            return False\n        \n        print(\"✅ Files are in correct alphabetical order\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying alphabetical order: {e}\")\n        return False\n\ndef verify_file_content_integrity(test_dir: Path) -> bool:\n    \"\"\"Verify that the content of each file is preserved correctly.\"\"\"\n    expected_files = get_expected_files()\n    merged_file = test_dir / \"merged_content.txt\"\n    \n    try:\n        content = merged_file.read_text()\n        lines = content.split('\\n')\n        \n        for expected_file in expected_files:\n            # Get the original file content\n            original_file = test_dir / expected_file\n            original_content = original_file.read_text().strip()\n            \n            # Find the line index where this file's header appears\n            header_line_index = -1\n            for i, line in enumerate(lines):\n                if expected_file in line:\n                    header_line_index = i\n                    break\n            \n            if header_line_index == -1:\n                print(f\"❌ Could not find header for {expected_file}\")\n                return False\n            \n            # Find the next header line or end of file\n            next_header_index = len(lines)\n            for i in range(header_line_index + 1, len(lines)):\n                for other_file in expected_files:\n                    if other_file != expected_file and other_file in lines[i]:\n                        next_header_index = i\n                        break\n                if next_header_index != len(lines):\n                    break\n            \n            # Extract content lines (from header + 1 to next header)\n            content_lines = lines[header_line_index + 1:next_header_index]\n            merged_content = '\\n'.join(content_lines).strip()\n            \n            if merged_content != original_content:\n                print(f\"❌ Content mismatch for {expected_file}\")\n                print(f\"   Expected: {original_content}\")\n                print(f\"   Found: {merged_content}\")\n                return False\n        \n        print(\"✅ All file contents preserved correctly\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying content integrity: {e}\")\n        return False\n\ndef verify_filename_headers(test_dir: Path) -> bool:\n    \"\"\"Verify that each file section starts with the correct filename header.\"\"\"\n    expected_files = get_expected_files()\n    merged_file = test_dir / \"merged_content.txt\"\n    \n    try:\n        content = merged_file.read_text()\n        \n        for expected_file in expected_files:\n            # Check if the filename appears anywhere in the content (as part of a line)\n            if expected_file not in content:\n                print(f\"❌ Filename header '{expected_file}' not found\")\n                return False\n        \n        print(\"✅ All filename headers present and correctly formatted\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying filename headers: {e}\")\n        return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying File Merging Task...\")\n    \n    # Show expected files for debugging\n    expected_files = get_expected_files()\n    print(f\"📋 Expected files (10 smallest, excluding file_12.txt): {expected_files}\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Merged File Exists\", verify_merged_file_exists),\n        (\"Correct Files Selected\", verify_correct_files_selected),\n        (\"Alphabetical Order\", verify_alphabetical_order),\n        (\"Filename Headers\", verify_filename_headers),\n        (\"Content Integrity\", verify_file_content_integrity),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ File merging task completed correctly!\")\n        print(\"🎉 Task verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/file_context/file_splitting/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### Task Description\n\nYou need to split a large text file into multiple smaller files with equal character counts. The task involves creating a new directory and splitting the content into exactly 10 files.\n\n### Task Objectives\n\n1. **Create a new directory** named `split` in the test directory\n2. **Split the file** `large_file.txt` into exactly 10 files with equal character counts\n3. **Name the files** as `split_01.txt`, `split_02.txt`, ..., `split_10.txt` in the `split` directory\n"
  },
  {
    "path": "tasks/filesystem/standard/file_context/file_splitting/meta.json",
    "content": "{\n  \"task_id\": \"file_splitting\",\n  \"task_name\": \"File Splitting\",\n  \"category_id\": \"file_context\",\n  \"category_name\": \"File Context\",\n  \"description\": \"Split a large text file into multiple equal-length segments for easier processing, distribution, and parallel handling of content.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-08\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"content transformation\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"file_context/\\n    ├── file_01.txt\\n    ├── file_02.txt\\n    ├── file_03.txt\\n    ├── file_04.txt\\n    ├── file_05.txt\\n    ├── file_06.txt\\n    ├── file_07.txt\\n    ├── file_08.txt\\n    ├── file_09.txt\\n    ├── file_10.txt\\n    ├── file_11.txt\\n    ├── file_12.txt\\n    ├── file_13.txt\\n    ├── file_14.txt\\n    ├── file_15.txt\\n    ├── file_16.txt\\n    ├── file_17.txt\\n    ├── file_18.txt\\n    ├── file_19.txt\\n    ├── file_20.txt\\n    └── large_file.txt\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/file_context.zip\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/file_context/file_splitting/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for File Splitting Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_split_directory_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the split directory exists.\"\"\"\n    split_dir = test_dir / \"split\"\n    \n    if not split_dir.exists():\n        print(\"❌ Directory 'split' not found\")\n        return False\n    \n    if not split_dir.is_dir():\n        print(\"❌ 'split' exists but is not a directory\")\n        return False\n    \n    print(\"✅ Split directory found\")\n    return True\n\ndef verify_all_split_files_exist(test_dir: Path) -> bool:\n    \"\"\"Verify that all 10 split files exist with correct names.\"\"\"\n    split_dir = test_dir / \"split\"\n    \n    expected_files = [f\"split_{i:02d}.txt\" for i in range(1, 11)]\n    missing_files = []\n    \n    for filename in expected_files:\n        file_path = split_dir / filename\n        if not file_path.exists():\n            missing_files.append(filename)\n    \n    if missing_files:\n        print(f\"❌ Missing files: {missing_files}\")\n        return False\n    \n    print(\"✅ All 10 split files exist with correct names\")\n    return True\n\ndef verify_equal_file_lengths(test_dir: Path) -> bool:\n    \"\"\"Verify that all split files have equal character counts.\"\"\"\n    split_dir = test_dir / \"split\"\n    \n    file_lengths = []\n    for i in range(1, 11):\n        filename = f\"split_{i:02d}.txt\"\n        file_path = split_dir / filename\n        \n        try:\n            content = file_path.read_text()\n            file_lengths.append(len(content))\n        except Exception as e:\n            print(f\"❌ Error reading {filename}: {e}\")\n            return False\n    \n    # Check if all lengths are equal\n    if len(set(file_lengths)) != 1:\n        print(f\"❌ File lengths are not equal: {file_lengths}\")\n        return False\n    \n    print(f\"✅ All files have equal length: {file_lengths[0]} characters\")\n    return True\n\ndef verify_content_integrity(test_dir: Path) -> bool:\n    \"\"\"Verify that concatenated split files equal the original file.\"\"\"\n    split_dir = test_dir / \"split\"\n    original_file = test_dir / \"large_file.txt\"\n    \n    # Read original content\n    try:\n        original_content = original_file.read_text()\n    except Exception as e:\n        print(f\"❌ Error reading original file: {e}\")\n        return False\n    \n    # Concatenate all split files\n    concatenated_content = \"\"\n    for i in range(1, 11):\n        filename = f\"split_{i:02d}.txt\"\n        file_path = split_dir / filename\n        \n        try:\n            content = file_path.read_text()\n            concatenated_content += content\n        except Exception as e:\n            print(f\"❌ Error reading {filename}: {e}\")\n            return False\n    \n    # Compare content\n    if concatenated_content != original_content:\n        print(\"❌ Concatenated content does not match original file\")\n        print(f\"   Original length: {len(original_content)}\")\n        print(f\"   Concatenated length: {len(concatenated_content)}\")\n        return False\n    \n    print(\"✅ Concatenated content matches original file exactly\")\n    return True\n\ndef verify_no_extra_files(test_dir: Path) -> bool:\n    \"\"\"Verify that no extra files exist in the split directory.\"\"\"\n    split_dir = test_dir / \"split\"\n    \n    expected_files = {f\"split_{i:02d}.txt\" for i in range(1, 11)}\n    actual_files = {f.name for f in split_dir.iterdir() if f.is_file()}\n    \n    extra_files = actual_files - expected_files\n    if extra_files:\n        print(f\"❌ Extra files found in split directory: {extra_files}\")\n        return False\n    \n    print(\"✅ No extra files in split directory\")\n    return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying File Splitting Task...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Split Directory Exists\", verify_split_directory_exists),\n        (\"All Split Files Exist\", verify_all_split_files_exist),\n        (\"Equal File Lengths\", verify_equal_file_lengths),\n        (\"Content Integrity\", verify_content_integrity),\n        (\"No Extra Files\", verify_no_extra_files),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ File splitting task completed correctly!\")\n        print(\"🎉 Task verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/file_context/pattern_matching/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### Task Description\n\nYour task is to find all files that contain a substring of 30 or more characters that also appears in `large_file.txt`. **You are not allowed to use python code.**\n\n### Task Objectives\n\n1. **Read the reference file** `large_file.txt` to understand its content\n2. **Examine each file** from file_01.txt to file_20.txt\n3. **Find files** that contain a substring of 30 or more characters that matches a substring in `large_file.txt`\n4. **Create a file `answer.txt`** and write the results to it with the following format:\n   - One line per matching file\n   - Format: `filename.txt,start_position`\n   - Where start_position is the character position (1-indexed) of the matching substring in `large_file.txt`\n   - Do not add any things else other than `filename.txt,start_position`.\n"
  },
  {
    "path": "tasks/filesystem/standard/file_context/pattern_matching/meta.json",
    "content": "{\n  \"task_id\": \"pattern_matching\",\n  \"task_name\": \"Pattern Matching\",\n  \"category_id\": \"file_context\",\n  \"category_name\": \"File Context\",\n  \"description\": \"Search multiple files for shared character sequences and precisely locate all matching pattern occurrences within the target files.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-06\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"pattern analysis\",\n    \"cross-referencing\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"file_context/\\n    ├── file_01.txt\\n    ├── file_02.txt\\n    ├── file_03.txt\\n    ├── file_04.txt\\n    ├── file_05.txt\\n    ├── file_06.txt\\n    ├── file_07.txt\\n    ├── file_08.txt\\n    ├── file_09.txt\\n    ├── file_10.txt\\n    ├── file_11.txt\\n    ├── file_12.txt\\n    ├── file_13.txt\\n    ├── file_14.txt\\n    ├── file_15.txt\\n    ├── file_16.txt\\n    ├── file_17.txt\\n    ├── file_18.txt\\n    ├── file_19.txt\\n    ├── file_20.txt\\n    └── large_file.txt\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/file_context.zip\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/file_context/pattern_matching/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for File Filtering Task: Find Files with Common Substring\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\nimport re\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_answer_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the answer.txt file exists.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    if not answer_file.exists():\n        print(\"❌ File 'answer.txt' not found\")\n        return False\n    \n    print(\"✅ Answer file found\")\n    return True\n\ndef verify_answer_format(test_dir: Path) -> bool:\n    \"\"\"Verify that the answer file has the correct format.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text().strip()\n        \n        # If file is empty, that's acceptable (no matches found)\n        if not content:\n            print(\"✅ Answer file is empty (no matches found)\")\n            return True\n        \n        lines = content.split('\\n')\n        \n        for i, line in enumerate(lines, 1):\n            line = line.strip()\n            if not line:\n                continue\n                \n            # Check format: filename.txt,start_position\n            parts = line.split(',')\n            if len(parts) != 2:\n                print(f\"❌ Line {i} has incorrect format: {line}\")\n                print(\"   Expected format: filename.txt,start_position\")\n                return False\n            \n            filename, start_pos = parts\n            \n            # Check filename format\n            if not filename.endswith('.txt') or not filename.startswith('file_'):\n                print(f\"❌ Line {i} has invalid filename: {filename}\")\n                return False\n            \n            # Check position format (should be integer)\n            try:\n                start_int = int(start_pos)\n                if start_int <= 0:\n                    print(f\"❌ Line {i} has invalid position: {start_pos}\")\n                    return False\n            except ValueError:\n                print(f\"❌ Line {i} has non-integer position: {start_pos}\")\n                return False\n        \n        print(\"✅ Answer format is correct\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error reading answer file: {e}\")\n        return False\n\ndef find_30_plus_char_matches(test_dir: Path) -> dict:\n    \"\"\"Find all matches with 30 or more characters between files and large_file.txt.\"\"\"\n    large_file = test_dir / \"large_file.txt\"\n    if not large_file.exists():\n        print(\"❌ large_file.txt not found\")\n        return {}\n    \n    large_content = large_file.read_text()\n    matches = {}\n    \n    # Check each file from file_01.txt to file_20.txt\n    for i in range(1, 21):\n        filename = f\"file_{i:02d}.txt\"\n        file_path = test_dir / filename\n        \n        if not file_path.exists():\n            continue\n            \n        file_content = file_path.read_text()\n        \n        # Find the longest matching substring (30+ characters)\n        longest_match = \"\"\n        longest_match_start = -1\n        \n        # Check all possible substrings in the file\n        for start_pos in range(len(file_content)):\n            for end_pos in range(start_pos + 30, len(file_content) + 1):  # At least 30 characters\n                substring = file_content[start_pos:end_pos]\n                \n                # Check if this substring exists in large_file.txt\n                if substring in large_content:\n                    if len(substring) > len(longest_match):\n                        longest_match = substring\n                        # Find the position in large_file.txt where this substring starts\n                        large_start_pos = large_content.find(substring)\n                        longest_match_start = large_start_pos + 1  # 1-indexed\n        \n        # If we found a match of 30+ characters, record it\n        if longest_match and len(longest_match) >= 30:\n            matches[filename] = longest_match_start\n    \n    return matches\n\ndef verify_matches_are_correct(test_dir: Path) -> bool:\n    \"\"\"Verify that the matches found in answer.txt are actually correct.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text().strip()\n        \n        # If no content, check if there should actually be no matches\n        if not content:\n            expected_matches = find_30_plus_char_matches(test_dir)\n            if expected_matches:\n                print(\"❌ Answer file is empty but matches should exist\")\n                for filename, start_pos in expected_matches.items():\n                    print(f\"   Expected: {filename},{start_pos}\")\n                return False\n            else:\n                print(\"✅ No matches found (correct)\")\n                return True\n        \n        # Parse answer file\n        answer_matches = {}\n        lines = content.split('\\n')\n        for line in lines:\n            line = line.strip()\n            if not line:\n                continue\n            filename, start_pos = line.split(',')\n            answer_matches[filename] = int(start_pos)\n        \n        # Get expected matches\n        expected_matches = find_30_plus_char_matches(test_dir)\n        \n        # Check if all answer matches are correct\n        for filename, start_pos in answer_matches.items():\n            if filename not in expected_matches:\n                print(f\"❌ File {filename} listed in answer but has no valid 30+ character match\")\n                return False\n            \n            expected_start = expected_matches[filename]\n            if start_pos != expected_start:\n                print(f\"❌ Incorrect match position for {filename}\")\n                print(f\"   Expected: {expected_start}\")\n                print(f\"   Found: {start_pos}\")\n                return False\n        \n        # Check if all expected matches are in answer\n        for filename in expected_matches:\n            if filename not in answer_matches:\n                print(f\"❌ Missing match for {filename} in answer file\")\n                return False\n        \n        print(\"✅ All matches are correct\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying matches: {e}\")\n        return False\n\ndef verify_match_length_is_30_plus(test_dir: Path) -> bool:\n    \"\"\"Verify that all matches are at least 30 characters long.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text().strip()\n        \n        if not content:\n            return True  # No matches to verify\n        \n        large_file = test_dir / \"large_file.txt\"\n        large_content = large_file.read_text()\n        \n        lines = content.split('\\n')\n        for line in lines:\n            line = line.strip()\n            if not line:\n                continue\n                \n            filename, start_pos = line.split(',')\n            start_int = int(start_pos)\n            \n            # Get the file content to check the match\n            file_path = test_dir / filename\n            file_content = file_path.read_text()\n            \n            # Find the longest matching substring starting from the given position\n            longest_match = \"\"\n            for end_pos in range(start_int + 30 - 1, len(large_content) + 1):  # At least 30 characters\n                substring = large_content[start_int - 1:end_pos]  # Convert to 0-indexed\n                if substring in file_content:\n                    longest_match = substring\n                else:\n                    break\n            \n            if len(longest_match) < 30:\n                print(f\"❌ Match in {filename} is {len(longest_match)} characters, less than 30\")\n                print(f\"   Starting position: {start_int}\")\n                return False\n        \n        print(\"✅ All matches are at least 30 characters long\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying match lengths: {e}\")\n        return False\n\ndef verify_files_exist(test_dir: Path) -> bool:\n    \"\"\"Verify that all files mentioned in answer.txt actually exist.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text().strip()\n        \n        if not content:\n            return True  # No files to verify\n        \n        lines = content.split('\\n')\n        for line in lines:\n            line = line.strip()\n            if not line:\n                continue\n                \n            filename = line.split(',')[0]\n            file_path = test_dir / filename\n            \n            if not file_path.exists():\n                print(f\"❌ File mentioned in answer does not exist: {filename}\")\n                return False\n        \n        print(\"✅ All files mentioned in answer exist\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying file existence: {e}\")\n        return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying Pattern Matching Task: Find Files with Common Substring...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Answer File Exists\", verify_answer_file_exists),\n        (\"Answer Format\", verify_answer_format),\n        (\"Files Exist\", verify_files_exist),\n        (\"Match Length is 30+\", verify_match_length_is_30_plus),\n        (\"Matches are Correct\", verify_matches_are_correct),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ File filtering task completed correctly!\")\n        print(\"🎉 Task verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/file_context/uppercase/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### Task Description\n\nYou need to process 10 text files (file_01.txt to file_10.txt) and convert their content to uppercase format.\n\n### Task Objectives\n\n1. **Create an uppercase directory** in the test environment root\n2. **Convert each file** from file_01.txt to file_10.txt to uppercase\n3. **Save converted files** in the uppercase/ directory with the same names\n4. **Count words** in each original file (file_01.txt to file_10.txt)\n5. **Create answer.txt** with word counts in the specified format.\n\n### Specified Format of answer.txt\n\nCreate a file named `answer.txt` in uppercase/\n\n**Requirements:**\n\n- Each line should follow the format: `<filename>:<word_count>`\n- Include all 10 files: file_01.txt, file_02.txt, ..., file_10.txt\n- Use the exact filename format (file_01.txt, file_02.txt, etc.)\n- One entry per line\n"
  },
  {
    "path": "tasks/filesystem/standard/file_context/uppercase/meta.json",
    "content": "{\n  \"task_id\": \"uppercase\",\n  \"task_name\": \"Uppercase\",\n  \"category_id\": \"file_context\",\n  \"category_name\": \"File Context\",\n  \"description\": \"Convert the content of 10 specified files to uppercase format and calculate the total word count across all processed files.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-19\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"content transformation\",\n    \"data extraction\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"file_context/\\n    ├── file_01.txt\\n    ├── file_02.txt\\n    ├── file_03.txt\\n    ├── file_04.txt\\n    ├── file_05.txt\\n    ├── file_06.txt\\n    ├── file_07.txt\\n    ├── file_08.txt\\n    ├── file_09.txt\\n    ├── file_10.txt\\n    ├── file_11.txt\\n    ├── file_12.txt\\n    ├── file_13.txt\\n    ├── file_14.txt\\n    ├── file_15.txt\\n    ├── file_16.txt\\n    ├── file_17.txt\\n    ├── file_18.txt\\n    ├── file_19.txt\\n    ├── file_20.txt\\n    └── large_file.txt\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/file_context.zip\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/file_context/uppercase/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for File Context Task: Convert Files to Uppercase\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\nimport re\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_uppercase_directory_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the uppercase directory exists.\"\"\"\n    uppercase_dir = test_dir / \"uppercase\"\n\n    if not uppercase_dir.exists():\n        print(\"| ❌ Directory 'uppercase' not found\")\n        return False\n\n    if not uppercase_dir.is_dir():\n        print(\"| ❌ 'uppercase' exists but is not a directory\")\n        return False\n\n    print(\"| ✓ Uppercase directory found\")\n    return True\n\ndef verify_uppercase_files_exist(test_dir: Path) -> bool:\n    \"\"\"Verify that all 10 uppercase files exist.\"\"\"\n    uppercase_dir = test_dir / \"uppercase\"\n\n    for i in range(1, 11):\n        filename = f\"file_{i:02d}.txt\"\n        file_path = uppercase_dir / filename\n\n        if not file_path.exists():\n            print(f\"| ❌ File '{filename}' not found in uppercase directory\")\n            return False\n\n    print(\"| ✓ All 10 uppercase files found\")\n    return True\n\ndef verify_uppercase_content(test_dir: Path) -> bool:\n    \"\"\"Verify that uppercase files contain the correct uppercase content.\"\"\"\n    uppercase_dir = test_dir / \"uppercase\"\n\n    for i in range(1, 11):\n        filename = f\"file_{i:02d}.txt\"\n        original_file = test_dir / filename\n        uppercase_file = uppercase_dir / filename\n\n        if not original_file.exists():\n            print(f\"| ❌ Original file '{filename}' not found\")\n            return False\n\n        try:\n            original_content = original_file.read_text()\n            uppercase_content = uppercase_file.read_text()\n\n            # Check if uppercase content is the uppercase version of original\n            expected_uppercase = original_content.upper()\n\n            if uppercase_content != expected_uppercase:\n                print(f\"| ❌ File '{filename}' content is not properly converted to uppercase\")\n                return False\n\n        except Exception as e:\n            print(f\"| ❌ Error reading file '{filename}': {e}\")\n            return False\n\n    print(\"| ✓ All uppercase files contain correct uppercase content\")\n    return True\n\ndef verify_answer_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the answer.txt file exists in the uppercase directory.\"\"\"\n    uppercase_dir = test_dir / \"uppercase\"\n    answer_file = uppercase_dir / \"answer.txt\"\n\n    if not answer_file.exists():\n        print(\"| ❌ File 'answer.txt' not found in uppercase directory\")\n        return False\n\n    print(\"| ✓ Answer file found in uppercase directory\")\n    return True\n\ndef verify_answer_format(test_dir: Path) -> bool:\n    \"\"\"Verify that the answer file has the correct format.\"\"\"\n    uppercase_dir = test_dir / \"uppercase\"\n    answer_file = uppercase_dir / \"answer.txt\"\n\n    try:\n        content = answer_file.read_text().strip()\n\n        if not content:\n            print(\"| ❌ Answer file is empty\")\n            return False\n\n        lines = content.split('\\n')\n\n        # Check if we have exactly 10 lines\n        if len(lines) != 10:\n            print(f\"| ❌ Answer file has {len(lines)} lines, expected 10\")\n            return False\n\n        for i, line in enumerate(lines, 1):\n            line = line.strip()\n            if not line:\n                print(f\"| ❌ Line {i} is empty\")\n                return False\n\n            # Check format: filename:word_count\n            if ':' not in line:\n                print(f\"| ❌ Line {i} has incorrect format: {line}\")\n                print(\"   Expected format: filename:word_count\")\n                return False\n\n            parts = line.split(':', 1)\n            if len(parts) != 2:\n                print(f\"| ❌ Line {i} has incorrect format: {line}\")\n                print(\"   Expected format: filename:word_count\")\n                return False\n\n            filename, word_count_str = parts\n\n            # Check filename format\n            if not filename.endswith('.txt') or not filename.startswith('file_'):\n                print(f\"| ❌ Line {i} has invalid filename: {filename}\")\n                return False\n\n            # Check word count format (should be integer)\n            try:\n                word_count = int(word_count_str)\n                if word_count <= 0:\n                    print(f\"| ❌ Line {i} has invalid word count: {word_count_str}\")\n                    return False\n            except ValueError:\n                print(f\"| ❌ Line {i} has non-integer word count: {word_count_str}\")\n                return False\n\n        print(\"| ✓ Answer format is correct\")\n        return True\n\n    except Exception as e:\n        print(f\"| ❌ Error reading answer file: {e}\")\n        return False\n\ndef count_words_in_file(file_path: Path) -> int:\n    \"\"\"Count words in a file.\"\"\"\n    try:\n        content = file_path.read_text()\n        # Split by whitespace and filter out empty strings\n        words = [word for word in content.split() if word.strip()]\n        return len(words)\n    except Exception as e:\n        print(f\"| ❌ Error reading file {file_path}: {e}\")\n        return 0\n\ndef verify_word_counts_are_correct(test_dir: Path) -> bool:\n    \"\"\"Verify that the word counts in answer.txt are correct.\"\"\"\n    uppercase_dir = test_dir / \"uppercase\"\n    answer_file = uppercase_dir / \"answer.txt\"\n\n    try:\n        content = answer_file.read_text().strip()\n        lines = content.split('\\n')\n\n        # Expected word counts based on answer.md\n        expected_counts = [22, 22, 22, 22, 18, 22, 22, 22, 18, 20]\n\n        # Create a set of expected file entries for easier checking\n        expected_entries = set()\n        for i in range(1, 11):\n            filename = f\"file_{i:02d}.txt\"\n            expected_count = expected_counts[i - 1]\n            if i == 6:  # Special case for file_06.txt: can be 21 or 22\n                expected_entries.add(f\"{filename}:21\")\n                expected_entries.add(f\"{filename}:22\")\n            else:\n                expected_entries.add(f\"{filename}:{expected_count}\")\n\n        # Check each line in the answer file\n        found_entries = set()\n        for line in lines:\n            line = line.strip()\n            if line in expected_entries:\n                found_entries.add(line)\n            else:\n                print(f\"| ❌ Invalid entry: {line}\")\n                return False\n\n        # Check if we found all expected entries\n        if len(found_entries) != 10:\n            print(f\"| ❌ Found {len(found_entries)} entries, expected 10\")\n            missing = expected_entries - found_entries\n            if missing:\n                print(f\"   Missing entries: {missing}\")\n            return False\n\n        print(\"| ✓ All word counts are correct\")\n        return True\n\n    except Exception as e:\n        print(f\"| ❌ Error verifying word counts: {e}\")\n        return False\n\ndef verify_all_files_are_included(test_dir: Path) -> bool:\n    \"\"\"Verify that all 10 files are included in the answer.\"\"\"\n    uppercase_dir = test_dir / \"uppercase\"\n    answer_file = uppercase_dir / \"answer.txt\"\n\n    try:\n        content = answer_file.read_text().strip()\n        lines = content.split('\\n')\n\n        # Check that all 10 files are present\n        found_files = set()\n        for line in lines:\n            parts = line.split(':', 1)\n            filename = parts[0]\n            found_files.add(filename)\n\n        expected_files = {f\"file_{i:02d}.txt\" for i in range(1, 11)}\n\n        if found_files != expected_files:\n            missing = expected_files - found_files\n            extra = found_files - expected_files\n            if missing:\n                print(f\"| ❌ Missing files in answer: {missing}\")\n            if extra:\n                print(f\"| ❌ Extra files in answer: {extra}\")\n            return False\n\n        print(\"| ✓ All 10 files are included in answer\")\n        return True\n\n    except Exception as e:\n        print(f\"| ❌ Error verifying file inclusion: {e}\")\n        return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    try:\n        test_dir = get_test_directory()\n        print(f\"| 🔍 Verifying Uppercase in: {test_dir}\")\n        print('|')\n\n        # Run all verification checks\n        checks = [\n            (\"Uppercase directory exists\", verify_uppercase_directory_exists),\n            (\"Uppercase files exist\", verify_uppercase_files_exist),\n            (\"Uppercase content is correct\", verify_uppercase_content),\n            (\"Answer file exists in uppercase directory\", verify_answer_file_exists),\n            (\"Answer format is correct\", verify_answer_format),\n            (\"All files are included\", verify_all_files_are_included),\n            (\"Word counts are correct\", verify_word_counts_are_correct),\n        ]\n\n        all_passed = True\n        for check_name, check_func in checks:\n            print(f\"| Checking {check_name}...\")\n            if not check_func(test_dir):\n                all_passed = False\n            print('|')\n\n        if all_passed:\n            print(\"| 🎉 All verification checks passed!\")\n            sys.exit(0)\n        else:\n            print(\"| ❌ Some verification checks failed!\")\n            sys.exit(1)\n\n    except Exception as e:\n        print(f\"| ❌ Verification failed with error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/filesystem/standard/file_property/size_classification/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### Task Description\n\nClassify all files in the test directory into three categories based on their file size. Create three subdirectories and move files accordingly.\n\n### Task Objectives\n\n1. **Create three directories** in the test directory:\n\n   - `small_files/` - for files smaller than 300 bytes\n   - `medium_files/` - for files between 300-700 bytes (inclusive)\n   - `large_files/` - for files larger than 700 bytes\n2. **Move all files** from the test directory into the appropriate subdirectory based on their size\n3. **Handle all file types** - classify all files regardless of their extension (.txt, .jpg, .MOV, etc.)\n"
  },
  {
    "path": "tasks/filesystem/standard/file_property/size_classification/meta.json",
    "content": "{\n  \"task_id\": \"size_classification\",\n  \"task_name\": \"Size Classification\",\n  \"category_id\": \"file_property\",\n  \"category_name\": \"File Property\",\n  \"description\": \"Classify all files in the folder by size into distinct categories (small/medium/large) and generate a comprehensive summary report with statistics.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-07\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"file organization\",\n    \"pattern analysis\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"file_property/\\n    ├── bear.jpg\\n    ├── bridge.jpg\\n    ├── bus.MOV\\n    ├── random_file_1.txt\\n    ├── random_file_2.txt\\n    ├── random_file_3.txt\\n    ├── road.MOV\\n    └── sg.jpg\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/file_property.zip\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/file_property/size_classification/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for File Classification Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef get_expected_classification():\n    \"\"\"Return the expected file classification based on answer.md.\"\"\"\n    return {\n        \"small_files\": [\"random_file_1.txt\", \"random_file_3.txt\"],\n        \"medium_files\": [\"random_file_2.txt\"],\n        \"large_files\": [\"bear.jpg\", \"sg.jpg\", \"road.MOV\", \"bus.MOV\", \"bridge.jpg\"]\n    }\n\ndef verify_directories_exist(test_dir: Path) -> bool:\n    \"\"\"Verify that all three required directories exist.\"\"\"\n    required_dirs = [\"small_files\", \"medium_files\", \"large_files\"]\n    \n    for dir_name in required_dirs:\n        dir_path = test_dir / dir_name\n        if not dir_path.exists():\n            print(f\"❌ Directory '{dir_name}' not found\")\n            return False\n        if not dir_path.is_dir():\n            print(f\"❌ '{dir_name}' exists but is not a directory\")\n            return False\n    \n    print(\"✅ All required directories exist\")\n    return True\n\ndef verify_file_classification(test_dir: Path) -> bool:\n    \"\"\"Verify that files are correctly classified into the right directories.\"\"\"\n    expected_classification = get_expected_classification()\n    \n    for dir_name, expected_files in expected_classification.items():\n        dir_path = test_dir / dir_name\n        \n        # Check that all expected files are in the directory\n        missing_files = []\n        for filename in expected_files:\n            file_path = dir_path / filename\n            if not file_path.exists():\n                missing_files.append(filename)\n        \n        if missing_files:\n            print(f\"❌ Missing files in '{dir_name}': {missing_files}\")\n            return False\n        \n        # Check that no unexpected files are in the directory (ignore .DS_Store and similar system files)\n        actual_files = [f.name for f in dir_path.iterdir() if f.is_file()]\n        # Filter out system files that are commonly present\n        system_files = ['.DS_Store', 'Thumbs.db', '.DS_Store?', '._.DS_Store']\n        unexpected_files = [f for f in actual_files if f not in expected_files and f not in system_files]\n        \n        if unexpected_files:\n            print(f\"❌ Unexpected files in '{dir_name}': {unexpected_files}\")\n            return False\n    \n    print(\"✅ All files are correctly classified\")\n    return True\n\ndef verify_no_files_in_root(test_dir: Path) -> bool:\n    \"\"\"Verify that no files remain in the root test directory.\"\"\"\n    root_files = [f for f in test_dir.iterdir() if f.is_file()]\n    \n    # Filter out system files that are commonly present\n    system_files = ['.DS_Store', 'Thumbs.db', '.DS_Store?', '._.DS_Store']\n    non_system_files = [f for f in root_files if f.name not in system_files]\n    \n    if non_system_files:\n        print(f\"❌ Files still present in root directory: {[f.name for f in non_system_files]}\")\n        return False\n    \n    print(\"✅ No files remain in root directory\")\n    return True\n\ndef verify_file_sizes(test_dir: Path) -> bool:\n    \"\"\"Verify that files are actually in the correct size categories.\"\"\"\n    size_ranges = {\n        \"small_files\": (0, 299),  # < 300 bytes\n        \"medium_files\": (300, 700),  # 300-700 bytes (inclusive)\n        \"large_files\": (701, float('inf'))  # > 700 bytes\n    }\n    \n    for dir_name, (min_size, max_size) in size_ranges.items():\n        dir_path = test_dir / dir_name\n        \n        for file_path in dir_path.iterdir():\n            if file_path.is_file():\n                file_size = file_path.stat().st_size\n                \n                if dir_name == \"small_files\" and file_size >= 300:\n                    print(f\"❌ File {file_path.name} in small_files but size is {file_size} bytes\")\n                    return False\n                elif dir_name == \"medium_files\" and (file_size < 300 or file_size > 700):\n                    print(f\"❌ File {file_path.name} in medium_files but size is {file_size} bytes\")\n                    return False\n                elif dir_name == \"large_files\" and file_size <= 700:\n                    print(f\"❌ File {file_path.name} in large_files but size is {file_size} bytes\")\n                    return False\n    \n    print(\"✅ All files are in correct size categories\")\n    return True\n\ndef verify_total_file_count(test_dir: Path) -> bool:\n    \"\"\"Verify that all original files are accounted for.\"\"\"\n    expected_classification = get_expected_classification()\n    total_expected = sum(len(files) for files in expected_classification.values())\n    \n    total_actual = 0\n    for dir_name in [\"small_files\", \"medium_files\", \"large_files\"]:\n        dir_path = test_dir / dir_name\n        if dir_path.exists():\n            # Count only non-system files\n            system_files = ['.DS_Store', 'Thumbs.db', '.DS_Store?', '._.DS_Store']\n            files_in_dir = [f for f in dir_path.iterdir() if f.is_file() and f.name not in system_files]\n            total_actual += len(files_in_dir)\n    \n    if total_actual != total_expected:\n        print(f\"❌ Expected {total_expected} files total, found {total_actual}\")\n        return False\n    \n    print(f\"✅ Total file count is correct: {total_actual}\")\n    return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    try:\n        test_dir = get_test_directory()\n        print(f\"🔍 Verifying file classification in: {test_dir}\")\n        \n        # Run all verification checks\n        checks = [\n            (\"Directory existence\", verify_directories_exist),\n            (\"File classification\", verify_file_classification),\n            (\"No files in root\", verify_no_files_in_root),\n            (\"File size validation\", verify_file_sizes),\n            (\"Total file count\", verify_total_file_count)\n        ]\n        \n        all_passed = True\n        for check_name, check_func in checks:\n            print(f\"\\n📋 Checking: {check_name}\")\n            if not check_func(test_dir):\n                all_passed = False\n        \n        if all_passed:\n            print(\"\\n🎉 All verification checks passed!\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Some verification checks failed!\")\n            sys.exit(1)\n            \n    except Exception as e:\n        print(f\"❌ Verification failed with error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/file_property/time_classification/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### Task Description\n\nAnalyze the creation time (ctime) of all files in the test directory and organize them into a hierarchical directory structure based on their creation dates.\n\n### Task Objectives\n\n1. **Read metadata** of all files in the test directory\n2. **Analyze creation times** (ctime) of all files (excluding .DS_Store)\n3. **Create directory structure** organized by month/day based on creation time\n4. **Move files** to appropriate directories\n5. **Create metadata analysis files** in each directory\n\n### Expected Output\n\n#### Directory Structure\n\nCreate directories in the format: `MM/DD/` where:\n\n- MM = month (two digits, e.g., 01, 02)\n- DD = day (two digits, e.g., 07, 09, 11, 26)\n\n#### Metadata Analysis Files\n\nCreate a file named `metadata_analyse.txt` in each directory containing exactly only two lines:\n\n- **Line 1**: Oldest filename and its creation time (excluding .DS_Store)\n- **Line 2**: Latest filename and its creation time (excluding .DS_Store)\n"
  },
  {
    "path": "tasks/filesystem/standard/file_property/time_classification/meta.json",
    "content": "{\n  \"task_id\": \"time_classification\",\n  \"task_name\": \"Time Classification\",\n  \"category_id\": \"file_property\",\n  \"category_name\": \"File Property\",\n  \"description\": \"Organize files based on modification timestamps into temporal categories and create a detailed time-based classification report with groupings.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-07\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"file organization\",\n    \"data extraction\",\n    \"pattern analysis\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"file_property/\\n    ├── bear.jpg\\n    ├── bridge.jpg\\n    ├── bus.MOV\\n    ├── random_file_1.txt\\n    ├── random_file_2.txt\\n    ├── random_file_3.txt\\n    ├── road.MOV\\n    └── sg.jpg\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/file_property.zip\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/file_property/time_classification/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for File Organization by Creation Time Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\nfrom datetime import datetime\nimport re\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef get_month_mapping():\n    \"\"\"Return mapping for both numeric and alphabetic month representations.\"\"\"\n    return {\n        \"07\": [\"07\", \"7\", \"jul\", \"Jul\", \"JUL\"],\n        \"08\": [\"08\", \"8\", \"aug\", \"Aug\", \"AUG\"]\n    }\n\ndef get_day_mapping():\n    \"\"\"Return mapping for day representations.\"\"\"\n    return {\n        \"09\": [\"09\", \"9\"],\n        \"25\": [\"25\"],\n        \"26\": [\"26\"],\n        \"06\": [\"06\", \"6\"]\n    }\n\ndef get_expected_directory_structure():\n    \"\"\"Return the expected directory structure based on answer.md.\"\"\"\n    return {\n        \"07\": {\n            \"09\": [\"sg.jpg\"],\n            \"25\": [\"bus.MOV\"],\n            \"26\": [\"road.MOV\"]\n        },\n        \"08\": {\n            \"06\": [\"bear.jpg\", \"bridge.jpg\", \"random_file_1.txt\", \"random_file_2.txt\", \"random_file_3.txt\"]\n        }\n    }\n\ndef find_month_directory(test_dir: Path, expected_month: str) -> Path:\n    \"\"\"Find the actual month directory, handling both numeric and alphabetic representations.\"\"\"\n    month_mapping = get_month_mapping()\n    valid_month_names = month_mapping.get(expected_month, [expected_month])\n    \n    for month_name in valid_month_names:\n        month_dir = test_dir / month_name\n        if month_dir.exists() and month_dir.is_dir():\n            return month_dir\n    \n    return None\n\ndef find_day_directory(month_dir: Path, expected_day: str) -> Path:\n    \"\"\"Find the actual day directory, handling both numeric representations.\"\"\"\n    day_mapping = get_day_mapping()\n    valid_day_names = day_mapping.get(expected_day, [expected_day])\n    \n    for day_name in valid_day_names:\n        day_dir = month_dir / day_name\n        if day_dir.exists() and day_dir.is_dir():\n            return day_dir\n    \n    return None\n\ndef verify_directory_structure(test_dir: Path) -> bool:\n    \"\"\"Verify that the correct directory structure exists.\"\"\"\n    expected_structure = get_expected_directory_structure()\n    \n    for expected_month, days in expected_structure.items():\n        month_dir = find_month_directory(test_dir, expected_month)\n        if month_dir is None:\n            valid_names = get_month_mapping().get(expected_month, [expected_month])\n            print(f\"❌ Month directory not found. Expected one of: {valid_names}\")\n            return False\n        \n        for day, expected_files in days.items():\n            day_dir = find_day_directory(month_dir, day)\n            if day_dir is None:\n                valid_day_names = get_day_mapping().get(day, [day])\n                print(f\"❌ Day directory '{month_dir.name}/{day}' not found. Expected one of: {valid_day_names}\")\n                return False\n            if not day_dir.is_dir():\n                print(f\"❌ '{month_dir.name}/{day_dir.name}' exists but is not a directory\")\n                return False\n    \n    print(\"✅ Directory structure is correct\")\n    return True\n\ndef verify_files_in_directories(test_dir: Path) -> bool:\n    \"\"\"Verify that files are in the correct directories.\"\"\"\n    expected_structure = get_expected_directory_structure()\n    \n    for expected_month, days in expected_structure.items():\n        month_dir = find_month_directory(test_dir, expected_month)\n        if month_dir is None:\n            continue  # Already handled in verify_directory_structure\n        \n        for day, expected_files in days.items():\n            day_dir = find_day_directory(month_dir, day)\n            if day_dir is None:\n                continue  # Already handled in verify_directory_structure\n            \n            # Check that all expected files are in the directory\n            missing_files = []\n            for filename in expected_files:\n                file_path = day_dir / filename\n                if not file_path.exists():\n                    missing_files.append(filename)\n            \n            if missing_files:\n                print(f\"❌ Missing files in '{month_dir.name}/{day_dir.name}': {missing_files}\")\n                return False\n            \n            # Check that no unexpected files are in the directory (ignore .DS_Store and metadata_analyse.txt)\n            actual_files = [f.name for f in day_dir.iterdir() if f.is_file()]\n            system_files = ['.DS_Store', 'Thumbs.db', '.DS_Store?', '._.DS_Store', 'metadata_analyse.txt']\n            unexpected_files = [f for f in actual_files if f not in expected_files and f not in system_files]\n            \n            if unexpected_files:\n                print(f\"❌ Unexpected files in '{month_dir.name}/{day_dir.name}': {unexpected_files}\")\n                return False\n    \n    print(\"✅ All files are in correct directories\")\n    return True\n\ndef verify_metadata_analysis_files(test_dir: Path) -> bool:\n    \"\"\"Verify that metadata_analyse.txt files exist and have correct content.\"\"\"\n    expected_structure = get_expected_directory_structure()\n    \n    for expected_month, days in expected_structure.items():\n        month_dir = find_month_directory(test_dir, expected_month)\n        if month_dir is None:\n            continue  # Already handled in verify_directory_structure\n        \n        for day, expected_files in days.items():\n            day_dir = find_day_directory(month_dir, day)\n            if day_dir is None:\n                continue  # Already handled in verify_directory_structure\n            \n            metadata_file = day_dir / \"metadata_analyse.txt\"\n            \n            if not metadata_file.exists():\n                print(f\"❌ metadata_analyse.txt not found in '{month_dir.name}/{day_dir.name}'\")\n                return False\n            \n            try:\n                content = metadata_file.read_text().strip()\n                lines = content.split('\\n')\n                \n                # Check that there are exactly 2 lines\n                if len(lines) != 2:\n                    print(f\"❌ metadata_analyse.txt in '{month_dir.name}/{day_dir.name}' has {len(lines)} lines, expected 2\")\n                    return False\n                \n                # Check each line - more flexible verification\n                for line_num, line in enumerate(lines, 1):\n                    line_lower = line.lower()\n                    \n                    # Check filename based on expected_month and day\n                    expected_filename = None\n                    if expected_month == \"07\" and day == \"09\":\n                        expected_filename = \"sg.jpg\"\n                    elif expected_month == \"07\" and day == \"25\":\n                        expected_filename = \"bus.mov\"\n                    elif expected_month == \"07\" and day == \"26\":\n                        expected_filename = \"road.mov\"\n                    elif expected_month == \"08\" and day == \"06\":\n                        # For 08/06, check if it's one of the expected files\n                        if line_num == 1:  # First line should be bear.jpg\n                            expected_filename = \"bear.jpg\"\n                        else:  # Second line should be one of the random files\n                            expected_filenames = [\"random_file_1.txt\", \"random_file_2.txt\", \"random_file_3.txt\"]\n                            if not any(filename in line_lower for filename in expected_filenames):\n                                print(f\"❌ Line {line_num} in '{month_dir.name}/{day_dir.name}' should contain one of {expected_filenames}: {line}\")\n                                return False\n                            continue  # Skip other checks for this line\n                    \n                    if expected_filename and expected_filename not in line_lower:\n                        print(f\"❌ Line {line_num} in '{month_dir.name}/{day_dir.name}' should contain '{expected_filename}': {line}\")\n                        return False\n                    \n                    # Check month letters\n                    month_letters = None\n                    if expected_month == \"07\":\n                        month_letters = [\"jul\", \"7\"]\n                    elif expected_month == \"08\":\n                        month_letters = [\"aug\", \"8\"]\n                    \n                    if month_letters and not any(letter in line_lower for letter in month_letters):\n                        print(f\"❌ Line {line_num} in '{month_dir.name}/{day_dir.name}' should contain month letters: {line}\")\n                        return False\n                    \n                    # Check year (2025)\n                    if \"2025\" not in line_lower:\n                        print(f\"❌ Line {line_num} in '{month_dir.name}/{day_dir.name}' should contain '2025': {line}\")\n                        return False\n                    \n                    # Check day number - support both formats\n                    valid_day_names = get_day_mapping().get(day, [day])\n                    if not any(day_name in line_lower for day_name in valid_day_names):\n                        print(f\"❌ Line {line_num} in '{month_dir.name}/{day_dir.name}' should contain day '{day}' (or {valid_day_names}): {line}\")\n                        return False\n                \n            except Exception as e:\n                print(f\"❌ Error reading metadata_analyse.txt in '{month_dir.name}/{day_dir.name}': {e}\")\n                return False\n    \n    print(\"✅ All metadata_analyse.txt files are correct\")\n    return True\n\ndef verify_no_files_in_root(test_dir: Path) -> bool:\n    \"\"\"Verify that no files remain in the root test directory.\"\"\"\n    root_files = [f for f in test_dir.iterdir() if f.is_file()]\n    \n    # Filter out system files that are commonly present\n    system_files = ['.DS_Store', 'Thumbs.db', '.DS_Store?', '._.DS_Store']\n    non_system_files = [f for f in root_files if f.name not in system_files]\n    \n    if non_system_files:\n        print(f\"❌ Files still present in root directory: {[f.name for f in non_system_files]}\")\n        return False\n    \n    print(\"✅ No files remain in root directory\")\n    return True\n\ndef verify_total_file_count(test_dir: Path) -> bool:\n    \"\"\"Verify that all original files are accounted for.\"\"\"\n    expected_structure = get_expected_directory_structure()\n    total_expected = sum(len(files) for days in expected_structure.values() for files in days.values())\n    \n    total_actual = 0\n    for expected_month, days in expected_structure.items():\n        month_dir = find_month_directory(test_dir, expected_month)\n        if month_dir is None:\n            continue\n        for day in days:\n            day_dir = find_day_directory(month_dir, day)\n            if day_dir and day_dir.exists():\n                # Count only non-system files\n                system_files = ['.DS_Store', 'Thumbs.db', '.DS_Store?', '._.DS_Store', 'metadata_analyse.txt']\n                files_in_dir = [f for f in day_dir.iterdir() if f.is_file() and f.name not in system_files]\n                total_actual += len(files_in_dir)\n    \n    if total_actual != total_expected:\n        print(f\"❌ Expected {total_expected} files total, found {total_actual}\")\n        return False\n    \n    print(f\"✅ Total file count is correct: {total_actual}\")\n    return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    try:\n        test_dir = get_test_directory()\n        print(f\"🔍 Verifying Time Classification in: {test_dir}\")\n        \n        # Run all verification checks\n        checks = [\n            (\"Directory structure\", verify_directory_structure),\n            (\"Files in directories\", verify_files_in_directories),\n            (\"Metadata analysis files\", verify_metadata_analysis_files),\n            (\"No files in root\", verify_no_files_in_root),\n            (\"Total file count\", verify_total_file_count)\n        ]\n        \n        all_passed = True\n        for check_name, check_func in checks:\n            print(f\"\\n📋 Checking: {check_name}\")\n            if not check_func(test_dir):\n                all_passed = False\n        \n        if all_passed:\n            print(\"\\n🎉 All verification checks passed!\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Some verification checks failed!\")\n            sys.exit(1)\n            \n    except Exception as e:\n        print(f\"❌ Verification failed with error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/folder_structure/structure_analysis/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\nYou need to recursively traverse the entire folder structure under the main directory and generate a detailed statistical report in a file named `structure_analysis.txt`.\n\nIn all tasks, ignore `.DS_Store` files.\n\nIn any tasks, you should not change or delete any existed files.\n\nDo not try to use python code.\n\n---\n\n### 1. File Statistics\n\nCount the following information for the entire directory structure:\n\n- total number of files\n- total number of folders\n- total size of the hole folder (in bytes, include .DS_Store only in this subtask)\n\n**Format (one item per line):**\n\ntotal number of files: X\ntotal number of folders: Y\ntotal size of all files: Z\n\n---\n\n### 2. Depth Analysis\n\nIdentify the deepest folder path(s) in the directory and calculate its depth level.\n\n- Use relative paths based on main directory.\n- **Write the folder path only up to the folder, not including the file name.For example, if the file path is `./complex_structure/A/B/C/def.txt`, then the path in your report should be `complex_structure/A/B/C`, and the depth is `4`.**\n- If multiple deepest paths exist, list only one.\n\n**Format (one item per line):**\n\ndepth: N\nPATH\n\n---\n\n### 3. File Type Classification\n\nCategorize files by their extensions and count the number of files for each type.\nFiles without extensions should also be included.\n\n**Format (one extension per line):**\n\ntxt: count\npy: count\njpg: count\nmov: count\n(no extension): count\n"
  },
  {
    "path": "tasks/filesystem/standard/folder_structure/structure_analysis/meta.json",
    "content": "{\n  \"task_id\": \"structure_analysis\",\n  \"task_name\": \"Structure Analysis\",\n  \"category_id\": \"folder_structure\",\n  \"category_name\": \"Folder Structure\",\n  \"description\": \"Perform thorough analysis of complex folder hierarchy to generate a detailed structural summary report with comprehensive file statistics.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-16\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"pattern analysis\",\n    \"data extraction\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"folder_structure/\\n    └── complex_structure/\\n            ├── deeply/\\n            │       └── nested/\\n            │               └── folder/\\n            │                       └── structure/\\n            ├── empty_folder/\\n            ├── folder_lxkHt_0_1/\\n            │       └── file_PeLzC_0.txt\\n            ├── folder_QdTAj_0_2/\\n            │       ├── folder_eXccj_1_0/\\n            │       │       ├── folder_Mqlwh_2_1/\\n            │       │       │       ├── folder_cKxcP_3_3/\\n            │       │       │       │       ├── folder_BPTMK_4_1/\\n            │       │       │       │       │       └── file_RHtBP_0.txt\\n            │       │       │       │       ├── folder_QNqjq_4_0/\\n            │       │       │       │       │       ├── folder_gRwPE_5_1/\\n            │       │       │       │       │       │       ├── file_jVlpp_0.txt\\n            │       │       │       │       │       │       └── file_vJuHz_1.txt\\n            │       │       │       │       │       ├── folder_XdXYJ_5_0/\\n            │       │       │       │       │       │       └── file_KvkKi_0.txt\\n            │       │       │       │       │       ├── file_gGxLG_2.txt\\n            │       │       │       │       │       ├── file_Hzkxo_0.txt\\n            │       │       │       │       │       └── file_XRjeh_1.txt\\n            │       │       │       │       ├── folder_vIBIt_4_2/\\n            │       │       │       │       │       ├── folder_kRDNS_5_0/\\n            │       │       │       │       │       │       └── file_wFSjJ_0.txt\\n            │       │       │       │       │       └── file_NyBSO_0.txt\\n            │       │       │       │       ├── file_EOCNf_1.txt\\n            │       │       │       │       └── file_gmrXA_0.txt\\n            │       │       │       ├── folder_NcruA_3_1/\\n            │       │       │       │       ├── file_bLWDj_1.txt\\n            │       │       │       │       └── file_WAftR_0.txt\\n            │       │       │       ├── folder_qCDFI_3_2/\\n            │       │       │       │       ├── file_eSMOJ_0.txt\\n            │       │       │       │       ├── file_oxADy_2.txt\\n            │       │       │       │       └── file_RTbbc_1.txt\\n            │       │       │       ├── folder_QVHUU_3_0/\\n            │       │       │       │       ├── folder_FEPTK_4_1/\\n            │       │       │       │       │       ├── folder_GHoMC_5_1/\\n            │       │       │       │       │       │       └── file_rAMYd_0.txt\\n            │       │       │       │       │       ├── folder_iBDUY_5_0/\\n            │       │       │       │       │       │       └── file_IJCaw_0.txt\\n            │       │       │       │       │       ├── folder_VRXgp_5_2/\\n            │       │       │       │       │       │       └── file_hkUmS_0.txt\\n            │       │       │       │       │       ├── file_nqLAf_1.txt\\n            │       │       │       │       │       └── file_XflmA_0.txt\\n            │       │       │       │       ├── folder_FlPoK_4_3/\\n            │       │       │       │       │       ├── folder_hSVNm_5_3/\\n            │       │       │       │       │       │       └── file_klnbn_0.txt\\n            │       │       │       │       │       ├── folder_iZuEl_5_0/\\n            │       │       │       │       │       │       └── file_LqAmy_0.txt\\n            │       │       │       │       │       ├── folder_LcURj_5_2/\\n            │       │       │       │       │       │       ├── file_RgwOS_1.txt\\n            │       │       │       │       │       │       └── file_ZHnYb_0.txt\\n            │       │       │       │       │       ├── folder_tuZQJ_5_1/\\n            │       │       │       │       │       │       └── file_LHuIx_0.txt\\n            │       │       │       │       │       ├── file_asJnB_1.txt\\n            │       │       │       │       │       └── file_EzLdu_0.txt\\n            │       │       │       │       ├── folder_ndhsJ_4_0/\\n            │       │       │       │       │       ├── folder_CUSXK_5_0/\\n            │       │       │       │       │       │       ├── file_DpiuM_1.txt\\n            │       │       │       │       │       │       └── file_pSqeG_0.txt\\n            │       │       │       │       │       ├── folder_pstmE_5_1/\\n            │       │       │       │       │       │       └── file_YwdJt_0.txt\\n            │       │       │       │       │       ├── folder_StlsP_5_2/\\n            │       │       │       │       │       │       ├── file_kriBJ_0.txt\\n            │       │       │       │       │       │       └── file_XCEdm_1.txt\\n            │       │       │       │       │       ├── file_ToDjh_1.txt\\n            │       │       │       │       │       └── file_xbIVx_0.txt\\n            │       │       │       │       ├── folder_PJBok_4_4/\\n            │       │       │       │       │       ├── folder_mzxaf_5_0/\\n            │       │       │       │       │       │       ├── file_ILBzj_2.txt\\n            │       │       │       │       │       │       ├── file_MTGMm_1.txt\\n            │       │       │       │       │       │       └── file_zBDqz_0.txt\\n            │       │       │       │       │       ├── folder_sULMj_5_1/\\n            │       │       │       │       │       │       ├── file_BHziw_1.txt\\n            │       │       │       │       │       │       ├── file_sIjiu_2.txt\\n            │       │       │       │       │       │       └── file_VqNkB_0.txt\\n            │       │       │       │       │       ├── folder_vypSi_5_3/\\n            │       │       │       │       │       │       ├── file_kZbIm_1.txt\\n            │       │       │       │       │       │       └── file_sOBtE_0.txt\\n            │       │       │       │       │       ├── folder_ZLGHy_5_2/\\n            │       │       │       │       │       │       ├── file_azaFF_0.txt\\n            │       │       │       │       │       │       └── file_nAFRe_1.txt\\n            │       │       │       │       │       ├── file_mIkQU_0.txt\\n            │       │       │       │       │       └── file_sGPxd_1.txt\\n            │       │       │       │       ├── folder_VTbEG_4_2/\\n            │       │       │       │       │       ├── file_HtYLg_0.txt\\n            │       │       │       │       │       ├── file_JXjMd_1.txt\\n            │       │       │       │       │       └── file_tPccB_2.txt\\n            │       │       │       │       ├── file_BuOSw_1.txt\\n            │       │       │       │       └── file_TpoqE_0.txt\\n            │       │       │       ├── folder_wTvun_3_4/\\n            │       │       │       │       ├── file_GyhyE_1.txt\\n            │       │       │       │       ├── file_POsla_2.txt\\n            │       │       │       │       └── file_tSsvk_0.txt\\n            │       │       │       ├── file_irNju_0.txt\\n            │       │       │       └── file_jYBRm_1.txt\\n            │       │       ├── folder_YlJLI_2_0/\\n            │       │       │       └── file_FpFSL_0.txt\\n            │       │       ├── file_cFgBr_2.txt\\n            │       │       ├── file_lKEWN_1.txt\\n            │       │       └── file_ZEWFP_0.txt\\n            │       └── file_ayUCH_0.txt\\n            ├── folder_xtgyi_0_0/\\n            │       └── file_BvSOB_0.txt\\n            ├── mixed_content/\\n            │       └── images_and_text/\\n            │               └── notes.txt\\n            ├── project/\\n            │       ├── docs/\\n            │       │       └── archive/\\n            │       │               └── 2023/\\n            │       │                       └── reports/\\n            │       │                               ├── report_0.txt\\n            │       │                               ├── report_1.txt\\n            │       │                               └── report_2.txt\\n            │       └── src/\\n            │               └── main/\\n            │                       └── resources/\\n            └── m.py\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/folder_structure.zip\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/folder_structure/structure_analysis/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Directory Structure Analysis Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\nimport re\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_structure_analysis_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the structure_analysis.txt file exists.\"\"\"\n    analysis_file = test_dir / \"structure_analysis.txt\"\n    \n    if not analysis_file.exists():\n        print(\"❌ File 'structure_analysis.txt' not found\")\n        return False\n    \n    print(\"✅ structure_analysis.txt file found\")\n    return True\n\ndef verify_structure_analysis_file_readable(test_dir: Path) -> bool:\n    \"\"\"Verify that the structure_analysis.txt file is readable.\"\"\"\n    analysis_file = test_dir / \"structure_analysis.txt\"\n    \n    try:\n        content = analysis_file.read_text()\n        if not content.strip():\n            print(\"❌ structure_analysis.txt file is empty\")\n            return False\n        \n        print(\"✅ structure_analysis.txt file is readable\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error reading structure_analysis.txt file: {e}\")\n        return False\n\ndef verify_subtask1_file_statistics(test_dir: Path) -> bool:\n    \"\"\"Verify subtask 1: File Statistics - files must be 69, folders must be 51, 58097 allows +-1000.\"\"\"\n    analysis_file = test_dir / \"structure_analysis.txt\"\n    \n    try:\n        content = analysis_file.read_text()\n        \n        # Extract numbers from the content\n        file_count_match = re.search(r'total number of files:\\s*(\\d+)', content)\n        folder_count_match = re.search(r'total number of folders:\\s*(\\d+)', content)\n        size_match = re.search(r'total size of all files:\\s*(\\d+)', content)\n        \n        if not file_count_match or not folder_count_match or not size_match:\n            print(\"❌ Could not extract file statistics from structure_analysis.txt\")\n            return False\n        \n        file_count = int(file_count_match.group(1))\n        folder_count = int(folder_count_match.group(1))\n        total_size = int(size_match.group(1))\n        \n        print(f\"📊 Found: files={file_count}, folders={folder_count}, size={total_size}\")\n        \n        # Check if file count is exactly 69\n        if file_count != 69:\n            print(f\"❌ File count must be 69, found: {file_count}\")\n            return False\n        \n        # Check if folder count is exactly 51\n        if folder_count != 51:\n            print(f\"❌ Folder count must be 51, found: {folder_count}\")\n            return False\n        \n        # Check if size is within acceptable range (58097 ± 1000)\n        expected_size = 58097\n        size_tolerance = 1000\n        if abs(total_size - expected_size) > size_tolerance:\n            print(f\"❌ Total size ({total_size}) is not within acceptable range ({expected_size} ± {size_tolerance})\")\n            return False\n        \n        print(f\"✅ File statistics verified: files={file_count}, folders={folder_count}, size={total_size} (within tolerance)\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying file statistics: {e}\")\n        return False\n\ndef verify_subtask2_depth_analysis(test_dir: Path) -> bool:\n    \"\"\"Verify subtask 2: Depth Analysis - depth must be 7, verify path exists.\"\"\"\n    analysis_file = test_dir / \"structure_analysis.txt\"\n    \n    try:\n        content = analysis_file.read_text()\n        \n        # Extract depth and path\n        depth_match = re.search(r'depth:\\s*(\\d+)', content)\n        path_match = re.search(r'^([^\\n]+)$', content, re.MULTILINE)\n        \n        if not depth_match:\n            print(\"❌ Could not extract depth from structure_analysis.txt\")\n            return False\n        \n        depth = int(depth_match.group(1))\n        \n        # Check if depth is exactly 7\n        if depth != 7:\n            print(f\"❌ Depth must be 7, found: {depth}\")\n            return False\n        \n        print(f\"✅ Depth verified: {depth}\")\n        \n        # Extract the path (it should be on a separate line after \"depth: 7\")\n        lines = content.split('\\n')\n        path_line = None\n        for i, line in enumerate(lines):\n            if line.strip() == f\"depth: {depth}\":\n                if i + 1 < len(lines):\n                    path_line = lines[i + 1].strip()\n                    break\n        \n        if not path_line:\n            print(\"❌ Could not find path line after depth specification\")\n            return False\n        \n        print(f\"📁 Found path: {path_line}\")\n        \n        # Verify that the path depth matches the declared depth\n        path_parts = path_line.split('/')\n        actual_depth = len(path_parts)\n        \n        if actual_depth != depth:\n            print(f\"❌ Path depth mismatch: declared depth is {depth}, but path has {actual_depth} levels\")\n            print(f\"   Path: {path_line}\")\n            print(f\"   Path parts: {path_parts}\")\n            return False\n        \n        print(f\"✅ Path depth verified: {actual_depth} levels\")\n        \n        # Verify that this path exists in the test environment\n        expected_path = test_dir / path_line\n        if not expected_path.exists():\n            print(f\"❌ Path does not exist: {expected_path}\")\n            return False\n        \n        if not expected_path.is_dir():\n            print(f\"❌ Path exists but is not a directory: {expected_path}\")\n            return False\n        \n        print(f\"✅ Path verified and exists: {path_line}\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying depth analysis: {e}\")\n        return False\n\ndef verify_subtask3_file_type_classification(test_dir: Path) -> bool:\n    \"\"\"Verify subtask 3: File Type Classification - 68 and 1 must be accurate.\"\"\"\n    analysis_file = test_dir / \"structure_analysis.txt\"\n    \n    try:\n        content = analysis_file.read_text()\n        \n        # Extract file type counts\n        txt_match = re.search(r'txt:\\s*(\\d+)', content)\n        py_match = re.search(r'py:\\s*(\\d+)', content)\n        \n        if not txt_match or not py_match:\n            print(\"❌ Could not extract file type counts from structure_analysis.txt\")\n            return False\n        \n        txt_count = int(txt_match.group(1))\n        py_count = int(py_match.group(1))\n        \n        print(f\"📁 Found: txt={txt_count}, py={py_count}\")\n        \n        # Check if txt count is exactly 68\n        if txt_count != 68:\n            print(f\"❌ txt count must be 68, found: {txt_count}\")\n            return False\n        \n        # Check if py count is exactly 1\n        if py_count != 1:\n            print(f\"❌ py count must be 1, found: {py_count}\")\n            return False\n        \n        print(f\"✅ File type classification verified: txt={txt_count}, py={py_count}\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying file type classification: {e}\")\n        return False\n\ndef verify_file_format(test_dir: Path) -> bool:\n    \"\"\"Verify that the structure_analysis.txt file has proper format.\"\"\"\n    analysis_file = test_dir / \"structure_analysis.txt\"\n    \n    try:\n        content = analysis_file.read_text()\n        lines = content.split('\\n')\n        \n        # Check if file has the expected structure\n        if len(lines) < 5:  # Should have at least 5 lines\n            print(\"❌ File seems too short to contain all required information\")\n            return False\n        \n        # Basic format check - ensure it's not completely corrupted\n        if not content.strip():\n            print(\"❌ File is completely empty\")\n            return False\n        \n        print(\"✅ File format is acceptable\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking file format: {e}\")\n        return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    try:\n        test_dir = get_test_directory()\n        print(f\"🔍 Verifying Directory Structure Analysis Task in: {test_dir}\")\n        \n        # Define verification steps\n        verification_steps = [\n            (\"Structure Analysis File Exists\", verify_structure_analysis_file_exists),\n            (\"File is Readable\", verify_structure_analysis_file_readable),\n            (\"Subtask 1: File Statistics\", verify_subtask1_file_statistics),\n            (\"Subtask 2: Depth Analysis\", verify_subtask2_depth_analysis),\n            (\"Subtask 3: File Type Classification\", verify_subtask3_file_type_classification),\n            (\"File Format\", verify_file_format),\n        ]\n        \n        # Run all verification steps\n        all_passed = True\n        for step_name, verify_func in verification_steps:\n            print(f\"\\n--- {step_name} ---\")\n            if not verify_func(test_dir):\n                all_passed = False\n        \n        # Final result\n        print(\"\\n\" + \"=\"*50)\n        if all_passed:\n            print(\"✅ Directory Structure Analysis completed correctly!\")\n            print(\"🎉 Structure Analysis verification: PASS\")\n            sys.exit(0)\n        else:\n            print(\"❌ Structure Analysis verification: FAIL\")\n            sys.exit(1)\n            \n    except Exception as e:\n        print(f\"❌ Verification failed with error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/folder_structure/structure_mirror/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### Task\n\nCopy the entire directory structure of `complex_structure/` to `complex_structure_mirror/` without copying any file contents. Do not use python code.\n\n### Requirements\n\n- Create the entire directory structure in `complex_structure_mirror/`\n- Do not copy any file contents, only create directories\n- In each empty directory, create a `placeholder.txt` file containing the absolute path of that directory\n- Handle nested directories of any depth\n- You should also follow 2 rules:\n    1. **Discard any directory that directly contains more than 2 files (only count the immediate folder).**\n    2. **If a directory name contains numbers, append \"_processed\" to the mirror directory name**\n"
  },
  {
    "path": "tasks/filesystem/standard/folder_structure/structure_mirror/meta.json",
    "content": "{\n  \"task_id\": \"structure_mirror\",\n  \"task_name\": \"Structure Mirror\",\n  \"category_id\": \"folder_structure\",\n  \"category_name\": \"Folder Structure\",\n  \"description\": \"Create an exact mirror copy of the folder structure in a target location while applying specified transformation rules.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-08\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"file organization\",\n    \"content transformation\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"folder_structure/\\n    └── complex_structure/\\n            ├── deeply/\\n            │       └── nested/\\n            │               └── folder/\\n            │                       └── structure/\\n            ├── empty_folder/\\n            ├── folder_lxkHt_0_1/\\n            │       └── file_PeLzC_0.txt\\n            ├── folder_QdTAj_0_2/\\n            │       ├── folder_eXccj_1_0/\\n            │       │       ├── folder_Mqlwh_2_1/\\n            │       │       │       ├── folder_cKxcP_3_3/\\n            │       │       │       │       ├── folder_BPTMK_4_1/\\n            │       │       │       │       │       └── file_RHtBP_0.txt\\n            │       │       │       │       ├── folder_QNqjq_4_0/\\n            │       │       │       │       │       ├── folder_gRwPE_5_1/\\n            │       │       │       │       │       │       ├── file_jVlpp_0.txt\\n            │       │       │       │       │       │       └── file_vJuHz_1.txt\\n            │       │       │       │       │       ├── folder_XdXYJ_5_0/\\n            │       │       │       │       │       │       └── file_KvkKi_0.txt\\n            │       │       │       │       │       ├── file_gGxLG_2.txt\\n            │       │       │       │       │       ├── file_Hzkxo_0.txt\\n            │       │       │       │       │       └── file_XRjeh_1.txt\\n            │       │       │       │       ├── folder_vIBIt_4_2/\\n            │       │       │       │       │       ├── folder_kRDNS_5_0/\\n            │       │       │       │       │       │       └── file_wFSjJ_0.txt\\n            │       │       │       │       │       └── file_NyBSO_0.txt\\n            │       │       │       │       ├── file_EOCNf_1.txt\\n            │       │       │       │       └── file_gmrXA_0.txt\\n            │       │       │       ├── folder_NcruA_3_1/\\n            │       │       │       │       ├── file_bLWDj_1.txt\\n            │       │       │       │       └── file_WAftR_0.txt\\n            │       │       │       ├── folder_qCDFI_3_2/\\n            │       │       │       │       ├── file_eSMOJ_0.txt\\n            │       │       │       │       ├── file_oxADy_2.txt\\n            │       │       │       │       └── file_RTbbc_1.txt\\n            │       │       │       ├── folder_QVHUU_3_0/\\n            │       │       │       │       ├── folder_FEPTK_4_1/\\n            │       │       │       │       │       ├── folder_GHoMC_5_1/\\n            │       │       │       │       │       │       └── file_rAMYd_0.txt\\n            │       │       │       │       │       ├── folder_iBDUY_5_0/\\n            │       │       │       │       │       │       └── file_IJCaw_0.txt\\n            │       │       │       │       │       ├── folder_VRXgp_5_2/\\n            │       │       │       │       │       │       └── file_hkUmS_0.txt\\n            │       │       │       │       │       ├── file_nqLAf_1.txt\\n            │       │       │       │       │       └── file_XflmA_0.txt\\n            │       │       │       │       ├── folder_FlPoK_4_3/\\n            │       │       │       │       │       ├── folder_hSVNm_5_3/\\n            │       │       │       │       │       │       └── file_klnbn_0.txt\\n            │       │       │       │       │       ├── folder_iZuEl_5_0/\\n            │       │       │       │       │       │       └── file_LqAmy_0.txt\\n            │       │       │       │       │       ├── folder_LcURj_5_2/\\n            │       │       │       │       │       │       ├── file_RgwOS_1.txt\\n            │       │       │       │       │       │       └── file_ZHnYb_0.txt\\n            │       │       │       │       │       ├── folder_tuZQJ_5_1/\\n            │       │       │       │       │       │       └── file_LHuIx_0.txt\\n            │       │       │       │       │       ├── file_asJnB_1.txt\\n            │       │       │       │       │       └── file_EzLdu_0.txt\\n            │       │       │       │       ├── folder_ndhsJ_4_0/\\n            │       │       │       │       │       ├── folder_CUSXK_5_0/\\n            │       │       │       │       │       │       ├── file_DpiuM_1.txt\\n            │       │       │       │       │       │       └── file_pSqeG_0.txt\\n            │       │       │       │       │       ├── folder_pstmE_5_1/\\n            │       │       │       │       │       │       └── file_YwdJt_0.txt\\n            │       │       │       │       │       ├── folder_StlsP_5_2/\\n            │       │       │       │       │       │       ├── file_kriBJ_0.txt\\n            │       │       │       │       │       │       └── file_XCEdm_1.txt\\n            │       │       │       │       │       ├── file_ToDjh_1.txt\\n            │       │       │       │       │       └── file_xbIVx_0.txt\\n            │       │       │       │       ├── folder_PJBok_4_4/\\n            │       │       │       │       │       ├── folder_mzxaf_5_0/\\n            │       │       │       │       │       │       ├── file_ILBzj_2.txt\\n            │       │       │       │       │       │       ├── file_MTGMm_1.txt\\n            │       │       │       │       │       │       └── file_zBDqz_0.txt\\n            │       │       │       │       │       ├── folder_sULMj_5_1/\\n            │       │       │       │       │       │       ├── file_BHziw_1.txt\\n            │       │       │       │       │       │       ├── file_sIjiu_2.txt\\n            │       │       │       │       │       │       └── file_VqNkB_0.txt\\n            │       │       │       │       │       ├── folder_vypSi_5_3/\\n            │       │       │       │       │       │       ├── file_kZbIm_1.txt\\n            │       │       │       │       │       │       └── file_sOBtE_0.txt\\n            │       │       │       │       │       ├── folder_ZLGHy_5_2/\\n            │       │       │       │       │       │       ├── file_azaFF_0.txt\\n            │       │       │       │       │       │       └── file_nAFRe_1.txt\\n            │       │       │       │       │       ├── file_mIkQU_0.txt\\n            │       │       │       │       │       └── file_sGPxd_1.txt\\n            │       │       │       │       ├── folder_VTbEG_4_2/\\n            │       │       │       │       │       ├── file_HtYLg_0.txt\\n            │       │       │       │       │       ├── file_JXjMd_1.txt\\n            │       │       │       │       │       └── file_tPccB_2.txt\\n            │       │       │       │       ├── file_BuOSw_1.txt\\n            │       │       │       │       └── file_TpoqE_0.txt\\n            │       │       │       ├── folder_wTvun_3_4/\\n            │       │       │       │       ├── file_GyhyE_1.txt\\n            │       │       │       │       ├── file_POsla_2.txt\\n            │       │       │       │       └── file_tSsvk_0.txt\\n            │       │       │       ├── file_irNju_0.txt\\n            │       │       │       └── file_jYBRm_1.txt\\n            │       │       ├── folder_YlJLI_2_0/\\n            │       │       │       └── file_FpFSL_0.txt\\n            │       │       ├── file_cFgBr_2.txt\\n            │       │       ├── file_lKEWN_1.txt\\n            │       │       └── file_ZEWFP_0.txt\\n            │       └── file_ayUCH_0.txt\\n            ├── folder_xtgyi_0_0/\\n            │       └── file_BvSOB_0.txt\\n            ├── mixed_content/\\n            │       └── images_and_text/\\n            │               └── notes.txt\\n            ├── project/\\n            │       ├── docs/\\n            │       │       └── archive/\\n            │       │               └── 2023/\\n            │       │                       └── reports/\\n            │       │                               ├── report_0.txt\\n            │       │                               ├── report_1.txt\\n            │       │                               └── report_2.txt\\n            │       └── src/\\n            │               └── main/\\n            │                       └── resources/\\n            └── m.py\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/folder_structure.zip\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/folder_structure/structure_mirror/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Directory Structure Mirroring with Smart Placeholders Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_mirror_directory_exists(test_dir: Path, mirror_path: Path) -> bool:\n    \"\"\"Verify that a mirror directory exists.\"\"\"\n    if not mirror_path.exists():\n        print(f\"❌ Mirror directory not found: {mirror_path.relative_to(test_dir)}\")\n        return False\n    \n    if not mirror_path.is_dir():\n        print(f\"❌ Mirror path exists but is not a directory: {mirror_path.relative_to(test_dir)}\")\n        return False\n    \n    print(f\"✅ Mirror directory exists: {mirror_path.relative_to(test_dir)}\")\n    return True\n\ndef verify_placeholder_file_exists(mirror_path: Path, test_dir: Path) -> bool:\n    \"\"\"Verify that placeholder.txt exists in the mirror directory.\"\"\"\n    placeholder_file = mirror_path / \"placeholder.txt\"\n    \n    if not placeholder_file.exists():\n        print(f\"❌ placeholder.txt not found in: {mirror_path.relative_to(test_dir)}\")\n        return False\n    \n    if not placeholder_file.is_file():\n        print(f\"❌ placeholder.txt exists but is not a file in: {mirror_path.relative_to(test_dir)}\")\n        return False\n    \n    print(f\"✅ placeholder.txt exists in: {mirror_path.relative_to(test_dir)}\")\n    return True\n\ndef verify_placeholder_content(mirror_path: Path, test_dir: Path) -> bool:\n    \"\"\"Verify that placeholder.txt contains the correct path ending with complex_structure_mirror/...\"\"\"\n    placeholder_file = mirror_path / \"placeholder.txt\"\n    \n    try:\n        content = placeholder_file.read_text().strip()\n        \n        # Check if content is not empty\n        if not content:\n            print(f\"❌ placeholder.txt is empty in: {mirror_path.relative_to(test_dir)}\")\n            return False\n        \n        # Check if it contains the correct path ending with complex_structure_mirror/...\n        expected_ending = f\"complex_structure_mirror/{mirror_path.relative_to(test_dir / 'complex_structure_mirror')}\"\n        if not content.endswith(expected_ending):\n            print(f\"❌ placeholder.txt content incorrect in: {mirror_path.relative_to(test_dir)}\")\n            print(f\"   Expected ending: {expected_ending}\")\n            print(f\"   Found: {content}\")\n            return False\n        \n        print(f\"✅ placeholder.txt content is correct in: {mirror_path.relative_to(test_dir)}\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error reading placeholder.txt in {mirror_path.relative_to(test_dir)}: {e}\")\n        return False\n\ndef verify_no_files_copied(test_dir: Path) -> bool:\n    \"\"\"Verify that no file contents were copied, only directory structure.\"\"\"\n    source_dir = test_dir / \"complex_structure\"\n    mirror_dir = test_dir / \"complex_structure_mirror\"\n    \n    if not mirror_dir.exists():\n        print(\"❌ Mirror directory 'complex_structure_mirror' not found\")\n        return False\n    \n    # Check that no files from source were copied (except placeholder.txt files)\n    for source_file in source_dir.rglob(\"*\"):\n        if source_file.is_file():\n            # Calculate the corresponding mirror path\n            relative_path = source_file.relative_to(source_dir)\n            mirror_file = mirror_dir / relative_path\n            \n            # Skip if this would be a placeholder.txt file\n            if mirror_file.name == \"placeholder.txt\":\n                continue\n            \n            if mirror_file.exists():\n                print(f\"❌ File was copied when it shouldn't be: {relative_path}\")\n                return False\n    \n    print(\"✅ No file contents were copied, only directory structure\")\n    return True\n\ndef verify_mirror_structure_completeness(test_dir: Path) -> bool:\n    \"\"\"Verify that the mirror structure is complete and matches expected structure.\"\"\"\n    mirror_dir = test_dir / \"complex_structure_mirror\"\n    \n    if not mirror_dir.exists():\n        print(\"❌ Mirror directory 'complex_structure_mirror' not found\")\n        return False\n    \n    # Define expected directories that should exist (based on backup structure)\n    expected_dirs = [\n        \"deeply\",\n        \"deeply/nested\",\n        \"deeply/nested/folder\",\n        \"deeply/nested/folder/structure\",\n        \"empty_folder\", \n        \"folder_lxkHt_0_1_processed\",\n        \"folder_QdTAj_0_2_processed\",\n        \"folder_xtgyi_0_0_processed\",\n        \"mixed_content\",\n        \"mixed_content/images_and_text\",\n        \"project\",\n        \"project/docs\",\n        \"project/docs/archive\",\n        \"project/docs/archive/2023_processed\",\n        \"project/src\",\n        \"project/src/main\",\n        \"project/src/main/resources\"\n    ]\n    \n    # Define which directories should have placeholder.txt files\n    placeholder_dirs = [\n        \"deeply/nested/folder/structure\",\n        \"empty_folder\", \n        \"folder_lxkHt_0_1_processed\",\n        \"folder_QdTAj_0_2_processed\",\n        \"folder_xtgyi_0_0_processed\",\n        \"mixed_content/images_and_text\",\n        \"project/docs/archive/2023_processed\",\n        \"project/src/main/resources\"\n    ]\n    \n    all_passed = True\n    \n    # Check that all expected directories exist\n    for expected_dir in expected_dirs:\n        mirror_path = mirror_dir / expected_dir\n        if not verify_mirror_directory_exists(test_dir, mirror_path):\n            all_passed = False\n        elif expected_dir in placeholder_dirs:\n            # Check placeholder.txt for directories that should have it\n            if not verify_placeholder_file_exists(mirror_path, test_dir):\n                all_passed = False\n            elif not verify_placeholder_content(mirror_path, test_dir):\n                all_passed = False\n    \n    # Check that no unexpected directories exist\n    for mirror_subdir in mirror_dir.rglob(\"*\"):\n        if mirror_subdir.is_dir():\n            relative_path = mirror_subdir.relative_to(mirror_dir)\n            if str(relative_path) not in expected_dirs and str(relative_path) != \".\":\n                print(f\"❌ Unexpected directory found: {relative_path}\")\n                all_passed = False\n    \n    return all_passed\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    try:\n        test_dir = get_test_directory()\n        print(f\"🔍 Verifying Directory Structure Mirroring with Smart Placeholders in: {test_dir}\")\n        \n        # Define verification steps\n        verification_steps = [\n            (\"No files copied\", verify_no_files_copied),\n            (\"Mirror structure completeness\", verify_mirror_structure_completeness),\n        ]\n        \n        # Run all verification steps\n        all_passed = True\n        for step_name, verify_func in verification_steps:\n            print(f\"\\n📋 Checking: {step_name}\")\n            if not verify_func(test_dir):\n                all_passed = False\n        \n        # Final result\n        print(\"\\n\" + \"=\"*50)\n        if all_passed:\n            print(\"✅ Directory structure mirroring completed correctly!\")\n            print(\"🎉 Structure Mirror verification: PASS\")\n            sys.exit(0)\n        else:\n            print(\"❌ Structure Mirror verification: FAIL\")\n            sys.exit(1)\n            \n    except Exception as e:\n        print(f\"❌ Verification failed with error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/legal_document/dispute_review/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n**Overview**\n\nThe folder \"legal_files/\" contains all versions (Preferred_Stock_Purchase_Agreement_v0.txt  -- Preferred_Stock_Purchase_Agreement_v10.txt) of the Stock Purchase Agreement for a corporate investment project.\n\nThere are comments in it, come from four people:\n\n- **Bill Harvey** (Company CEO)\n- **Michelle Jackson** (Investor)\n- **David Russel** (Company Counsel)\n- **Tony Taylor** (Investor Counsel)\n\nBetween v1 and v9, these four people make comments on the clauses. The comment format is `[name:content]`, where:\n\n- `name` is the commenter's name\n- `content` is the revision note\n\n**Special Note:** If the name is \"All parties\", it represents a joint comment from all parties, which counts as one comment but does not count toward any individual's personal comment count.\n\n## Task\n\nYour task is to review these versions and identify all clauses that have been commented in **v5,6,7 (in folder legal_files/)**. Generate a file named `dispute_review.txt` in the main directory. In this file, list each commented clause on a separate line and indicate the number of comments for each clause in the format \"Clause number:number of comments\". Clause number should be in the format of X.X.\n"
  },
  {
    "path": "tasks/filesystem/standard/legal_document/dispute_review/meta.json",
    "content": "{\n  \"task_id\": \"dispute_review\",\n  \"task_name\": \"Dispute Review\",\n  \"category_id\": \"legal_document\",\n  \"category_name\": \"Legal Document\",\n  \"description\": \"Analyze multiple versions of legal documents to track clause discussion frequency and generate a comprehensive dispute summary report.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data extraction\",\n    \"cross-referencing\",\n    \"pattern analysis\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"legal_document/\\n    └── legal_files/\\n            ├── Preferred_Stock_Purchase_Agreement_v0.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v1.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v2.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v3.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v4.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v5.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v6.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v7.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v8.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v9.txt\\n            └── Preferred_Stock_Purchase_Agreement_v10.txt\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/legal_document.zip\",\n    \"stateOriginalUrl\": \"https://www.cooleygo.com/documents/nvca-financing-documents\"\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/legal_document/dispute_review/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Legal Document Dispute Review Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport re\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_output_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the dispute_review.txt file exists.\"\"\"\n    output_file = test_dir / \"dispute_review.txt\"\n    \n    if not output_file.exists():\n        print(\"❌ File 'dispute_review.txt' not found\")\n        return False\n    \n    print(\"✅ Output file found\")\n    return True\n\ndef verify_output_format(test_dir: Path) -> bool:\n    \"\"\"Verify that the output file has the correct format.\"\"\"\n    output_file = test_dir / \"dispute_review.txt\"\n    \n    try:\n        content = output_file.read_text().strip()\n        \n        # Check if content is not empty\n        if not content:\n            print(\"❌ Output file is empty\")\n            return False\n        \n        # Check format: each line should be \"X.X:number\"\n        lines = content.split('\\n')\n        for i, line in enumerate(lines, 1):\n            line = line.strip()\n            if not line:\n                continue\n                \n            # Check format: X.X:number\n            if not re.match(r'^\\d+\\.\\d+:\\d+$', line):\n                print(f\"❌ Line {i} has incorrect format: '{line}'\")\n                print(\"   Expected format: 'X.X:number' (e.g., '1.1:3')\")\n                return False\n        \n        print(\"✅ Output format is correct\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error reading output file: {e}\")\n        return False\n\ndef verify_expected_entries(test_dir: Path) -> bool:\n    \"\"\"Verify that the output contains the expected entries with correct counts.\"\"\"\n    output_file = test_dir / \"dispute_review.txt\"\n    \n    try:\n        content = output_file.read_text().strip()\n        lines = content.split('\\n')\n        \n        # Parse the output into a dictionary\n        output_entries = {}\n        for line in lines:\n            line = line.strip()\n            if not line:\n                continue\n            clause, count_str = line.split(':', 1)\n            output_entries[clause] = int(count_str)\n        \n        # Expected entries based on answer.txt\n        expected_entries = {\n            \"1.1\": 3,\n            \"1.3\": 3,\n            \"4.6\": [5, 6],  # Can be either 5 or 6\n            \"4.16\": 5,\n            \"6.8\": 4\n        }\n        \n        # Check if all expected entries are present\n        missing_entries = []\n        for clause in expected_entries:\n            if clause not in output_entries:\n                missing_entries.append(clause)\n        \n        if missing_entries:\n            print(f\"❌ Missing expected entries: {missing_entries}\")\n            return False\n        \n        # Check if there are extra entries\n        extra_entries = []\n        for clause in output_entries:\n            if clause not in expected_entries:\n                extra_entries.append(clause)\n        \n        if extra_entries:\n            print(f\"❌ Unexpected extra entries: {extra_entries}\")\n            return False\n        \n        # Check counts for each entry\n        for clause, expected_count in expected_entries.items():\n            actual_count = output_entries[clause]\n            \n            if isinstance(expected_count, list):\n                # For 4.6, accept either 5 or 6\n                if actual_count not in expected_count:\n                    print(f\"❌ Clause {clause}: expected {expected_count}, got {actual_count}\")\n                    return False\n            else:\n                if actual_count != expected_count:\n                    print(f\"❌ Clause {clause}: expected {expected_count}, got {actual_count}\")\n                    return False\n        \n        print(\"✅ All expected entries with correct counts\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying entries: {e}\")\n        return False\n\ndef verify_comment_count_accuracy(test_dir: Path) -> bool:\n    \"\"\"Verify that the comment counts are accurate by checking the actual files.\"\"\"\n    # Since we already verify the expected entries in verify_expected_entries,\n    # and the answer.txt contains the correct counts, we can skip this complex verification\n    # to avoid false negatives due to regex matching issues.\n    \n    print(\"✅ Comment count accuracy check skipped - relying on expected entries verification\")\n    return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying Legal Document Dispute Review Task...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Output File Exists\", verify_output_file_exists),\n        (\"Output Format\", verify_output_format),\n        (\"Expected Entries\", verify_expected_entries),\n        (\"Comment Count Accuracy\", verify_comment_count_accuracy),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ Legal document dispute review completed correctly!\")\n        print(\"🎉 Task verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/legal_document/individual_comments/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n**Overview**\n\nThe folder \"legal_files/\" contains all versions (Preferred_Stock_Purchase_Agreement_v0.txt  -- Preferred_Stock_Purchase_Agreement_v10.txt) of the Stock Purchase Agreement for a corporate investment project.\n\nThere are comments in it, come from four people:\n\n- **Bill Harvey** (Company CEO)\n- **Michelle Jackson** (Investor)\n- **David Russel** (Company Counsel)\n- **Tony Taylor** (Investor Counsel)\n\nBetween v1 and v9, these four people make comments on the clauses. The comment format is `[name:content]`, where:\n\n- `name` is the commenter's name\n- `content` is the revision note\n\n**Special Note:** If the name is \"All parties\", it represents a joint comment from all parties, which counts as one comment but does not count toward any individual's personal comment count.\n\n## Task\n\nYour task is to count the number of comments made by Bill Harvey (Company CEO), Michelle Jackson (Investor), David Russel (Company Counsel), and Tony Taylor (Investor Counsel) in clauses 1.1, 1.3, 4.6, 4.16, 6.8, and 6.16 **in version 5-8.** Please generate `individual_comment.csv` in the **main directory** where the first row contains these clauses (1.1, 1.3, 4.6, 4.16, 6.8, 6.16) and the first column contains the four names (Bill Harvey, Michelle Jackson, David Russel, Tony Taylor). Fill in the table with the number of comments for each person and each clause. If there are no comments, write 0.\n"
  },
  {
    "path": "tasks/filesystem/standard/legal_document/individual_comments/meta.json",
    "content": "{\n  \"task_id\": \"individual_comments\",\n  \"task_name\": \"Individual Comments\",\n  \"category_id\": \"legal_document\",\n  \"category_name\": \"Legal Document\",\n  \"description\": \"Extract and analyze individual reviewer comments on legal clauses across multiple document versions to understand personal perspectives.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data extraction\",\n    \"cross-referencing\",\n    \"pattern analysis\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"legal_document/\\n    └── legal_files/\\n            ├── Preferred_Stock_Purchase_Agreement_v0.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v1.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v2.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v3.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v4.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v5.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v6.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v7.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v8.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v9.txt\\n            └── Preferred_Stock_Purchase_Agreement_v10.txt\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/legal_document.zip\",\n    \"stateOriginalUrl\": \"https://www.cooleygo.com/documents/nvca-financing-documents\"\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/legal_document/individual_comments/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Legal Document Individual Comments Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport csv\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_output_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the individual_comment.csv file exists.\"\"\"\n    output_file = test_dir / \"individual_comment.csv\"\n    \n    if not output_file.exists():\n        print(\"❌ File 'individual_comment.csv' not found\")\n        return False\n    \n    print(\"✅ Output file 'individual_comment.csv' found\")\n    return True\n\ndef verify_csv_format(test_dir: Path) -> bool:\n    \"\"\"Verify that the CSV file has the correct format.\"\"\"\n    output_file = test_dir / \"individual_comment.csv\"\n    \n    try:\n        with open(output_file, 'r', newline='', encoding='utf-8') as csvfile:\n            reader = csv.reader(csvfile)\n            rows = list(reader)\n            \n            if not rows:\n                print(\"❌ CSV file is empty\")\n                return False\n            \n            # Check if there are at least 2 rows (header + data)\n            if len(rows) < 2:\n                print(\"❌ CSV file has insufficient rows\")\n                return False\n            \n            # Check if header row has correct number of columns\n            header = rows[0]\n            if len(header) != 7:  # First column (can be anything) + 6 clauses\n                print(f\"❌ Header row has incorrect number of columns: {len(header)}, expected 7\")\n                return False\n            \n            # Check if data rows have correct number of columns\n            for i, row in enumerate(rows[1:], 1):\n                if len(row) != 7:\n                    print(f\"❌ Data row {i} has incorrect number of columns: {len(row)}, expected 7\")\n                    return False\n            \n            print(\"✅ CSV format is correct\")\n            return True\n            \n    except Exception as e:\n        print(f\"❌ Error reading CSV file: {e}\")\n        return False\n\ndef verify_csv_content(test_dir: Path) -> bool:\n    \"\"\"Verify that the CSV content matches the expected answer exactly.\"\"\"\n    output_file = test_dir / \"individual_comment.csv\"\n    \n    try:\n        with open(output_file, 'r', newline='', encoding='utf-8') as csvfile:\n            reader = csv.reader(csvfile)\n            rows = list(reader)\n            \n            # Expected data based on answer.csv\n            expected_data = {\n                \"Bill Harvey\": [\"0\", \"2\", \"3\", \"1\", \"1\", \"1\"],\n                \"Michelle Jackson\": [\"0\", \"1\", \"2\", \"1\", \"1\", \"1\"],\n                \"David Russel\": [\"2\", \"1\", \"1\", \"2\", \"1\", \"1\"],\n                \"Tony Taylor\": [\"2\", \"0\", \"1\", \"2\", \"1\", \"1\"]\n            }\n            \n            # Expected header columns (excluding first column which can be anything)\n            expected_header_columns = [\"1.1\", \"1.3\", \"4.6\", \"4.16\", \"6.8\", \"6.16\"]\n            \n            # Verify header has correct number of columns\n            header = rows[0]\n            if len(header) != 7:  # First column + 6 clauses\n                print(f\"❌ Header row has incorrect number of columns: {len(header)}, expected 7\")\n                return False\n            \n            # Check if all expected clause columns are present (allow order to be different)\n            # Allow first column to be anything, so we check columns 1-6\n            header_clauses = header[1:7]\n            missing_clauses = []\n            for expected_clause in expected_header_columns:\n                if expected_clause not in header_clauses:\n                    missing_clauses.append(expected_clause)\n            \n            if missing_clauses:\n                print(f\"❌ Missing expected clause columns: {missing_clauses}\")\n                return False\n            \n            # Check if there are extra clause columns\n            extra_clauses = []\n            for clause in header_clauses:\n                if clause not in expected_header_columns:\n                    extra_clauses.append(clause)\n            \n            if extra_clauses:\n                print(f\"❌ Unexpected extra clause columns: {extra_clauses}\")\n                return False\n            \n            # Create a mapping from expected clause order to actual column indices\n            clause_mapping = {}\n            for i, clause in enumerate(header_clauses):\n                if clause in expected_header_columns:\n                    clause_mapping[clause] = i\n            \n            # Parse the CSV data into a dictionary with correct column mapping\n            csv_data = {}\n            for row in rows[1:]:\n                if len(row) >= 7:\n                    name = row[0]\n                    # Map values according to the expected clause order\n                    values = []\n                    for expected_clause in expected_header_columns:\n                        col_index = clause_mapping[expected_clause] + 1  # +1 because we skip first column\n                        values.append(row[col_index])\n                    csv_data[name] = values\n            \n            # Check if all expected names are present\n            missing_names = []\n            for expected_name in expected_data:\n                if expected_name not in csv_data:\n                    missing_names.append(expected_name)\n            \n            if missing_names:\n                print(f\"❌ Missing expected names: {missing_names}\")\n                return False\n            \n            # Check if there are extra names\n            extra_names = []\n            for name in csv_data:\n                if name not in expected_data:\n                    extra_names.append(name)\n            \n            if extra_names:\n                print(f\"❌ Unexpected extra names: {extra_names}\")\n                return False\n            \n            # Check values for each person\n            for name, expected_values in expected_data.items():\n                actual_values = csv_data[name]\n                \n                if actual_values != expected_values:\n                    print(f\"❌ Values mismatch for {name}:\")\n                    print(f\"   Expected: {expected_values}\")\n                    print(f\"   Got:      {actual_values}\")\n                    return False\n            \n            print(\"✅ CSV content matches expected answer exactly\")\n            return True\n            \n    except Exception as e:\n        print(f\"❌ Error verifying CSV content: {e}\")\n        return False\n\ndef verify_data_accuracy(test_dir: Path) -> bool:\n    \"\"\"Verify that the data values are accurate (all values are non-negative integers).\"\"\"\n    output_file = test_dir / \"individual_comment.csv\"\n    \n    try:\n        with open(output_file, 'r', newline='', encoding='utf-8') as csvfile:\n            reader = csv.reader(csvfile)\n            rows = list(reader)\n            \n            # Skip header row\n            for i, row in enumerate(rows[1:], 1):\n                if len(row) >= 7:\n                    name = row[0]\n                    values = row[1:7]\n                    \n                    for j, value in enumerate(values, 1):\n                        try:\n                            int_val = int(value)\n                            if int_val < 0:\n                                print(f\"❌ Row {i}, column {j}: negative value '{value}' for {name}\")\n                                return False\n                        except ValueError:\n                            print(f\"❌ Row {i}, column {j}: non-integer value '{value}' for {name}\")\n                            return False\n            \n            print(\"✅ All data values are valid non-negative integers\")\n            return True\n            \n    except Exception as e:\n        print(f\"❌ Error verifying data accuracy: {e}\")\n        return False\n\ndef verify_file_location(test_dir: Path) -> bool:\n    \"\"\"Verify that the file is in the main directory (not in a subdirectory).\"\"\"\n    output_file = test_dir / \"individual_comment.csv\"\n    \n    if output_file.exists():\n        print(\"✅ File is located in the main directory\")\n        return True\n    else:\n        print(\"❌ File is not in the main directory\")\n        return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying Legal Document Individual Comments Task...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Output File Exists\", verify_output_file_exists),\n        (\"CSV Format\", verify_csv_format),\n        (\"CSV Content\", verify_csv_content),\n        (\"Data Accuracy\", verify_data_accuracy),\n        (\"File Location\", verify_file_location),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ Legal document individual comments task completed correctly!\")\n        print(\"🎉 Task verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/legal_document/solution_tracing/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### Overview\n\nThe folder \"legal_files/\" contains all versions (Preferred_Stock_Purchase_Agreement_v0.txt  -- Preferred_Stock_Purchase_Agreement_v10.txt) of the Stock Purchase Agreement for a corporate investment project.\n\nThere are comments in it, come from four people:\n\n- **Bill Harvey** (Company CEO)\n- **Michelle Jackson** (Investor)\n- **David Russel** (Company Counsel)\n- **Tony Taylor** (Investor Counsel)\n\nBetween v1 and v9, these four people make comments on the clauses. The comment format is `[name:content]`, where:\n\n- `name` is the commenter's name\n- `content` is the revision note\n\n**Special Note:** If the name is \"All parties\", it represents a joint comment from all parties, which counts as one comment but does not count toward any individual's personal comment count.\n\n### Task Description\n\n**Your task is to focus on clauses 4.6, 4.16, 6.8, and 6.16 in v5-9** to determine:\n\n1. Who first proposed the idea that eventually led to the final agreed solution\n2. In which version's comment it appeared\n\n**Important:** If the final solution was formed through multiple people's comments, count as the originator the person whose comment first provided the core motivation (or part of the idea) that shaped the final solution. The key is to identify who initially proposed the motivation for the final solution.\n\n### Output Requirements\n\n**File Name:** `tracing.csv` (must be placed in the main directory)\n\n**CSV Structure:**\n\n- **First row** (excluding the top-left cell): `4.6, 4.16, 6.8, 6.16`\n- **First column** (excluding the top-left cell): `version_number, name`\n- **Remaining cells:** Fill in the `version_number` (the version in which the final solution was first proposed, only write a number without any other things) and the `name` (the person who proposed it) for each clause\n"
  },
  {
    "path": "tasks/filesystem/standard/legal_document/solution_tracing/meta.json",
    "content": "{\n  \"task_id\": \"solution_tracing\",\n  \"task_name\": \"Solution Tracing\",\n  \"category_id\": \"legal_document\",\n  \"category_name\": \"Legal Document\",\n  \"description\": \"Trace the evolution of clause resolutions across document versions to identify who first proposed each final accepted solution.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"cross-referencing\",\n    \"pattern analysis\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"legal_document/\\n    └── legal_files/\\n            ├── Preferred_Stock_Purchase_Agreement_v0.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v1.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v2.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v3.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v4.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v5.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v6.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v7.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v8.txt\\n            ├── Preferred_Stock_Purchase_Agreement_v9.txt\\n            └── Preferred_Stock_Purchase_Agreement_v10.txt\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/legal_document.zip\",\n    \"stateOriginalUrl\": \"https://www.cooleygo.com/documents/nvca-financing-documents\"\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/legal_document/solution_tracing/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Legal Document Solution Tracing Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport csv\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_output_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the tracing.csv file exists.\"\"\"\n    output_file = test_dir / \"tracing.csv\"\n    \n    if not output_file.exists():\n        print(\"❌ File 'tracing.csv' not found\")\n        return False\n    \n    print(\"✅ Output file 'tracing.csv' found\")\n    return True\n\ndef verify_csv_format(test_dir: Path) -> bool:\n    \"\"\"Verify that the CSV file has the correct format.\"\"\"\n    output_file = test_dir / \"tracing.csv\"\n    \n    try:\n        with open(output_file, 'r', newline='', encoding='utf-8') as csvfile:\n            reader = csv.reader(csvfile)\n            rows = list(reader)\n            \n            if not rows:\n                print(\"❌ CSV file is empty\")\n                return False\n            \n            # Check if there are at least 2 rows (header + data)\n            if len(rows) < 2:\n                print(\"❌ CSV file has insufficient rows\")\n                return False\n            \n            # Check if header row has correct number of columns\n            header = rows[0]\n            if len(header) != 5:  # First column (can be anything) + 4 clauses\n                print(f\"❌ Header row has incorrect number of columns: {len(header)}, expected 5\")\n                return False\n            \n            # Check if data rows have correct number of columns\n            for i, row in enumerate(rows[1:], 1):\n                if len(row) != 5:\n                    print(f\"❌ Data row {i} has incorrect number of columns: {len(row)}, expected 5\")\n                    return False\n            \n            print(\"✅ CSV format is correct\")\n            return True\n            \n    except Exception as e:\n        print(f\"❌ Error reading CSV file: {e}\")\n        return False\n\ndef verify_csv_content(test_dir: Path) -> bool:\n    \"\"\"Verify that the CSV content matches the expected answer exactly.\"\"\"\n    output_file = test_dir / \"tracing.csv\"\n    \n    try:\n        with open(output_file, 'r', newline='', encoding='utf-8') as csvfile:\n            reader = csv.reader(csvfile)\n            rows = list(reader)\n            \n            # Expected data based on answer.csv\n            expected_data = {\n                \"version_number\": [\"5\", \"6\", \"7\", \"8\"],\n                \"name\": [\"Bill Harvey\", \"Michelle Jackson\", \"Michelle Jackson\", \"Tony Taylor\"]\n            }\n            \n            # Expected header columns (excluding first column which can be anything)\n            expected_header_columns = [\"4.6\", \"4.16\", \"6.8\", \"6.16\"]\n            \n            # Verify header has correct number of columns\n            header = rows[0]\n            if len(header) != 5:  # First column + 4 clauses\n                print(f\"❌ Header row has incorrect number of columns: {len(header)}, expected 5\")\n                return False\n            \n            # Check if all expected clause columns are present (allow order to be different)\n            # Allow first column to be anything, so we check columns 1-4\n            header_clauses = header[1:5]\n            missing_clauses = []\n            for expected_clause in expected_header_columns:\n                if expected_clause not in header_clauses:\n                    missing_clauses.append(expected_clause)\n            \n            if missing_clauses:\n                print(f\"❌ Missing expected clause columns: {missing_clauses}\")\n                return False\n            \n            # Check if there are extra clause columns\n            extra_clauses = []\n            for clause in header_clauses:\n                if clause not in expected_header_columns:\n                    extra_clauses.append(clause)\n            \n            if extra_clauses:\n                print(f\"❌ Unexpected extra clause columns: {extra_clauses}\")\n                return False\n            \n            # Create a mapping from expected clause order to actual column indices\n            clause_mapping = {}\n            for i, clause in enumerate(header_clauses):\n                if clause in expected_header_columns:\n                    clause_mapping[clause] = i\n            \n            # Parse the CSV data into a dictionary with correct column mapping\n            csv_data = {}\n            for row in rows[1:]:\n                if len(row) >= 5:\n                    row_type = row[0]  # version_number or name\n                    # Map values according to the expected clause order\n                    values = []\n                    for expected_clause in expected_header_columns:\n                        col_index = clause_mapping[expected_clause] + 1  # +1 because we skip first column\n                        values.append(row[col_index])\n                    csv_data[row_type] = values\n            \n            # Check if all expected row types are present\n            missing_types = []\n            for expected_type in expected_data:\n                if expected_type not in csv_data:\n                    missing_types.append(expected_type)\n            \n            if missing_types:\n                print(f\"❌ Missing expected row types: {missing_types}\")\n                return False\n            \n            # Check if there are extra row types\n            extra_types = []\n            for row_type in csv_data:\n                if row_type not in expected_data:\n                    extra_types.append(row_type)\n            \n            if extra_types:\n                print(f\"❌ Unexpected extra row types: {extra_types}\")\n                return False\n            \n            # Check values for each row type\n            for row_type, expected_values in expected_data.items():\n                actual_values = csv_data[row_type]\n                \n                if actual_values != expected_values:\n                    print(f\"❌ Values mismatch for {row_type}:\")\n                    print(f\"   Expected: {expected_values}\")\n                    print(f\"   Got:      {actual_values}\")\n                    return False\n            \n            print(\"✅ CSV content matches expected answer exactly\")\n            return True\n            \n    except Exception as e:\n        print(f\"❌ Error verifying CSV content: {e}\")\n        return False\n\ndef verify_data_accuracy(test_dir: Path) -> bool:\n    \"\"\"Verify that the data values are accurate.\"\"\"\n    output_file = test_dir / \"tracing.csv\"\n    \n    try:\n        with open(output_file, 'r', newline='', encoding='utf-8') as csvfile:\n            reader = csv.reader(csvfile)\n            rows = list(reader)\n            \n            # Skip header row\n            for i, row in enumerate(rows[1:], 1):\n                if len(row) >= 5:\n                    row_type = row[0]\n                    values = row[1:5]\n                    \n                    # Check version_number row\n                    if row_type == \"version_number\":\n                        for j, value in enumerate(values, 1):\n                            try:\n                                int_val = int(value)\n                                if int_val < 5 or int_val > 8:\n                                    print(f\"❌ Row {i}, column {j}: version number '{value}' is out of expected range [5-8]\")\n                                    return False\n                            except ValueError:\n                                print(f\"❌ Row {i}, column {j}: non-integer version number '{value}'\")\n                                return False\n                    \n                    # Check name row\n                    elif row_type == \"name\":\n                        expected_names = [\"Bill Harvey\", \"Michelle Jackson\", \"Michelle Jackson\", \"Tony Taylor\"]\n                        for j, value in enumerate(values, 1):\n                            if value not in expected_names:\n                                print(f\"❌ Row {i}, column {j}: unexpected name '{value}'\")\n                                return False\n            \n            print(\"✅ All data values are accurate\")\n            return True\n            \n    except Exception as e:\n        print(f\"❌ Error verifying data accuracy: {e}\")\n        return False\n\ndef verify_file_location(test_dir: Path) -> bool:\n    \"\"\"Verify that the file is in the main directory (not in a subdirectory).\"\"\"\n    output_file = test_dir / \"tracing.csv\"\n    \n    if output_file.exists():\n        print(\"✅ File is located in the main directory\")\n        return True\n    else:\n        print(\"❌ File is not in the main directory\")\n        return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying Legal Document Solution Tracing Task...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Output File Exists\", verify_output_file_exists),\n        (\"CSV Format\", verify_csv_format),\n        (\"CSV Content\", verify_csv_content),\n        (\"Data Accuracy\", verify_data_accuracy),\n        (\"File Location\", verify_file_location),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ Legal document solution tracing task completed correctly!\")\n        print(\"🎉 Task verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/papers/author_folders/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### Task Description\n\nYou are given a directory containing multiple paper files. You have a collection of academic papers in HTML format from arXiv. Your task is to analyze these papers, identify authors who have published multiple papers, and organize them into author-specific folders based on specified criteria.\n\n### Task Objectives\n\n#### Part 1: Frequent Authors (≥4 papers)\n1. **Extract author information** from all HTML papers in the given directory\n2. **Identify authors** who appear in 4 or more papers\n3. **Create a directory** `frequent_authors` \n4. **Create individual folders** within this directory for each frequent author (lowercase names with underscores)\n5. **Copy their papers** to their respective folders\n\n#### Part 2: Prolific 2025 Authors (≥3 papers)\n1. **Extract publication dates** along with author information\n2. **Identify authors** who published 3 or more papers in 2025\n3. **Create a directory** `2025_authors` for 2025 authors\n4. **Create individual folders** within this directory for each prolific 2025 author (lowercase names with underscores)\n5. **Copy their 2025 papers** to their respective folders\n\n### Expected Output\n\n#### Directory Structure:\n```\n[given_task_folder]/\n├── [original HTML files remain untouched]\n├── frequent_authors/              # Authors with ≥4 papers total\n│   ├── smith_john/\n│   │   └── [copied papers]\n│   ├── johnson_sarah/\n│   │   └── [copied papers]\n│   └── ...\n└── 2025_authors/                  # Authors with ≥3 papers in 2025\n    ├── williams_david/\n    │   └── [copied 2025 papers]\n    ├── brown_emily/\n    │   └── [copied 2025 papers]\n    └── ...\n```\n\n#### Requirements:\n- Author folder names should be **lowercase** with underscores replacing spaces/commas (e.g., `smith_john`, `williams_david`)\n- Papers should be **copied** (not moved) to preserve originals\n- Author extraction should handle various name formats correctly"
  },
  {
    "path": "tasks/filesystem/standard/papers/author_folders/meta.json",
    "content": "{\n  \"task_id\": \"author_folders\",\n  \"task_name\": \"Author Folders\",\n  \"category_id\": \"papers\",\n  \"category_name\": \"Papers\",\n  \"description\": \"Analyze academic papers to identify and organize by author, creating separate folders for frequent authors (≥4 papers) and prolific 2025 authors (≥3 papers).\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-08-12\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data extraction\",\n    \"file organization\",\n    \"pattern analysis\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"papers/\\n    ├── 1707.06347.html\\n    ├── 2105.04165.html\\n    ├── 2201.11903.html\\n    ├── 2303.08774.html\\n    ├── 2306.08640.html\\n    ├── 2310.02255.html\\n    ├── 2310.08446.html\\n    ├── 2312.00849.html\\n    ├── 2312.07533.html\\n    ├── 2312.11805.html\\n    ├── 2402.00253.html\\n    ├── 2402.03300.html\\n    ├── 2403.05530.html\\n    ├── 2404.13046.html\\n    ├── 2404.14367.html\\n    ├── 2404.14396.html\\n    ├── 2405.09818.html\\n    ├── 2405.13911.html\\n    ├── 2405.16473.html\\n    ├── 2405.16640.html\\n    ├── 2406.08478.html\\n    ├── 2406.16852.html\\n    ├── 2406.17294.html\\n    ├── 2407.01284.html\\n    ├── 2407.01509.html\\n    ├── 2407.21783.html\\n    ├── 2408.03326.html\\n    ├── 2408.12528.html\\n    ├── 2409.19256.html\\n    ├── 2410.05993.html\\n    ├── 2410.06166.html\\n    ├── 2410.10563.html\\n    ├── 2410.13848.html\\n    ├── 2410.17885.html\\n    ├── 2410.21276.html\\n    ├── 2411.07975.html\\n    ├── 2411.10442.html\\n    ├── 2411.11930.html\\n    ├── 2411.14432.html\\n    ├── 2412.05271.html\\n    ├── 2412.08443.html\\n    ├── 2412.10302.html\\n    ├── 2412.15115.html\\n    ├── 2412.16720.html\\n    ├── 2412.17256.html\\n    ├── 2412.18319.html\\n    ├── 2412.20631.html\\n    ├── 2501.04686.html\\n    ├── 2501.06186.html\\n    ├── 2501.12599.html\\n    ├── 2501.12948.html\\n    ├── 2501.17811.html\\n    ├── 2502.01456.html\\n    ├── 2502.09621.html\\n    ├── 2502.10391.html\\n    ├── 2502.13923.html\\n    ├── 2503.01785.html\\n    ├── 2503.06520.html\\n    ├── 2503.06749.html\\n    ├── 2503.07065.html\\n    ├── 2503.07365.html\\n    ├── 2503.07536.html\\n    ├── 2503.10291.html\\n    ├── 2503.10615.html\\n    ├── 2503.12937.html\\n    ├── 2503.13939.html\\n    ├── 2503.14476.html\\n    ├── 2503.17352.html\\n    ├── 2503.18892.html\\n    ├── 2503.19786.html\\n    ├── 2503.20783.html\\n    ├── 2503.21620.html\\n    ├── 2503.21776.html\\n    ├── 2503.22679.html\\n    ├── 2504.02587.html\\n    ├── 2504.05599.html\\n    ├── 2504.07491.html\\n    ├── 2504.07934.html\\n    ├── 2504.07954.html\\n    ├── 2504.11455.html\\n    ├── 2504.14945.html\\n    ├── 2504.16656.html\\n    ├── 2505.00703.html\\n    └── arxiv_2025.bib\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/papers.zip\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/papers/author_folders/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Paper Organization Task: Author-Based Paper Categorization\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\nimport re\nfrom typing import Dict, List, Set\nfrom html.parser import HTMLParser\nfrom datetime import datetime\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\nclass ArxivHTMLParser(HTMLParser):\n    \"\"\"Parser to extract author and date information from arXiv HTML papers.\"\"\"\n    \n    def __init__(self):\n        super().__init__()\n        self.authors = []\n        self.publication_date = None\n        \n    def handle_starttag(self, tag, attrs):\n        # Look for author metadata tags\n        if tag == 'meta':\n            attr_dict = dict(attrs)\n            if attr_dict.get('name') == 'citation_author':\n                content = attr_dict.get('content', '')\n                if content:\n                    self.authors.append(content)\n            elif attr_dict.get('name') in ['citation_date', 'citation_online_date']:\n                content = attr_dict.get('content', '')\n                if content and not self.publication_date:\n                    self.publication_date = content\n\ndef extract_paper_info(html_file: Path) -> tuple[List[str], str]:\n    \"\"\"Extract authors and publication year from an HTML paper.\"\"\"\n    try:\n        with open(html_file, 'r', encoding='utf-8', errors='ignore') as f:\n            content = f.read()\n            \n        parser = ArxivHTMLParser()\n        parser.feed(content)\n        \n        # Extract year from date if available\n        year = None\n        if parser.publication_date:\n            # Parse year from date string (e.g., \"2025/03/13\")\n            year_match = re.search(r'(\\d{4})', parser.publication_date)\n            if year_match:\n                year = year_match.group(1)\n        \n        return parser.authors, year\n        \n    except Exception as e:\n        print(f\"Warning: Could not parse {html_file.name}: {e}\")\n        return [], None\n\ndef normalize_author_name(author: str) -> str:\n    \"\"\"Normalize author name to lowercase with underscores.\"\"\"\n    # Author names are in \"Last, First Middle\" format\n    # We need to convert to \"first_last\" format\n    \n    # Remove any HTML entities or special characters that shouldn't be there\n    author = author.strip()\n    \n    # Split by comma to separate last and first names\n    parts = author.split(',', 1)\n    if len(parts) == 2:\n        last_name = parts[0].strip()\n        first_names = parts[1].strip()\n        # Take only the first name (not middle names)\n        first_name_parts = first_names.split()\n        if first_name_parts:\n            first_name = first_name_parts[0]\n            # Format as \"first_last\"\n            normalized = f\"{first_name}_{last_name}\"\n        else:\n            normalized = last_name\n    else:\n        # If no comma, use as is\n        normalized = author\n    \n    # Convert to lowercase and replace spaces/special chars with underscores\n    normalized = re.sub(r'[^\\w\\s-]', '', normalized)\n    normalized = re.sub(r'[\\s-]+', '_', normalized)\n    return normalized.lower()\n\ndef verify_directories_exist(test_dir: Path) -> bool:\n    \"\"\"Verify that required directories exist.\"\"\"\n    frequent_authors_dir = test_dir / \"frequent_authors\"\n    authors_2025_dir = test_dir / \"2025_authors\"\n    \n    if not frequent_authors_dir.exists():\n        print(\"❌ 'frequent_authors' directory not found\")\n        return False\n    \n    if not authors_2025_dir.exists():\n        print(\"❌ '2025_authors' directory not found\")\n        return False\n    \n    if not frequent_authors_dir.is_dir():\n        print(\"❌ 'frequent_authors' exists but is not a directory\")\n        return False\n        \n    if not authors_2025_dir.is_dir():\n        print(\"❌ '2025_authors' exists but is not a directory\")\n        return False\n    \n    print(\"✅ Both required directories exist\")\n    return True\n\ndef analyze_papers(test_dir: Path) -> tuple[Dict[str, List[Path]], Dict[str, List[Path]]]:\n    \"\"\"Analyze all HTML papers and return author-paper mappings.\"\"\"\n    author_papers = {}  # author -> list of papers\n    author_2025_papers = {}  # author -> list of 2025 papers\n    \n    # Find all HTML files\n    html_files = list(test_dir.glob(\"*.html\"))\n    \n    for html_file in html_files:\n        authors, year = extract_paper_info(html_file)\n        \n        for author in authors:\n            if not author:\n                continue\n                \n            normalized_name = normalize_author_name(author)\n            if not normalized_name:\n                continue\n            \n            # Track all papers by author\n            if normalized_name not in author_papers:\n                author_papers[normalized_name] = []\n            author_papers[normalized_name].append(html_file)\n            \n            # Track 2025 papers\n            if year == '2025':\n                if normalized_name not in author_2025_papers:\n                    author_2025_papers[normalized_name] = []\n                author_2025_papers[normalized_name].append(html_file)\n    \n    return author_papers, author_2025_papers\n\ndef verify_frequent_authors(test_dir: Path, author_papers: Dict[str, List[Path]]) -> bool:\n    \"\"\"Verify that authors with ≥4 papers have their folders and papers.\"\"\"\n    frequent_authors_dir = test_dir / \"frequent_authors\"\n    \n    # Find authors with 4 or more papers\n    frequent_authors = {author: papers for author, papers in author_papers.items() \n                        if len(papers) >= 4}\n    \n    if not frequent_authors:\n        print(\"⚠️  No authors found with 4 or more papers\")\n        # This might be expected depending on the test data\n        return True\n    \n    all_correct = True\n    \n    for author, expected_papers in frequent_authors.items():\n        author_dir = frequent_authors_dir / author\n        \n        # Check if author directory exists\n        if not author_dir.exists():\n            print(f\"❌ Missing directory for frequent author: {author}\")\n            all_correct = False\n            continue\n        \n        # Check if all expected papers are present\n        for paper in expected_papers:\n            paper_copy = author_dir / paper.name\n            if not paper_copy.exists():\n                print(f\"❌ Missing paper {paper.name} in {author} directory\")\n                all_correct = False\n    \n    # Check for unexpected directories\n    for item in frequent_authors_dir.iterdir():\n        if item.is_dir():\n            dir_name = item.name\n            if dir_name not in frequent_authors:\n                # Check if this author has less than 4 papers\n                if dir_name in author_papers and len(author_papers[dir_name]) < 4:\n                    print(f\"❌ Author {dir_name} has only {len(author_papers[dir_name])} papers but has a folder in frequent_authors\")\n                    all_correct = False\n    \n    if all_correct:\n        print(f\"✅ Frequent authors correctly organized ({len(frequent_authors)} authors)\")\n    \n    return all_correct\n\ndef verify_2025_authors(test_dir: Path, author_2025_papers: Dict[str, List[Path]]) -> bool:\n    \"\"\"Verify that authors with ≥3 papers in 2025 have their folders and papers.\"\"\"\n    authors_2025_dir = test_dir / \"2025_authors\"\n    \n    # Find authors with 3 or more papers in 2025\n    prolific_2025_authors = {author: papers for author, papers in author_2025_papers.items() \n                             if len(papers) >= 3}\n    \n    if not prolific_2025_authors:\n        print(\"⚠️  No authors found with 3 or more papers in 2025\")\n        # This might be expected depending on the test data\n        return True\n    \n    all_correct = True\n    \n    for author, expected_papers in prolific_2025_authors.items():\n        author_dir = authors_2025_dir / author\n        \n        # Check if author directory exists\n        if not author_dir.exists():\n            print(f\"❌ Missing directory for 2025 author: {author}\")\n            all_correct = False\n            continue\n        \n        # Check if all expected 2025 papers are present\n        for paper in expected_papers:\n            paper_copy = author_dir / paper.name\n            if not paper_copy.exists():\n                print(f\"❌ Missing 2025 paper {paper.name} in {author} directory\")\n                all_correct = False\n    \n    # Check for unexpected directories\n    for item in authors_2025_dir.iterdir():\n        if item.is_dir():\n            dir_name = item.name\n            if dir_name not in prolific_2025_authors:\n                # Check if this author has less than 3 papers in 2025\n                if dir_name in author_2025_papers and len(author_2025_papers[dir_name]) < 3:\n                    print(f\"❌ Author {dir_name} has only {len(author_2025_papers[dir_name])} papers in 2025 but has a folder in 2025_authors\")\n                    all_correct = False\n    \n    if all_correct:\n        print(f\"✅ 2025 authors correctly organized ({len(prolific_2025_authors)} authors)\")\n    \n    return all_correct\n\ndef verify_original_files_intact(test_dir: Path) -> bool:\n    \"\"\"Verify that original HTML files are still present (not moved).\"\"\"\n    html_files = list(test_dir.glob(\"*.html\"))\n    \n    if not html_files:\n        print(\"❌ No original HTML files found in root directory\")\n        return False\n    \n    print(f\"✅ Original HTML files remain intact ({len(html_files)} files)\")\n    return True\n\ndef verify_naming_convention(test_dir: Path) -> bool:\n    \"\"\"Verify that author folder names follow the correct naming convention.\"\"\"\n    frequent_authors_dir = test_dir / \"frequent_authors\"\n    authors_2025_dir = test_dir / \"2025_authors\"\n    \n    all_correct = True\n    \n    # Check frequent_authors subdirectories\n    for author_dir in frequent_authors_dir.iterdir():\n        if author_dir.is_dir():\n            name = author_dir.name\n            # Check for lowercase and underscores only\n            if not re.match(r'^[a-z0-9_]+$', name):\n                print(f\"❌ Invalid folder name in frequent_authors: {name} (should be lowercase with underscores)\")\n                all_correct = False\n    \n    # Check 2025_authors subdirectories\n    for author_dir in authors_2025_dir.iterdir():\n        if author_dir.is_dir():\n            name = author_dir.name\n            # Check for lowercase and underscores only\n            if not re.match(r'^[a-z0-9_]+$', name):\n                print(f\"❌ Invalid folder name in 2025_authors: {name} (should be lowercase with underscores)\")\n                all_correct = False\n    \n    if all_correct:\n        print(\"✅ All author folder names follow correct naming convention\")\n    \n    return all_correct\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    try:\n        test_dir = get_test_directory()\n        print(f\"🔍 Verifying paper organization in: {test_dir}\")\n        \n        # Analyze papers first\n        print(\"\\n📊 Analyzing papers...\")\n        author_papers, author_2025_papers = analyze_papers(test_dir)\n        \n        # Run verification checks\n        checks = [\n            (\"Directory existence\", lambda: verify_directories_exist(test_dir)),\n            (\"Original files intact\", lambda: verify_original_files_intact(test_dir)),\n            (\"Frequent authors organization\", lambda: verify_frequent_authors(test_dir, author_papers)),\n            (\"2025 authors organization\", lambda: verify_2025_authors(test_dir, author_2025_papers)),\n            (\"Naming conventions\", lambda: verify_naming_convention(test_dir))\n        ]\n        \n        all_passed = True\n        for check_name, check_func in checks:\n            print(f\"\\n📋 Checking: {check_name}\")\n            if not check_func():\n                all_passed = False\n        \n        if all_passed:\n            print(\"\\n🎉 All verification checks passed!\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Some verification checks failed!\")\n            sys.exit(1)\n            \n    except Exception as e:\n        print(f\"❌ Verification failed with error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/papers/find_math_paper/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\nYou are given a directory containing multiple paper files. Please help me find a math-related benchmark paper. I don’t remember its name, but I remember it not only checks whether the answer is correct, but also analyzes whether the model suffers from insufficient knowledge, lacks generalization ability, or relies on rote memorization. After finding this paper, rename its corresponding HTML file to `answer.html`."
  },
  {
    "path": "tasks/filesystem/standard/papers/find_math_paper/meta.json",
    "content": "{\n  \"task_id\": \"find_math_paper\",\n  \"task_name\": \"Find Math Paper\",\n  \"category_id\": \"papers\",\n  \"category_name\": \"Papers\",\n  \"description\": \"Search through academic papers to identify and locate mathematics-related content that satisfies specific mathematical criteria and research requirements.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-08-12\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"pattern analysis\",\n    \"data extraction\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"papers/\\n    ├── 1707.06347.html\\n    ├── 2105.04165.html\\n    ├── 2201.11903.html\\n    ├── 2303.08774.html\\n    ├── 2306.08640.html\\n    ├── 2310.02255.html\\n    ├── 2310.08446.html\\n    ├── 2312.00849.html\\n    ├── 2312.07533.html\\n    ├── 2312.11805.html\\n    ├── 2402.00253.html\\n    ├── 2402.03300.html\\n    ├── 2403.05530.html\\n    ├── 2404.13046.html\\n    ├── 2404.14367.html\\n    ├── 2404.14396.html\\n    ├── 2405.09818.html\\n    ├── 2405.13911.html\\n    ├── 2405.16473.html\\n    ├── 2405.16640.html\\n    ├── 2406.08478.html\\n    ├── 2406.16852.html\\n    ├── 2406.17294.html\\n    ├── 2407.01284.html\\n    ├── 2407.01509.html\\n    ├── 2407.21783.html\\n    ├── 2408.03326.html\\n    ├── 2408.12528.html\\n    ├── 2409.19256.html\\n    ├── 2410.05993.html\\n    ├── 2410.06166.html\\n    ├── 2410.10563.html\\n    ├── 2410.13848.html\\n    ├── 2410.17885.html\\n    ├── 2410.21276.html\\n    ├── 2411.07975.html\\n    ├── 2411.10442.html\\n    ├── 2411.11930.html\\n    ├── 2411.14432.html\\n    ├── 2412.05271.html\\n    ├── 2412.08443.html\\n    ├── 2412.10302.html\\n    ├── 2412.15115.html\\n    ├── 2412.16720.html\\n    ├── 2412.17256.html\\n    ├── 2412.18319.html\\n    ├── 2412.20631.html\\n    ├── 2501.04686.html\\n    ├── 2501.06186.html\\n    ├── 2501.12599.html\\n    ├── 2501.12948.html\\n    ├── 2501.17811.html\\n    ├── 2502.01456.html\\n    ├── 2502.09621.html\\n    ├── 2502.10391.html\\n    ├── 2502.13923.html\\n    ├── 2503.01785.html\\n    ├── 2503.06520.html\\n    ├── 2503.06749.html\\n    ├── 2503.07065.html\\n    ├── 2503.07365.html\\n    ├── 2503.07536.html\\n    ├── 2503.10291.html\\n    ├── 2503.10615.html\\n    ├── 2503.12937.html\\n    ├── 2503.13939.html\\n    ├── 2503.14476.html\\n    ├── 2503.17352.html\\n    ├── 2503.18892.html\\n    ├── 2503.19786.html\\n    ├── 2503.20783.html\\n    ├── 2503.21620.html\\n    ├── 2503.21776.html\\n    ├── 2503.22679.html\\n    ├── 2504.02587.html\\n    ├── 2504.05599.html\\n    ├── 2504.07491.html\\n    ├── 2504.07934.html\\n    ├── 2504.07954.html\\n    ├── 2504.11455.html\\n    ├── 2504.14945.html\\n    ├── 2504.16656.html\\n    ├── 2505.00703.html\\n    └── arxiv_2025.bib\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/papers.zip\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/papers/find_math_paper/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Find Math Paper Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_answer_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that answer.html exists in the papers directory.\"\"\"\n    answer_file = test_dir  / \"answer.html\"\n    \n    if not answer_file.exists():\n        print(\"❌ File 'answer.html' not found\")\n        return False\n    \n    print(\"✅ answer.html found\")\n    return True\n\ndef verify_original_file_removed(test_dir: Path) -> bool:\n    \"\"\"Verify that the original file (2407.01284.html) no longer exists.\"\"\"\n    original_file = test_dir  / \"2407.01284.html\"\n    \n    if original_file.exists():\n        print(\"❌ Original file 2407.01284.html still exists\")\n        return False\n    \n    print(\"✅ Original file has been renamed\")\n    return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying Find Math Paper Task...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Answer File Exists\", verify_answer_file_exists),\n        (\"Original File Renamed\", verify_original_file_removed),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ Paper correctly renamed to answer.html!\")\n        print(\"🎉 Task verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/papers/organize_legacy_papers/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### Task Description\n\nYou are given a directory containing multiple paper files. You have a collection of arXiv papers saved as HTML files in the papers directory, along with a BibTeX file. Your task is to organize the older papers (2023 and earlier) into a structured year-based hierarchy with proper documentation, while leaving newer papers in the original location.\n\n### Task Objectives\n\n1. **Organize by year**: Create a year-based directory structure for papers from 2023 and earlier\n2. **Generate documentation**: Create INDEX.md files for each year with paper metadata\n3. **Create summary**: Build a master SUMMARY.md file linking to all year indexes\n\n### Detailed Requirements\n\n#### Step 1: Organization\n- Create directory structure: `organized/{year}/` where year is extracted from the arXiv ID\n  - Example: `1707.06347.html` → `organized/2017/1707.06347.html`\n- Move each HTML file from 2023 and earlier to its corresponding year folder, keeping original filenames\n- Papers from 2024 onwards (arXiv IDs starting with `24` or `25`) should remain in the original papers directory\n\n#### Step 2: Year Index Files\nFor each year folder, create an `INDEX.md` file containing:\n- A markdown table with three columns: `ArXiv ID | Authors | Local Path`\n- Extract authors from `<meta name=\"citation_author\" content=\"...\"/>` tags, keeping only the first 3 authors\n- If there are more than 3 authors, list the first 3 followed by \"et al.\"\n- Format authors as: \"Author1, Author2, Author3\" or \"Author1, Author2, Author3, et al.\"\n- Local Path should be just the filename (e.g., `1707.06347.html`)\n- Sort entries by arXiv ID in ascending order\n\n#### Step 3: Master Summary\nCreate `organized/SUMMARY.md` with:\n- A markdown table with columns: `Year | Paper Count | Index Link`\n- Index Link should be a relative markdown link (e.g., `[View Index](2017/INDEX.md)`)\n- Sort by year in ascending order\n\n### Expected Output Structure\n\n```\npapers/\n├── arxiv_2025.bib (remains here)\n├── (2024+ HTML files remain here)\n└── organized/\n    ├── SUMMARY.md\n    ├── 2017/\n    │   ├── INDEX.md\n    │   └── 1707.06347.html\n    ├── 2021/\n    │   ├── INDEX.md\n    │   └── 2105.04165.html\n    ├── 2022/\n    │   ├── INDEX.md\n    │   └── 2201.11903.html\n    └── 2023/\n        ├── INDEX.md\n        ├── 2303.08774.html\n        ├── 2306.08640.html\n        ├── 2310.02255.html\n        ├── 2310.08446.html\n        ├── 2312.00849.html\n        ├── 2312.07533.html\n        └── 2312.11805.html\n```"
  },
  {
    "path": "tasks/filesystem/standard/papers/organize_legacy_papers/meta.json",
    "content": "{\n  \"task_id\": \"organize_legacy_papers\",\n  \"task_name\": \"Organize Legacy Papers\",\n  \"category_id\": \"papers\",\n  \"category_name\": \"Papers\",\n  \"description\": \"Structure and organize older academic papers from 2023 and earlier into a year-based hierarchical directory system with proper documentation.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-08-12\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"file organization\",\n    \"data extraction\",\n    \"cross-referencing\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"papers/\\n    ├── 1707.06347.html\\n    ├── 2105.04165.html\\n    ├── 2201.11903.html\\n    ├── 2303.08774.html\\n    ├── 2306.08640.html\\n    ├── 2310.02255.html\\n    ├── 2310.08446.html\\n    ├── 2312.00849.html\\n    ├── 2312.07533.html\\n    ├── 2312.11805.html\\n    ├── 2402.00253.html\\n    ├── 2402.03300.html\\n    ├── 2403.05530.html\\n    ├── 2404.13046.html\\n    ├── 2404.14367.html\\n    ├── 2404.14396.html\\n    ├── 2405.09818.html\\n    ├── 2405.13911.html\\n    ├── 2405.16473.html\\n    ├── 2405.16640.html\\n    ├── 2406.08478.html\\n    ├── 2406.16852.html\\n    ├── 2406.17294.html\\n    ├── 2407.01284.html\\n    ├── 2407.01509.html\\n    ├── 2407.21783.html\\n    ├── 2408.03326.html\\n    ├── 2408.12528.html\\n    ├── 2409.19256.html\\n    ├── 2410.05993.html\\n    ├── 2410.06166.html\\n    ├── 2410.10563.html\\n    ├── 2410.13848.html\\n    ├── 2410.17885.html\\n    ├── 2410.21276.html\\n    ├── 2411.07975.html\\n    ├── 2411.10442.html\\n    ├── 2411.11930.html\\n    ├── 2411.14432.html\\n    ├── 2412.05271.html\\n    ├── 2412.08443.html\\n    ├── 2412.10302.html\\n    ├── 2412.15115.html\\n    ├── 2412.16720.html\\n    ├── 2412.17256.html\\n    ├── 2412.18319.html\\n    ├── 2412.20631.html\\n    ├── 2501.04686.html\\n    ├── 2501.06186.html\\n    ├── 2501.12599.html\\n    ├── 2501.12948.html\\n    ├── 2501.17811.html\\n    ├── 2502.01456.html\\n    ├── 2502.09621.html\\n    ├── 2502.10391.html\\n    ├── 2502.13923.html\\n    ├── 2503.01785.html\\n    ├── 2503.06520.html\\n    ├── 2503.06749.html\\n    ├── 2503.07065.html\\n    ├── 2503.07365.html\\n    ├── 2503.07536.html\\n    ├── 2503.10291.html\\n    ├── 2503.10615.html\\n    ├── 2503.12937.html\\n    ├── 2503.13939.html\\n    ├── 2503.14476.html\\n    ├── 2503.17352.html\\n    ├── 2503.18892.html\\n    ├── 2503.19786.html\\n    ├── 2503.20783.html\\n    ├── 2503.21620.html\\n    ├── 2503.21776.html\\n    ├── 2503.22679.html\\n    ├── 2504.02587.html\\n    ├── 2504.05599.html\\n    ├── 2504.07491.html\\n    ├── 2504.07934.html\\n    ├── 2504.07954.html\\n    ├── 2504.11455.html\\n    ├── 2504.14945.html\\n    ├── 2504.16656.html\\n    ├── 2505.00703.html\\n    └── arxiv_2025.bib\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/papers.zip\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/papers/organize_legacy_papers/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Papers Collection Cleanup and Organization Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport re\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_papers_remain(test_dir: Path) -> bool:\n    \"\"\"Verify that BibTeX and 2024+ papers remain in original directory.\"\"\"\n    papers_dir = test_dir\n    \n    # Check BibTeX file still exists\n    bib_file = papers_dir / \"arxiv_2025.bib\"\n    if not bib_file.exists():\n        print(\"❌ BibTeX file arxiv_2025.bib not found\")\n        return False\n    print(\"✅ BibTeX file remains in place\")\n    \n    # Check that 2024+ papers remain in original directory\n    found_2024_plus = False\n    if papers_dir.exists():\n        for html_file in papers_dir.glob(\"*.html\"):\n            arxiv_id = html_file.stem\n            year_part = arxiv_id[:2] if len(arxiv_id) >= 2 else \"\"\n            if year_part.isdigit():\n                year = int(year_part)\n                if year >= 24:\n                    found_2024_plus = True\n                    break\n    \n    if found_2024_plus:\n        print(\"✅ 2024+ papers remain in original directory\")\n    else:\n        print(\"⚠️ No 2024+ papers found (this may be expected if none existed)\")\n    \n    # Check that pre-2024 papers are NOT in original directory\n    pre_2024_found = []\n    if papers_dir.exists():\n        for html_file in papers_dir.glob(\"*.html\"):\n            arxiv_id = html_file.stem\n            year_part = arxiv_id[:2] if len(arxiv_id) >= 2 else \"\"\n            if year_part.isdigit():\n                year = int(year_part)\n                if year < 24:\n                    pre_2024_found.append(html_file.name)\n    \n    if pre_2024_found:\n        print(f\"❌ Pre-2024 papers still in original directory: {pre_2024_found[:3]}...\")\n        return False\n    \n    print(\"✅ Pre-2024 papers have been moved\")\n    return True\n\ndef verify_directory_structure(test_dir: Path) -> bool:\n    \"\"\"Verify the organized directory structure exists.\"\"\"\n    organized_dir = test_dir / \"organized\"\n    \n    if not organized_dir.exists():\n        print(\"❌ organized/ directory not found\")\n        return False\n    print(\"✅ organized/ directory exists\")\n    \n    # Expected years based on pre-2024 papers\n    expected_years = [\"2017\", \"2021\", \"2022\", \"2023\"]\n    found_years = []\n    \n    for year in expected_years:\n        year_dir = organized_dir / year\n        if year_dir.exists() and year_dir.is_dir():\n            found_years.append(year)\n    \n    if len(found_years) != len(expected_years):\n        print(f\"❌ Expected year directories {expected_years}, found {found_years}\")\n        return False\n    \n    print(f\"✅ All expected year directories exist: {found_years}\")\n    return True\n\ndef verify_papers_moved(test_dir: Path) -> bool:\n    \"\"\"Verify papers are correctly moved to year folders.\"\"\"\n    organized_dir = test_dir / \"organized\"\n    \n    # Expected paper distribution\n    expected_papers = {\n        \"2017\": [\"1707.06347.html\"],\n        \"2021\": [\"2105.04165.html\"],\n        \"2022\": [\"2201.11903.html\"],\n        \"2023\": [\"2303.08774.html\", \"2306.08640.html\", \"2310.02255.html\", \n                 \"2310.08446.html\", \"2312.00849.html\", \"2312.07533.html\", \n                 \"2312.11805.html\"]\n    }\n    \n    all_correct = True\n    for year, papers in expected_papers.items():\n        year_dir = organized_dir / year\n        if not year_dir.exists():\n            print(f\"❌ Year directory {year} doesn't exist\")\n            return False\n        \n        actual_papers = sorted([f.name for f in year_dir.glob(\"*.html\")])\n        expected_sorted = sorted(papers)\n        \n        if actual_papers != expected_sorted:\n            print(f\"❌ Papers in {year}/: expected {expected_sorted}, found {actual_papers}\")\n            all_correct = False\n        else:\n            print(f\"✅ Correct papers in {year}/: {len(actual_papers)} files\")\n    \n    return all_correct\n\ndef verify_index_files(test_dir: Path) -> bool:\n    \"\"\"Verify INDEX.md files exist and have correct format.\"\"\"\n    organized_dir = test_dir / \"organized\"\n    years = [\"2017\", \"2021\", \"2022\", \"2023\"]\n    \n    for year in years:\n        index_file = organized_dir / year / \"INDEX.md\"\n        \n        if not index_file.exists():\n            print(f\"❌ INDEX.md missing in {year}/\")\n            return False\n        \n        content = index_file.read_text()\n        \n        # Check for table format\n        if \"ArXiv ID\" not in content or \"Authors\" not in content or \"Local Path\" not in content:\n            print(f\"❌ INDEX.md in {year}/ missing required columns\")\n            return False\n        \n        \n        # Check that papers are listed\n        year_dir = organized_dir / year\n        html_files = list(year_dir.glob(\"*.html\"))\n        for html_file in html_files:\n            arxiv_id = html_file.stem\n            if arxiv_id not in content:\n                print(f\"❌ INDEX.md in {year}/ missing paper {arxiv_id}\")\n                return False\n        \n        print(f\"✅ INDEX.md in {year}/ has correct format\")\n    \n    return True\n\ndef verify_author_extraction(test_dir: Path) -> bool:\n    \"\"\"Verify that authors are correctly extracted from HTML metadata (max 3 authors).\"\"\"\n    organized_dir = test_dir / \"organized\"\n    \n    # Check a sample paper's authors\n    sample_file = organized_dir / \"2017\" / \"1707.06347.html\"\n    if not sample_file.exists():\n        print(\"❌ Cannot verify author extraction - sample file missing\")\n        return False\n    \n    # Read the HTML to get expected authors\n    html_content = sample_file.read_text()\n    author_pattern = r'<meta name=\"citation_author\" content=\"([^\"]+)\"'\n    all_authors = re.findall(author_pattern, html_content)\n    \n    if not all_authors:\n        print(\"❌ No authors found in sample HTML file\")\n        return False\n    \n    # Build expected author string (max 3 authors)\n    if len(all_authors) <= 3:\n        expected_author_str = \", \".join(all_authors)\n    else:\n        expected_author_str = \", \".join(all_authors[:3]) + \", et al.\"\n    \n    # Check if INDEX.md contains these authors\n    index_file = organized_dir / \"2017\" / \"INDEX.md\"\n    index_content = index_file.read_text()\n    \n    # Find the line with this paper\n    found = False\n    for line in index_content.split('\\n'):\n        if \"1707.06347\" in line:\n            found = True\n            # Check if authors are correctly formatted\n            if len(all_authors) > 3:\n                # Should have first 3 authors and \"et al.\"\n                if \"et al.\" not in line:\n                    print(\"❌ Missing 'et al.' for paper with >3 authors\")\n                    return False\n                # Check first 3 authors are present\n                for author in all_authors[:3]:\n                    if author not in line:\n                        print(f\"❌ Author '{author}' not found in INDEX.md\")\n                        return False\n                # Check that 4th author is NOT present\n                if len(all_authors) > 3 and all_authors[3] in line:\n                    print(f\"❌ Fourth author '{all_authors[3]}' should not be in INDEX.md\")\n                    return False\n            else:\n                # Should have all authors, no \"et al.\"\n                if \"et al.\" in line:\n                    print(\"❌ Should not have 'et al.' for paper with ≤3 authors\")\n                    return False\n                for author in all_authors:\n                    if author not in line:\n                        print(f\"❌ Author '{author}' not found in INDEX.md\")\n                        return False\n            break\n    \n    if not found:\n        print(\"❌ Paper 1707.06347 not found in INDEX.md\")\n        return False\n    \n    print(\"✅ Authors correctly extracted (max 3) from HTML metadata\")\n    \n    # Additional check: verify 3-author limit across all papers\n    print(\"\\nVerifying 3-author limit across all papers...\")\n    years = [\"2017\", \"2021\", \"2022\", \"2023\"]\n    for year in years:\n        year_dir = organized_dir / year\n        if not year_dir.exists():\n            continue\n            \n        index_file = year_dir / \"INDEX.md\"\n        if not index_file.exists():\n            continue\n            \n        index_content = index_file.read_text()\n        \n        # Check each HTML file in the year directory\n        for html_file in year_dir.glob(\"*.html\"):\n            arxiv_id = html_file.stem\n            \n            # Get actual authors from HTML\n            html_content = html_file.read_text()\n            authors = re.findall(r'<meta name=\"citation_author\" content=\"([^\"]+)\"', html_content)\n            \n            # Find corresponding line in INDEX.md\n            for line in index_content.split('\\n'):\n                if arxiv_id in line and '|' in line and 'ArXiv ID' not in line:\n                    # Count authors in the line (split by comma)\n                    author_parts = line.split('|')[1] if '|' in line else \"\"\n                    \n                    # Check et al. usage\n                    if len(authors) > 3:\n                        if \"et al.\" not in line:\n                            print(f\"❌ {year}/{arxiv_id}: Missing 'et al.' for {len(authors)} authors\")\n                            return False\n                    elif \"et al.\" in line:\n                        print(f\"❌ {year}/{arxiv_id}: Unexpected 'et al.' for {len(authors)} authors\")\n                        return False\n                    \n                    # Verify no more than 3 authors are listed\n                    author_count = author_parts.count(',') + 1 if author_parts.strip() else 0\n                    if \"et al.\" in author_parts:\n                        author_count -= 1  # Don't count \"et al.\" as an author\n                    \n                    if author_count > 3:\n                        print(f\"❌ {year}/{arxiv_id}: More than 3 authors listed\")\n                        return False\n                    \n                    break\n    \n    print(\"✅ All papers respect the 3-author limit\")\n    return True\n\ndef verify_summary_file(test_dir: Path) -> bool:\n    \"\"\"Verify SUMMARY.md exists and has correct content.\"\"\"\n    summary_file = test_dir / \"organized\" / \"SUMMARY.md\"\n    \n    if not summary_file.exists():\n        print(\"❌ SUMMARY.md not found\")\n        return False\n    \n    content = summary_file.read_text()\n    \n    # Check for required columns\n    if \"Year\" not in content or \"Paper Count\" not in content or \"Index Link\" not in content:\n        print(\"❌ SUMMARY.md missing required columns\")\n        return False\n    \n    \n    # Check for year entries\n    expected_years = [\"2017\", \"2021\", \"2022\", \"2023\"]\n    for year in expected_years:\n        if year not in content:\n            print(f\"❌ SUMMARY.md missing year {year}\")\n            return False\n    \n    # Check for links to INDEX.md files\n    expected_links = [\n        f\"{year}/INDEX.md\" for year in expected_years\n    ]\n    for link in expected_links:\n        if link not in content:\n            print(f\"❌ SUMMARY.md missing link to {link}\")\n            return False\n    \n    # Check paper counts\n    expected_counts = {\n        \"2017\": 1,\n        \"2021\": 1,\n        \"2022\": 1,\n        \"2023\": 7\n    }\n    \n    for year, count in expected_counts.items():\n        # Look for the row with this year\n        for line in content.split('\\n'):\n            if f\"| {year}\" in line or f\"|{year}\" in line:\n                if str(count) not in line:\n                    print(f\"❌ SUMMARY.md has incorrect paper count for {year}\")\n                    return False\n                break\n    \n    print(\"✅ SUMMARY.md has correct format and content\")\n    return True\n\ndef verify_sorting(test_dir: Path) -> bool:\n    \"\"\"Verify that entries are sorted correctly.\"\"\"\n    organized_dir = test_dir / \"organized\"\n    \n    # Check SUMMARY.md year sorting\n    summary_file = organized_dir / \"SUMMARY.md\"\n    content = summary_file.read_text()\n    \n    # Extract years from table rows\n    years_in_summary = []\n    for line in content.split('\\n'):\n        if '|' in line and any(year in line for year in [\"2017\", \"2021\", \"2022\", \"2023\"]):\n            # Extract year from the line\n            for year in [\"2017\", \"2021\", \"2022\", \"2023\"]:\n                if year in line:\n                    years_in_summary.append(year)\n                    break\n    \n    if years_in_summary != sorted(years_in_summary):\n        print(f\"❌ SUMMARY.md years not sorted: {years_in_summary}\")\n        return False\n    \n    print(\"✅ SUMMARY.md years sorted correctly\")\n    \n    # Check INDEX.md arxiv ID sorting for one year\n    index_file = organized_dir / \"2023\" / \"INDEX.md\"\n    if index_file.exists():\n        content = index_file.read_text()\n        arxiv_ids = []\n        for line in content.split('\\n'):\n            if '|' in line and '23' in line and 'ArXiv ID' not in line and '---' not in line:\n                # Extract arxiv ID\n                match = re.search(r'23\\d{2}\\.\\d{5}', line)\n                if match:\n                    arxiv_ids.append(match.group())\n        \n        if arxiv_ids != sorted(arxiv_ids):\n            print(f\"❌ INDEX.md arxiv IDs not sorted in 2023/\")\n            return False\n        \n        print(\"✅ INDEX.md entries sorted by arxiv ID\")\n    \n    return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying Papers Collection Cleanup and Organization...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Papers Remain/Move Verification\", verify_papers_remain),\n        (\"Directory Structure\", verify_directory_structure),\n        (\"Papers Moved Correctly\", verify_papers_moved),\n        (\"Index Files Format\", verify_index_files),\n        (\"Author Extraction\", verify_author_extraction),\n        (\"Summary File\", verify_summary_file),\n        (\"Sorting Verification\", verify_sorting),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        try:\n            if not verify_func(test_dir):\n                all_passed = False\n        except Exception as e:\n            print(f\"❌ Error in {step_name}: {e}\")\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ Papers organized correctly!\")\n        print(\"🎉 Task verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/student_database/duplicate_name/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\nPlease help me identify duplicate names from the list of all the 150 students. Do not use python code. Then generate a `namesake.txt` file to record the results in the following format, with each group written in three lines:\n\nname: xxx\ncount: xxx\nids: xxx, xxx, ...\n\nLeave one blank line between every two groups. If there are multiple duplicates, just list all corresponding IDs in the third line.\n"
  },
  {
    "path": "tasks/filesystem/standard/student_database/duplicate_name/meta.json",
    "content": "{\n  \"task_id\": \"duplicate_name\",\n  \"task_name\": \"Duplicate Name\",\n  \"category_id\": \"student_database\",\n  \"category_name\": \"Student Database\",\n  \"description\": \"Identify students with identical names from a 150-student database and generate a formatted namesake grouping report file.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-10\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"pattern analysis\",\n    \"data extraction\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"student_database/\\n    ├── 20101250_Patricia_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20101701_Isabella_Davis/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20102572_Michael_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20104233_Robert_Lopez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20104498_Sarah_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20104653_Sophia_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20104675_Michael_Gonzalez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20104846_Christopher_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20107487_Mia_Martin/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20108742_Sarah_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20109144_Emma_Thomas/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20109803_Oliver_Hernandez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20111634_Isabella_Thomas/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20112439_Christopher_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20113368_William_Wilson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20113603_Robert_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20114397_Isabella_Martin/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20114869_Ethan_Martin/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20115252_Mason_Johnson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20115632_Elizabeth_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20115753_Charlotte_Johnson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20115924_Michael_Lopez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20116232_Olivia_Lopez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20119528_Thomas_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20122427_Karen_Gonzalez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20122977_Evelyn_Miller/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20123376_Joseph_Johnson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20125451_Barbara_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20126203_Barbara_Davis/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20126394_Olivia_Williams/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20126471_Ethan_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20127423_John_Williams/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20128249_Oliver_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20128879_Christopher_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20129898_Jessica_Johnson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20131271_Olivia_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20131518_Sophia_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20132026_Isabella_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20132370_James_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20132669_Noah_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20133527_Mason_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20133697_Isabella_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20135821_Thomas_Wilson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20136681_Benjamin_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20136890_Benjamin_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20137514_Lucas_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20139234_Harper_Martinez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20139637_Noah_Johnson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20139647_Patricia_Lopez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20141421_Linda_Gonzalez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20142085_William_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20142383_Amelia_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20143406_Susan_Martin/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20143830_James_Garcia/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20146035_Christopher_Garcia/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20146277_William_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20146279_Christopher_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20147301_James_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20147789_James_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20148681_John_Hernandez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20148778_Susan_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20149712_Jessica_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20151012_Harper_Miller/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20153174_Benjamin_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20153412_Charlotte_Martin/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20153606_James_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20153687_Richard_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20154518_John_Gonzalez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20154710_Benjamin_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20156469_Jennifer_Hernandez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20156522_Jennifer_Martinez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20156851_Noah_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20157943_Harper_Williams/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20158266_Sophia_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20158294_Sophia_Wilson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20158819_Sarah_Wilson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20159113_John_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20159695_James_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20161279_William_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20162253_Mason_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20162542_Mia_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20163356_Ava_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20164515_Patricia_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20164801_Noah_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20165511_Mary_Gonzalez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20166436_Christopher_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20166487_Barbara_Hernandez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20166564_Ava_Lopez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20166998_Ava_Lopez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20168311_Lucas_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20168491_Karen_Martinez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20169515_Thomas_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20171050_Christopher_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20171406_Mary_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20171613_Ethan_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20172106_Isabella_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20173259_Michael_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20173492_Richard_Miller/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20173501_Mary_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20173517_Susan_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20174207_Richard_Wilson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20174369_Mary_Garcia/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20175314_William_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20176169_Lucas_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20176947_Noah_Miller/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20177389_James_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20178687_Isabella_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20179461_William_Johnson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20179690_Linda_Thomas/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20181056_Sarah_Hernandez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20182020_Patricia_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20182390_Ethan_Wilson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20183149_David_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20183219_Charlotte_Williams/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20184489_Jessica_Gonzalez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20186154_Charlotte_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20186510_James_Thomas/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20187107_David_Martinez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20187144_Mary_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20187892_Christopher_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20187921_Mary_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20187967_Sarah_Davis/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20188937_James_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20189123_Mary_Martin/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20189192_Olivia_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20189268_Emma_Williams/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20189854_William_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20191265_Joseph_Lopez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20192725_Robert_Martinez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20194054_Michael_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20194160_Benjamin_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20194164_Sarah_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20194525_John_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20195164_Jennifer_Gonzalez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20195982_David_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20196776_William_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20196896_Olivia_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20196961_Joseph_Thomas/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20196998_Ethan_Wilson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20198548_Evelyn_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20199036_Benjamin_Hernandez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20199583_Mary_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20199735_Mason_Johnson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20199872_Sophia_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20199980_James_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20201385_John_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20201800_John_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20202548_Robert_Miller/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20203855_Mia_Miller/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    └── 20204611_Sarah_Wilson/\\n            ├── basic_info.txt\\n            └── recommendation_letter.txt\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/student_database.zip\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/student_database/duplicate_name/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Student Database Task: Find Duplicate Names\nSimplified version that only checks against expected results without folder validation\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_namesake_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the namesake.txt file exists.\"\"\"\n    namesake_file = test_dir / \"namesake.txt\"\n    \n    if not namesake_file.exists():\n        print(\"❌ File 'namesake.txt' not found\")\n        return False\n    \n    print(\"✅ Namesake file found\")\n    return True\n\ndef parse_namesake_file(test_dir: Path) -> dict:\n    \"\"\"Parse the namesake.txt file and return structured data.\"\"\"\n    namesake_file = test_dir / \"namesake.txt\"\n    \n    try:\n        content = namesake_file.read_text()\n        lines = content.strip().split('\\n')\n        \n        namesakes = {}\n        current_line = 0\n        \n        while current_line < len(lines):\n            # Skip blank lines\n            if not lines[current_line].strip():\n                current_line += 1\n                continue\n            \n            # Check if we have enough lines for a complete group\n            if current_line + 2 >= len(lines):\n                print(f\"❌ Incomplete group at line {current_line + 1}\")\n                return {}\n            \n            # Parse group\n            name_line = lines[current_line].strip()\n            count_line = lines[current_line + 1].strip()\n            ids_line = lines[current_line + 2].strip()\n            \n            # Extract name\n            if not name_line.startswith(\"name: \"):\n                print(f\"❌ Invalid name line format at line {current_line + 1}: {name_line}\")\n                return {}\n            name = name_line.replace(\"name: \", \"\").strip()\n            \n            # Extract count\n            if not count_line.startswith(\"count: \"):\n                print(f\"❌ Invalid count line format at line {current_line + 2}: {count_line}\")\n                return {}\n            count_str = count_line.replace(\"count: \", \"\").strip()\n            try:\n                count = int(count_str)\n            except ValueError:\n                print(f\"❌ Invalid count format: {count_str}\")\n                return {}\n            \n            # Extract IDs\n            if not ids_line.startswith(\"ids: \"):\n                print(f\"❌ Invalid ids line format at line {current_line + 3}: {ids_line}\")\n                return {}\n            ids_str = ids_line.replace(\"ids: \", \"\").strip()\n            ids = [id.strip() for id in ids_str.split(\",\")]\n            \n            namesakes[name] = {\n                'count': count,\n                'ids': ids\n            }\n            \n            current_line += 4  # Skip to next group (after blank line)\n        \n        return namesakes\n        \n    except Exception as e:\n        print(f\"❌ Error parsing namesake file: {e}\")\n        return {}\n\ndef verify_against_expected_results(namesakes: dict) -> bool:\n    \"\"\"Verify that the results match the expected answer.md content exactly.\"\"\"\n    \n    # Expected duplicate names from answer.md (hardcoded)\n    expected_duplicates = {\n        'Isabella Smith': ['20132026', '20133697'],\n        'Ava Lopez': ['20166564', '20166998'],\n        'James Moore': ['20159695', '20188937'],\n        'William Taylor': ['20175314', '20189854'],\n        'Ethan Wilson': ['20182390', '20196998'],\n        'Christopher Taylor': ['20128879', '20187892'],\n        'William Anderson': ['20142085', '20146277'],\n        'James Anderson': ['20147789', '20153606'],\n        'Olivia Jones': ['20189192', '20196896'],\n        'Mason Johnson': ['20115252', '20199735'],\n        'Benjamin Jackson': ['20153174', '20194160'],\n        'John Taylor': ['20194525', '20201385'],\n        'Susan Anderson': ['20148778', '20173517'],\n        'Christopher Moore': ['20112439', '20146279'],\n        'Sarah Wilson': ['20158819', '20204611'],\n        'Sarah Brown': ['20104498', '20108742']\n    }\n    \n    # Check if exactly 16 duplicate names are found\n    if len(namesakes) != 16:\n        print(f\"❌ Expected exactly 16 duplicate names, but found {len(namesakes)}\")\n        return False\n    \n    # Check if all expected duplicate names are present\n    for expected_name in expected_duplicates:\n        if expected_name not in namesakes:\n            print(f\"❌ Missing expected duplicate name: '{expected_name}'\")\n            return False\n    \n    # Check if all namesakes in the file are actually duplicates\n    for name, data in namesakes.items():\n        if name not in expected_duplicates:\n            print(f\"❌ Unexpected duplicate name found: '{name}' (not in expected list)\")\n            return False\n        \n        expected_ids = set(expected_duplicates[name])\n        stated_ids = set(data['ids'])\n        \n        if expected_ids != stated_ids:\n            print(f\"❌ ID mismatch for '{name}':\")\n            print(f\"   Expected: {sorted(expected_ids)}\")\n            print(f\"   Stated: {sorted(stated_ids)}\")\n            return False\n        \n        # Verify count matches\n        if data['count'] != 2:\n            print(f\"❌ Count mismatch for '{name}': expected 2, got {data['count']}\")\n            return False\n    \n    print(\"✅ All 16 expected duplicate names are correctly identified\")\n    print(\"✅ All student IDs match expected results\")\n    print(\"✅ All counts are correct (2 for each duplicate name)\")\n    return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying Student Database Task: Find Duplicate Names...\")\n    \n    # Check if namesake file exists\n    print(\"\\n--- File Existence Check ---\")\n    if not verify_namesake_file_exists(test_dir):\n        print(\"\\n❌ Basic verification failed, cannot proceed with content verification\")\n        sys.exit(1)\n    \n    # Parse the file and run content verification\n    print(\"\\n--- Content Verification ---\")\n    namesakes = parse_namesake_file(test_dir)\n    \n    if not namesakes:\n        print(\"❌ Failed to parse namesake file\")\n        sys.exit(1)\n    \n    # Verify against expected results\n    print(\"\\n--- Results Verification ---\")\n    if not verify_against_expected_results(namesakes):\n        print(\"\\n❌ Task verification: FAIL\")\n        sys.exit(1)\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    print(\"✅ Namesake identification completed correctly!\")\n    print(f\"🎉 Found exactly {len(namesakes)} duplicate names (16 expected)\")\n    print(\"🎉 Task verification: PASS\")\n    sys.exit(0)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/student_database/english_talent/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\nWe are now recruiting students proficient in English to be responsible for the school’s English media operations. To contact with students, from the total of 150 students, select those who **meet both of the following criteria** :\n\n1. Rated ****S** or** ****A** grade level in** `recommendation_letter.txt` by their teachers.\n2. TOEFL score in the basic info is **higher than or equal to 100** .\n\n Please compile all their names, ids and emails into a  `qualified_students.txt` file, with the format:\n\n    name: xxx\n\tid: xxx\n\temail: xxx\n\nEach person’s information should occupy three lines, with one blank line between each block.\n"
  },
  {
    "path": "tasks/filesystem/standard/student_database/english_talent/meta.json",
    "content": "{\n  \"task_id\": \"english_talent\",\n  \"task_name\": \"English Talent\",\n  \"category_id\": \"student_database\",\n  \"category_name\": \"Student Database\",\n  \"description\": \"Select qualified students with S/A recommendation grades and TOEFL scores ≥100 for English media operations recruitment opportunities.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-10\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data extraction\",\n    \"cross-referencing\",\n    \"pattern analysis\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"student_database/\\n    ├── 20101250_Patricia_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20101701_Isabella_Davis/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20102572_Michael_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20104233_Robert_Lopez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20104498_Sarah_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20104653_Sophia_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20104675_Michael_Gonzalez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20104846_Christopher_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20107487_Mia_Martin/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20108742_Sarah_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20109144_Emma_Thomas/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20109803_Oliver_Hernandez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20111634_Isabella_Thomas/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20112439_Christopher_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20113368_William_Wilson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20113603_Robert_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20114397_Isabella_Martin/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20114869_Ethan_Martin/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20115252_Mason_Johnson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20115632_Elizabeth_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20115753_Charlotte_Johnson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20115924_Michael_Lopez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20116232_Olivia_Lopez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20119528_Thomas_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20122427_Karen_Gonzalez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20122977_Evelyn_Miller/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20123376_Joseph_Johnson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20125451_Barbara_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20126203_Barbara_Davis/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20126394_Olivia_Williams/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20126471_Ethan_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20127423_John_Williams/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20128249_Oliver_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20128879_Christopher_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20129898_Jessica_Johnson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20131271_Olivia_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20131518_Sophia_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20132026_Isabella_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20132370_James_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20132669_Noah_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20133527_Mason_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20133697_Isabella_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20135821_Thomas_Wilson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20136681_Benjamin_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20136890_Benjamin_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20137514_Lucas_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20139234_Harper_Martinez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20139637_Noah_Johnson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20139647_Patricia_Lopez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20141421_Linda_Gonzalez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20142085_William_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20142383_Amelia_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20143406_Susan_Martin/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20143830_James_Garcia/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20146035_Christopher_Garcia/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20146277_William_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20146279_Christopher_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20147301_James_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20147789_James_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20148681_John_Hernandez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20148778_Susan_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20149712_Jessica_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20151012_Harper_Miller/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20153174_Benjamin_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20153412_Charlotte_Martin/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20153606_James_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20153687_Richard_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20154518_John_Gonzalez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20154710_Benjamin_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20156469_Jennifer_Hernandez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20156522_Jennifer_Martinez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20156851_Noah_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20157943_Harper_Williams/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20158266_Sophia_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20158294_Sophia_Wilson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20158819_Sarah_Wilson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20159113_John_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20159695_James_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20161279_William_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20162253_Mason_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20162542_Mia_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20163356_Ava_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20164515_Patricia_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20164801_Noah_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20165511_Mary_Gonzalez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20166436_Christopher_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20166487_Barbara_Hernandez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20166564_Ava_Lopez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20166998_Ava_Lopez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20168311_Lucas_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20168491_Karen_Martinez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20169515_Thomas_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20171050_Christopher_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20171406_Mary_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20171613_Ethan_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20172106_Isabella_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20173259_Michael_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20173492_Richard_Miller/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20173501_Mary_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20173517_Susan_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20174207_Richard_Wilson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20174369_Mary_Garcia/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20175314_William_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20176169_Lucas_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20176947_Noah_Miller/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20177389_James_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20178687_Isabella_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20179461_William_Johnson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20179690_Linda_Thomas/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20181056_Sarah_Hernandez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20182020_Patricia_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20182390_Ethan_Wilson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20183149_David_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20183219_Charlotte_Williams/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20184489_Jessica_Gonzalez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20186154_Charlotte_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20186510_James_Thomas/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20187107_David_Martinez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20187144_Mary_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20187892_Christopher_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20187921_Mary_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20187967_Sarah_Davis/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20188937_James_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20189123_Mary_Martin/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20189192_Olivia_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20189268_Emma_Williams/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20189854_William_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20191265_Joseph_Lopez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20192725_Robert_Martinez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20194054_Michael_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20194160_Benjamin_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20194164_Sarah_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20194525_John_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20195164_Jennifer_Gonzalez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20195982_David_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20196776_William_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20196896_Olivia_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20196961_Joseph_Thomas/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20196998_Ethan_Wilson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20198548_Evelyn_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20199036_Benjamin_Hernandez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20199583_Mary_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20199735_Mason_Johnson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20199872_Sophia_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20199980_James_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20201385_John_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20201800_John_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20202548_Robert_Miller/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20203855_Mia_Miller/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    └── 20204611_Sarah_Wilson/\\n            ├── basic_info.txt\\n            └── recommendation_letter.txt\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/student_database.zip\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/student_database/english_talent/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Student Database Task: English Talent Recruitment\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport re\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_qualified_students_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the qualified_students.txt file exists.\"\"\"\n    answer_file = test_dir / \"qualified_students.txt\"\n    \n    if not answer_file.exists():\n        print(\"❌ File 'qualified_students.txt' not found\")\n        return False\n    \n    print(\"✅ Qualified students file found\")\n    return True\n\ndef verify_file_format(test_dir: Path) -> bool:\n    \"\"\"Verify that the qualified_students.txt file has the correct format.\"\"\"\n    answer_file = test_dir / \"qualified_students.txt\"\n    \n    try:\n        content = answer_file.read_text()\n        lines = content.strip().split('\\n')\n        \n        if not lines:\n            print(\"❌ File is empty\")\n            return False\n        \n        # Check if content follows the expected pattern\n        # Each student should have 3 lines: name, id, email\n        # Students should be separated by blank lines\n        current_line = 0\n        student_count = 0\n        \n        while current_line < len(lines):\n            # Skip blank lines\n            if not lines[current_line].strip():\n                current_line += 1\n                continue\n            \n            # Check if we have enough lines for a complete student\n            if current_line + 2 >= len(lines):\n                print(f\"❌ Incomplete student entry at line {current_line + 1}\")\n                return False\n            \n            # Verify name line format\n            if not lines[current_line].strip().startswith(\"name: \"):\n                print(f\"❌ Invalid name line format at line {current_line + 1}: {lines[current_line]}\")\n                return False\n            \n            # Verify id line format\n            if not lines[current_line + 1].strip().startswith(\"id: \"):\n                print(f\"❌ Invalid id line format at line {current_line + 2}: {lines[current_line + 1]}\")\n                return False\n            \n            # Verify email line format\n            if not lines[current_line + 2].strip().startswith(\"email: \"):\n                print(f\"❌ Invalid email line format at line {current_line + 3}: {lines[current_line + 2]}\")\n                return False\n            \n            student_count += 1\n            current_line += 3\n            \n            # Check for blank line separator (except for the last student)\n            if current_line < len(lines) and lines[current_line].strip():\n                print(f\"❌ Missing blank line separator after student {student_count}\")\n                return False\n            \n            current_line += 1\n        \n        if student_count == 0:\n            print(\"❌ No valid student entries found\")\n            return False\n        \n        print(f\"✅ File format is correct with {student_count} students\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error reading qualified students file: {e}\")\n        return False\n\ndef parse_qualified_students_file(test_dir: Path) -> list:\n    \"\"\"Parse the qualified_students.txt file and return structured data.\"\"\"\n    answer_file = test_dir / \"qualified_students.txt\"\n    \n    try:\n        content = answer_file.read_text()\n        lines = content.strip().split('\\n')\n        \n        students = []\n        current_line = 0\n        \n        while current_line < len(lines):\n            # Skip blank lines\n            if not lines[current_line].strip():\n                current_line += 1\n                continue\n            \n            # Parse student entry\n            name_line = lines[current_line].strip()\n            id_line = lines[current_line + 1].strip()\n            email_line = lines[current_line + 2].strip()\n            \n            # Extract name\n            name = name_line.replace(\"name: \", \"\").strip()\n            \n            # Extract id\n            student_id = id_line.replace(\"id: \", \"\").strip()\n            \n            # Extract email\n            email = email_line.replace(\"email: \", \"\").strip()\n            \n            students.append({\n                'name': name,\n                'id': student_id,\n                'email': email\n            })\n            \n            current_line += 4  # Skip to next student (after blank line)\n        \n        return students\n        \n    except Exception as e:\n        print(f\"❌ Error parsing qualified students file: {e}\")\n        return []\n\ndef verify_student_count(students: list) -> bool:\n    \"\"\"Verify that exactly 19 students are found.\"\"\"\n    expected_count = 19\n    actual_count = len(students)\n    \n    if actual_count != expected_count:\n        print(f\"❌ Expected {expected_count} students, but found {actual_count}\")\n        return False\n    \n    print(f\"✅ Found exactly {expected_count} students\")\n    return True\n\ndef verify_expected_students(students: list) -> bool:\n    \"\"\"Verify that all expected students are present with correct details.\"\"\"\n    # Expected students from answer.md\n    expected_students = {\n        'James Smith': {'id': '20177389', 'email': 'james.smith30@outlook.com'},\n        'Ava Lopez': {'id': '20166998', 'email': 'ava.lopez67@outlook.com'},\n        'James Anderson': {'id': '20153606', 'email': 'james.anderson71@yahoo.com'},\n        'Benjamin Anderson': {'id': '20136681', 'email': 'benjamin.anderson37@qq.com'},\n        'Sarah Wilson': {'id': '20158819', 'email': 'sarah.wilson96@outlook.com'},\n        'Isabella Davis': {'id': '20101701', 'email': 'isabella.davis89@gmail.com'},\n        'James Moore': {'id': '20188937', 'email': 'james.moore62@gmail.com'},\n        'Harper Williams': {'id': '20157943', 'email': 'harper.williams38@163.com'},\n        'Noah Smith': {'id': '20132669', 'email': 'noah.smith45@163.com'},\n        'Emma Thomas': {'id': '20109144', 'email': 'emma.thomas68@163.com'},\n        'Mary Brown': {'id': '20199583', 'email': 'mary.brown27@yahoo.com'},\n        'John Jones': {'id': '20201800', 'email': 'john.jones46@gmail.com'},\n        'Mia Anderson': {'id': '20162542', 'email': 'mia.anderson3@outlook.com'},\n        'Barbara Davis': {'id': '20126203', 'email': 'barbara.davis67@163.com'},\n        'Thomas Brown': {'id': '20119528', 'email': 'thomas.brown43@163.com'},\n        'Susan Anderson': {'id': '20148778', 'email': 'susan.anderson16@163.com'},\n        'Mary Garcia': {'id': '20174369', 'email': 'mary.garcia58@gmail.com'},\n        'Richard Wilson': {'id': '20174207', 'email': 'richard.wilson39@outlook.com'},\n        'Joseph Lopez': {'id': '20191265', 'email': 'joseph.lopez93@yahoo.com'}\n    }\n    \n    # Check if all expected students are present\n    found_students = set()\n    for student in students:\n        found_students.add(student['name'])\n    \n    missing_students = set(expected_students.keys()) - found_students\n    if missing_students:\n        print(f\"❌ Missing expected students: {missing_students}\")\n        return False\n    \n    # Check if all found students are expected\n    unexpected_students = found_students - set(expected_students.keys())\n    if unexpected_students:\n        print(f\"❌ Unexpected students found: {unexpected_students}\")\n        return False\n    \n    # Check if student details match exactly\n    for student in students:\n        expected = expected_students[student['name']]\n        if student['id'] != expected['id']:\n            print(f\"❌ ID mismatch for {student['name']}: expected {expected['id']}, got {student['id']}\")\n            return False\n        if student['email'] != expected['email']:\n            print(f\"❌ Email mismatch for {student['name']}: expected {expected['email']}, got {student['email']}\")\n            return False\n    \n    print(\"✅ All expected students are present with correct details\")\n    return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying Student Database Task: English Talent Recruitment...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Qualified Students File Exists\", verify_qualified_students_file_exists),\n        (\"File Format\", verify_file_format),\n    ]\n    \n    # Run basic verification steps first\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n            break\n    \n    if not all_passed:\n        print(\"\\n❌ Basic verification failed, cannot proceed with content verification\")\n        sys.exit(1)\n    \n    # Parse the file and run content verification\n    print(\"\\n--- Content Verification ---\")\n    students = parse_qualified_students_file(test_dir)\n    \n    if not students:\n        print(\"❌ Failed to parse qualified students file\")\n        sys.exit(1)\n    \n    content_verification_steps = [\n        (\"Student Count\", lambda: verify_student_count(students)),\n        (\"Expected Students\", lambda: verify_expected_students(students)),\n    ]\n    \n    for step_name, verify_func in content_verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func():\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ English talent recruitment completed correctly!\")\n        print(f\"🎉 Found exactly {len(students)} qualified students\")\n        print(\"🎉 Task verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/student_database/gradebased_score/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### Simple Grade Calculation\n\n1. Read Student Data:\n\n* Process all student basic_info.txt files from the database\n* Extract scores for Chinese, Math, and English subjects\n\n2. Calculate Basic Grades:\n\n* Use simple grade scale: A (90+), B (80-89), C (70-79), D (60-69), F (<60)\n* Apply this same scale to all subjects\n\n### Generate Output Files\n\n1. Create student_grades.csv:\n\n* Columns: student_id, name, chinese_score, chinese_grade, math_score, math_grade, english_score, english_grade\n* Must contain exactly each students\n* Each students one row\n\n2. Create grade_summary.txt:\n\n* Total number of students processed\n* Number of A's, B's, C's, D's, and F's for each subject\n* Simple count of students with passing grades (A, B, C) vs failing grades (D, F) for each subjects\n"
  },
  {
    "path": "tasks/filesystem/standard/student_database/gradebased_score/meta.json",
    "content": "{\n  \"task_id\": \"gradebased_score\",\n  \"task_name\": \"Gradebased Score\",\n  \"category_id\": \"student_database\",\n  \"category_name\": \"Student Database\",\n  \"description\": \"Process student numerical scores to calculate letter grades using A-F scale and produce comprehensive grade distribution analysis reports.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-10\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data extraction\",\n    \"content transformation\",\n    \"pattern analysis\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"student_database/\\n    ├── 20101250_Patricia_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20101701_Isabella_Davis/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20102572_Michael_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20104233_Robert_Lopez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20104498_Sarah_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20104653_Sophia_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20104675_Michael_Gonzalez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20104846_Christopher_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20107487_Mia_Martin/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20108742_Sarah_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20109144_Emma_Thomas/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20109803_Oliver_Hernandez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20111634_Isabella_Thomas/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20112439_Christopher_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20113368_William_Wilson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20113603_Robert_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20114397_Isabella_Martin/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20114869_Ethan_Martin/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20115252_Mason_Johnson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20115632_Elizabeth_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20115753_Charlotte_Johnson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20115924_Michael_Lopez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20116232_Olivia_Lopez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20119528_Thomas_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20122427_Karen_Gonzalez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20122977_Evelyn_Miller/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20123376_Joseph_Johnson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20125451_Barbara_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20126203_Barbara_Davis/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20126394_Olivia_Williams/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20126471_Ethan_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20127423_John_Williams/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20128249_Oliver_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20128879_Christopher_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20129898_Jessica_Johnson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20131271_Olivia_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20131518_Sophia_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20132026_Isabella_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20132370_James_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20132669_Noah_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20133527_Mason_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20133697_Isabella_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20135821_Thomas_Wilson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20136681_Benjamin_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20136890_Benjamin_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20137514_Lucas_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20139234_Harper_Martinez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20139637_Noah_Johnson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20139647_Patricia_Lopez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20141421_Linda_Gonzalez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20142085_William_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20142383_Amelia_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20143406_Susan_Martin/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20143830_James_Garcia/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20146035_Christopher_Garcia/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20146277_William_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20146279_Christopher_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20147301_James_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20147789_James_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20148681_John_Hernandez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20148778_Susan_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20149712_Jessica_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20151012_Harper_Miller/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20153174_Benjamin_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20153412_Charlotte_Martin/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20153606_James_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20153687_Richard_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20154518_John_Gonzalez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20154710_Benjamin_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20156469_Jennifer_Hernandez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20156522_Jennifer_Martinez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20156851_Noah_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20157943_Harper_Williams/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20158266_Sophia_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20158294_Sophia_Wilson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20158819_Sarah_Wilson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20159113_John_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20159695_James_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20161279_William_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20162253_Mason_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20162542_Mia_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20163356_Ava_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20164515_Patricia_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20164801_Noah_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20165511_Mary_Gonzalez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20166436_Christopher_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20166487_Barbara_Hernandez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20166564_Ava_Lopez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20166998_Ava_Lopez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20168311_Lucas_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20168491_Karen_Martinez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20169515_Thomas_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20171050_Christopher_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20171406_Mary_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20171613_Ethan_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20172106_Isabella_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20173259_Michael_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20173492_Richard_Miller/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20173501_Mary_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20173517_Susan_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20174207_Richard_Wilson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20174369_Mary_Garcia/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20175314_William_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20176169_Lucas_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20176947_Noah_Miller/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20177389_James_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20178687_Isabella_Anderson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20179461_William_Johnson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20179690_Linda_Thomas/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20181056_Sarah_Hernandez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20182020_Patricia_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20182390_Ethan_Wilson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20183149_David_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20183219_Charlotte_Williams/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20184489_Jessica_Gonzalez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20186154_Charlotte_Smith/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20186510_James_Thomas/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20187107_David_Martinez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20187144_Mary_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20187892_Christopher_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20187921_Mary_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20187967_Sarah_Davis/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20188937_James_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20189123_Mary_Martin/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20189192_Olivia_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20189268_Emma_Williams/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20189854_William_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20191265_Joseph_Lopez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20192725_Robert_Martinez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20194054_Michael_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20194160_Benjamin_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20194164_Sarah_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20194525_John_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20195164_Jennifer_Gonzalez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20195982_David_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20196776_William_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20196896_Olivia_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20196961_Joseph_Thomas/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20196998_Ethan_Wilson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20198548_Evelyn_Moore/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20199036_Benjamin_Hernandez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20199583_Mary_Brown/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20199735_Mason_Johnson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20199872_Sophia_Jackson/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20199980_James_Rodriguez/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20201385_John_Taylor/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20201800_John_Jones/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20202548_Robert_Miller/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    ├── 20203855_Mia_Miller/\\n    │       ├── basic_info.txt\\n    │       └── recommendation_letter.txt\\n    └── 20204611_Sarah_Wilson/\\n            ├── basic_info.txt\\n            └── recommendation_letter.txt\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/student_database.zip\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/student_database/gradebased_score/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Student Database Grade-Based Score Analysis Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\nimport re\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_grade_summary_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that grade_summary.txt file exists.\"\"\"\n    grade_summary_file = test_dir / \"grade_summary.txt\"\n    \n    if not grade_summary_file.exists():\n        print(\"❌ File 'grade_summary.txt' not found\")\n        return False\n    \n    print(\"✅ grade_summary.txt file found\")\n    return True\n\ndef verify_grade_summary_readable(test_dir: Path) -> bool:\n    \"\"\"Verify that the grade_summary.txt file is readable.\"\"\"\n    grade_summary_file = test_dir / \"grade_summary.txt\"\n    \n    try:\n        content = grade_summary_file.read_text()\n        if not content.strip():\n            print(\"❌ grade_summary.txt file is empty\")\n            return False\n        \n        print(\"✅ grade_summary.txt file is readable\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error reading grade_summary.txt file: {e}\")\n        return False\n\ndef extract_numbers_from_text(text: str) -> list:\n    \"\"\"Extract all numbers from text.\"\"\"\n    numbers = re.findall(r'\\d+', text)\n    return [int(num) for num in numbers]\n\ndef verify_three_subjects_present(test_dir: Path) -> bool:\n    \"\"\"Verify that grade_summary.txt contains all three subjects (case insensitive).\"\"\"\n    grade_summary_file = test_dir / \"grade_summary.txt\"\n    \n    try:\n        content = grade_summary_file.read_text()\n        \n        # Check if all three subjects are mentioned (case insensitive)\n        subjects = [\"chinese\", \"math\", \"english\"]\n        missing_subjects = []\n        \n        for subject in subjects:\n            if subject.lower() not in content.lower():\n                missing_subjects.append(subject)\n        \n        if missing_subjects:\n            print(f\"❌ Missing subjects in grade_summary.txt: {missing_subjects}\")\n            return False\n        \n        print(\"✅ All three subjects (Chinese, Math, English) found in grade_summary.txt\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking subjects: {e}\")\n        return False\n\ndef verify_grade_summary_content(test_dir: Path) -> bool:\n    \"\"\"Verify that grade_summary.txt contains the correct statistics from answer.md.\"\"\"\n    grade_summary_file = test_dir / \"grade_summary.txt\"\n    \n    try:\n        content = grade_summary_file.read_text()\n        \n        # Extract all numbers from the content\n        found_numbers = extract_numbers_from_text(content)\n        \n        if not found_numbers:\n            print(\"❌ No numbers found in grade_summary.txt\")\n            return False\n        \n        # Expected numbers from answer.md\n        # Format: [total_students, chinese_A, chinese_B, chinese_C, chinese_D, chinese_pass, chinese_fail,\n        #          math_A, math_B, math_C, math_D, math_pass, math_fail,\n        #          english_A, english_B, english_C, english_D, english_F, english_pass, english_fail]\n        expected_numbers = [\n            # Total students\n            150,\n            # Chinese grades: A(42), B(37), C(43), D(28), Pass(122), Fail(28)\n            42, 37, 43, 28, 122, 28,\n            # Math grades: A(31), B(38), C(47), D(34), Pass(116), Fail(34)  \n            31, 38, 47, 34, 116, 34,\n            # English grades: A(32), B(38), C(38), D(41), F(1), Pass(108), Fail(42)\n            32, 38, 38, 41, 1, 108, 42\n        ]\n        \n        # Check if all expected numbers are present in the found numbers\n        missing_numbers = []\n        for expected in expected_numbers:\n            if expected not in found_numbers:\n                missing_numbers.append(expected)\n        \n        if missing_numbers:\n            print(f\"❌ Missing expected numbers: {missing_numbers}\")\n            print(f\"   Found numbers: {found_numbers}\")\n            return False\n        \n        # Check if the counts match (each number should appear the expected number of times)\n        for expected in expected_numbers:\n            expected_count = expected_numbers.count(expected)\n            found_count = found_numbers.count(expected)\n            if found_count < expected_count:\n                print(f\"❌ Number {expected} appears {found_count} times, expected {expected_count} times\")\n                return False\n        \n        print(\"✅ All expected grade statistics found in grade_summary.txt\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying grade summary content: {e}\")\n        return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    try:\n        test_dir = get_test_directory()\n        print(f\"🔍 Verifying Student Database Grade-Based Score Analysis in: {test_dir}\")\n        \n        # Define verification steps\n        verification_steps = [\n            (\"Grade Summary File Exists\", verify_grade_summary_exists),\n            (\"File is Readable\", verify_grade_summary_readable),\n            (\"Three Subjects Present\", verify_three_subjects_present),\n            (\"Grade Statistics Content\", verify_grade_summary_content),\n        ]\n        \n        # Run all verification steps\n        all_passed = True\n        for step_name, verify_func in verification_steps:\n            print(f\"\\n--- {step_name} ---\")\n            if not verify_func(test_dir):\n                all_passed = False\n        \n        # Final result\n        print(\"\\n\" + \"=\"*50)\n        if all_passed:\n            print(\"✅ Student grade analysis completed correctly!\")\n            print(\"🎉 Grade-Based Score Analysis verification: PASS\")\n            sys.exit(0)\n        else:\n            print(\"❌ Grade-Based Score Analysis verification: FAIL\")\n            sys.exit(1)\n            \n    except Exception as e:\n        print(f\"❌ Verification failed with error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/threestudio/code_locating/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### Task Description\n\nThreeStudio is a comprehensive codebase that implements various diffusion-based text-to-3D models, including NeRF-based rendering stage and diffusion guidance stage. Your task is to explore the codebase and identify the specific file that defines the guidance functionality for the Zero123 model.\n\n### Task Objectives\n\n1. **Explore the ThreeStudio codebase** using filesystem MCP tools\n2. **Search through the project structure** to understand the codebase organization\n3. **Identify the file** that contains the Zero123 guidance implementation\n4. **Create an answer file** with the correct file path\n\n### Expected Output\n\nCreate a file named `answer.txt` in the test directory root\n\n**Requirements:**\n- Only include the file path, no additional text or explanation\n- Use forward slashes (/) for path separators\n- Include the full relative path from the project root\n- Ensure the path points to the actual file that defines Zero123 guidance"
  },
  {
    "path": "tasks/filesystem/standard/threestudio/code_locating/meta.json",
    "content": "{\n  \"task_id\": \"code_locating\",\n  \"task_name\": \"Code Locating\",\n  \"category_id\": \"threestudio\",\n  \"category_name\": \"Threestudio\",\n  \"description\": \"Navigate the ThreeStudio codebase to locate and identify the specific file that defines Zero123 guidance functionality implementation.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-05\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"code exploration\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"threestudio/\\n    ├── configs/\\n    │       ├── debugging/\\n    │       │       ├── controlnet-canny.yaml\\n    │       │       ├── controlnet-normal.yaml\\n    │       │       ├── instructpix2pix.yaml\\n    │       │       └── stablediffusion.yaml\\n    │       ├── experimental/\\n    │       │       ├── unified-guidance/\\n    │       │       │       ├── dreamfusion-sd.yaml\\n    │       │       │       ├── hifa.yaml\\n    │       │       │       ├── prolificdreamer-hifa.yaml\\n    │       │       │       ├── prolificdreamer.yaml\\n    │       │       │       └── zero123-simple.yaml\\n    │       │       ├── co3d-imagecondition.yaml\\n    │       │       ├── imagecondition.yaml\\n    │       │       ├── imagecondition_zero123nerf.yaml\\n    │       │       ├── imagecondition_zero123nerf_refine.yaml\\n    │       │       ├── prolificdreamer-importance.yaml\\n    │       │       ├── prolificdreamer-neus-importance.yaml\\n    │       │       ├── prolificdreamer-propnet.yaml\\n    │       │       └── textmesh-if-importance.yaml\\n    │       ├── gradio/\\n    │       │       ├── dreamfusion-if.yaml\\n    │       │       ├── dreamfusion-sd.yaml\\n    │       │       ├── fantasia3d.yaml\\n    │       │       ├── latentnerf.yaml\\n    │       │       ├── sjc.yaml\\n    │       │       └── textmesh-if.yaml\\n    │       ├── control4d-static.yaml\\n    │       ├── dreamfusion-if.yaml\\n    │       ├── dreamfusion-sd-eff.yaml\\n    │       ├── dreamfusion-sd.yaml\\n    │       ├── fantasia3d-texture.yaml\\n    │       ├── fantasia3d.yaml\\n    │       ├── hifa.yaml\\n    │       ├── instructnerf2nerf.yaml\\n    │       ├── latentnerf-refine.yaml\\n    │       ├── latentnerf.yaml\\n    │       ├── magic123-coarse-sd.yaml\\n    │       ├── magic123-hifa-coarse-sd.yaml\\n    │       ├── magic123-hifa-refine-sd.yaml\\n    │       ├── magic123-refine-sd.yaml\\n    │       ├── magic3d-coarse-if.yaml\\n    │       ├── magic3d-coarse-sd.yaml\\n    │       ├── magic3d-refine-sd.yaml\\n    │       ├── prolificdreamer-geometry.yaml\\n    │       ├── prolificdreamer-hifa.yaml\\n    │       ├── prolificdreamer-patch.yaml\\n    │       ├── prolificdreamer-scene-hifa.yaml\\n    │       ├── prolificdreamer-scene.yaml\\n    │       ├── prolificdreamer-texture.yaml\\n    │       ├── prolificdreamer.yaml\\n    │       ├── sdi.yaml\\n    │       ├── sjc.yaml\\n    │       ├── sketchshape-refine.yaml\\n    │       ├── sketchshape.yaml\\n    │       ├── stable-zero123.yaml\\n    │       ├── textmesh-if.yaml\\n    │       ├── zero123-geometry.yaml\\n    │       └── zero123.yaml\\n    ├── custom/\\n    │       └── put_custom_extensions_here\\n    ├── docker/\\n    │       ├── compose.yaml\\n    │       └── Dockerfile\\n    ├── docs/\\n    │       └── installation.md\\n    ├── extern/\\n    │       ├── ldm_zero123/\\n    │       │       ├── models/\\n    │       │       │       ├── diffusion/\\n    │       │       │       │       ├── __init__.py\\n    │       │       │       │       ├── classifier.py\\n    │       │       │       │       ├── ddim.py\\n    │       │       │       │       ├── ddpm.py\\n    │       │       │       │       ├── plms.py\\n    │       │       │       │       └── sampling_util.py\\n    │       │       │       └── autoencoder.py\\n    │       │       ├── modules/\\n    │       │       │       ├── diffusionmodules/\\n    │       │       │       │       ├── __init__.py\\n    │       │       │       │       ├── model.py\\n    │       │       │       │       ├── openaimodel.py\\n    │       │       │       │       └── util.py\\n    │       │       │       ├── distributions/\\n    │       │       │       │       ├── __init__.py\\n    │       │       │       │       └── distributions.py\\n    │       │       │       ├── encoders/\\n    │       │       │       │       ├── __init__.py\\n    │       │       │       │       └── modules.py\\n    │       │       │       ├── evaluate/\\n    │       │       │       │       ├── adm_evaluator.py\\n    │       │       │       │       ├── evaluate_perceptualsim.py\\n    │       │       │       │       ├── frechet_video_distance.py\\n    │       │       │       │       ├── ssim.py\\n    │       │       │       │       └── torch_frechet_video_distance.py\\n    │       │       │       ├── image_degradation/\\n    │       │       │       │       ├── utils/\\n    │       │       │       │       │       └── test.png\\n    │       │       │       │       ├── __init__.py\\n    │       │       │       │       ├── bsrgan.py\\n    │       │       │       │       ├── bsrgan_light.py\\n    │       │       │       │       └── utils_image.py\\n    │       │       │       ├── losses/\\n    │       │       │       │       ├── __init__.py\\n    │       │       │       │       ├── contperceptual.py\\n    │       │       │       │       └── vqperceptual.py\\n    │       │       │       ├── attention.py\\n    │       │       │       ├── ema.py\\n    │       │       │       └── x_transformer.py\\n    │       │       ├── thirdp/\\n    │       │       │       └── psp/\\n    │       │       │               ├── helpers.py\\n    │       │       │               ├── id_loss.py\\n    │       │       │               └── model_irse.py\\n    │       │       ├── __init__.py\\n    │       │       ├── extras.py\\n    │       │       ├── guidance.py\\n    │       │       ├── lr_scheduler.py\\n    │       │       └── util.py\\n    │       ├── __init__.py\\n    │       └── zero123.py\\n    ├── load/\\n    │       ├── images/\\n    │       │       ├── anya_front.png\\n    │       │       ├── anya_front_depth.png\\n    │       │       ├── anya_front_normal.png\\n    │       │       ├── anya_front_rgba.png\\n    │       │       ├── baby_phoenix_on_ice.png\\n    │       │       ├── baby_phoenix_on_ice_depth.png\\n    │       │       ├── baby_phoenix_on_ice_normal.png\\n    │       │       ├── baby_phoenix_on_ice_rgba.png\\n    │       │       ├── beach_house_1.png\\n    │       │       ├── beach_house_1_depth.png\\n    │       │       ├── beach_house_1_normal.png\\n    │       │       ├── beach_house_1_rgba.png\\n    │       │       ├── beach_house_2.png\\n    │       │       ├── beach_house_2_depth.png\\n    │       │       ├── beach_house_2_normal.png\\n    │       │       ├── beach_house_2_rgba.png\\n    │       │       ├── bollywood_actress.png\\n    │       │       ├── bollywood_actress_depth.png\\n    │       │       ├── bollywood_actress_normal.png\\n    │       │       ├── bollywood_actress_rgba.png\\n    │       │       ├── cactus.png\\n    │       │       ├── cactus_depth.png\\n    │       │       ├── cactus_normal.png\\n    │       │       ├── cactus_rgba.png\\n    │       │       ├── catstatue.png\\n    │       │       ├── catstatue_depth.png\\n    │       │       ├── catstatue_normal.png\\n    │       │       ├── catstatue_rgba.png\\n    │       │       ├── church_ruins.png\\n    │       │       ├── church_ruins_depth.png\\n    │       │       ├── church_ruins_normal.png\\n    │       │       ├── church_ruins_rgba.png\\n    │       │       ├── dog1_rgba.png\\n    │       │       ├── dragon2_rgba.png\\n    │       │       ├── firekeeper.jpg\\n    │       │       ├── firekeeper_depth.png\\n    │       │       ├── firekeeper_normal.png\\n    │       │       ├── firekeeper_rgba.png\\n    │       │       ├── futuristic_car.png\\n    │       │       ├── futuristic_car_depth.png\\n    │       │       ├── futuristic_car_normal.png\\n    │       │       ├── futuristic_car_rgba.png\\n    │       │       ├── grootplant_rgba.png\\n    │       │       ├── hamburger.png\\n    │       │       ├── hamburger_depth.png\\n    │       │       ├── hamburger_rgba.png\\n    │       │       ├── mona_lisa.png\\n    │       │       ├── mona_lisa_depth.png\\n    │       │       ├── mona_lisa_normal.png\\n    │       │       ├── mona_lisa_rgba.png\\n    │       │       ├── robot_rgba.png\\n    │       │       ├── teddy.png\\n    │       │       ├── teddy_depth.png\\n    │       │       ├── teddy_normal.png\\n    │       │       ├── teddy_rgba.png\\n    │       │       └── thorhammer_rgba.png\\n    │       ├── lights/\\n    │       │       ├── bsdf_256_256.bin\\n    │       │       ├── LICENSE.txt\\n    │       │       └── mud_road_puresky_1k.hdr\\n    │       ├── shapes/\\n    │       │       ├── animal.obj\\n    │       │       ├── blub.obj\\n    │       │       ├── cabin.obj\\n    │       │       ├── env_sphere.obj\\n    │       │       ├── hand_prismatic.obj\\n    │       │       ├── human.obj\\n    │       │       ├── nascar.obj\\n    │       │       ├── potion.obj\\n    │       │       ├── README.md\\n    │       │       └── teddy.obj\\n    │       ├── tets/\\n    │       │       ├── 128_tets.npz\\n    │       │       ├── 32_tets.npz\\n    │       │       ├── 64_tets.npz\\n    │       │       └── generate_tets.py\\n    │       ├── zero123/\\n    │       │       ├── download.sh\\n    │       │       └── sd-objaverse-finetune-c_concat-256.yaml\\n    │       ├── make_prompt_library.py\\n    │       └── prompt_library.json\\n    ├── scripts/\\n    │       └── convert_zero123_to_diffusers.py\\n    ├── threestudio/\\n    │       ├── data/\\n    │       │       ├── __init__.py\\n    │       │       ├── co3d.py\\n    │       │       ├── image.py\\n    │       │       ├── multiview.py\\n    │       │       ├── uncond.py\\n    │       │       └── uncond_eff.py\\n    │       ├── models/\\n    │       │       ├── background/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── base.py\\n    │       │       │       ├── neural_environment_map_background.py\\n    │       │       │       ├── solid_color_background.py\\n    │       │       │       └── textured_background.py\\n    │       │       ├── exporters/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── base.py\\n    │       │       │       └── mesh_exporter.py\\n    │       │       ├── geometry/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── base.py\\n    │       │       │       ├── custom_mesh.py\\n    │       │       │       ├── implicit_sdf.py\\n    │       │       │       ├── implicit_volume.py\\n    │       │       │       ├── tetrahedra_sdf_grid.py\\n    │       │       │       └── volume_grid.py\\n    │       │       ├── guidance/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── controlnet_guidance.py\\n    │       │       │       ├── deep_floyd_guidance.py\\n    │       │       │       ├── instructpix2pix_guidance.py\\n    │       │       │       ├── stable_diffusion_guidance.py\\n    │       │       │       ├── stable_diffusion_sdi_guidance.py\\n    │       │       │       ├── stable_diffusion_unified_guidance.py\\n    │       │       │       ├── stable_diffusion_vsd_guidance.py\\n    │       │       │       ├── stable_zero123_guidance.py\\n    │       │       │       ├── zero123_guidance.py\\n    │       │       │       └── zero123_unified_guidance.py\\n    │       │       ├── materials/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── base.py\\n    │       │       │       ├── diffuse_with_point_light_material.py\\n    │       │       │       ├── hybrid_rgb_latent_material.py\\n    │       │       │       ├── neural_radiance_material.py\\n    │       │       │       ├── no_material.py\\n    │       │       │       ├── pbr_material.py\\n    │       │       │       └── sd_latent_adapter_material.py\\n    │       │       ├── prompt_processors/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── base.py\\n    │       │       │       ├── deepfloyd_prompt_processor.py\\n    │       │       │       ├── dummy_prompt_processor.py\\n    │       │       │       └── stable_diffusion_prompt_processor.py\\n    │       │       ├── renderers/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── base.py\\n    │       │       │       ├── deferred_volume_renderer.py\\n    │       │       │       ├── gan_volume_renderer.py\\n    │       │       │       ├── nerf_volume_renderer.py\\n    │       │       │       ├── neus_volume_renderer.py\\n    │       │       │       ├── nvdiff_rasterizer.py\\n    │       │       │       └── patch_renderer.py\\n    │       │       ├── __init__.py\\n    │       │       ├── estimators.py\\n    │       │       ├── isosurface.py\\n    │       │       ├── mesh.py\\n    │       │       └── networks.py\\n    │       ├── scripts/\\n    │       │       ├── make_training_vid.py\\n    │       │       ├── run_zero123.sh\\n    │       │       ├── run_zero123_comparison.sh\\n    │       │       ├── run_zero123_phase.sh\\n    │       │       ├── run_zero123_phase2.sh\\n    │       │       ├── run_zero123_sbatch.py\\n    │       │       ├── zero123_demo.py\\n    │       │       └── zero123_sbatch.sh\\n    │       ├── systems/\\n    │       │       ├── __init__.py\\n    │       │       ├── base.py\\n    │       │       ├── control4d_multiview.py\\n    │       │       ├── dreamfusion.py\\n    │       │       ├── eff_dreamfusion.py\\n    │       │       ├── fantasia3d.py\\n    │       │       ├── imagedreamfusion.py\\n    │       │       ├── instructnerf2nerf.py\\n    │       │       ├── latentnerf.py\\n    │       │       ├── magic123.py\\n    │       │       ├── magic3d.py\\n    │       │       ├── optimizers.py\\n    │       │       ├── prolificdreamer.py\\n    │       │       ├── sdi.py\\n    │       │       ├── sjc.py\\n    │       │       ├── textmesh.py\\n    │       │       ├── utils.py\\n    │       │       ├── zero123.py\\n    │       │       └── zero123_simple.py\\n    │       ├── utils/\\n    │       │       ├── GAN/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── attention.py\\n    │       │       │       ├── discriminator.py\\n    │       │       │       ├── distribution.py\\n    │       │       │       ├── loss.py\\n    │       │       │       ├── mobilenet.py\\n    │       │       │       ├── network_util.py\\n    │       │       │       ├── util.py\\n    │       │       │       └── vae.py\\n    │       │       ├── perceptual/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── perceptual.py\\n    │       │       │       └── utils.py\\n    │       │       ├── __init__.py\\n    │       │       ├── base.py\\n    │       │       ├── callbacks.py\\n    │       │       ├── config.py\\n    │       │       ├── loss.py\\n    │       │       ├── misc.py\\n    │       │       ├── ops.py\\n    │       │       ├── rasterize.py\\n    │       │       ├── saving.py\\n    │       │       └── typing.py\\n    │       └── __init__.py\\n    ├── .editorconfig\\n    ├── .pre-commit-config.yaml\\n    ├── .pylintrc\\n    ├── 2dplayground.ipynb\\n    ├── 2dplayground_SDI_version.ipynb\\n    ├── CHANGELOG.md\\n    ├── DOCUMENTATION.md\\n    ├── gradio_app.py\\n    ├── launch.py\\n    ├── LICENSE\\n    ├── README.md\\n    ├── requirements-dev.txt\\n    ├── requirements.txt\\n    ├── setup.py\\n    └── threestudio.ipynb\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/threestudio.zip\",\n    \"stateOriginalUrl\": \"https://github.com/threestudio-project/threestudio\"\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/threestudio/code_locating/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for ThreeStudio Task 1: Find Zero123 Guidance Implementation\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport re\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_answer_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the answer.txt file exists.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    if not answer_file.exists():\n        print(\"❌ File 'answer.txt' not found\")\n        return False\n    \n    print(\"✅ Answer file found\")\n    return True\n\ndef verify_answer_format(test_dir: Path) -> bool:\n    \"\"\"Verify that the answer file has the correct format.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text().strip()\n        \n        # Check if content is not empty\n        if not content:\n            print(\"❌ Answer file is empty\")\n            return False\n        \n        # Check if it contains only the file path (no additional text)\n        if len(content.split('\\n')) > 1:\n            print(\"❌ Answer file contains multiple lines or additional text\")\n            return False\n        \n        # Check if it uses forward slashes\n        if '\\\\' in content:\n            print(\"❌ Answer uses backslashes instead of forward slashes\")\n            return False\n        \n        # Check if it's a relative path\n        if content.startswith('/') or ':' in content:\n            print(\"❌ Answer appears to be an absolute path\")\n            return False\n        \n        print(\"✅ Answer format is correct\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error reading answer file: {e}\")\n        return False\n\ndef verify_file_path_structure(test_dir: Path) -> bool:\n    \"\"\"Verify that the file path has the expected structure.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text().strip()\n        \n        # Expected path components for Zero123 guidance\n        # In backup directories, the path is threestudio/models/guidance/zero123_guidance.py\n        # In test_environments, the path is threestudio/threestudio/models/guidance/zero123_guidance.py\n        expected_components = [\"threestudio\", \"models\", \"guidance\", \"zero123_guidance.py\"]\n        \n        # Check if all expected components are in the path\n        for component in expected_components:\n            if component not in content:\n                print(f\"❌ Path missing expected component: {component}\")\n                return False\n        \n        print(\"✅ File path structure is correct\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying file path structure: {e}\")\n        return False\n\ndef verify_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the identified file actually exists.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text().strip()\n        \n        # Try the path as provided in the answer file\n        file_path = test_dir / content\n        \n        # If that doesn't exist, try with the correct path structure\n        # The answer file might have threestudio/models/guidance/zero123_guidance.py\n        # but the actual path is threestudio/threestudio/models/guidance/zero123_guidance.py\n        if not file_path.exists():\n            # Try to fix the path by adding the missing threestudio prefix\n            if content.startswith(\"threestudio/models/\"):\n                corrected_path = content.replace(\"threestudio/models/\", \"threestudio/threestudio/models/\")\n                file_path = test_dir / corrected_path\n                if file_path.exists():\n                    print(f\"✅ File exists with corrected path: {corrected_path}\")\n                    return True\n        \n        if not file_path.exists():\n            print(f\"❌ Identified file does not exist: {content}\")\n            return False\n        \n        print(\"✅ Identified file exists\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying file existence: {e}\")\n        return False\n\ndef verify_zero123_guidance_content(test_dir: Path) -> bool:\n    \"\"\"Verify that the identified file actually contains Zero123 guidance implementation.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text().strip()\n        \n        # Try the path as provided in the answer file\n        file_path = test_dir / content\n        \n        # If that doesn't exist, try with the correct path structure\n        if not file_path.exists():\n            # Try to fix the path by adding the missing threestudio prefix\n            if content.startswith(\"threestudio/models/\"):\n                corrected_path = content.replace(\"threestudio/models/\", \"threestudio/threestudio/models/\")\n                file_path = test_dir / corrected_path\n        \n        if not file_path.exists():\n            print(f\"❌ Cannot find file for content verification: {content}\")\n            return False\n        \n        file_content = file_path.read_text()\n        \n        # Check for the main Zero123 guidance implementation\n        # The main implementation should have the class name \"Zero123Guidance\" and register as \"zero123-guidance\"\n        main_zero123_indicators = [\n            r'class Zero123Guidance',  # Main class name\n            r'@threestudio\\.register\\(\"zero123-guidance\"\\)',  # Correct registration\n            r'BaseObject',  # Base class\n            r'zero123',  # General zero123 reference\n        ]\n        \n        found_indicators = []\n        for indicator in main_zero123_indicators:\n            if re.search(indicator, file_content, re.IGNORECASE):\n                found_indicators.append(indicator)\n        \n        # Check if this is the main Zero123 guidance implementation\n        is_main_implementation = (\n            'class Zero123Guidance' in file_content and \n            '@threestudio.register(\"zero123-guidance\")' in file_content\n        )\n        \n        if not is_main_implementation:\n            print(f\"❌ File is not the main Zero123 guidance implementation\")\n            print(f\"   Expected: class Zero123Guidance and @threestudio.register('zero123-guidance')\")\n            return False\n        \n        print(f\"✅ File contains main Zero123 guidance implementation indicators: {found_indicators}\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying file content: {e}\")\n        return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying ThreeStudio Task 1: Find Zero123 Guidance Implementation...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Answer File Exists\", verify_answer_file_exists),\n        (\"Answer Format\", verify_answer_format),\n        (\"File Path Structure\", verify_file_path_structure),\n        (\"File Exists\", verify_file_exists),\n        (\"Zero123 Guidance Content\", verify_zero123_guidance_content),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ Zero123 guidance file path identified correctly!\")\n        print(\"🎉 Task 1 verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task 1 verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/threestudio/output_analysis/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### Task Description\n\nThreeStudio is a comprehensive codebase that implements various diffusion-based text-to-3D models, including NeRF-based rendering stage and diffusion guidance stage. Your task is to explore the codebase and identify the specific file that defines the guidance functionality for the Zero123 model.\n\n### Task\n\nWhat is the output of `guidance_out`, returned by the code at line 137 in `threestudio/systems/zero123.py`?\n\nClearly state the structure of it and where you find the answer (file and line numbers).Write your answer in a file named `answer.txt` in the test directory root. Do not add extra explanation or formatting beyond what is required by the task.\n"
  },
  {
    "path": "tasks/filesystem/standard/threestudio/output_analysis/meta.json",
    "content": "{\n  \"task_id\": \"output_analysis\",\n  \"task_name\": \"Output Analysis\",\n  \"category_id\": \"threestudio\",\n  \"category_name\": \"Threestudio\",\n  \"description\": \"Analyze the structure and components of guidance_out object returned by Zero123 guidance code at line 137 for understanding output format.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-05\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"code exploration\",\n    \"pattern analysis\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"threestudio/\\n    ├── configs/\\n    │       ├── debugging/\\n    │       │       ├── controlnet-canny.yaml\\n    │       │       ├── controlnet-normal.yaml\\n    │       │       ├── instructpix2pix.yaml\\n    │       │       └── stablediffusion.yaml\\n    │       ├── experimental/\\n    │       │       ├── unified-guidance/\\n    │       │       │       ├── dreamfusion-sd.yaml\\n    │       │       │       ├── hifa.yaml\\n    │       │       │       ├── prolificdreamer-hifa.yaml\\n    │       │       │       ├── prolificdreamer.yaml\\n    │       │       │       └── zero123-simple.yaml\\n    │       │       ├── co3d-imagecondition.yaml\\n    │       │       ├── imagecondition.yaml\\n    │       │       ├── imagecondition_zero123nerf.yaml\\n    │       │       ├── imagecondition_zero123nerf_refine.yaml\\n    │       │       ├── prolificdreamer-importance.yaml\\n    │       │       ├── prolificdreamer-neus-importance.yaml\\n    │       │       ├── prolificdreamer-propnet.yaml\\n    │       │       └── textmesh-if-importance.yaml\\n    │       ├── gradio/\\n    │       │       ├── dreamfusion-if.yaml\\n    │       │       ├── dreamfusion-sd.yaml\\n    │       │       ├── fantasia3d.yaml\\n    │       │       ├── latentnerf.yaml\\n    │       │       ├── sjc.yaml\\n    │       │       └── textmesh-if.yaml\\n    │       ├── control4d-static.yaml\\n    │       ├── dreamfusion-if.yaml\\n    │       ├── dreamfusion-sd-eff.yaml\\n    │       ├── dreamfusion-sd.yaml\\n    │       ├── fantasia3d-texture.yaml\\n    │       ├── fantasia3d.yaml\\n    │       ├── hifa.yaml\\n    │       ├── instructnerf2nerf.yaml\\n    │       ├── latentnerf-refine.yaml\\n    │       ├── latentnerf.yaml\\n    │       ├── magic123-coarse-sd.yaml\\n    │       ├── magic123-hifa-coarse-sd.yaml\\n    │       ├── magic123-hifa-refine-sd.yaml\\n    │       ├── magic123-refine-sd.yaml\\n    │       ├── magic3d-coarse-if.yaml\\n    │       ├── magic3d-coarse-sd.yaml\\n    │       ├── magic3d-refine-sd.yaml\\n    │       ├── prolificdreamer-geometry.yaml\\n    │       ├── prolificdreamer-hifa.yaml\\n    │       ├── prolificdreamer-patch.yaml\\n    │       ├── prolificdreamer-scene-hifa.yaml\\n    │       ├── prolificdreamer-scene.yaml\\n    │       ├── prolificdreamer-texture.yaml\\n    │       ├── prolificdreamer.yaml\\n    │       ├── sdi.yaml\\n    │       ├── sjc.yaml\\n    │       ├── sketchshape-refine.yaml\\n    │       ├── sketchshape.yaml\\n    │       ├── stable-zero123.yaml\\n    │       ├── textmesh-if.yaml\\n    │       ├── zero123-geometry.yaml\\n    │       └── zero123.yaml\\n    ├── custom/\\n    │       └── put_custom_extensions_here\\n    ├── docker/\\n    │       ├── compose.yaml\\n    │       └── Dockerfile\\n    ├── docs/\\n    │       └── installation.md\\n    ├── extern/\\n    │       ├── ldm_zero123/\\n    │       │       ├── models/\\n    │       │       │       ├── diffusion/\\n    │       │       │       │       ├── __init__.py\\n    │       │       │       │       ├── classifier.py\\n    │       │       │       │       ├── ddim.py\\n    │       │       │       │       ├── ddpm.py\\n    │       │       │       │       ├── plms.py\\n    │       │       │       │       └── sampling_util.py\\n    │       │       │       └── autoencoder.py\\n    │       │       ├── modules/\\n    │       │       │       ├── diffusionmodules/\\n    │       │       │       │       ├── __init__.py\\n    │       │       │       │       ├── model.py\\n    │       │       │       │       ├── openaimodel.py\\n    │       │       │       │       └── util.py\\n    │       │       │       ├── distributions/\\n    │       │       │       │       ├── __init__.py\\n    │       │       │       │       └── distributions.py\\n    │       │       │       ├── encoders/\\n    │       │       │       │       ├── __init__.py\\n    │       │       │       │       └── modules.py\\n    │       │       │       ├── evaluate/\\n    │       │       │       │       ├── adm_evaluator.py\\n    │       │       │       │       ├── evaluate_perceptualsim.py\\n    │       │       │       │       ├── frechet_video_distance.py\\n    │       │       │       │       ├── ssim.py\\n    │       │       │       │       └── torch_frechet_video_distance.py\\n    │       │       │       ├── image_degradation/\\n    │       │       │       │       ├── utils/\\n    │       │       │       │       │       └── test.png\\n    │       │       │       │       ├── __init__.py\\n    │       │       │       │       ├── bsrgan.py\\n    │       │       │       │       ├── bsrgan_light.py\\n    │       │       │       │       └── utils_image.py\\n    │       │       │       ├── losses/\\n    │       │       │       │       ├── __init__.py\\n    │       │       │       │       ├── contperceptual.py\\n    │       │       │       │       └── vqperceptual.py\\n    │       │       │       ├── attention.py\\n    │       │       │       ├── ema.py\\n    │       │       │       └── x_transformer.py\\n    │       │       ├── thirdp/\\n    │       │       │       └── psp/\\n    │       │       │               ├── helpers.py\\n    │       │       │               ├── id_loss.py\\n    │       │       │               └── model_irse.py\\n    │       │       ├── __init__.py\\n    │       │       ├── extras.py\\n    │       │       ├── guidance.py\\n    │       │       ├── lr_scheduler.py\\n    │       │       └── util.py\\n    │       ├── __init__.py\\n    │       └── zero123.py\\n    ├── load/\\n    │       ├── images/\\n    │       │       ├── anya_front.png\\n    │       │       ├── anya_front_depth.png\\n    │       │       ├── anya_front_normal.png\\n    │       │       ├── anya_front_rgba.png\\n    │       │       ├── baby_phoenix_on_ice.png\\n    │       │       ├── baby_phoenix_on_ice_depth.png\\n    │       │       ├── baby_phoenix_on_ice_normal.png\\n    │       │       ├── baby_phoenix_on_ice_rgba.png\\n    │       │       ├── beach_house_1.png\\n    │       │       ├── beach_house_1_depth.png\\n    │       │       ├── beach_house_1_normal.png\\n    │       │       ├── beach_house_1_rgba.png\\n    │       │       ├── beach_house_2.png\\n    │       │       ├── beach_house_2_depth.png\\n    │       │       ├── beach_house_2_normal.png\\n    │       │       ├── beach_house_2_rgba.png\\n    │       │       ├── bollywood_actress.png\\n    │       │       ├── bollywood_actress_depth.png\\n    │       │       ├── bollywood_actress_normal.png\\n    │       │       ├── bollywood_actress_rgba.png\\n    │       │       ├── cactus.png\\n    │       │       ├── cactus_depth.png\\n    │       │       ├── cactus_normal.png\\n    │       │       ├── cactus_rgba.png\\n    │       │       ├── catstatue.png\\n    │       │       ├── catstatue_depth.png\\n    │       │       ├── catstatue_normal.png\\n    │       │       ├── catstatue_rgba.png\\n    │       │       ├── church_ruins.png\\n    │       │       ├── church_ruins_depth.png\\n    │       │       ├── church_ruins_normal.png\\n    │       │       ├── church_ruins_rgba.png\\n    │       │       ├── dog1_rgba.png\\n    │       │       ├── dragon2_rgba.png\\n    │       │       ├── firekeeper.jpg\\n    │       │       ├── firekeeper_depth.png\\n    │       │       ├── firekeeper_normal.png\\n    │       │       ├── firekeeper_rgba.png\\n    │       │       ├── futuristic_car.png\\n    │       │       ├── futuristic_car_depth.png\\n    │       │       ├── futuristic_car_normal.png\\n    │       │       ├── futuristic_car_rgba.png\\n    │       │       ├── grootplant_rgba.png\\n    │       │       ├── hamburger.png\\n    │       │       ├── hamburger_depth.png\\n    │       │       ├── hamburger_rgba.png\\n    │       │       ├── mona_lisa.png\\n    │       │       ├── mona_lisa_depth.png\\n    │       │       ├── mona_lisa_normal.png\\n    │       │       ├── mona_lisa_rgba.png\\n    │       │       ├── robot_rgba.png\\n    │       │       ├── teddy.png\\n    │       │       ├── teddy_depth.png\\n    │       │       ├── teddy_normal.png\\n    │       │       ├── teddy_rgba.png\\n    │       │       └── thorhammer_rgba.png\\n    │       ├── lights/\\n    │       │       ├── bsdf_256_256.bin\\n    │       │       ├── LICENSE.txt\\n    │       │       └── mud_road_puresky_1k.hdr\\n    │       ├── shapes/\\n    │       │       ├── animal.obj\\n    │       │       ├── blub.obj\\n    │       │       ├── cabin.obj\\n    │       │       ├── env_sphere.obj\\n    │       │       ├── hand_prismatic.obj\\n    │       │       ├── human.obj\\n    │       │       ├── nascar.obj\\n    │       │       ├── potion.obj\\n    │       │       ├── README.md\\n    │       │       └── teddy.obj\\n    │       ├── tets/\\n    │       │       ├── 128_tets.npz\\n    │       │       ├── 32_tets.npz\\n    │       │       ├── 64_tets.npz\\n    │       │       └── generate_tets.py\\n    │       ├── zero123/\\n    │       │       ├── download.sh\\n    │       │       └── sd-objaverse-finetune-c_concat-256.yaml\\n    │       ├── make_prompt_library.py\\n    │       └── prompt_library.json\\n    ├── scripts/\\n    │       └── convert_zero123_to_diffusers.py\\n    ├── threestudio/\\n    │       ├── data/\\n    │       │       ├── __init__.py\\n    │       │       ├── co3d.py\\n    │       │       ├── image.py\\n    │       │       ├── multiview.py\\n    │       │       ├── uncond.py\\n    │       │       └── uncond_eff.py\\n    │       ├── models/\\n    │       │       ├── background/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── base.py\\n    │       │       │       ├── neural_environment_map_background.py\\n    │       │       │       ├── solid_color_background.py\\n    │       │       │       └── textured_background.py\\n    │       │       ├── exporters/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── base.py\\n    │       │       │       └── mesh_exporter.py\\n    │       │       ├── geometry/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── base.py\\n    │       │       │       ├── custom_mesh.py\\n    │       │       │       ├── implicit_sdf.py\\n    │       │       │       ├── implicit_volume.py\\n    │       │       │       ├── tetrahedra_sdf_grid.py\\n    │       │       │       └── volume_grid.py\\n    │       │       ├── guidance/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── controlnet_guidance.py\\n    │       │       │       ├── deep_floyd_guidance.py\\n    │       │       │       ├── instructpix2pix_guidance.py\\n    │       │       │       ├── stable_diffusion_guidance.py\\n    │       │       │       ├── stable_diffusion_sdi_guidance.py\\n    │       │       │       ├── stable_diffusion_unified_guidance.py\\n    │       │       │       ├── stable_diffusion_vsd_guidance.py\\n    │       │       │       ├── stable_zero123_guidance.py\\n    │       │       │       ├── zero123_guidance.py\\n    │       │       │       └── zero123_unified_guidance.py\\n    │       │       ├── materials/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── base.py\\n    │       │       │       ├── diffuse_with_point_light_material.py\\n    │       │       │       ├── hybrid_rgb_latent_material.py\\n    │       │       │       ├── neural_radiance_material.py\\n    │       │       │       ├── no_material.py\\n    │       │       │       ├── pbr_material.py\\n    │       │       │       └── sd_latent_adapter_material.py\\n    │       │       ├── prompt_processors/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── base.py\\n    │       │       │       ├── deepfloyd_prompt_processor.py\\n    │       │       │       ├── dummy_prompt_processor.py\\n    │       │       │       └── stable_diffusion_prompt_processor.py\\n    │       │       ├── renderers/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── base.py\\n    │       │       │       ├── deferred_volume_renderer.py\\n    │       │       │       ├── gan_volume_renderer.py\\n    │       │       │       ├── nerf_volume_renderer.py\\n    │       │       │       ├── neus_volume_renderer.py\\n    │       │       │       ├── nvdiff_rasterizer.py\\n    │       │       │       └── patch_renderer.py\\n    │       │       ├── __init__.py\\n    │       │       ├── estimators.py\\n    │       │       ├── isosurface.py\\n    │       │       ├── mesh.py\\n    │       │       └── networks.py\\n    │       ├── scripts/\\n    │       │       ├── make_training_vid.py\\n    │       │       ├── run_zero123.sh\\n    │       │       ├── run_zero123_comparison.sh\\n    │       │       ├── run_zero123_phase.sh\\n    │       │       ├── run_zero123_phase2.sh\\n    │       │       ├── run_zero123_sbatch.py\\n    │       │       ├── zero123_demo.py\\n    │       │       └── zero123_sbatch.sh\\n    │       ├── systems/\\n    │       │       ├── __init__.py\\n    │       │       ├── base.py\\n    │       │       ├── control4d_multiview.py\\n    │       │       ├── dreamfusion.py\\n    │       │       ├── eff_dreamfusion.py\\n    │       │       ├── fantasia3d.py\\n    │       │       ├── imagedreamfusion.py\\n    │       │       ├── instructnerf2nerf.py\\n    │       │       ├── latentnerf.py\\n    │       │       ├── magic123.py\\n    │       │       ├── magic3d.py\\n    │       │       ├── optimizers.py\\n    │       │       ├── prolificdreamer.py\\n    │       │       ├── sdi.py\\n    │       │       ├── sjc.py\\n    │       │       ├── textmesh.py\\n    │       │       ├── utils.py\\n    │       │       ├── zero123.py\\n    │       │       └── zero123_simple.py\\n    │       ├── utils/\\n    │       │       ├── GAN/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── attention.py\\n    │       │       │       ├── discriminator.py\\n    │       │       │       ├── distribution.py\\n    │       │       │       ├── loss.py\\n    │       │       │       ├── mobilenet.py\\n    │       │       │       ├── network_util.py\\n    │       │       │       ├── util.py\\n    │       │       │       └── vae.py\\n    │       │       ├── perceptual/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── perceptual.py\\n    │       │       │       └── utils.py\\n    │       │       ├── __init__.py\\n    │       │       ├── base.py\\n    │       │       ├── callbacks.py\\n    │       │       ├── config.py\\n    │       │       ├── loss.py\\n    │       │       ├── misc.py\\n    │       │       ├── ops.py\\n    │       │       ├── rasterize.py\\n    │       │       ├── saving.py\\n    │       │       └── typing.py\\n    │       └── __init__.py\\n    ├── .editorconfig\\n    ├── .pre-commit-config.yaml\\n    ├── .pylintrc\\n    ├── 2dplayground.ipynb\\n    ├── 2dplayground_SDI_version.ipynb\\n    ├── CHANGELOG.md\\n    ├── DOCUMENTATION.md\\n    ├── gradio_app.py\\n    ├── launch.py\\n    ├── LICENSE\\n    ├── README.md\\n    ├── requirements-dev.txt\\n    ├── requirements.txt\\n    ├── setup.py\\n    └── threestudio.ipynb\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/threestudio.zip\",\n    \"stateOriginalUrl\": \"https://github.com/threestudio-project/threestudio\"\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/threestudio/output_analysis/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for ThreeStudio Task 2: Analyze Zero123 Guidance Output Structure\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport re\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_answer_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the answer.txt file exists.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    if not answer_file.exists():\n        print(\"❌ File 'answer.txt' not found\")\n        return False\n    \n    print(\"✅ Answer file found\")\n    return True\n\ndef verify_required_strings(test_dir: Path) -> bool:\n    \"\"\"Verify that the answer contains the four required strings.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text()\n        \n        # Check for required strings\n        required_strings = [\"loss_sds\", \"grad_norm\", \"min_step\", \"max_step\"]\n        missing_strings = []\n        \n        for string in required_strings:\n            if string not in content:\n                missing_strings.append(string)\n        \n        if missing_strings:\n            print(f\"❌ Missing required strings: {missing_strings}\")\n            return False\n        \n        print(\"✅ All required strings found\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error reading answer file: {e}\")\n        return False\n\ndef verify_line_numbers(test_dir: Path) -> bool:\n    \"\"\"Verify that line numbers contain (323 or 324) AND (327 or 328).\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text()\n        \n        # Check for first number (323 or 324)\n        has_first = \"323\" in content or \"324\" in content\n        \n        # Check for second number (327 or 328)\n        has_second = \"327\" in content or \"328\" in content\n        \n        if not has_first:\n            print(\"❌ Missing first line number (323 or 324)\")\n            return False\n        \n        if not has_second:\n            print(\"❌ Missing second line number (327 or 328)\")\n            return False\n        \n        print(\"✅ Line numbers found: contains (323 or 324) and (327 or 328)\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying line numbers: {e}\")\n        return False\n\ndef verify_file_path(test_dir: Path) -> bool:\n    \"\"\"Verify that the file path contains the exact expected path string.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text()\n        \n        # Check for the exact expected file path\n        expected_path = \"threestudio/models/guidance/zero123_guidance.py\"\n        \n        if expected_path not in content:\n            print(f\"❌ Missing expected file path: {expected_path}\")\n            return False\n        \n        print(\"✅ File path found: threestudio/models/guidance/zero123_guidance.py\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying file path: {e}\")\n        return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying ThreeStudio Task 2: Analyze Zero123 Guidance Output Structure...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Answer File Exists\", verify_answer_file_exists),\n        (\"Required Strings\", verify_required_strings),\n        (\"Line Numbers Range\", verify_line_numbers),\n        (\"File Path Components\", verify_file_path),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ Zero123 guidance output structure analyzed correctly!\")\n        print(\"🎉 Task 2 verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task 2 verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/threestudio/requirements_completion/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### Task Description\n\nThe `requirements.txt` file in the ThreeStudio project is used to install necessary Python libraries. However, the Zero123-related dependencies were accidentally deleted from the file. Your task is to restore these missing dependencies.\n\n### Task Objectives\n\n1. **Locate the requirements.txt file** in the test environment\n2. **Identify the missing Zero123 dependencies** that need to be restored\n3. **Add the required dependencies** to the requirements.txt file\n4. **Ensure the file format is correct** (one dependency per line)\n"
  },
  {
    "path": "tasks/filesystem/standard/threestudio/requirements_completion/meta.json",
    "content": "{\n  \"task_id\": \"requirements_completion\",\n  \"task_name\": \"Requirements Completion\",\n  \"category_id\": \"threestudio\",\n  \"category_name\": \"Threestudio\",\n  \"description\": \"Restore and complete missing Zero123-related dependencies in the requirements.txt file to ensure proper ThreeStudio project configuration.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-05\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"code exploration\",\n    \"cross-referencing\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"threestudio/\\n    ├── configs/\\n    │       ├── debugging/\\n    │       │       ├── controlnet-canny.yaml\\n    │       │       ├── controlnet-normal.yaml\\n    │       │       ├── instructpix2pix.yaml\\n    │       │       └── stablediffusion.yaml\\n    │       ├── experimental/\\n    │       │       ├── unified-guidance/\\n    │       │       │       ├── dreamfusion-sd.yaml\\n    │       │       │       ├── hifa.yaml\\n    │       │       │       ├── prolificdreamer-hifa.yaml\\n    │       │       │       ├── prolificdreamer.yaml\\n    │       │       │       └── zero123-simple.yaml\\n    │       │       ├── co3d-imagecondition.yaml\\n    │       │       ├── imagecondition.yaml\\n    │       │       ├── imagecondition_zero123nerf.yaml\\n    │       │       ├── imagecondition_zero123nerf_refine.yaml\\n    │       │       ├── prolificdreamer-importance.yaml\\n    │       │       ├── prolificdreamer-neus-importance.yaml\\n    │       │       ├── prolificdreamer-propnet.yaml\\n    │       │       └── textmesh-if-importance.yaml\\n    │       ├── gradio/\\n    │       │       ├── dreamfusion-if.yaml\\n    │       │       ├── dreamfusion-sd.yaml\\n    │       │       ├── fantasia3d.yaml\\n    │       │       ├── latentnerf.yaml\\n    │       │       ├── sjc.yaml\\n    │       │       └── textmesh-if.yaml\\n    │       ├── control4d-static.yaml\\n    │       ├── dreamfusion-if.yaml\\n    │       ├── dreamfusion-sd-eff.yaml\\n    │       ├── dreamfusion-sd.yaml\\n    │       ├── fantasia3d-texture.yaml\\n    │       ├── fantasia3d.yaml\\n    │       ├── hifa.yaml\\n    │       ├── instructnerf2nerf.yaml\\n    │       ├── latentnerf-refine.yaml\\n    │       ├── latentnerf.yaml\\n    │       ├── magic123-coarse-sd.yaml\\n    │       ├── magic123-hifa-coarse-sd.yaml\\n    │       ├── magic123-hifa-refine-sd.yaml\\n    │       ├── magic123-refine-sd.yaml\\n    │       ├── magic3d-coarse-if.yaml\\n    │       ├── magic3d-coarse-sd.yaml\\n    │       ├── magic3d-refine-sd.yaml\\n    │       ├── prolificdreamer-geometry.yaml\\n    │       ├── prolificdreamer-hifa.yaml\\n    │       ├── prolificdreamer-patch.yaml\\n    │       ├── prolificdreamer-scene-hifa.yaml\\n    │       ├── prolificdreamer-scene.yaml\\n    │       ├── prolificdreamer-texture.yaml\\n    │       ├── prolificdreamer.yaml\\n    │       ├── sdi.yaml\\n    │       ├── sjc.yaml\\n    │       ├── sketchshape-refine.yaml\\n    │       ├── sketchshape.yaml\\n    │       ├── stable-zero123.yaml\\n    │       ├── textmesh-if.yaml\\n    │       ├── zero123-geometry.yaml\\n    │       └── zero123.yaml\\n    ├── custom/\\n    │       └── put_custom_extensions_here\\n    ├── docker/\\n    │       ├── compose.yaml\\n    │       └── Dockerfile\\n    ├── docs/\\n    │       └── installation.md\\n    ├── extern/\\n    │       ├── ldm_zero123/\\n    │       │       ├── models/\\n    │       │       │       ├── diffusion/\\n    │       │       │       │       ├── __init__.py\\n    │       │       │       │       ├── classifier.py\\n    │       │       │       │       ├── ddim.py\\n    │       │       │       │       ├── ddpm.py\\n    │       │       │       │       ├── plms.py\\n    │       │       │       │       └── sampling_util.py\\n    │       │       │       └── autoencoder.py\\n    │       │       ├── modules/\\n    │       │       │       ├── diffusionmodules/\\n    │       │       │       │       ├── __init__.py\\n    │       │       │       │       ├── model.py\\n    │       │       │       │       ├── openaimodel.py\\n    │       │       │       │       └── util.py\\n    │       │       │       ├── distributions/\\n    │       │       │       │       ├── __init__.py\\n    │       │       │       │       └── distributions.py\\n    │       │       │       ├── encoders/\\n    │       │       │       │       ├── __init__.py\\n    │       │       │       │       └── modules.py\\n    │       │       │       ├── evaluate/\\n    │       │       │       │       ├── adm_evaluator.py\\n    │       │       │       │       ├── evaluate_perceptualsim.py\\n    │       │       │       │       ├── frechet_video_distance.py\\n    │       │       │       │       ├── ssim.py\\n    │       │       │       │       └── torch_frechet_video_distance.py\\n    │       │       │       ├── image_degradation/\\n    │       │       │       │       ├── utils/\\n    │       │       │       │       │       └── test.png\\n    │       │       │       │       ├── __init__.py\\n    │       │       │       │       ├── bsrgan.py\\n    │       │       │       │       ├── bsrgan_light.py\\n    │       │       │       │       └── utils_image.py\\n    │       │       │       ├── losses/\\n    │       │       │       │       ├── __init__.py\\n    │       │       │       │       ├── contperceptual.py\\n    │       │       │       │       └── vqperceptual.py\\n    │       │       │       ├── attention.py\\n    │       │       │       ├── ema.py\\n    │       │       │       └── x_transformer.py\\n    │       │       ├── thirdp/\\n    │       │       │       └── psp/\\n    │       │       │               ├── helpers.py\\n    │       │       │               ├── id_loss.py\\n    │       │       │               └── model_irse.py\\n    │       │       ├── __init__.py\\n    │       │       ├── extras.py\\n    │       │       ├── guidance.py\\n    │       │       ├── lr_scheduler.py\\n    │       │       └── util.py\\n    │       ├── __init__.py\\n    │       └── zero123.py\\n    ├── load/\\n    │       ├── images/\\n    │       │       ├── anya_front.png\\n    │       │       ├── anya_front_depth.png\\n    │       │       ├── anya_front_normal.png\\n    │       │       ├── anya_front_rgba.png\\n    │       │       ├── baby_phoenix_on_ice.png\\n    │       │       ├── baby_phoenix_on_ice_depth.png\\n    │       │       ├── baby_phoenix_on_ice_normal.png\\n    │       │       ├── baby_phoenix_on_ice_rgba.png\\n    │       │       ├── beach_house_1.png\\n    │       │       ├── beach_house_1_depth.png\\n    │       │       ├── beach_house_1_normal.png\\n    │       │       ├── beach_house_1_rgba.png\\n    │       │       ├── beach_house_2.png\\n    │       │       ├── beach_house_2_depth.png\\n    │       │       ├── beach_house_2_normal.png\\n    │       │       ├── beach_house_2_rgba.png\\n    │       │       ├── bollywood_actress.png\\n    │       │       ├── bollywood_actress_depth.png\\n    │       │       ├── bollywood_actress_normal.png\\n    │       │       ├── bollywood_actress_rgba.png\\n    │       │       ├── cactus.png\\n    │       │       ├── cactus_depth.png\\n    │       │       ├── cactus_normal.png\\n    │       │       ├── cactus_rgba.png\\n    │       │       ├── catstatue.png\\n    │       │       ├── catstatue_depth.png\\n    │       │       ├── catstatue_normal.png\\n    │       │       ├── catstatue_rgba.png\\n    │       │       ├── church_ruins.png\\n    │       │       ├── church_ruins_depth.png\\n    │       │       ├── church_ruins_normal.png\\n    │       │       ├── church_ruins_rgba.png\\n    │       │       ├── dog1_rgba.png\\n    │       │       ├── dragon2_rgba.png\\n    │       │       ├── firekeeper.jpg\\n    │       │       ├── firekeeper_depth.png\\n    │       │       ├── firekeeper_normal.png\\n    │       │       ├── firekeeper_rgba.png\\n    │       │       ├── futuristic_car.png\\n    │       │       ├── futuristic_car_depth.png\\n    │       │       ├── futuristic_car_normal.png\\n    │       │       ├── futuristic_car_rgba.png\\n    │       │       ├── grootplant_rgba.png\\n    │       │       ├── hamburger.png\\n    │       │       ├── hamburger_depth.png\\n    │       │       ├── hamburger_rgba.png\\n    │       │       ├── mona_lisa.png\\n    │       │       ├── mona_lisa_depth.png\\n    │       │       ├── mona_lisa_normal.png\\n    │       │       ├── mona_lisa_rgba.png\\n    │       │       ├── robot_rgba.png\\n    │       │       ├── teddy.png\\n    │       │       ├── teddy_depth.png\\n    │       │       ├── teddy_normal.png\\n    │       │       ├── teddy_rgba.png\\n    │       │       └── thorhammer_rgba.png\\n    │       ├── lights/\\n    │       │       ├── bsdf_256_256.bin\\n    │       │       ├── LICENSE.txt\\n    │       │       └── mud_road_puresky_1k.hdr\\n    │       ├── shapes/\\n    │       │       ├── animal.obj\\n    │       │       ├── blub.obj\\n    │       │       ├── cabin.obj\\n    │       │       ├── env_sphere.obj\\n    │       │       ├── hand_prismatic.obj\\n    │       │       ├── human.obj\\n    │       │       ├── nascar.obj\\n    │       │       ├── potion.obj\\n    │       │       ├── README.md\\n    │       │       └── teddy.obj\\n    │       ├── tets/\\n    │       │       ├── 128_tets.npz\\n    │       │       ├── 32_tets.npz\\n    │       │       ├── 64_tets.npz\\n    │       │       └── generate_tets.py\\n    │       ├── zero123/\\n    │       │       ├── download.sh\\n    │       │       └── sd-objaverse-finetune-c_concat-256.yaml\\n    │       ├── make_prompt_library.py\\n    │       └── prompt_library.json\\n    ├── scripts/\\n    │       └── convert_zero123_to_diffusers.py\\n    ├── threestudio/\\n    │       ├── data/\\n    │       │       ├── __init__.py\\n    │       │       ├── co3d.py\\n    │       │       ├── image.py\\n    │       │       ├── multiview.py\\n    │       │       ├── uncond.py\\n    │       │       └── uncond_eff.py\\n    │       ├── models/\\n    │       │       ├── background/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── base.py\\n    │       │       │       ├── neural_environment_map_background.py\\n    │       │       │       ├── solid_color_background.py\\n    │       │       │       └── textured_background.py\\n    │       │       ├── exporters/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── base.py\\n    │       │       │       └── mesh_exporter.py\\n    │       │       ├── geometry/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── base.py\\n    │       │       │       ├── custom_mesh.py\\n    │       │       │       ├── implicit_sdf.py\\n    │       │       │       ├── implicit_volume.py\\n    │       │       │       ├── tetrahedra_sdf_grid.py\\n    │       │       │       └── volume_grid.py\\n    │       │       ├── guidance/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── controlnet_guidance.py\\n    │       │       │       ├── deep_floyd_guidance.py\\n    │       │       │       ├── instructpix2pix_guidance.py\\n    │       │       │       ├── stable_diffusion_guidance.py\\n    │       │       │       ├── stable_diffusion_sdi_guidance.py\\n    │       │       │       ├── stable_diffusion_unified_guidance.py\\n    │       │       │       ├── stable_diffusion_vsd_guidance.py\\n    │       │       │       ├── stable_zero123_guidance.py\\n    │       │       │       ├── zero123_guidance.py\\n    │       │       │       └── zero123_unified_guidance.py\\n    │       │       ├── materials/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── base.py\\n    │       │       │       ├── diffuse_with_point_light_material.py\\n    │       │       │       ├── hybrid_rgb_latent_material.py\\n    │       │       │       ├── neural_radiance_material.py\\n    │       │       │       ├── no_material.py\\n    │       │       │       ├── pbr_material.py\\n    │       │       │       └── sd_latent_adapter_material.py\\n    │       │       ├── prompt_processors/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── base.py\\n    │       │       │       ├── deepfloyd_prompt_processor.py\\n    │       │       │       ├── dummy_prompt_processor.py\\n    │       │       │       └── stable_diffusion_prompt_processor.py\\n    │       │       ├── renderers/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── base.py\\n    │       │       │       ├── deferred_volume_renderer.py\\n    │       │       │       ├── gan_volume_renderer.py\\n    │       │       │       ├── nerf_volume_renderer.py\\n    │       │       │       ├── neus_volume_renderer.py\\n    │       │       │       ├── nvdiff_rasterizer.py\\n    │       │       │       └── patch_renderer.py\\n    │       │       ├── __init__.py\\n    │       │       ├── estimators.py\\n    │       │       ├── isosurface.py\\n    │       │       ├── mesh.py\\n    │       │       └── networks.py\\n    │       ├── scripts/\\n    │       │       ├── make_training_vid.py\\n    │       │       ├── run_zero123.sh\\n    │       │       ├── run_zero123_comparison.sh\\n    │       │       ├── run_zero123_phase.sh\\n    │       │       ├── run_zero123_phase2.sh\\n    │       │       ├── run_zero123_sbatch.py\\n    │       │       ├── zero123_demo.py\\n    │       │       └── zero123_sbatch.sh\\n    │       ├── systems/\\n    │       │       ├── __init__.py\\n    │       │       ├── base.py\\n    │       │       ├── control4d_multiview.py\\n    │       │       ├── dreamfusion.py\\n    │       │       ├── eff_dreamfusion.py\\n    │       │       ├── fantasia3d.py\\n    │       │       ├── imagedreamfusion.py\\n    │       │       ├── instructnerf2nerf.py\\n    │       │       ├── latentnerf.py\\n    │       │       ├── magic123.py\\n    │       │       ├── magic3d.py\\n    │       │       ├── optimizers.py\\n    │       │       ├── prolificdreamer.py\\n    │       │       ├── sdi.py\\n    │       │       ├── sjc.py\\n    │       │       ├── textmesh.py\\n    │       │       ├── utils.py\\n    │       │       ├── zero123.py\\n    │       │       └── zero123_simple.py\\n    │       ├── utils/\\n    │       │       ├── GAN/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── attention.py\\n    │       │       │       ├── discriminator.py\\n    │       │       │       ├── distribution.py\\n    │       │       │       ├── loss.py\\n    │       │       │       ├── mobilenet.py\\n    │       │       │       ├── network_util.py\\n    │       │       │       ├── util.py\\n    │       │       │       └── vae.py\\n    │       │       ├── perceptual/\\n    │       │       │       ├── __init__.py\\n    │       │       │       ├── perceptual.py\\n    │       │       │       └── utils.py\\n    │       │       ├── __init__.py\\n    │       │       ├── base.py\\n    │       │       ├── callbacks.py\\n    │       │       ├── config.py\\n    │       │       ├── loss.py\\n    │       │       ├── misc.py\\n    │       │       ├── ops.py\\n    │       │       ├── rasterize.py\\n    │       │       ├── saving.py\\n    │       │       └── typing.py\\n    │       └── __init__.py\\n    ├── .editorconfig\\n    ├── .pre-commit-config.yaml\\n    ├── .pylintrc\\n    ├── 2dplayground.ipynb\\n    ├── 2dplayground_SDI_version.ipynb\\n    ├── CHANGELOG.md\\n    ├── DOCUMENTATION.md\\n    ├── gradio_app.py\\n    ├── launch.py\\n    ├── LICENSE\\n    ├── README.md\\n    ├── requirements-dev.txt\\n    ├── requirements.txt\\n    ├── setup.py\\n    └── threestudio.ipynb\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/threestudio.zip\",\n    \"stateOriginalUrl\": \"https://github.com/threestudio-project/threestudio\"\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/threestudio/requirements_completion/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for ThreeStudio Task 3: Restore Zero123 Dependencies in Requirements.txt\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport re\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_requirements_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the requirements.txt file exists.\"\"\"\n    requirements_file = test_dir / \"requirements.txt\"\n    \n    if not requirements_file.exists():\n        print(\"❌ File 'requirements.txt' not found\")\n        return False\n    \n    print(\"✅ Requirements.txt file found\")\n    return True\n\ndef verify_requirements_file_readable(test_dir: Path) -> bool:\n    \"\"\"Verify that the requirements.txt file is readable.\"\"\"\n    requirements_file = test_dir / \"requirements.txt\"\n    \n    try:\n        content = requirements_file.read_text()\n        if not content.strip():\n            print(\"❌ Requirements.txt file is empty\")\n            return False\n        \n        print(\"✅ Requirements.txt file is readable\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error reading requirements.txt file: {e}\")\n        return False\n\ndef verify_required_dependencies_present(test_dir: Path) -> bool:\n    \"\"\"Verify that all required Zero123 dependencies are present.\"\"\"\n    requirements_file = test_dir / \"requirements.txt\"\n    \n    try:\n        content = requirements_file.read_text()\n        \n        # Required dependencies to check for (simplified)\n        required_deps = [\n            \"einops\",\n            \"kornia\", \n            \"taming\",\n            \"openai\",\n            \"clip\"\n        ]\n        \n        missing_deps = []\n        found_deps = []\n        \n        for dep in required_deps:\n            if dep.lower() in content.lower():\n                found_deps.append(dep)\n            else:\n                missing_deps.append(dep)\n        \n        if missing_deps:\n            print(f\"❌ Missing required dependencies: {missing_deps}\")\n            return False\n        \n        print(f\"✅ All required dependencies found: {found_deps}\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking dependencies: {e}\")\n        return False\n\ndef verify_specific_dependency_entries(test_dir: Path) -> bool:\n    \"\"\"Verify that the specific dependency entries are present.\"\"\"\n    requirements_file = test_dir / \"requirements.txt\"\n    \n    try:\n        content = requirements_file.read_text()\n        \n        # Check for specific dependency entries (simplified)\n        # For taming, we only need to check if \"taming\" is present, not the full package name\n        required_checks = [\n            (\"einops\", \"einops\"),\n            (\"kornia\", \"kornia\"),\n            (\"taming\", \"taming\"),  # Just check for \"taming\" substring\n        ]\n        \n        missing_entries = []\n        found_entries = []\n        \n        for check_name, full_entry in required_checks:\n            if check_name in content.lower():\n                found_entries.append(check_name)\n            else:\n                missing_entries.append(check_name)\n        \n        # Special check for openai and clip - they should be on the same line\n        lines = content.split('\\n')\n        openai_clip_found = False\n        for line in lines:\n            line_lower = line.lower()\n            if \"openai\" in line_lower and \"clip\" in line_lower:\n                openai_clip_found = True\n                break\n        \n        if openai_clip_found:\n            found_entries.append(\"openai+clip\")\n        else:\n            missing_entries.append(\"openai+clip\")\n        \n        if missing_entries:\n            print(f\"❌ Missing required dependency checks: {missing_entries}\")\n            return False\n        \n        print(f\"✅ All required dependency checks passed: {found_entries}\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking specific entries: {e}\")\n        return False\n\ndef verify_file_format(test_dir: Path) -> bool:\n    \"\"\"Verify that the requirements.txt file has proper format.\"\"\"\n    requirements_file = test_dir / \"requirements.txt\"\n    \n    try:\n        content = requirements_file.read_text()\n        lines = content.split('\\n')\n        \n        # Basic format check - just ensure file is not completely empty\n        if not content.strip():\n            print(\"❌ File is completely empty\")\n            return False\n        \n        print(\"✅ File format is acceptable\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking file format: {e}\")\n        return False\n\ndef verify_no_duplicate_entries(test_dir: Path) -> bool:\n    \"\"\"Verify that there are no duplicate dependency entries.\"\"\"\n    requirements_file = test_dir / \"requirements.txt\"\n    \n    try:\n        content = requirements_file.read_text()\n        \n        # Simplified duplicate check - just ensure the file is not completely corrupted\n        if len(content) < 10:  # Basic sanity check\n            print(\"❌ File seems too short to be valid\")\n            return False\n        \n        print(\"✅ File appears to be valid\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking file: {e}\")\n        return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying ThreeStudio Task 3: Restore Zero123 Dependencies in Requirements.txt...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Requirements File Exists\", verify_requirements_file_exists),\n        (\"File is Readable\", verify_requirements_file_readable),\n        (\"Required Dependencies Present\", verify_required_dependencies_present),\n        (\"Specific Entries Present\", verify_specific_dependency_entries),\n        (\"File Format\", verify_file_format),\n        (\"File Validity\", verify_no_duplicate_entries),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ Zero123 dependencies successfully restored in requirements.txt!\")\n        print(\"🎉 Task 3 verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task 3 verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/filesystem/standard/votenet/dataset_comparison/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### Task Description\n\nAnalyze the codebase to map ScanNet object categories to SUN RGB-D categories and calculate object counts.\n\n### Task Objectives\n\n1. **Primary Goal**: Use SUN RGB-D's 10-category classification system as the target taxonomy\n2. **Mapping Requirement**: Map each ScanNet object category (using the \"category\" field, not \"raw_category\") to the corresponding SUN RGB-D category\n3. **Calculation**: For each SUN RGB-D category, calculate the total count of objects from ScanNet that map to that category （It only counts if the category (not raw category) name are exactly the same(night_stand = nightstand)）\n4. **Output**: Generate an analysis.txt file in the main directory showing the mapping and counts\n\n### Expected Output\n\nCreate a file named `analysis.txt` in the test directory root with the following format:\n\n- Each SUN RGB-D category should be represented as a 2-line block\n- Line 1: category name\n- Line 2: total count\n- Each block should be separated by one empty line\n"
  },
  {
    "path": "tasks/filesystem/standard/votenet/dataset_comparison/meta.json",
    "content": "{\n  \"task_id\": \"dataset_comparison\",\n  \"task_name\": \"Dataset Comparison\",\n  \"category_id\": \"votenet\",\n  \"category_name\": \"Votenet\",\n  \"description\": \"Map ScanNet object categories to their SUN RGB-D equivalents and calculate detailed object counts for each mapped category.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-13\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"cross-referencing\",\n    \"data extraction\",\n    \"pattern analysis\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"votenet/\\n    ├── doc/\\n    │       ├── teaser.jpg\\n    │       └── tips.md\\n    ├── models/\\n    │       ├── ap_helper.py\\n    │       ├── backbone_module.py\\n    │       ├── boxnet.py\\n    │       ├── dump_helper.py\\n    │       ├── loss_helper.py\\n    │       ├── loss_helper_boxnet.py\\n    │       ├── proposal_module.py\\n    │       ├── votenet.py\\n    │       └── voting_module.py\\n    ├── pointnet2/\\n    │       ├── _ext_src/\\n    │       │       ├── include/\\n    │       │       │       ├── ball_query.h\\n    │       │       │       ├── cuda_utils.h\\n    │       │       │       ├── group_points.h\\n    │       │       │       ├── interpolate.h\\n    │       │       │       ├── sampling.h\\n    │       │       │       └── utils.h\\n    │       │       └── src/\\n    │       │               ├── ball_query.cpp\\n    │       │               ├── ball_query_gpu.cu\\n    │       │               ├── bindings.cpp\\n    │       │               ├── group_points.cpp\\n    │       │               ├── group_points_gpu.cu\\n    │       │               ├── interpolate.cpp\\n    │       │               ├── interpolate_gpu.cu\\n    │       │               ├── sampling.cpp\\n    │       │               └── sampling_gpu.cu\\n    │       ├── pointnet2_modules.py\\n    │       ├── pointnet2_test.py\\n    │       ├── pointnet2_utils.py\\n    │       ├── pytorch_utils.py\\n    │       └── setup.py\\n    ├── scannet/\\n    │       ├── meta_data/\\n    │       │       ├── scannet_means.npz\\n    │       │       ├── scannet_train.txt\\n    │       │       ├── scannetv2-labels.combined.tsv\\n    │       │       ├── scannetv2_test.txt\\n    │       │       ├── scannetv2_train.txt\\n    │       │       └── scannetv2_val.txt\\n    │       ├── scans/\\n    │       ├── batch_load_scannet_data.py\\n    │       ├── data_viz.py\\n    │       ├── load_scannet_data.py\\n    │       ├── model_util_scannet.py\\n    │       ├── README.md\\n    │       ├── scannet_detection_dataset.py\\n    │       └── scannet_utils.py\\n    ├── sunrgbd/\\n    │       ├── matlab/\\n    │       │       ├── extract_rgbd_data_v1.m\\n    │       │       ├── extract_rgbd_data_v2.m\\n    │       │       └── extract_split.m\\n    │       ├── OFFICIAL_SUNRGBD/\\n    │       ├── sunrgbd_trainval/\\n    │       ├── model_util_sunrgbd.py\\n    │       ├── README.md\\n    │       ├── sunrgbd_data.py\\n    │       ├── sunrgbd_detection_dataset.py\\n    │       └── sunrgbd_utils.py\\n    ├── utils/\\n    │       ├── box_util.py\\n    │       ├── eval_det.py\\n    │       ├── metric_util.py\\n    │       ├── nms.py\\n    │       ├── nn_distance.py\\n    │       ├── pc_util.py\\n    │       ├── tf_logger.py\\n    │       └── tf_visualizer.py\\n    ├── CODE_OF_CONDUCT.md\\n    ├── CONTRIBUTING.md\\n    ├── demo.py\\n    ├── eval.py\\n    ├── LICENSE\\n    ├── README.md\\n    └── train.py\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/votenet.zip\",\n    \"stateOriginalUrl\": \"https://github.com/facebookresearch/votenet\"\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/votenet/dataset_comparison/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Votenet Dataset Comparison Task\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport re\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_analysis_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the analysis.txt file exists.\"\"\"\n    analysis_file = test_dir / \"analysis.txt\"\n    \n    if not analysis_file.exists():\n        print(\"❌ File 'analysis.txt' not found\")\n        return False\n    \n    print(\"✅ Analysis file found\")\n    return True\n\ndef verify_analysis_format(test_dir: Path) -> bool:\n    \"\"\"Verify that the analysis file has the correct format.\"\"\"\n    analysis_file = test_dir / \"analysis.txt\"\n    \n    try:\n        content = analysis_file.read_text()\n        lines = content.split('\\n')\n        \n        # Check if content is not empty\n        if not content.strip():\n            print(\"❌ Analysis file is empty\")\n            return False\n        \n        # Check if we have enough lines for at least one category block\n        if len(lines) < 2:\n            print(\"❌ Analysis file doesn't have enough lines for a category block\")\n            return False\n        \n        # Check if the format follows the 2-line block pattern with empty lines between blocks\n        # Each block should have: category_name, count, empty_line\n        line_index = 0\n        block_count = 0\n        \n        while line_index < len(lines):\n            # Skip leading empty lines\n            while line_index < len(lines) and lines[line_index].strip() == \"\":\n                line_index += 1\n            \n            if line_index >= len(lines):\n                break\n            \n            # Check if we have at least 2 lines for a block\n            if line_index + 1 >= len(lines):\n                print(\"❌ Incomplete category block at the end\")\n                return False\n            \n            # Line 1 should be category name\n            category_line = lines[line_index].strip()\n            if not category_line:\n                print(f\"❌ Empty category name at line {line_index + 1}\")\n                return False\n            \n            # Line 2 should be count\n            count_line = lines[line_index + 1].strip()\n            if not count_line:\n                print(f\"❌ Empty count at line {line_index + 2}\")\n                return False\n            \n            # Check if count line contains a number\n            if not re.search(r'\\d+', count_line):\n                print(f\"❌ Count line doesn't contain a number at line {line_index + 2}: '{count_line}'\")\n                return False\n            \n            block_count += 1\n            line_index += 2\n            \n            # Skip empty line between blocks (if not at the end)\n            if line_index < len(lines) and lines[line_index].strip() == \"\":\n                line_index += 1\n        \n        if block_count == 0:\n            print(\"❌ No valid category blocks found\")\n            return False\n        \n        print(f\"✅ Analysis format is correct with {block_count} category blocks\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error reading analysis file: {e}\")\n        return False\n\ndef verify_required_categories(test_dir: Path) -> bool:\n    \"\"\"Verify that all required SUN RGB-D categories are present.\"\"\"\n    analysis_file = test_dir / \"analysis.txt\"\n    \n    try:\n        content = analysis_file.read_text()\n        lines = content.split('\\n')\n        \n        # Extract category names from the file\n        categories_found = []\n        line_index = 0\n        \n        while line_index < len(lines):\n            # Skip empty lines\n            while line_index < len(lines) and lines[line_index].strip() == \"\":\n                line_index += 1\n            \n            if line_index >= len(lines):\n                break\n            \n            # Get category name\n            category_line = lines[line_index].strip()\n            if category_line:\n                categories_found.append(category_line.lower())\n            \n            # Skip to next block\n            line_index += 2\n            while line_index < len(lines) and lines[line_index].strip() == \"\":\n                line_index += 1\n        \n        # Required categories\n        required_categories = {\n            'chair', 'table', 'bed', 'bookshelf', 'desk', \n            'toilet', 'dresser', 'bathtub', 'sofa', 'night_stand'\n        }\n        \n        # Check if all required categories are present\n        missing_categories = required_categories - set(categories_found)\n        if missing_categories:\n            print(f\"❌ Missing required categories: {missing_categories}\")\n            return False\n        \n        # Check for extra categories\n        extra_categories = set(categories_found) - required_categories\n        if extra_categories:\n            print(f\"⚠️  Extra categories found: {extra_categories}\")\n        \n        print(f\"✅ All required categories present: {sorted(required_categories)}\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying required categories: {e}\")\n        return False\n\ndef verify_category_counts(test_dir: Path) -> bool:\n    \"\"\"Verify that the category counts match the expected values.\"\"\"\n    analysis_file = test_dir / \"analysis.txt\"\n    \n    try:\n        content = analysis_file.read_text()\n        lines = content.split('\\n')\n        \n        # Expected counts from answer.txt\n        expected_counts = {\n            'chair': 4681,\n            'table': 1170,\n            'bed': 370,\n            'bookshelf': 377,\n            'desk': 680,\n            'toilet': 256,\n            'dresser': 213,\n            'bathtub': 144,\n            'sofa': 1,\n            'night_stand': 224\n        }\n        \n        # Extract category counts from the file\n        category_counts = {}\n        line_index = 0\n        \n        while line_index < len(lines):\n            # Skip empty lines\n            while line_index < len(lines) and lines[line_index].strip() == \"\":\n                line_index += 1\n            \n            if line_index >= len(lines):\n                break\n            \n            # Get category name\n            category_line = lines[line_index].strip()\n            if not category_line:\n                line_index += 1\n                continue\n            \n            # Get count\n            if line_index + 1 < len(lines):\n                count_line = lines[line_index + 1].strip()\n                if count_line:\n                    # Extract number from count line\n                    count_match = re.search(r'(\\d+)', count_line)\n                    if count_match:\n                        category = category_line.lower()\n                        count = int(count_match.group(1))\n                        category_counts[category] = count\n            \n            # Skip to next block\n            line_index += 2\n            while line_index < len(lines) and lines[line_index].strip() == \"\":\n                line_index += 1\n        \n        # Verify counts match expected values\n        all_counts_correct = True\n        for category, expected_count in expected_counts.items():\n            if category in category_counts:\n                actual_count = category_counts[category]\n                if actual_count != expected_count:\n                    print(f\"❌ Count mismatch for {category}: expected {expected_count}, got {actual_count}\")\n                    all_counts_correct = False\n            else:\n                print(f\"❌ Category {category} not found in analysis\")\n                all_counts_correct = False\n        \n        if all_counts_correct:\n            print(\"✅ All category counts match expected values\")\n            return True\n        else:\n            return False\n        \n    except Exception as e:\n        print(f\"❌ Error verifying category counts: {e}\")\n        return False\n\ndef verify_file_structure(test_dir: Path) -> bool:\n    \"\"\"Verify that the analysis.txt file is in the correct location.\"\"\"\n    analysis_file = test_dir / \"analysis.txt\"\n    \n    if not analysis_file.exists():\n        print(\"❌ Analysis file not found in test directory root\")\n        return False\n    \n    # Check if it's directly in the test directory root, not in a subdirectory\n    if analysis_file.parent != test_dir:\n        print(\"❌ Analysis file should be in the test directory root\")\n        return False\n    \n    print(\"✅ Analysis file is in the correct location\")\n    return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying Votenet Dataset Comparison Task...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Analysis File Exists\", verify_analysis_file_exists),\n        (\"File Location\", verify_file_structure),\n        (\"File Format\", verify_analysis_format),\n        (\"Required Categories\", verify_required_categories),\n        (\"Category Counts\", verify_category_counts),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ Votenet dataset comparison task completed correctly!\")\n        print(\"🎉 Task verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/votenet/debugging/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### Task Description\n\nThere is a bug in the VoteNet backbone module that needs to be identified and fixed.\n\n### Task Objectives\n\n1. **Examine the codebase** using filesystem MCP tools\n2. **Identify the bug** inside the hole process\n3. **Fix the bug** in the code\n4. **Create an answer file** with the bug location\n\n### Expected Output\n\n1. **Fix the bug** in the code file directly\n2. **Create `answer.txt`** in the test directory root with the format: `path`\n\n**Requirements:**\n\n- Only include the bug's file path in answer.txt\n- No additional text or explanation\n\n### Hint\n\n**The bug is not in demo.py**, please look deeper inside the codebase.\n"
  },
  {
    "path": "tasks/filesystem/standard/votenet/debugging/meta.json",
    "content": "{\n  \"task_id\": \"debugging\",\n  \"task_name\": \"Debugging\",\n  \"category_id\": \"votenet\",\n  \"category_name\": \"Votenet\",\n  \"description\": \"Identify and fix bugs in the VoteNet backbone module by examining the codebase and implementing necessary corrections.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-13\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"code exploration\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"votenet/\\n    ├── doc/\\n    │       ├── teaser.jpg\\n    │       └── tips.md\\n    ├── models/\\n    │       ├── ap_helper.py\\n    │       ├── backbone_module.py\\n    │       ├── boxnet.py\\n    │       ├── dump_helper.py\\n    │       ├── loss_helper.py\\n    │       ├── loss_helper_boxnet.py\\n    │       ├── proposal_module.py\\n    │       ├── votenet.py\\n    │       └── voting_module.py\\n    ├── pointnet2/\\n    │       ├── _ext_src/\\n    │       │       ├── include/\\n    │       │       │       ├── ball_query.h\\n    │       │       │       ├── cuda_utils.h\\n    │       │       │       ├── group_points.h\\n    │       │       │       ├── interpolate.h\\n    │       │       │       ├── sampling.h\\n    │       │       │       └── utils.h\\n    │       │       └── src/\\n    │       │               ├── ball_query.cpp\\n    │       │               ├── ball_query_gpu.cu\\n    │       │               ├── bindings.cpp\\n    │       │               ├── group_points.cpp\\n    │       │               ├── group_points_gpu.cu\\n    │       │               ├── interpolate.cpp\\n    │       │               ├── interpolate_gpu.cu\\n    │       │               ├── sampling.cpp\\n    │       │               └── sampling_gpu.cu\\n    │       ├── pointnet2_modules.py\\n    │       ├── pointnet2_test.py\\n    │       ├── pointnet2_utils.py\\n    │       ├── pytorch_utils.py\\n    │       └── setup.py\\n    ├── scannet/\\n    │       ├── meta_data/\\n    │       │       ├── scannet_means.npz\\n    │       │       ├── scannet_train.txt\\n    │       │       ├── scannetv2-labels.combined.tsv\\n    │       │       ├── scannetv2_test.txt\\n    │       │       ├── scannetv2_train.txt\\n    │       │       └── scannetv2_val.txt\\n    │       ├── scans/\\n    │       ├── batch_load_scannet_data.py\\n    │       ├── data_viz.py\\n    │       ├── load_scannet_data.py\\n    │       ├── model_util_scannet.py\\n    │       ├── README.md\\n    │       ├── scannet_detection_dataset.py\\n    │       └── scannet_utils.py\\n    ├── sunrgbd/\\n    │       ├── matlab/\\n    │       │       ├── extract_rgbd_data_v1.m\\n    │       │       ├── extract_rgbd_data_v2.m\\n    │       │       └── extract_split.m\\n    │       ├── OFFICIAL_SUNRGBD/\\n    │       ├── sunrgbd_trainval/\\n    │       ├── model_util_sunrgbd.py\\n    │       ├── README.md\\n    │       ├── sunrgbd_data.py\\n    │       ├── sunrgbd_detection_dataset.py\\n    │       └── sunrgbd_utils.py\\n    ├── utils/\\n    │       ├── box_util.py\\n    │       ├── eval_det.py\\n    │       ├── metric_util.py\\n    │       ├── nms.py\\n    │       ├── nn_distance.py\\n    │       ├── pc_util.py\\n    │       ├── tf_logger.py\\n    │       └── tf_visualizer.py\\n    ├── CODE_OF_CONDUCT.md\\n    ├── CONTRIBUTING.md\\n    ├── demo.py\\n    ├── eval.py\\n    ├── LICENSE\\n    ├── README.md\\n    └── train.py\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/votenet.zip\",\n    \"stateOriginalUrl\": \"https://github.com/facebookresearch/votenet\"\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/votenet/debugging/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for VoteNet Task: Debug Backbone Module\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport re\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_answer_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the answer.txt file exists.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    if not answer_file.exists():\n        print(\"❌ File 'answer.txt' not found\")\n        return False\n    \n    print(\"✅ Answer file found\")\n    return True\n\ndef verify_answer_format(test_dir: Path) -> bool:\n    \"\"\"Verify that the answer file has the correct format.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text().strip()\n        \n        # Check if content is not empty\n        if not content:\n            print(\"❌ Answer file is empty\")\n            return False\n        \n        # Check if it contains only one line (no additional text)\n        if len(content.split('\\n')) > 1:\n            print(\"❌ Answer file contains multiple lines or additional text\")\n            return False\n        \n        # Check if path contains the expected components\n        if 'models/backbone_module.py' not in content:\n            print(\"❌ Answer should contain 'models/backbone_module.py'\")\n            return False\n        \n        print(\"✅ Answer format is correct\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error reading answer file: {e}\")\n        return False\n\ndef verify_file_path_structure(test_dir: Path) -> bool:\n    \"\"\"Verify that the file path has the expected structure.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text().strip()\n        \n        # Expected path components for backbone module\n        expected_components = [\"models\", \"backbone_module.py\"]\n        \n        # Check if all expected components are in the content\n        for component in expected_components:\n            if component not in content:\n                print(f\"❌ Answer missing expected component: {component}\")\n                return False\n        \n        print(\"✅ Answer contains expected components\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying answer structure: {e}\")\n        return False\n\ndef verify_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the identified file actually exists.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text().strip()\n        \n        # Try the expected path\n        file_path = test_dir / \"models/backbone_module.py\"\n        \n        if not file_path.exists():\n            print(f\"❌ Expected file does not exist: models/backbone_module.py\")\n            return False\n        \n        print(\"✅ Expected file exists\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying file existence: {e}\")\n        return False\n\ndef verify_bug_fix(test_dir: Path) -> bool:\n    \"\"\"Verify that the bug has been fixed in the code.\"\"\"\n    answer_file = test_dir / \"answer.txt\"\n    \n    try:\n        content = answer_file.read_text().strip()\n        \n        file_path = test_dir / \"models/backbone_module.py\"\n        \n        if not file_path.exists():\n            print(f\"❌ Cannot find file for bug fix verification: models/backbone_module.py\")\n            return False\n        \n        # Read the file and search for the specific line containing self.fp2 = PointnetFPModule\n        file_content = file_path.read_text()\n        lines = file_content.split('\\n')\n        \n        # Find the line containing self.fp2 = PointnetFPModule\n        target_line = None\n        target_line_number = None\n        \n        for i, line in enumerate(lines):\n            if \"self.fp2 = PointnetFPModule\" in line:\n                target_line = line.strip()\n                target_line_number = i + 1  # Convert to 1-based line number\n                break\n        \n        if target_line is None:\n            print(\"❌ Could not find line containing 'self.fp2 = PointnetFPModule'\")\n            return False\n        \n        # Check if the original buggy line still exists\n        original_bug = \"self.fp2 = PointnetFPModule(mlp=[256,256,256])\"\n        if original_bug in target_line:\n            print(f\"❌ Bug has not been fixed - original line still exists at line {target_line_number}\")\n            print(f\"   Line {target_line_number} content: {target_line}\")\n            return False\n        \n        # Check for the correct fix\n        correct_fixes = [\n            \"self.fp2 = PointnetFPModule(mlp=[256+256,256,256])\",\n            \"self.fp2 = PointnetFPModule(mlp=[512,256,256])\"\n        ]\n        \n        fix_found = False\n        for fix in correct_fixes:\n            if fix in target_line:\n                fix_found = True\n                break\n        \n        if not fix_found:\n            print(f\"❌ Bug fix not found at line {target_line_number}\")\n            print(f\"   Line {target_line_number} content: {target_line}\")\n            print(\"   Expected one of:\")\n            for fix in correct_fixes:\n                print(f\"   - {fix}\")\n            return False\n        \n        print(f\"✅ Bug has been fixed correctly at line {target_line_number}\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error verifying bug fix: {e}\")\n        return False\n\n\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying VoteNet Task: Debug Backbone Module...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Answer File Exists\", verify_answer_file_exists),\n        (\"Answer Format\", verify_answer_format),\n        (\"Answer Structure\", verify_file_path_structure),\n        (\"File Exists\", verify_file_exists),\n        (\"Bug Fix Applied\", verify_bug_fix),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ VoteNet backbone module bug has been correctly identified and fixed!\")\n        print(\"🎉 Task verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/filesystem/standard/votenet/requirements_writing/description.md",
    "content": "Please use FileSystem tools to finish the following task:\n\n### Task Description\n\nThe VoteNet project is a 3D object detection framework for point clouds. Your task is to create a `requirements.txt` file that lists all the necessary Python dependencies for running this codebase.\n\n### Task Objectives\n\n1. **Create a requirements.txt file** in the main directory\n2. **Include all essential dependencies** needed to run the VoteNet codebase\n3. **Ensure the file format is correct** (one dependency per line)\n4. **Save the file as `requirements.txt`** in the current working directory\n5. **Not just** pip install or conda install, your answer should contain **every necessary dependencies in the hole process of VoteNet**.\n\n### Requirements\n\nThe requirements.txt file should contain Python packages that are necessary for:\n\n- 3D point cloud processing\n- Deep learning frameworks\n- Computer vision libraries\n- Data visualization\n- 3D mesh processing\n- Network/graph operations\n\n### Note\n\n- You can examine the codebase structure and README to understand what packages are needed\n- The file should be saved as `requirements.txt` in the current directory\n- Each dependency should be on a separate line\n"
  },
  {
    "path": "tasks/filesystem/standard/votenet/requirements_writing/meta.json",
    "content": "{\n  \"task_id\": \"requirements_writing\",\n  \"task_name\": \"Requirements Writing\",\n  \"category_id\": \"votenet\",\n  \"category_name\": \"VoteNet\",\n  \"description\": \"Generate a complete requirements.txt file containing all necessary Python dependencies for running the VoteNet codebase successfully.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-13\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"code exploration\",\n    \"cross-referencing\"\n  ],\n  \"mcp\": [\n    \"filesystem\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"votenet/\\n    ├── doc/\\n    │       ├── teaser.jpg\\n    │       └── tips.md\\n    ├── models/\\n    │       ├── ap_helper.py\\n    │       ├── backbone_module.py\\n    │       ├── boxnet.py\\n    │       ├── dump_helper.py\\n    │       ├── loss_helper.py\\n    │       ├── loss_helper_boxnet.py\\n    │       ├── proposal_module.py\\n    │       ├── votenet.py\\n    │       └── voting_module.py\\n    ├── pointnet2/\\n    │       ├── _ext_src/\\n    │       │       ├── include/\\n    │       │       │       ├── ball_query.h\\n    │       │       │       ├── cuda_utils.h\\n    │       │       │       ├── group_points.h\\n    │       │       │       ├── interpolate.h\\n    │       │       │       ├── sampling.h\\n    │       │       │       └── utils.h\\n    │       │       └── src/\\n    │       │               ├── ball_query.cpp\\n    │       │               ├── ball_query_gpu.cu\\n    │       │               ├── bindings.cpp\\n    │       │               ├── group_points.cpp\\n    │       │               ├── group_points_gpu.cu\\n    │       │               ├── interpolate.cpp\\n    │       │               ├── interpolate_gpu.cu\\n    │       │               ├── sampling.cpp\\n    │       │               └── sampling_gpu.cu\\n    │       ├── pointnet2_modules.py\\n    │       ├── pointnet2_test.py\\n    │       ├── pointnet2_utils.py\\n    │       ├── pytorch_utils.py\\n    │       └── setup.py\\n    ├── scannet/\\n    │       ├── meta_data/\\n    │       │       ├── scannet_means.npz\\n    │       │       ├── scannet_train.txt\\n    │       │       ├── scannetv2-labels.combined.tsv\\n    │       │       ├── scannetv2_test.txt\\n    │       │       ├── scannetv2_train.txt\\n    │       │       └── scannetv2_val.txt\\n    │       ├── scans/\\n    │       ├── batch_load_scannet_data.py\\n    │       ├── data_viz.py\\n    │       ├── load_scannet_data.py\\n    │       ├── model_util_scannet.py\\n    │       ├── README.md\\n    │       ├── scannet_detection_dataset.py\\n    │       └── scannet_utils.py\\n    ├── sunrgbd/\\n    │       ├── matlab/\\n    │       │       ├── extract_rgbd_data_v1.m\\n    │       │       ├── extract_rgbd_data_v2.m\\n    │       │       └── extract_split.m\\n    │       ├── OFFICIAL_SUNRGBD/\\n    │       ├── sunrgbd_trainval/\\n    │       ├── model_util_sunrgbd.py\\n    │       ├── README.md\\n    │       ├── sunrgbd_data.py\\n    │       ├── sunrgbd_detection_dataset.py\\n    │       └── sunrgbd_utils.py\\n    ├── utils/\\n    │       ├── box_util.py\\n    │       ├── eval_det.py\\n    │       ├── metric_util.py\\n    │       ├── nms.py\\n    │       ├── nn_distance.py\\n    │       ├── pc_util.py\\n    │       ├── tf_logger.py\\n    │       └── tf_visualizer.py\\n    ├── CODE_OF_CONDUCT.md\\n    ├── CONTRIBUTING.md\\n    ├── demo.py\\n    ├── eval.py\\n    ├── LICENSE\\n    ├── README.md\\n    └── train.py\",\n    \"stateUrl\": \"https://storage.mcpmark.ai/filesystem/votenet.zip\",\n    \"stateOriginalUrl\": \"https://github.com/facebookresearch/votenet\"\n  }\n}"
  },
  {
    "path": "tasks/filesystem/standard/votenet/requirements_writing/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for VoteNet Task: Create Requirements.txt File\n\"\"\"\n\nimport sys\nfrom pathlib import Path\nimport os\n\ndef get_test_directory() -> Path:\n    \"\"\"Get the test directory from FILESYSTEM_TEST_DIR env var.\"\"\"\n    test_root = os.environ.get(\"FILESYSTEM_TEST_DIR\")\n    if not test_root:\n        raise ValueError(\"FILESYSTEM_TEST_DIR environment variable is required\")\n    return Path(test_root)\n\ndef verify_requirements_file_exists(test_dir: Path) -> bool:\n    \"\"\"Verify that the requirements.txt file exists.\"\"\"\n    requirements_file = test_dir / \"requirements.txt\"\n    \n    if not requirements_file.exists():\n        print(\"❌ File 'requirements.txt' not found\")\n        return False\n    \n    print(\"✅ Requirements.txt file found\")\n    return True\n\ndef verify_requirements_file_readable(test_dir: Path) -> bool:\n    \"\"\"Verify that the requirements.txt file is readable.\"\"\"\n    requirements_file = test_dir / \"requirements.txt\"\n    \n    try:\n        content = requirements_file.read_text()\n        if not content.strip():\n            print(\"❌ Requirements.txt file is empty\")\n            return False\n        \n        print(\"✅ Requirements.txt file is readable\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error reading requirements.txt file: {e}\")\n        return False\n\ndef verify_required_dependencies_present(test_dir: Path) -> bool:\n    \"\"\"Verify that all required dependencies are present.\"\"\"\n    requirements_file = test_dir / \"requirements.txt\"\n    \n    try:\n        content = requirements_file.read_text()\n        \n        # Required dependencies from answer.txt\n        required_deps = [\n            \"matplotlib\",\n            \"opencv\", \n            \"plyfile\",\n            \"trimesh\",\n            \"pointnet2\",\n            \"networkx\"\n        ]\n        \n        missing_deps = []\n        found_deps = []\n        \n        for dep in required_deps:\n            if dep.lower() in content.lower():\n                found_deps.append(dep)\n            else:\n                missing_deps.append(dep)\n        \n        if missing_deps:\n            print(f\"❌ Missing required dependencies: {missing_deps}\")\n            return False\n        \n        print(f\"✅ All required dependencies found: {found_deps}\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking dependencies: {e}\")\n        return False\n\ndef verify_file_format(test_dir: Path) -> bool:\n    \"\"\"Verify that the requirements.txt file has proper format.\"\"\"\n    requirements_file = test_dir / \"requirements.txt\"\n    \n    try:\n        content = requirements_file.read_text()\n        lines = content.split('\\n')\n        \n        # Check if file has content and proper line structure\n        if not content.strip():\n            print(\"❌ File is completely empty\")\n            return False\n        \n        # Check if there are multiple lines (indicating multiple dependencies)\n        non_empty_lines = [line.strip() for line in lines if line.strip()]\n        if len(non_empty_lines) < 3:  # Should have at least 3 dependencies\n            print(\"❌ File seems to have too few dependencies\")\n            return False\n        \n        print(\"✅ File format is acceptable\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking file format: {e}\")\n        return False\n\ndef verify_no_duplicate_entries(test_dir: Path) -> bool:\n    \"\"\"Verify that there are no duplicate dependency entries.\"\"\"\n    requirements_file = test_dir / \"requirements.txt\"\n    \n    try:\n        content = requirements_file.read_text()\n        lines = [line.strip().lower() for line in content.split('\\n') if line.strip()]\n        \n        # Check for duplicates\n        if len(lines) != len(set(lines)):\n            print(\"❌ File contains duplicate entries\")\n            return False\n        \n        print(\"✅ No duplicate entries found\")\n        return True\n        \n    except Exception as e:\n        print(f\"❌ Error checking for duplicates: {e}\")\n        return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    test_dir = get_test_directory()\n    print(\"🔍 Verifying VoteNet Task: Create Requirements.txt File...\")\n    \n    # Define verification steps\n    verification_steps = [\n        (\"Requirements File Exists\", verify_requirements_file_exists),\n        (\"File is Readable\", verify_requirements_file_readable),\n        (\"Required Dependencies Present\", verify_required_dependencies_present),\n        (\"File Format\", verify_file_format),\n        (\"No Duplicate Entries\", verify_no_duplicate_entries),\n    ]\n    \n    # Run all verification steps\n    all_passed = True\n    for step_name, verify_func in verification_steps:\n        print(f\"\\n--- {step_name} ---\")\n        if not verify_func(test_dir):\n            all_passed = False\n    \n    # Final result\n    print(\"\\n\" + \"=\"*50)\n    if all_passed:\n        print(\"✅ Requirements.txt file successfully created with all required dependencies!\")\n        print(\"🎉 Task verification: PASS\")\n        sys.exit(0)\n    else:\n        print(\"❌ Task verification: FAIL\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/github/easy/build-your-own-x/close_commented_issues/description.md",
    "content": "Use the GitHub MCP tools to close every issue in `mcpmark-eval/build-your-own-x` that already has at least one comment. Leave all other issues unchanged.\n"
  },
  {
    "path": "tasks/github/easy/build-your-own-x/close_commented_issues/meta.json",
    "content": "{\n  \"task_id\": \"close_commented_issues\",\n  \"task_name\": \"Close Commented Issues\",\n  \"category_id\": \"build-your-own-x\",\n  \"category_name\": \"Build Your Own X (Easy)\",\n  \"description\": \"Use GitHub MCP tools to close every issue with comments in build-your-own-x and leave everything else alone.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"issue management\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/build-your-own-x\",\n    \"stateOriginalUrl\": \"https://github.com/codecrafters-io/build-your-own-x\"\n  }\n}\n"
  },
  {
    "path": "tasks/github/easy/build-your-own-x/close_commented_issues/verify.py",
    "content": "import os\nimport sys\nfrom typing import Optional\n\nimport requests\nfrom dotenv import load_dotenv\n\nREPO_NAME = \"build-your-own-x\"\nTARGET_ISSUES = [23, 25]\n\n\ndef _fetch_issue(org: str, token: str, number: int) -> Optional[dict]:\n    url = f\"https://api.github.com/repos/{org}/{REPO_NAME}/issues/{number}\"\n    headers = {\n        \"Authorization\": f\"Bearer {token}\",\n        \"Accept\": \"application/vnd.github+json\",\n    }\n\n    try:\n        response = requests.get(url, headers=headers, timeout=30)\n    except Exception as exc:\n        print(f\"Request error for issue #{number}: {exc}\", file=sys.stderr)\n        return None\n\n    if response.status_code != 200:\n        print(\n            f\"GitHub API returned {response.status_code} when fetching issue #{number}\",\n            file=sys.stderr,\n        )\n        return None\n\n    try:\n        return response.json()\n    except Exception as exc:\n        print(f\"Unable to parse issue #{number}: {exc}\", file=sys.stderr)\n        return None\n\n\ndef verify() -> bool:\n    load_dotenv(\".mcp_env\")\n\n    token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not token:\n        print(\"MCP_GITHUB_TOKEN is missing\", file=sys.stderr)\n        return False\n\n    if not org:\n        print(\"GITHUB_EVAL_ORG is missing\", file=sys.stderr)\n        return False\n\n    print(\"Checking issue states in remote repository...\")\n    success = True\n\n    for issue_number in TARGET_ISSUES:\n        data = _fetch_issue(org, token, issue_number)\n        if data is None:\n            success = False\n            continue\n\n        state = data.get(\"state\", \"\").lower()\n        if state != \"closed\":\n            print(\n                f\"Issue #{issue_number} is '{state}' but must be closed.\",\n                file=sys.stderr,\n            )\n            success = False\n        else:\n            print(f\"Issue #{issue_number} is closed as expected.\")\n\n    return success\n\n\nif __name__ == \"__main__\":\n    sys.exit(0 if verify() else 1)\n"
  },
  {
    "path": "tasks/github/easy/build-your-own-x/record_recent_commits/description.md",
    "content": "Use the GitHub MCP tools to work in the `mcpmark-eval/build-your-own-x` repository.\n\n1. Retrieve the newest five commits on the default branch.\n2. Open a new issue titled exactly `Latest 5 Commit Snapshot`.\n3. Set the issue body to exactly this format (newest commit first):\n\n```\nLatest 5 commits (newest first)\n1. <full-sha> | <author name> | <commit subject>\n2. <full-sha> | <author name> | <commit subject>\n3. <full-sha> | <author name> | <commit subject>\n4. <full-sha> | <author name> | <commit subject>\n5. <full-sha> | <author name> | <commit subject>\n```\n\nUse the full 40-character SHA and only the first line of each commit message. The `<author name>` must come from the commit metadata's author name field (not the GitHub username/login). Leave the issue open and do not touch other issues.\n"
  },
  {
    "path": "tasks/github/easy/build-your-own-x/record_recent_commits/meta.json",
    "content": "{\n  \"task_id\": \"record_recent_commits\",\n  \"task_name\": \"Record Recent Commits\",\n  \"category_id\": \"build-your-own-x\",\n  \"category_name\": \"Build Your Own X (Easy)\",\n  \"description\": \"Summarize the latest five commits by opening an issue with their SHAs, authors, and subjects.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"commits\",\n    \"issue\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/build-your-own-x\",\n    \"stateOriginalUrl\": \"https://github.com/codecrafters-io/build-your-own-x\"\n  }\n}\n"
  },
  {
    "path": "tasks/github/easy/build-your-own-x/record_recent_commits/verify.py",
    "content": "import os\nimport sys\nfrom typing import List, Optional\n\nimport requests\nfrom dotenv import load_dotenv\n\nREPO_NAME = \"build-your-own-x\"\nBRANCH = \"master\"\nISSUE_TITLE = \"Latest 5 Commit Snapshot\"\nEXPECTED_HEADER = \"latest 5 commits (newest first)\"\n\n\ndef _request(url: str, token: str) -> Optional[requests.Response]:\n    headers = {\n        \"Authorization\": f\"Bearer {token}\",\n        \"Accept\": \"application/vnd.github+json\",\n    }\n    try:\n        response = requests.get(url, headers=headers, timeout=30)\n    except Exception as exc:  # pragma: no cover - network errors\n        print(f\"Request error for {url}: {exc}\", file=sys.stderr)\n        return None\n\n    if response.status_code != 200:\n        print(\n            f\"GitHub API returned {response.status_code} for {url}\",\n            file=sys.stderr,\n        )\n        return None\n\n    return response\n\n\ndef _fetch_commits(org: str, token: str) -> Optional[List[dict]]:\n    url = (\n        f\"https://api.github.com/repos/{org}/{REPO_NAME}/commits\"\n        f\"?per_page=5&sha={BRANCH}\"\n    )\n    response = _request(url, token)\n    if response is None:\n        return None\n\n    try:\n        return response.json()\n    except Exception as exc:\n        print(f\"Unable to parse commits: {exc}\", file=sys.stderr)\n        return None\n\n\ndef _find_issue(org: str, token: str) -> Optional[dict]:\n    page = 1\n    while True:\n        url = (\n            f\"https://api.github.com/repos/{org}/{REPO_NAME}/issues\"\n            f\"?state=open&per_page=100&page={page}\"\n        )\n        response = _request(url, token)\n        if response is None:\n            return None\n\n        try:\n            issues = response.json()\n        except Exception as exc:\n            print(f\"Unable to parse issues: {exc}\", file=sys.stderr)\n            return None\n\n        if not issues:\n            break\n\n        for issue in issues:\n            if issue.get(\"title\") == ISSUE_TITLE:\n                # Exclude pull requests\n                if \"pull_request\" in issue:\n                    continue\n                return issue\n\n        page += 1\n\n    print(\n        f\"No open issue titled '{ISSUE_TITLE}' was found.\",\n        file=sys.stderr,\n    )\n    return None\n\n\ndef verify() -> bool:\n    load_dotenv(\".mcp_env\")\n\n    token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not token:\n        print(\"MCP_GITHUB_TOKEN is missing\", file=sys.stderr)\n        return False\n\n    if not org:\n        print(\"GITHUB_EVAL_ORG is missing\", file=sys.stderr)\n        return False\n\n    commits = _fetch_commits(org, token)\n    if commits is None:\n        return False\n\n    if len(commits) < 5:\n        print(\"Less than five commits returned; cannot verify.\", file=sys.stderr)\n        return False\n\n    issue = _find_issue(org, token)\n    if issue is None:\n        return False\n\n    if issue.get(\"title\") != ISSUE_TITLE:\n        print(\n            f\"Found issue title '{issue.get('title')}', expected '{ISSUE_TITLE}'.\",\n            file=sys.stderr,\n        )\n        return False\n\n    if (issue.get(\"state\") or \"\").lower() != \"open\":\n        print(\"Issue must remain open.\", file=sys.stderr)\n        return False\n\n    body = issue.get(\"body\") or \"\"\n    if not body.strip():\n        print(\"Issue body is empty.\", file=sys.stderr)\n        return False\n\n    lines = [line.strip() for line in body.splitlines() if line.strip()]\n    if not lines:\n        print(\"Issue body contains no content.\", file=sys.stderr)\n        return False\n\n    header = lines[0].lower()\n    if header != EXPECTED_HEADER:\n        print(\n            \"Issue body must start with 'Latest 5 commits (newest first)'.\",\n            file=sys.stderr,\n        )\n        return False\n\n    entries = lines[1:]\n    if len(entries) != 5:\n        print(\"Issue body must list exactly five commit entries.\", file=sys.stderr)\n        return False\n\n    for idx in range(5):\n        commit = commits[idx]\n        sha = commit.get(\"sha\", \"\")\n        subject = (commit.get(\"commit\", {}).get(\"message\", \"\").splitlines()[0]).strip()\n        author = commit.get(\"commit\", {}).get(\"author\", {}).get(\"name\", \"\")\n\n        expected_line = f\"{idx + 1}. {sha} | {author} | {subject}\"\n        actual_line = entries[idx]\n        if actual_line != expected_line:\n            print(\n                f\"Entry {idx + 1} mismatch.\\nExpected: {expected_line}\\nFound:    {actual_line}\",\n                file=sys.stderr,\n            )\n            return False\n\n    print(\"Issue contains the expected latest five commits.\")\n    return True\n\n\nif __name__ == \"__main__\":\n    sys.exit(0 if verify() else 1)\n"
  },
  {
    "path": "tasks/github/easy/claude-code/add_terminal_shortcuts_doc/description.md",
    "content": "Use the GitHub MCP tools to edit the `mcpmark-eval/claude-code` repository.\n\n1. On the `main` branch, add a new file `docs/TERMINAL_SHORTCUTS.md` containing exactly:\n\n```\n# Terminal Shortcuts\n\n- `claude plan`: Outline the next steps before making edits.\n- `claude apply`: Run the plan and apply the queued changes.\n- `claude check`: Re-run relevant tests or linters to validate the edits.\n```\n\n2. Commit with the message `docs: add terminal shortcuts reference` and push directly to `main`.\n"
  },
  {
    "path": "tasks/github/easy/claude-code/add_terminal_shortcuts_doc/meta.json",
    "content": "{\n  \"task_id\": \"add_terminal_shortcuts_doc\",\n  \"task_name\": \"Add Terminal Shortcuts Doc\",\n  \"category_id\": \"claude-code\",\n  \"category_name\": \"Claude Code (Easy)\",\n  \"description\": \"Add a simple terminal shortcuts reference file to docs/TERMINAL_SHORTCUTS.md and push it to main.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"docs update\",\n    \"content creation\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/claude-code\",\n    \"stateOriginalUrl\": \"https://github.com/anthropics/claude-code\"\n  }\n}\n"
  },
  {
    "path": "tasks/github/easy/claude-code/add_terminal_shortcuts_doc/verify.py",
    "content": "import base64\nimport os\nimport sys\nfrom typing import Optional\n\nimport requests\nfrom dotenv import load_dotenv\n\nREPO_NAME = \"claude-code\"\nTARGET_FILE = \"docs/TERMINAL_SHORTCUTS.md\"\nBRANCH = \"main\"\nEXPECTED_CONTENT = \"\"\"# Terminal Shortcuts\n\n- `claude plan`: Outline the next steps before making edits.\n- `claude apply`: Run the plan and apply the queued changes.\n- `claude check`: Re-run relevant tests or linters to validate the edits.\n\"\"\".strip()\n\n\ndef _download_file(org: str, token: str) -> Optional[str]:\n    url = f\"https://api.github.com/repos/{org}/{REPO_NAME}/contents/{TARGET_FILE}?ref={BRANCH}\"\n    headers = {\n        \"Authorization\": f\"Bearer {token}\",\n        \"Accept\": \"application/vnd.github+json\",\n    }\n\n    try:\n        response = requests.get(url, headers=headers, timeout=30)\n    except Exception as exc:\n        print(f\"Request error for {TARGET_FILE}: {exc}\", file=sys.stderr)\n        return None\n\n    if response.status_code != 200:\n        print(\n            f\"GitHub API returned {response.status_code} when fetching {TARGET_FILE}\",\n            file=sys.stderr,\n        )\n        return None\n\n    data = response.json()\n    try:\n        content = base64.b64decode(data.get(\"content\", \"\")).decode(\"utf-8\").strip()\n    except Exception as exc:\n        print(f\"Unable to decode {TARGET_FILE}: {exc}\", file=sys.stderr)\n        return None\n\n    return content\n\n\ndef verify() -> bool:\n    load_dotenv(\".mcp_env\")\n\n    token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not token:\n        print(\"MCP_GITHUB_TOKEN is missing\", file=sys.stderr)\n        return False\n\n    if not org:\n        print(\"GITHUB_EVAL_ORG is missing\", file=sys.stderr)\n        return False\n\n    print(f\"Checking {TARGET_FILE} in remote repository...\")\n    content = _download_file(org, token)\n\n    if content is None:\n        return False\n\n    normalized = content.strip()\n    if normalized != EXPECTED_CONTENT:\n        print(\"TERMINAL_SHORTCUTS.md does not match the expected content.\", file=sys.stderr)\n        print(\"Expected:\")\n        print(EXPECTED_CONTENT)\n        print(\"Found:\")\n        print(content)\n        return False\n\n    print(\"All checks passed! docs/TERMINAL_SHORTCUTS.md contains the expected text.\")\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/easy/claude-code/thank_docker_pr_author/description.md",
    "content": "Use the GitHub MCP tools to comment on the pull request in `mcpmark-eval/claude-code` that proposes automating Docker image builds with GitHub Actions.\n\n1. Skim the PR description so you understand it’s the Docker workflow automation proposal.\n2. Add a new comment on that PR that thanks the author and contains all of these keywords: `Docker workflow`, `automation`, `review`.\n"
  },
  {
    "path": "tasks/github/easy/claude-code/thank_docker_pr_author/meta.json",
    "content": "{\n  \"task_id\": \"thank_docker_pr_author\",\n  \"task_name\": \"Thank Docker PR Author\",\n  \"category_id\": \"claude-code\",\n  \"category_name\": \"Claude Code (Easy)\",\n  \"description\": \"Leave a thank-you comment on the Docker automation PR mentioning the workflow automation review keywords.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"pull request\",\n    \"comment\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/claude-code\",\n    \"stateOriginalUrl\": \"https://github.com/anthropics/claude-code\"\n  }\n}\n"
  },
  {
    "path": "tasks/github/easy/claude-code/thank_docker_pr_author/verify.py",
    "content": "import os\nimport sys\nfrom typing import Optional, Union\n\nimport requests\nfrom dotenv import load_dotenv\n\nREPO_NAME = \"claude-code\"\nPR_NUMBER = 53\nKEYWORDS = [\"docker workflow\", \"automation\", \"review\"]\n\n\ndef _github_get(org: str, token: str, path: str) -> Optional[Union[list, dict]]:\n    url = f\"https://api.github.com/repos/{org}/{REPO_NAME}/{path}\"\n    headers = {\n        \"Authorization\": f\"Bearer {token}\",\n        \"Accept\": \"application/vnd.github+json\",\n    }\n\n    try:\n        response = requests.get(url, headers=headers, timeout=30)\n    except Exception as exc:\n        print(f\"Request error for {path}: {exc}\", file=sys.stderr)\n        return None\n\n    if response.status_code != 200:\n        print(\n            f\"GitHub API returned {response.status_code} for {path}\",\n            file=sys.stderr,\n        )\n        return None\n\n    return response.json()\n\n\ndef verify() -> bool:\n    load_dotenv(\".mcp_env\")\n\n    token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not token:\n        print(\"MCP_GITHUB_TOKEN is missing\", file=sys.stderr)\n        return False\n\n    if not org:\n        print(\"GITHUB_EVAL_ORG is missing\", file=sys.stderr)\n        return False\n\n    comments = _github_get(org, token, f\"issues/{PR_NUMBER}/comments?per_page=100\")\n    if comments is None:\n        return False\n\n    for comment in comments:\n        body = comment.get(\"body\", \"\").strip()\n        lowered = body.lower()\n        if not body:\n            continue\n\n        if not any(thank_word in lowered for thank_word in (\"thanks\", \"thank you\")):\n            continue\n\n        if all(keyword in lowered for keyword in KEYWORDS):\n            print(\"All checks passed! Keyword-rich thank-you comment found on PR #53.\")\n            return True\n\n    print(\n        \"Did not find a thank-you comment containing all required keywords on PR #53.\",\n        file=sys.stderr,\n    )\n    return False\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/easy/claude-code/triage_missing_tool_result_issue/description.md",
    "content": "Use the GitHub MCP tools to triage issue #24 in the `mcpmark-eval/claude-code` repository.\n\n1. Read the issue details to understand the reported API error.\n2. Add a triage comment on the issue that explicitly includes all of the following keywords: `invalid_request_error`, `toolu_01Kjp7i9iF3xJ3z9aH4pSaRw`, `tool_result`, `tool_use`. Use them while confirming the API error and asking for the missing result block.\n3. Remove the `area:packaging` label from issue #24.\n"
  },
  {
    "path": "tasks/github/easy/claude-code/triage_missing_tool_result_issue/meta.json",
    "content": "{\n  \"task_id\": \"triage_missing_tool_result_issue\",\n  \"task_name\": \"Triage Missing Tool Result Issue\",\n  \"category_id\": \"claude-code\",\n  \"category_name\": \"Claude Code (Easy)\",\n  \"description\": \"Leave a predefined triage comment on issue #24 and remove the area:packaging label.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"issue triage\",\n    \"github\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/claude-code\",\n    \"stateOriginalUrl\": \"https://github.com/anthropics/claude-code\"\n  }\n}\n"
  },
  {
    "path": "tasks/github/easy/claude-code/triage_missing_tool_result_issue/verify.py",
    "content": "import os\nimport sys\nfrom typing import Optional\n\nimport requests\nfrom dotenv import load_dotenv\n\nREPO_NAME = \"claude-code\"\nISSUE_NUMBER = 24\nKEYWORDS = [\n    \"invalid_request_error\",\n    \"toolu_01kjp7i9if3xj3z9ah4psarw\",\n    \"tool_result\",\n    \"tool_use\",\n]\nREMOVED_LABEL = \"area:packaging\"\n\n\ndef _github_get(org: str, token: str, path: str) -> Optional[dict]:\n    url = f\"https://api.github.com/repos/{org}/{REPO_NAME}/{path}\"\n    headers = {\n        \"Authorization\": f\"Bearer {token}\",\n        \"Accept\": \"application/vnd.github+json\",\n    }\n\n    try:\n        response = requests.get(url, headers=headers, timeout=30)\n    except Exception as exc:\n        print(f\"Request error for {path}: {exc}\", file=sys.stderr)\n        return None\n\n    if response.status_code != 200:\n        print(\n            f\"GitHub API returned {response.status_code} for {path}\",\n            file=sys.stderr,\n        )\n        return None\n\n    return response.json()\n\n\ndef verify() -> bool:\n    load_dotenv(\".mcp_env\")\n\n    token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not token:\n        print(\"MCP_GITHUB_TOKEN is missing\", file=sys.stderr)\n        return False\n\n    if not org:\n        print(\"GITHUB_EVAL_ORG is missing\", file=sys.stderr)\n        return False\n\n    issue = _github_get(org, token, f\"issues/{ISSUE_NUMBER}\")\n    if issue is None:\n        return False\n\n    label_names = {label.get(\"name\", \"\") for label in issue.get(\"labels\", [])}\n    if REMOVED_LABEL in label_names:\n        print(f\"Label '{REMOVED_LABEL}' is still present on issue #{ISSUE_NUMBER}.\", file=sys.stderr)\n        return False\n\n    comments = _github_get(org, token, f\"issues/{ISSUE_NUMBER}/comments?per_page=100\")\n    if comments is None:\n        return False\n\n    found = False\n    for comment in comments:\n        body = comment.get(\"body\", \"\").strip().lower()\n        if all(keyword in body for keyword in KEYWORDS):\n            found = True\n            break\n\n    if not found:\n        print(\n            \"Did not find a triage comment containing all required keywords.\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\"All checks passed! Comment added and label removed.\")\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/easy/mcpmark-cicd/basic_ci_checks/description.md",
    "content": "Use the GitHub MCP tools to update the `mcpmark-eval/mcpmark-cicd` repository with a very small CI workflow.\n\n## Goal\nAdd a GitHub Actions workflow named **Basic CI Checks** that automatically runs linting and unit tests any time work is pushed to or proposed for the `main` branch.\n\n## Requirements\n1. Create a branch called `basic-ci-checks` from `main`.\n2. Add `.github/workflows/basic-ci.yml` with the following characteristics:\n   - Workflow name: `Basic CI Checks`.\n   - Trigger on both `push` and `pull_request`, limited to the `main` branch.\n   - Single job called `quality-checks` that runs on `ubuntu-latest` and uses Node.js 18 (`actions/setup-node`).\n   - Steps must include `actions/checkout`, `npm ci`, `npm run lint`, and `npm test` in that order after Node is configured.\n3. Commit the workflow to your branch, open a pull request titled `Add basic CI checks`, and merge it so the workflow exists on `main`.\n\nThat's it—no caching, matrix builds, or issue automation required. Keep it lightweight and focused on verifying the existing lint/test scripts.\n"
  },
  {
    "path": "tasks/github/easy/mcpmark-cicd/basic_ci_checks/meta.json",
    "content": "{\n  \"task_id\": \"basic_ci_checks\",\n  \"task_name\": \"Basic CI Checks\",\n  \"category_id\": \"mcpmark-cicd\",\n  \"category_name\": \"MCPMark CI/CD (Easy)\",\n  \"description\": \"Add a lightweight GitHub Actions workflow that runs npm ci, npm run lint, and npm test whenever main is updated or receives a pull request.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"ci/cd\",\n    \"github actions\",\n    \"workflow basics\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/mcpmark-cicd\",\n    \"stateOriginalUrl\": null\n  }\n}\n"
  },
  {
    "path": "tasks/github/easy/mcpmark-cicd/basic_ci_checks/verify.py",
    "content": "import base64\nimport os\nimport sys\nfrom typing import List, Optional\n\nimport requests\nfrom dotenv import load_dotenv\n\nREPO_NAME = \"mcpmark-cicd\"\nWORKFLOW_PATH = \".github/workflows/basic-ci.yml\"\nBRANCH = \"main\"\n\n\ndef _download_file(org: str, token: str, path: str) -> Optional[str]:\n    url = f\"https://api.github.com/repos/{org}/{REPO_NAME}/contents/{path}?ref={BRANCH}\"\n    headers = {\n        \"Authorization\": f\"Bearer {token}\",\n        \"Accept\": \"application/vnd.github+json\",\n    }\n\n    try:\n        response = requests.get(url, headers=headers, timeout=30)\n    except Exception as exc:  # pragma: no cover - network failure\n        print(f\"Request error for {path}: {exc}\", file=sys.stderr)\n        return None\n\n    if response.status_code != 200:\n        print(\n            f\"GitHub API returned {response.status_code} when fetching {path}\",\n            file=sys.stderr,\n        )\n        return None\n\n    data = response.json()\n    try:\n        content = base64.b64decode(data.get(\"content\", \"\")).decode(\"utf-8\")\n    except Exception as exc:\n        print(f\"Unable to decode {path}: {exc}\", file=sys.stderr)\n        return None\n\n    return content\n\n\ndef _line_index(lines: List[str], needle: str) -> int:\n    for idx, line in enumerate(lines):\n        if needle in line:\n            return idx\n    return -1\n\n\ndef verify() -> bool:\n    load_dotenv(\".mcp_env\")\n\n    token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not token:\n        print(\"MCP_GITHUB_TOKEN is missing\", file=sys.stderr)\n        return False\n\n    if not org:\n        print(\"GITHUB_EVAL_ORG is missing\", file=sys.stderr)\n        return False\n\n    content = _download_file(org, token, WORKFLOW_PATH)\n    if content is None:\n        print(\n            \"Workflow file .github/workflows/basic-ci.yml was not found on main\",\n            file=sys.stderr,\n        )\n        return False\n\n    normalized = content.lower()\n    normalized_lines = [line.strip().lower() for line in content.splitlines()]\n\n    errors = []\n\n    required_snippets = {\n        \"workflow name\": \"name: basic ci checks\",\n        \"job name\": \"quality-checks\",\n        \"checkout step\": \"actions/checkout\",\n        \"setup-node step\": \"actions/setup-node\",\n        \"node version\": \"node-version: 18\",\n        \"ubuntu runner\": \"runs-on: ubuntu-latest\",\n        \"push trigger\": \"push:\",\n        \"pull_request trigger\": \"pull_request:\",\n    }\n\n    for label, snippet in required_snippets.items():\n        if snippet not in normalized:\n            errors.append(f\"Missing {label} ({snippet}) in workflow\")\n\n    branch_limited = \"- main\" in normalized or \"[main]\" in normalized\n    if not branch_limited:\n        errors.append(\"Workflow triggers must be limited to the main branch\")\n\n    for command in [\"npm ci\", \"npm run lint\", \"npm test\"]:\n        if command not in normalized:\n            errors.append(f\"Missing '{command}' step\")\n\n    # Ensure npm commands happen in the expected order\n    ci_index = _line_index(normalized_lines, \"npm ci\")\n    lint_index = _line_index(normalized_lines, \"npm run lint\")\n    test_index = _line_index(normalized_lines, \"npm test\")\n\n    if ci_index == -1 or lint_index == -1 or test_index == -1:\n        errors.append(\"Could not find all npm commands to validate ordering\")\n    else:\n        if not (ci_index < lint_index < test_index):\n            errors.append(\"npm commands must run in order: ci -> lint -> test\")\n\n    if errors:\n        print(\"Verification failed:\")\n        for err in errors:\n            print(f\" - {err}\", file=sys.stderr)\n        return False\n\n    print(\"✅ basic-ci workflow found with required steps and triggers\")\n    return True\n\n\nif __name__ == \"__main__\":\n    sys.exit(0 if verify() else 1)\n"
  },
  {
    "path": "tasks/github/easy/mcpmark-cicd/issue_lint_guard/description.md",
    "content": "Use the GitHub MCP tools to wire up a tiny issue-triggered lint check for `mcpmark-eval/mcpmark-cicd`.\n\n## Goal\nWhenever a maintainer opens the tracking issue **Lint workflow check**, the repo should automatically run `npm run lint` via GitHub Actions. Keep it simple—just prove the workflow fires for issue events.\n\n## Requirements\n1. Create a branch called `issue-lint-workflow` from `main`.\n2. Add `.github/workflows/issue-lint.yml` with:\n   - Workflow name **Issue Lint Guard**.\n   - Trigger: `issues` with `types: [opened]` (no push/PR triggers).\n   - Single job `lint` on `ubuntu-latest` using Node.js 18 via `actions/setup-node`.\n   - Steps in order: `actions/checkout`, `npm ci`, `npm run lint`.\n3. Open a pull request titled `Add issue lint workflow`, get it merged so the workflow exists on `main`.\n4. After the merge, open a new issue titled **Lint workflow check** to trigger the workflow and wait until the matching run finishes successfully. Leave the issue open; we only care that the run went green.\n"
  },
  {
    "path": "tasks/github/easy/mcpmark-cicd/issue_lint_guard/meta.json",
    "content": "{\n  \"task_id\": \"issue_lint_guard\",\n  \"task_name\": \"Issue Lint Guard\",\n  \"category_id\": \"mcpmark-cicd\",\n  \"category_name\": \"MCPMark CI/CD (Easy)\",\n  \"description\": \"Add an issue-triggered lint workflow and prove it runs when the tracking issue is opened.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"ci/cd\",\n    \"github actions\",\n    \"issues\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/mcpmark-cicd\",\n    \"stateOriginalUrl\": null\n  }\n}\n"
  },
  {
    "path": "tasks/github/easy/mcpmark-cicd/issue_lint_guard/verify.py",
    "content": "import base64\nimport os\nimport sys\nimport time\nfrom typing import List, Optional\n\nimport requests\nfrom dotenv import load_dotenv\n\nREPO_NAME = \"mcpmark-cicd\"\nWORKFLOW_PATH = \".github/workflows/issue-lint.yml\"\nWORKFLOW_FILE = \"issue-lint.yml\"\nTARGET_BRANCH = \"main\"\nTRACKING_ISSUE_TITLE = \"Lint workflow check\"\nMAX_POLL_ATTEMPTS = 12\nPOLL_INTERVAL_SECONDS = 10\n\n\ndef _download_file(org: str, token: str, path: str) -> Optional[str]:\n    url = f\"https://api.github.com/repos/{org}/{REPO_NAME}/contents/{path}?ref={TARGET_BRANCH}\"\n    headers = {\n        \"Authorization\": f\"Bearer {token}\",\n        \"Accept\": \"application/vnd.github+json\",\n    }\n\n    try:\n        response = requests.get(url, headers=headers, timeout=30)\n    except Exception as exc:  # pragma: no cover - network error handling\n        print(f\"Request error for {path}: {exc}\", file=sys.stderr)\n        return None\n\n    if response.status_code != 200:\n        print(\n            f\"GitHub API returned {response.status_code} when fetching {path}\",\n            file=sys.stderr,\n        )\n        return None\n\n    data = response.json()\n    try:\n        content = base64.b64decode(data.get(\"content\", \"\")).decode(\"utf-8\")\n    except Exception as exc:  # pragma: no cover - decode error\n        print(f\"Unable to decode {path}: {exc}\", file=sys.stderr)\n        return None\n\n    return content\n\n\ndef _line_index(lines: List[str], needle: str) -> int:\n    for idx, line in enumerate(lines):\n        if needle in line:\n            return idx\n    return -1\n\n\ndef _list_workflow_runs(org: str, token: str) -> Optional[List[dict]]:\n    url = (\n        f\"https://api.github.com/repos/{org}/{REPO_NAME}/actions/workflows/{WORKFLOW_FILE}/runs\"\n        f\"?event=issues&per_page=15\"\n    )\n    headers = {\n        \"Authorization\": f\"Bearer {token}\",\n        \"Accept\": \"application/vnd.github+json\",\n    }\n\n    try:\n        response = requests.get(url, headers=headers, timeout=30)\n    except Exception as exc:  # pragma: no cover - network error handling\n        print(f\"Request error when listing workflow runs: {exc}\", file=sys.stderr)\n        return None\n\n    if response.status_code != 200:\n        print(\n            f\"GitHub API returned {response.status_code} when listing workflow runs\",\n            file=sys.stderr,\n        )\n        return None\n\n    data = response.json()\n    return data.get(\"workflow_runs\", [])\n\n\ndef _wait_for_tracking_issue_run(org: str, token: str) -> bool:\n    for attempt in range(1, MAX_POLL_ATTEMPTS + 1):\n        runs = _list_workflow_runs(org, token)\n        if runs is None:\n            return False\n\n        relevant = [\n            run\n            for run in runs\n            if run.get(\"display_title\") == TRACKING_ISSUE_TITLE\n        ]\n\n        if not relevant:\n            print(\n                f\"[{attempt}/{MAX_POLL_ATTEMPTS}] No Issue Lint Guard run for '{TRACKING_ISSUE_TITLE}' yet; waiting...\"\n            )\n            time.sleep(POLL_INTERVAL_SECONDS)\n            continue\n\n        latest = relevant[0]\n        status = latest.get(\"status\")\n        conclusion = latest.get(\"conclusion\")\n        html_url = latest.get(\"html_url\")\n\n        if status != \"completed\":\n            print(\n                f\"[{attempt}/{MAX_POLL_ATTEMPTS}] Latest run is '{status}'; waiting for completion...\"\n            )\n            time.sleep(POLL_INTERVAL_SECONDS)\n            continue\n\n        if conclusion != \"success\":\n            print(\n                \"Latest Issue Lint Guard run finished without success.\",\n                file=sys.stderr,\n            )\n            print(f\"Status: {status}, Conclusion: {conclusion}\", file=sys.stderr)\n            if html_url:\n                print(f\"Run URL: {html_url}\", file=sys.stderr)\n            return False\n\n        if html_url:\n            print(f\"✅ Latest Issue Lint Guard run succeeded: {html_url}\")\n        else:\n            print(\"✅ Latest Issue Lint Guard run succeeded\")\n        return True\n\n    print(\n        f\"Timed out waiting for a successful Issue Lint Guard run for '{TRACKING_ISSUE_TITLE}'\",\n        file=sys.stderr,\n    )\n    return False\n\n\ndef verify() -> bool:\n    load_dotenv(\".mcp_env\")\n\n    token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not token:\n        print(\"MCP_GITHUB_TOKEN is missing\", file=sys.stderr)\n        return False\n\n    if not org:\n        print(\"GITHUB_EVAL_ORG is missing\", file=sys.stderr)\n        return False\n\n    content = _download_file(org, token, WORKFLOW_PATH)\n    if content is None:\n        print(\n            \"Workflow file .github/workflows/issue-lint.yml was not found on main\",\n            file=sys.stderr,\n        )\n        return False\n\n    normalized = content.lower()\n    normalized_lines = [line.strip().lower() for line in content.splitlines()]\n\n    errors = []\n\n    required_snippets = {\n        \"workflow name\": \"name: issue lint guard\",\n        \"issues trigger\": \"issues:\",\n        \"types opened\": \"types:\",\n        \"job name\": \"lint:\",\n        \"runner\": \"runs-on: ubuntu-latest\",\n        \"checkout\": \"actions/checkout\",\n        \"setup-node\": \"actions/setup-node\",\n        \"node version\": \"node-version: 18\",\n        \"npm ci\": \"npm ci\",\n        \"npm run lint\": \"npm run lint\",\n    }\n\n    for label, snippet in required_snippets.items():\n        if snippet not in normalized:\n            errors.append(f\"Missing {label} ({snippet}) in workflow\")\n\n    types_line = next(\n        (line for line in normalized_lines if \"types\" in line and \"opened\" in line),\n        None,\n    )\n    if types_line is None:\n        errors.append(\"issues trigger must limit types to include 'opened'\")\n\n    checkout_idx = _line_index(normalized_lines, \"actions/checkout\")\n    setup_idx = _line_index(normalized_lines, \"actions/setup-node\")\n    ci_idx = _line_index(normalized_lines, \"npm ci\")\n    lint_idx = _line_index(normalized_lines, \"npm run lint\")\n\n    if -1 in [checkout_idx, setup_idx, ci_idx, lint_idx]:\n        errors.append(\"Could not determine workflow step ordering\")\n    else:\n        if not (checkout_idx < setup_idx < ci_idx < lint_idx):\n            errors.append(\n                \"Steps must run in order: checkout -> setup-node -> npm ci -> npm run lint\"\n            )\n\n    if errors:\n        print(\"Workflow validation failed:\")\n        for err in errors:\n            print(f\" - {err}\", file=sys.stderr)\n        return False\n\n    print(\"✅ issue-lint workflow file looks correct\")\n\n    return _wait_for_tracking_issue_run(org, token)\n\n\nif __name__ == \"__main__\":\n    sys.exit(0 if verify() else 1)\n"
  },
  {
    "path": "tasks/github/easy/mcpmark-cicd/nightly_health_check/description.md",
    "content": "Use the GitHub MCP tools to add a tiny bit of automation to `mcpmark-eval/mcpmark-cicd`.\n\nGoal: every night the repo should run the existing health check script.\n\nDo the usual branch/PR flow with a branch named `nightly-health` and a PR titled `Add nightly health check`.\n\nCreate `.github/workflows/nightly-health.yml` with:\n- workflow name `Nightly Health Check`\n- triggers: `workflow_dispatch` plus a cron schedule `0 2 * * *`\n- one job called `health-check` on `ubuntu-latest`\n- use Node.js 18 via `actions/setup-node`\n- steps in order: checkout, npm ci, `npm run health-check`\n\nMerge the PR so the workflow lives on `main`.\n"
  },
  {
    "path": "tasks/github/easy/mcpmark-cicd/nightly_health_check/meta.json",
    "content": "{\n  \"task_id\": \"nightly_health_check\",\n  \"task_name\": \"Nightly Health Check\",\n  \"category_id\": \"mcpmark-cicd\",\n  \"category_name\": \"MCPMark CI/CD (Easy)\",\n  \"description\": \"Add a scheduled workflow that runs the npm health check script every night.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"ci/cd\",\n    \"github actions\",\n    \"scheduling\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/mcpmark-cicd\",\n    \"stateOriginalUrl\": null\n  }\n}\n"
  },
  {
    "path": "tasks/github/easy/mcpmark-cicd/nightly_health_check/verify.py",
    "content": "import base64\nimport os\nimport sys\nfrom typing import List, Optional\n\nimport requests\nfrom dotenv import load_dotenv\n\nREPO_NAME = \"mcpmark-cicd\"\nWORKFLOW_PATH = \".github/workflows/nightly-health.yml\"\nBRANCH = \"main\"\n\n\ndef _download_file(org: str, token: str, path: str) -> Optional[str]:\n    url = f\"https://api.github.com/repos/{org}/{REPO_NAME}/contents/{path}?ref={BRANCH}\"\n    headers = {\n        \"Authorization\": f\"Bearer {token}\",\n        \"Accept\": \"application/vnd.github+json\",\n    }\n\n    try:\n        response = requests.get(url, headers=headers, timeout=30)\n    except Exception as exc:  # pragma: no cover\n        print(f\"Request error for {path}: {exc}\", file=sys.stderr)\n        return None\n\n    if response.status_code != 200:\n        print(\n            f\"GitHub API returned {response.status_code} when fetching {path}\",\n            file=sys.stderr,\n        )\n        return None\n\n    data = response.json()\n    try:\n        content = base64.b64decode(data.get(\"content\", \"\")).decode(\"utf-8\")\n    except Exception as exc:\n        print(f\"Unable to decode {path}: {exc}\", file=sys.stderr)\n        return None\n\n    return content\n\n\ndef _line_index(lines: List[str], needle: str) -> int:\n    for idx, line in enumerate(lines):\n        if needle in line:\n            return idx\n    return -1\n\n\ndef verify() -> bool:\n    load_dotenv(\".mcp_env\")\n\n    token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not token:\n        print(\"MCP_GITHUB_TOKEN is missing\", file=sys.stderr)\n        return False\n\n    if not org:\n        print(\"GITHUB_EVAL_ORG is missing\", file=sys.stderr)\n        return False\n\n    content = _download_file(org, token, WORKFLOW_PATH)\n    if content is None:\n        print(\n            \"Workflow file .github/workflows/nightly-health.yml was not found on main\",\n            file=sys.stderr,\n        )\n        return False\n\n    normalized = content.lower()\n    normalized_lines = [line.strip().lower() for line in content.splitlines()]\n\n    errors = []\n\n    required_bits = {\n        \"workflow name\": \"name: nightly health check\",\n        \"workflow_dispatch trigger\": \"workflow_dispatch:\",\n        \"schedule\": \"schedule:\",\n        \"cron\": \"0 2 * * *\",\n        \"job name\": \"health-check:\",\n        \"runner\": \"runs-on: ubuntu-latest\",\n        \"checkout\": \"actions/checkout\",\n        \"setup-node\": \"actions/setup-node\",\n        \"node version\": \"node-version: 18\",\n        \"npm ci\": \"npm ci\",\n        \"health script\": \"npm run health-check\",\n    }\n\n    for label, snippet in required_bits.items():\n        if snippet not in normalized:\n            errors.append(f\"Missing {label} ({snippet}) in workflow\")\n\n    schedule_index = _line_index(normalized_lines, \"schedule:\")\n    cron_index = _line_index(normalized_lines, \"- cron: '0 2 * * *'\")\n    if cron_index == -1:\n        cron_index = _line_index(normalized_lines, \"cron: '0 2 * * *'\")\n    if cron_index == -1:\n        cron_index = _line_index(normalized_lines, 'cron: \"0 2 * * *\"')\n\n    if schedule_index == -1 or cron_index == -1 or cron_index < schedule_index:\n        errors.append(\"Cron expression must appear under schedule trigger\")\n\n    ci_index = _line_index(normalized_lines, \"npm ci\")\n    health_index = _line_index(normalized_lines, \"npm run health-check\")\n    if ci_index == -1 or health_index == -1:\n        errors.append(\"npm ci and npm run health-check must both appear\")\n    else:\n        if not ci_index < health_index:\n            errors.append(\"npm ci must run before npm run health-check\")\n\n    if errors:\n        print(\"Verification failed:\")\n        for err in errors:\n            print(f\" - {err}\", file=sys.stderr)\n        return False\n\n    print(\"✅ nightly-health workflow found with required schedule and steps\")\n    return True\n\n\nif __name__ == \"__main__\":\n    sys.exit(0 if verify() else 1)\n"
  },
  {
    "path": "tasks/github/easy/missing-semester/count_translations/description.md",
    "content": "Use the GitHub MCP tools to inspect the `mcpmark-eval/missing-semester` repository.\n\n1. Navigate the repository to find the list of community translations that appears on the site's home page.\n2. Determine how many translation links are currently listed.\n3. Record both the count and the specific file you used as evidence by creating an `ANSWER.md` file in the repository root that contains exactly:\n\n```\nTranslation Count: <number>\nSource: <filename>\n```\n\n4. Commit the new file and push the change to `master`.\n"
  },
  {
    "path": "tasks/github/easy/missing-semester/count_translations/meta.json",
    "content": "{\n  \"task_id\": \"count_translations\",\n  \"task_name\": \"Count Translations\",\n  \"category_id\": \"missing-semester\",\n  \"category_name\": \"Missing Semester (Easy)\",\n  \"description\": \"Use GitHub MCP to count the translations listed on the home page, record the value in ANSWER.md, and push the change to master.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"content search\",\n    \"answer file\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/missing-semester\",\n    \"stateOriginalUrl\": \"https://github.com/missing-semester/missing-semester\"\n  }\n}\n"
  },
  {
    "path": "tasks/github/easy/missing-semester/count_translations/verify.py",
    "content": "import base64\nimport os\nimport sys\nfrom typing import Optional\n\nimport requests\nfrom dotenv import load_dotenv\n\nREPO_NAME = \"missing-semester\"\nTARGET_FILE = \"ANSWER.md\"\nBRANCH = \"master\"\nEXPECTED_COUNT = \"translation count: 14\"\nEXPECTED_SOURCE = \"source: index.md\"\n\n\ndef _download_file(org: str, token: str, path: str) -> Optional[str]:\n    url = f\"https://api.github.com/repos/{org}/{REPO_NAME}/contents/{path}?ref={BRANCH}\"\n    headers = {\n        \"Authorization\": f\"Bearer {token}\",\n        \"Accept\": \"application/vnd.github+json\",\n    }\n\n    try:\n        response = requests.get(url, headers=headers, timeout=30)\n    except Exception as exc:\n        print(f\"Request error for {path}: {exc}\", file=sys.stderr)\n        return None\n\n    if response.status_code != 200:\n        print(\n            f\"GitHub API returned {response.status_code} when fetching {path}\",\n            file=sys.stderr,\n        )\n        return None\n\n    data = response.json()\n    try:\n        content = base64.b64decode(data.get(\"content\", \"\")).decode(\"utf-8\").strip()\n    except Exception as exc:\n        print(f\"Unable to decode {path}: {exc}\", file=sys.stderr)\n        return None\n\n    return content\n\n\ndef verify() -> bool:\n    load_dotenv(\".mcp_env\")\n\n    token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not token:\n        print(\"MCP_GITHUB_TOKEN is missing\", file=sys.stderr)\n        return False\n\n    if not org:\n        print(\"GITHUB_EVAL_ORG is missing\", file=sys.stderr)\n        return False\n\n    print(\"Checking ANSWER.md in remote repository...\")\n    answer_content = _download_file(org, token, TARGET_FILE)\n\n    if answer_content is None:\n        return False\n\n    normalized = \" \".join(answer_content.lower().split())\n\n    if EXPECTED_COUNT not in normalized:\n        print(\n            \"ANSWER.md must include 'Translation Count: 14' (spacing/casing ignored).\",\n            file=sys.stderr,\n        )\n        print(\"Found:\")\n        print(answer_content)\n        return False\n\n    if EXPECTED_SOURCE not in normalized:\n        print(\n            \"ANSWER.md must include 'Source: index.md' (spacing/casing ignored).\",\n            file=sys.stderr,\n        )\n        print(\"Found:\")\n        print(answer_content)\n        return False\n\n    print(\"All checks passed! ANSWER.md contains the expected count and source.\")\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/easy/missing-semester/find_ga_tracking_id/description.md",
    "content": "Use the GitHub MCP tools to inspect the `mcpmark-eval/missing-semester` repository.\n\n1. Determine the Analytics tracking ID that the Missing Semester site declares in its configuration.\n2. Create an `ANSWER.md` file in the repository root that contains exactly:\n\n```\nAnalytics Tracking ID: <value you found>\n```\n\n3. Commit the new file and push the change to `master`.\n"
  },
  {
    "path": "tasks/github/easy/missing-semester/find_ga_tracking_id/meta.json",
    "content": "{\n  \"task_id\": \"find_ga_tracking_id\",\n  \"task_name\": \"Find GA Tracking ID\",\n  \"category_id\": \"missing-semester\",\n  \"category_name\": \"Missing Semester (Easy)\",\n  \"description\": \"Use GitHub MCP to discover the single Google Analytics tracking ID declared in the site configuration, write it to ANSWER.md, and push the change to master.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"config search\",\n    \"analytics\",\n    \"answer file\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/missing-semester\",\n    \"stateOriginalUrl\": \"https://github.com/missing-semester/missing-semester\"\n  }\n}\n"
  },
  {
    "path": "tasks/github/easy/missing-semester/find_ga_tracking_id/verify.py",
    "content": "import base64\nimport os\nimport sys\nfrom typing import Optional\n\nimport requests\nfrom dotenv import load_dotenv\n\n# Accept either wording, regardless of casing\nEXPECTED_VARIANTS = {\n    \"google analytics tracking id: g-p7wvhd84d1\",\n    \"analytics tracking id: g-p7wvhd84d1\",\n}\nREPO_NAME = \"missing-semester\"\nTARGET_FILE = \"ANSWER.md\"\nBRANCH = \"master\"\n\n\ndef _download_file(org: str, token: str) -> Optional[str]:\n    url = f\"https://api.github.com/repos/{org}/{REPO_NAME}/contents/{TARGET_FILE}?ref={BRANCH}\"\n    headers = {\n        \"Authorization\": f\"Bearer {token}\",\n        \"Accept\": \"application/vnd.github+json\",\n    }\n\n    try:\n        response = requests.get(url, headers=headers)\n    except Exception as exc:\n        print(f\"Request error for {TARGET_FILE}: {exc}\", file=sys.stderr)\n        return None\n\n    if response.status_code != 200:\n        print(\n            f\"GitHub API returned {response.status_code} when fetching {TARGET_FILE}\",\n            file=sys.stderr,\n        )\n        return None\n\n    data = response.json()\n    try:\n        content = base64.b64decode(data.get(\"content\", \"\")).decode(\"utf-8\").strip()\n    except Exception as exc:\n        print(f\"Unable to decode {TARGET_FILE}: {exc}\", file=sys.stderr)\n        return None\n\n    return content\n\n\ndef verify() -> bool:\n    load_dotenv(\".mcp_env\")\n\n    token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not token:\n        print(\"MCP_GITHUB_TOKEN is missing\", file=sys.stderr)\n        return False\n\n    if not org:\n        print(\"GITHUB_EVAL_ORG is missing\", file=sys.stderr)\n        return False\n\n    print(\"Checking ANSWER.md in remote repository...\")\n    answer_content = _download_file(org, token)\n\n    if answer_content is None:\n        return False\n\n    normalized = answer_content.strip().lower()\n    if normalized not in EXPECTED_VARIANTS:\n        print(\"ANSWER.md does not contain an accepted tracking ID format\", file=sys.stderr)\n        print(\"Accepted variants:\", file=sys.stderr)\n        for variant in EXPECTED_VARIANTS:\n            print(f\"  - {variant}\", file=sys.stderr)\n        print(f\"Found: {answer_content}\", file=sys.stderr)\n        return False\n\n    print(\"All checks passed! ANSWER.md matches an accepted content variant.\")\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/standard/build_your_own_x/find_commit_date/description.md",
    "content": "Find out when the entries in the Voxel Engine section were first created by Daniel Stefanovic. After finding this information, create an ANSWER.md file in the repository with the content being the date in [YYYY]-[MM]-[DD] format (e.g., 2000-06-02)."
  },
  {
    "path": "tasks/github/standard/build_your_own_x/find_commit_date/meta.json",
    "content": "{\n  \"task_id\": \"find_commit_date\",\n  \"task_name\": \"Find Commit Date\",\n  \"category_id\": \"build_your_own_x\",\n  \"category_name\": \"Build Your Own X\",\n  \"description\": \"Find when Voxel Engine entries were first created by Daniel Stefanovic and document the date.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"repository analysis\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/build-your-own-x\",\n    \"stateOriginalUrl\": \"https://github.com/codecrafters-io/build-your-own-x\"\n  }\n}"
  },
  {
    "path": "tasks/github/standard/build_your_own_x/find_commit_date/verify.py",
    "content": "import sys\nimport os\nimport requests\nfrom typing import Dict, Optional, Tuple\nimport base64\nfrom dotenv import load_dotenv\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str], org: str, repo: str = \"build-your-own-x\"\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    url = f\"https://api.github.com/repos/{org}/{repo}/{endpoint}\"\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _get_file_content(\n    file_path: str,\n    headers: Dict[str, str],\n    org: str,\n    repo: str = \"build-your-own-x\",\n    ref: str = \"master\",\n) -> Optional[str]:\n    \"\"\"Get the content of a file from the repository.\"\"\"\n    success, result = _get_github_api(\n        f\"contents/{file_path}?ref={ref}\", headers, org, repo\n    )\n    if not success or not result:\n        return None\n\n    try:\n        content = base64.b64decode(result.get(\"content\", \"\")).decode(\"utf-8\")\n        return content\n    except Exception as e:\n        print(f\"Content decode error for {file_path}: {e}\", file=sys.stderr)\n        return None\n\n\ndef verify_task() -> bool:\n    \"\"\"Verify the find commit data task for Voxel Engine entries.\"\"\"\n    # Load environment variables from .mcp_env\n    load_dotenv(\".mcp_env\")\n\n    # Get GitHub token and org\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    if not github_org:\n        print(\"Error: GITHUB_EVAL_ORG environment variable not set\", file=sys.stderr)\n        return False\n\n    headers = {\n        \"Authorization\": f\"Bearer {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    print(\"Verifying Voxel Engine commit date task...\")\n\n    # 1. Check if ANSWER.md exists in the repository\n    print(\"1. Checking if ANSWER.md exists...\")\n    content = _get_file_content(\"ANSWER.md\", headers, github_org)\n    if not content:\n        print(\"Error: ANSWER.md not found in repository\", file=sys.stderr)\n        return False\n    print(\"✓ ANSWER.md found\")\n\n    # 2. Check the content format\n    print(\"2. Checking content format...\")\n    content = content.strip()\n    \n    # The expected date when Daniel Stefanovic added Voxel Engine entries\n    # Based on historical records, this should be 2018-07-07\n    expected_date = \"2018-07-07\"\n    \n    # Check if the content matches the expected date format (YYYY-MM-DD)\n    import re\n    date_pattern = r'^\\d{4}-\\d{2}-\\d{2}$'\n    if not re.match(date_pattern, content):\n        print(f\"Error: Invalid date format. Expected YYYY-MM-DD, got: {content}\", file=sys.stderr)\n        return False\n    print(\"✓ Date format is correct\")\n\n    # 3. Verify the date is correct\n    print(\"3. Verifying the date...\")\n    if content != expected_date:\n        print(f\"Error: Incorrect date. Expected {expected_date}, got: {content}\", file=sys.stderr)\n        return False\n    print(f\"✓ Date is correct: {content}\")\n\n    # 4. Verify README.md contains Voxel Engine section\n    print(\"4. Checking if README.md contains Voxel Engine section...\")\n    readme_content = _get_file_content(\"README.md\", headers, github_org)\n    if not readme_content:\n        print(\"Error: README.md not found in repository\", file=sys.stderr)\n        return False\n    \n    if \"Voxel Engine\" not in readme_content:\n        print(\"Error: Voxel Engine section not found in README.md\", file=sys.stderr)\n        return False\n    \n    # Check for specific Voxel Engine entries\n    voxel_entries = [\n        \"Let's Make a Voxel Engine\",\n        \"Java Voxel Engine Tutorial\"\n    ]\n    \n    for entry in voxel_entries:\n        if entry not in readme_content:\n            print(f\"Warning: Voxel Engine entry '{entry}' not found in README.md\", file=sys.stderr)\n    \n    print(\"✓ Voxel Engine section found in README.md\")\n\n    print(\"\\n✅ All verification checks passed!\")\n    print(\"Task completed successfully:\")\n    print(f\"  - ANSWER.md created with date: {content}\")\n    print(\"  - Date format is correct (YYYY-MM-DD)\")\n    print(\"  - Date matches expected creation date for Voxel Engine entries by Daniel Stefanovic\")\n    print(\"  - Voxel Engine section exists in README.md\")\n\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify_task()\n    sys.exit(0 if success else 1)"
  },
  {
    "path": "tasks/github/standard/build_your_own_x/find_rag_commit/description.md",
    "content": "Find out the specific commit SHA of adding an entry about \"RAG for Document Search\". After finding this information, create an ANSWER.md file in the repository with the content being the commit SHA (e.g., 023dfa35694db2709057488ad338afdbc89fb226).\n\nHint: It should be in an \"AI model\" section I think."
  },
  {
    "path": "tasks/github/standard/build_your_own_x/find_rag_commit/meta.json",
    "content": "{\n  \"task_id\": \"find_rag_commit\",\n  \"task_name\": \"Find Rag Commit\",\n  \"category_id\": \"build_your_own_x\",\n  \"category_name\": \"Build Your Own X\",\n  \"description\": \"Identify the specific commit SHA that added the RAG for Document Search entry to the repository.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"repository analysis\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/build-your-own-x\",\n    \"stateOriginalUrl\": \"https://github.com/codecrafters-io/build-your-own-x\"\n  }\n}"
  },
  {
    "path": "tasks/github/standard/build_your_own_x/find_rag_commit/verify.py",
    "content": "import sys\nimport os\nimport requests\nfrom typing import Dict, Optional, Tuple\nimport base64\nfrom dotenv import load_dotenv\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str], org: str, repo: str = \"build-your-own-x\"\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    url = f\"https://api.github.com/repos/{org}/{repo}/{endpoint}\"\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _get_file_content(\n    file_path: str,\n    headers: Dict[str, str],\n    org: str,\n    repo: str = \"build-your-own-x\",\n    ref: str = \"master\",\n) -> Optional[str]:\n    \"\"\"Get the content of a file from the repository.\"\"\"\n    success, result = _get_github_api(\n        f\"contents/{file_path}?ref={ref}\", headers, org, repo\n    )\n    if not success or not result:\n        return None\n\n    try:\n        content = base64.b64decode(result.get(\"content\", \"\")).decode(\"utf-8\")\n        return content\n    except Exception as e:\n        print(f\"Content decode error for {file_path}: {e}\", file=sys.stderr)\n        return None\n\n\ndef verify_task() -> bool:\n    \"\"\"Verify the find RAG commit SHA task.\"\"\"\n    # Load environment variables from .mcp_env\n    load_dotenv(\".mcp_env\")\n\n    # Get GitHub token and org\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    if not github_org:\n        print(\"Error: GITHUB_EVAL_ORG environment variable not set\", file=sys.stderr)\n        return False\n\n    headers = {\n        \"Authorization\": f\"Bearer {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    print(\"Verifying RAG commit SHA task...\")\n\n    # Expected commit SHA for RAG for Document Search\n    expected_sha = \"048cd3b3de70e4b429057891576ea394a50cdf48\"\n\n    # 1. Check if ANSWER.md exists in the repository\n    print(\"1. Checking if ANSWER.md exists...\")\n    content = _get_file_content(\"ANSWER.md\", headers, github_org)\n    if not content:\n        print(\"Error: ANSWER.md not found in repository\", file=sys.stderr)\n        return False\n    print(\"✓ ANSWER.md found\")\n\n    # 2. Check the content matches expected SHA\n    print(\"2. Checking commit SHA...\")\n    content = content.strip()\n    \n    if content != expected_sha:\n        print(f\"Error: Incorrect commit SHA. Expected {expected_sha}, got: {content}\", file=sys.stderr)\n        return False\n    print(\"✓ Commit SHA is correct\")\n\n    # 3. Verify the commit exists\n    print(\"3. Verifying the commit exists...\")\n    success, commit_data = _get_github_api(f\"commits/{content}\", headers, github_org)\n    if not success or not commit_data:\n        print(f\"Error: Commit {content} not found in repository\", file=sys.stderr)\n        return False\n    print(f\"✓ Commit {content} exists\")\n\n    print(\"\\n✅ All verification checks passed!\")\n    print(\"Task completed successfully:\")\n    print(f\"  - ANSWER.md created with correct commit SHA: {content}\")\n    print(f\"  - Commit exists in the repository\")\n    print(f\"  - Commit message: {commit_data.get('commit', {}).get('message', '')}\")\n\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify_task()\n    sys.exit(0 if success else 1)"
  },
  {
    "path": "tasks/github/standard/claude-code/automated_changelog_generation/description.md",
    "content": "I need you to analyze all recently closed issues and open pull requests in the repository, then generate comprehensive documentation and organize them properly.\n\n**Step 1: Create Documentation Branch**\nCreate a new branch called 'docs/changelog-and-migration' from the main branch.\n\n**Step 2: Generate Changelog from Closed Issues**\nFind all closed issues in the repository and create the file `CHANGELOG-GENERATED.md` on your branch with:\n- A heading \"# Changelog - Recent Fixes\"\n- A \"### 🐛 Bug Fixes\" section listing all closed issues with bug label, formatted as: \"- **#[NUMBER]**: [Title] ([labels])\"\n- A \"### 📚 Documentation\" section for closed issues with documentation label\n- A \"### 🔄 Duplicates\" section for issues marked as duplicate\n- A \"### 📊 Statistics\" section with:\n  - Total number of closed issues\n  - Distribution by platform labels (platform:macos, platform:linux, etc.)\n  - Distribution by area labels (area:core, area:tools, etc.)\n\n**Step 3: Create Migration Guide for Open PRs**\nAnalyze all open pull requests and create the file `docs/MIGRATION_GUIDE.md` with:\n- A heading \"# Migration Guide for Pending Features\"\n- For each open PR, create a section with:\n  - PR title and number\n  - Summary of changes based on the PR description\n  - Any new configuration or environment variables mentioned\n  - Installation or usage instructions if applicable\n\n**Step 4: Create Issue Analysis Report**\nCreate the file `reports/ISSUE_ANALYSIS.md` with:\n- A heading \"# Issue Analysis Report\"\n- A \"## Closed Issues by Category\" section grouping closed issues by their primary label\n- A \"## Resolution Patterns\" section identifying common themes\n- A \"## Platform Impact Analysis\" section showing which platforms were most affected\n- Include references to specific issues that had cross-project impact or memory-related problems\n\n**Step 5: Create PR Integration Plan**\nCreate the file `reports/PR_INTEGRATION_PLAN.md` with:\n- A heading \"# Pull Request Integration Strategy\"\n- A \"## Open PRs Overview\" section listing each open PR with a technical summary\n- A \"## Dependencies and Conflicts\" section analyzing potential conflicts between PRs\n- A \"## Recommended Merge Order\" section with reasoning\n- A \"## Risk Assessment\" section linking any risks to previously closed issues\n\n**Step 6: Create Documentation PR**\nCreate a pull request from 'docs/changelog-and-migration' to 'main' with:\n- Title: \"docs: Generated changelog and migration documentation\"\n- Body including:\n  - A \"## Summary\" section describing what was generated\n  - A \"## Files Created\" section listing all new documentation\n  - A \"## Issues Processed\" section mentioning the number of closed issues analyzed\n  - A \"## PRs Analyzed\" section mentioning the open PRs reviewed\n\n**Step 7: Merge Documentation PR**\nMerge the documentation pull request using the \"squash\" merge method."
  },
  {
    "path": "tasks/github/standard/claude-code/automated_changelog_generation/meta.json",
    "content": "{\n  \"task_id\": \"automated_changelog_generation\",\n  \"task_name\": \"Automated Changelog Generation\",\n  \"category_id\": \"claude-code\",\n  \"category_name\": \"Claude Code\",\n  \"description\": \"Analyze closed issues and open PRs to generate comprehensive documentation including changelog, migration guide, and analysis reports.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"release coordination\",\n    \"workflow automation\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/claude-code\",\n    \"stateOriginalUrl\": \"https://github.com/anthropics/claude-code\"\n  }\n}"
  },
  {
    "path": "tasks/github/standard/claude-code/automated_changelog_generation/verify.py",
    "content": "import sys\nimport os\nimport requests\nfrom typing import Dict, List, Optional, Tuple\nimport base64\nfrom dotenv import load_dotenv\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str], org: str, repo: str = \"claude-code\"\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    url = f\"https://api.github.com/repos/{org}/{repo}/{endpoint}\"\n\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _check_branch_exists(\n    branch_name: str, headers: Dict[str, str], org: str, repo: str = \"claude-code\"\n) -> bool:\n    \"\"\"Verify that a branch exists in the repository.\"\"\"\n    success, _ = _get_github_api(f\"branches/{branch_name}\", headers, org, repo)\n    return success\n\n\ndef _get_file_content(\n    file_path: str,\n    headers: Dict[str, str],\n    org: str,\n    repo: str = \"claude-code\",\n    ref: str = \"main\",\n) -> Optional[str]:\n    \"\"\"Get the content of a file from the repository.\"\"\"\n    success, result = _get_github_api(\n        f\"contents/{file_path}?ref={ref}\", headers, org, repo\n    )\n    if not success or not result:\n        return None\n\n    try:\n        content = base64.b64decode(result.get(\"content\", \"\")).decode(\"utf-8\")\n        return content\n    except Exception as e:\n        print(f\"Content decode error for {file_path}: {e}\", file=sys.stderr)\n        return None\n\n\ndef _find_pr_by_title_keyword(\n    keyword: str, headers: Dict[str, str], org: str, repo: str = \"claude-code\"\n) -> Optional[Dict]:\n    \"\"\"Find a PR by title keyword and return the PR data.\"\"\"\n    for state in [\"open\", \"closed\"]:\n        success, prs = _get_github_api(\n            f\"pulls?state={state}&per_page=100\", headers, org, repo\n        )\n        if success and prs:\n            for pr in prs:\n                if keyword.lower() in pr.get(\"title\", \"\").lower():\n                    return pr\n    return None\n\n\ndef _get_pr_merge_commit(\n    pr_number: int, headers: Dict[str, str], org: str, repo: str = \"claude-code\"\n) -> Optional[Dict]:\n    \"\"\"Get the merge commit for a PR to check merge method.\"\"\"\n    success, pr = _get_github_api(f\"pulls/{pr_number}\", headers, org, repo)\n    if success and pr:\n        merge_commit_sha = pr.get(\"merge_commit_sha\")\n        if merge_commit_sha:\n            success, commit = _get_github_api(\n                f\"commits/{merge_commit_sha}\", headers, org, repo\n            )\n            if success:\n                return commit\n    return None\n\n\ndef _check_file_sections(content: str, required_sections: List[str]) -> bool:\n    \"\"\"Check if file content contains required sections.\"\"\"\n    if not content:\n        return False\n    return all(section in content for section in required_sections)\n\n\ndef _check_issue_references(text: str, issue_numbers: List[int]) -> int:\n    \"\"\"Count how many of the specified issue numbers are referenced in the text.\"\"\"\n    if not text:\n        return 0\n    count = 0\n    for num in issue_numbers:\n        if f\"#{num}\" in text:\n            count += 1\n    return count\n\n\ndef _check_pr_references(text: str, pr_numbers: List[int]) -> int:\n    \"\"\"Count how many of the specified PR numbers are referenced in the text.\"\"\"\n    if not text:\n        return 0\n    count = 0\n    for num in pr_numbers:\n        if f\"#{num}\" in text or f\"PR #{num}\" in text:\n            count += 1\n    return count\n\n\ndef verify() -> bool:\n    \"\"\"\n    Programmatically verify that the changelog and migration documentation workflow\n    meets the requirements described in description.md.\n    \"\"\"\n    # Configuration constants - these are known to us but not explicitly told to the model\n    DOCS_BRANCH_NAME = \"docs/changelog-and-migration\"\n    DOCS_PR_KEYWORD = \"Generated changelog and migration\"\n\n    # Known issue and PR numbers for verification\n    EXPECTED_BUG_ISSUES = [12, 13, 15, 21, 22, 23, 25, 37, 39, 48, 50]\n    EXPECTED_OPEN_PRS = [51, 52, 53]\n\n    # Expected file sections\n    CHANGELOG_SECTIONS = [\n        \"# Changelog - Recent Fixes\",\n        \"### 🐛 Bug Fixes\",\n        \"### 📚 Documentation\",\n        \"### 🔄 Duplicates\",\n        \"### 📊 Statistics\",\n    ]\n\n    MIGRATION_GUIDE_SECTIONS = [\"# Migration Guide for Pending Features\"]\n\n    ISSUE_ANALYSIS_SECTIONS = [\n        \"# Issue Analysis Report\",\n        \"## Closed Issues by Category\",\n        \"## Resolution Patterns\",\n        \"## Platform Impact Analysis\",\n    ]\n\n    PR_INTEGRATION_SECTIONS = [\n        \"# Pull Request Integration Strategy\",\n        \"## Open PRs Overview\",\n        \"## Dependencies and Conflicts\",\n        \"## Recommended Merge Order\",\n        \"## Risk Assessment\",\n    ]\n\n    # Load environment variables from .mcp_env\n    load_dotenv(\".mcp_env\")\n\n    # Get GitHub token and org\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    if not github_org:\n        print(\"Error: GITHUB_EVAL_ORG environment variable not set\", file=sys.stderr)\n        return False\n\n    headers = {\n        \"Authorization\": f\"Bearer {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    # Run verification checks\n    print(\"Verifying changelog and migration documentation workflow...\")\n\n    # 1. Check that documentation branch exists\n    print(\"1. Verifying documentation branch exists...\")\n    if not _check_branch_exists(DOCS_BRANCH_NAME, headers, github_org):\n        print(f\"Error: Branch '{DOCS_BRANCH_NAME}' not found\", file=sys.stderr)\n        return False\n    print(\"✓ Documentation branch created\")\n\n    # 2. Check changelog file\n    print(\"2. Verifying CHANGELOG-GENERATED.md...\")\n    changelog_content = _get_file_content(\n        \"CHANGELOG-GENERATED.md\", headers, github_org, \"claude-code\", DOCS_BRANCH_NAME\n    )\n    if not changelog_content:\n        print(\"Error: CHANGELOG-GENERATED.md not found\", file=sys.stderr)\n        return False\n\n    if not _check_file_sections(changelog_content, CHANGELOG_SECTIONS):\n        print(\n            \"Error: CHANGELOG-GENERATED.md missing required sections\", file=sys.stderr\n        )\n        return False\n\n    # Check that bug issues are referenced\n    bug_refs = _check_issue_references(changelog_content, EXPECTED_BUG_ISSUES)\n    if bug_refs < 8:  # At least 8 of the bug issues\n        print(\n            f\"Error: CHANGELOG-GENERATED.md only references {bug_refs} bug issues, expected at least 8\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Check for platform and area statistics\n    if (\n        \"platform:\" not in changelog_content.lower()\n        or \"area:\" not in changelog_content.lower()\n    ):\n        print(\n            \"Error: CHANGELOG-GENERATED.md missing platform or area distribution\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\"✓ Changelog created with proper content\")\n\n    # 3. Check migration guide\n    print(\"3. Verifying MIGRATION_GUIDE.md...\")\n    migration_content = _get_file_content(\n        \"docs/MIGRATION_GUIDE.md\", headers, github_org, \"claude-code\", DOCS_BRANCH_NAME\n    )\n    if not migration_content:\n        print(\"Error: docs/MIGRATION_GUIDE.md not found\", file=sys.stderr)\n        return False\n\n    if not _check_file_sections(migration_content, MIGRATION_GUIDE_SECTIONS):\n        print(\"Error: MIGRATION_GUIDE.md missing required sections\", file=sys.stderr)\n        return False\n\n    # Check that all expected open PRs are mentioned\n    pr_refs = _check_pr_references(migration_content, EXPECTED_OPEN_PRS)\n    if pr_refs < 3:\n        print(\n            f\"Error: MIGRATION_GUIDE.md only references {pr_refs}/3 open PRs\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\"✓ Migration guide created with proper content\")\n\n    # 4. Check issue analysis report\n    print(\"4. Verifying ISSUE_ANALYSIS.md...\")\n    issue_analysis_content = _get_file_content(\n        \"reports/ISSUE_ANALYSIS.md\",\n        headers,\n        github_org,\n        \"claude-code\",\n        DOCS_BRANCH_NAME,\n    )\n    if not issue_analysis_content:\n        print(\"Error: reports/ISSUE_ANALYSIS.md not found\", file=sys.stderr)\n        return False\n\n    if not _check_file_sections(issue_analysis_content, ISSUE_ANALYSIS_SECTIONS):\n        print(\"Error: ISSUE_ANALYSIS.md missing required sections\", file=sys.stderr)\n        return False\n\n    # Check for cross-project and memory issue mentions\n    if \"#50\" not in issue_analysis_content and \"#48\" not in issue_analysis_content:\n        print(\n            \"Warning: ISSUE_ANALYSIS.md may be missing cross-project issue references\",\n            file=sys.stderr,\n        )\n\n    print(\"✓ Issue analysis report created\")\n\n    # 5. Check PR integration plan\n    print(\"5. Verifying PR_INTEGRATION_PLAN.md...\")\n    pr_plan_content = _get_file_content(\n        \"reports/PR_INTEGRATION_PLAN.md\",\n        headers,\n        github_org,\n        \"claude-code\",\n        DOCS_BRANCH_NAME,\n    )\n    if not pr_plan_content:\n        print(\"Error: reports/PR_INTEGRATION_PLAN.md not found\", file=sys.stderr)\n        return False\n\n    if not _check_file_sections(pr_plan_content, PR_INTEGRATION_SECTIONS):\n        print(\n            \"Error: PR_INTEGRATION_PLAN.md missing required sections\", file=sys.stderr\n        )\n        return False\n\n    # Check that all open PRs are analyzed\n    pr_refs_in_plan = _check_pr_references(pr_plan_content, EXPECTED_OPEN_PRS)\n    if pr_refs_in_plan < 3:\n        print(\n            f\"Error: PR_INTEGRATION_PLAN.md only references {pr_refs_in_plan}/3 open PRs\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\"✓ PR integration plan created\")\n\n    # 6. Find and verify the documentation PR\n    print(\"6. Verifying documentation pull request...\")\n    docs_pr = _find_pr_by_title_keyword(DOCS_PR_KEYWORD, headers, github_org)\n    if not docs_pr:\n        # Try alternative keyword\n        docs_pr = _find_pr_by_title_keyword(\n            \"changelog and migration\", headers, github_org\n        )\n\n    if not docs_pr:\n        print(\"Error: Documentation PR not found\", file=sys.stderr)\n        return False\n\n    pr_body = docs_pr.get(\"body\", \"\")\n    pr_number = docs_pr.get(\"number\")\n\n    # Check PR body sections\n    required_sections = [\n        \"## Summary\",\n        \"## Files Created\",\n        \"## Issues Processed\",\n        \"## PRs Analyzed\",\n    ]\n    missing_sections = []\n    for section in required_sections:\n        if section not in pr_body:\n            missing_sections.append(section)\n\n    if len(missing_sections) > 1:  # Allow 1 missing section for flexibility\n        print(\n            f\"Error: Documentation PR missing sections: {missing_sections}\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\"✓ Documentation PR created\")\n\n    # 7. Check that the documentation PR has been merged with squash method\n    print(\"7. Verifying documentation PR merge with squash method...\")\n    if docs_pr.get(\"state\") != \"closed\" or not docs_pr.get(\"merged_at\"):\n        print(\"Error: Documentation PR has not been merged\", file=sys.stderr)\n        return False\n\n    # Check merge method was squash by examining the merge commit\n    merge_commit = _get_pr_merge_commit(pr_number, headers, github_org)\n    if merge_commit:\n        # Squash merges typically have only one parent (the base branch)\n        parents = merge_commit.get(\"parents\", [])\n        if len(parents) != 1:\n            print(\n                f\"Warning: Merge commit has {len(parents)} parents, may not be squash merge\",\n                file=sys.stderr,\n            )\n\n        # Check commit message pattern typical of squash merges\n        commit_message = merge_commit.get(\"commit\", {}).get(\"message\", \"\")\n        if f\"#{pr_number}\" not in commit_message:\n            print(\n                \"Warning: Merge commit message may not follow squash merge pattern\",\n                file=sys.stderr,\n            )\n    else:\n        print(\"Warning: Could not retrieve merge commit details\", file=sys.stderr)\n\n    merged_at = docs_pr.get(\"merged_at\")\n    if not merged_at:\n        print(\"Error: Documentation PR merge timestamp not found\", file=sys.stderr)\n        return False\n\n    print(\"✓ Documentation PR merged successfully\")\n\n    print(\"\\n✅ All verification checks passed!\")\n    print(\"Changelog and migration documentation completed successfully:\")\n    print(f\"  - Documentation PR #{pr_number} (merged)\")\n    print(f\"  - Branch: {DOCS_BRANCH_NAME}\")\n    print(\"  - Files created: 4 documentation files\")\n    print(f\"  - Bug issues referenced: {bug_refs}/{len(EXPECTED_BUG_ISSUES)}\")\n    print(f\"  - Open PRs analyzed: {pr_refs}/{len(EXPECTED_OPEN_PRS)}\")\n\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/standard/claude-code/claude_collaboration_analysis/description.md",
    "content": "I need you to analyze the collaboration patterns between human developers and Claude (the AI assistant) in the repository by examining all available commit history, then create a comprehensive analysis report and submit it as a new file to the repository.\n\n**Step 1: Commit History Analysis**\nAnalyze ALL commits in the repository to identify:\n\n1. **Claude Co-Authored Commits**: Find all commits that were co-authored by Claude (look for \"Co-Authored-By: Claude <noreply@anthropic.com>\" in commit messages)\n2. **Top Claude Collaborators**: Identify the top 3 human developers who most frequently collaborated with Claude\n\n**Step 2: Create Collaboration Analysis Report**\nCreate a file called `CLAUDE_COLLABORATION_ANALYSIS.md` in the repository root with:\n\n- A \"# Claude AI Collaboration Analysis\" title\n- A \"## Summary Statistics\" section with these exact format requirements:\n  - \"Total commits analyzed: [NUMBER]\"\n  - \"Number of Claude co-authored commits found: [NUMBER]\"\n  - \"Percentage of commits with Claude collaboration: [NUMBER]%\"\n  - \"Number of unique human collaborators who worked with Claude: [NUMBER]\"\n\n- A \"## Top Claude Collaborators\" section with this exact table format:\n```markdown\n| Developer | GitHub Username | Claude Collaborations |\n|-----------|----------------|----------------------|\n```\nInclude the top 3 developers by number of Claude collaborations.\n\n**Step 3: Commit Analysis to Repository**\nCommit the `CLAUDE_COLLABORATION_ANALYSIS.md` file to the main branch with:\n- Commit message: \"Add Claude AI collaboration analysis report\"\n- Ensure all statistics are accurate based on actual commit data"
  },
  {
    "path": "tasks/github/standard/claude-code/claude_collaboration_analysis/meta.json",
    "content": "{\n  \"task_id\": \"claude_collaboration_analysis\",\n  \"task_name\": \"Claude Collaboration Analysis\",\n  \"category_id\": \"claude-code\",\n  \"category_name\": \"Claude Code\",\n  \"description\": \"Analyze Claude AI collaboration patterns in commit history and create a comprehensive report of co-authored commits and top collaborators.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"repository analysis\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/claude-code\",\n    \"stateOriginalUrl\": \"https://github.com/anthropics/claude-code\"\n  }\n}"
  },
  {
    "path": "tasks/github/standard/claude-code/claude_collaboration_analysis/verify.py",
    "content": "import sys\nimport os\nimport requests\nfrom typing import Dict, List, Optional, Tuple\nimport base64\nimport re\nfrom dotenv import load_dotenv\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str], org: str, repo: str = \"claude-code\"\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    url = f\"https://api.github.com/repos/{org}/{repo}/{endpoint}\"\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _get_file_content(\n    file_path: str,\n    headers: Dict[str, str],\n    org: str,\n    repo: str = \"claude-code\",\n    ref: str = \"main\",\n) -> Optional[str]:\n    \"\"\"Get the content of a file from the repository.\"\"\"\n    success, result = _get_github_api(\n        f\"contents/{file_path}?ref={ref}\", headers, org, repo\n    )\n    if not success or not result:\n        return None\n\n    try:\n        content = base64.b64decode(result.get(\"content\", \"\")).decode(\"utf-8\")\n        return content\n    except Exception as e:\n        print(f\"Content decode error for {file_path}: {e}\", file=sys.stderr)\n        return None\n\n\ndef _parse_summary_statistics(content: str) -> Dict:\n    \"\"\"Parse the summary statistics section from the report.\"\"\"\n    stats = {}\n\n    lines = content.split(\"\\n\")\n    in_summary = False\n\n    for line in lines:\n        if \"## Summary Statistics\" in line:\n            in_summary = True\n            continue\n\n        if in_summary:\n            if \"##\" in line and \"Summary Statistics\" not in line:\n                break\n\n            # Parse statistics lines\n            if \"Total commits analyzed\" in line:\n                match = re.search(r\"(\\d+)\", line)\n                if match:\n                    stats[\"total_analyzed\"] = int(match.group(1))\n            elif \"Number of Claude co-authored commits\" in line:\n                match = re.search(r\"(\\d+)\", line)\n                if match:\n                    stats[\"claude_commits\"] = int(match.group(1))\n            elif \"Percentage of commits with Claude collaboration\" in line:\n                match = re.search(r\"([\\d.]+)%\", line)\n                if match:\n                    stats[\"percentage\"] = float(match.group(1))\n            elif \"Number of unique human collaborators\" in line:\n                match = re.search(r\"(\\d+)\", line)\n                if match:\n                    stats[\"unique_collaborators\"] = int(match.group(1))\n\n    return stats\n\n\ndef _parse_collaborators_table(content: str) -> List[Dict]:\n    \"\"\"Parse the top collaborators table from the report.\"\"\"\n    collaborators = []\n\n    lines = content.split(\"\\n\")\n    in_table = False\n\n    for line in lines:\n        if \"| Developer | GitHub Username | Claude Collaborations |\" in line:\n            in_table = True\n            continue\n        if in_table and line.startswith(\"|---\"):\n            continue\n\n        if in_table and line.startswith(\"|\"):\n            parts = [p.strip() for p in line.split(\"|\")]\n            if len(parts) >= 4:  # Should have 3 columns plus empty parts\n                developer = parts[1].strip()\n                username = parts[2].strip()\n                collaborations = parts[3].strip()\n\n                if developer and username and collaborations:\n                    try:\n                        collaborators.append(\n                            {\n                                \"developer\": developer,\n                                \"username\": username,\n                                \"collaborations\": int(collaborations),\n                            }\n                        )\n                    except ValueError:\n                        pass\n\n        if in_table and line and not line.startswith(\"|\") and \"##\" in line:\n            break\n\n    return collaborators\n\n\ndef verify_task() -> bool:\n    \"\"\"Verify the Claude collaboration analysis task.\"\"\"\n    # Load environment variables from .mcp_env\n    load_dotenv(\".mcp_env\")\n\n    # Get GitHub token and org\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    if not github_org:\n        print(\"Error: GITHUB_EVAL_ORG environment variable not set\", file=sys.stderr)\n        return False\n\n    headers = {\n        \"Authorization\": f\"Bearer {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    # Pre-computed expected values based on repository analysis\n    # These are the correct answers the agent should find\n    EXPECTED_TOP_COLLABORATORS = [\n        {\n            \"username\": \"bcherny\",\n            \"min_collaborations\": 14,\n        },  # Boris Cherny has many Claude collaborations\n        {\"username\": \"ashwin-ant\", \"min_collaborations\": 5},  # Ashwin Bhat has some\n        {\"username\": \"ant-kurt\", \"min_collaborations\": 3},  # Kurt Carpenter has several\n    ]\n\n    # Expected exact values for summary statistics\n    EXPECTED_STATS = {\n        \"total_analyzed\": 158,\n        \"claude_commits\": 25,\n        \"percentage\": 15.82,\n        \"unique_collaborators\": 6,\n    }\n\n    print(\"Verifying Claude collaboration analysis task...\")\n\n    # 1. Check if CLAUDE_COLLABORATION_ANALYSIS.md exists in main branch\n    print(\"1. Checking if CLAUDE_COLLABORATION_ANALYSIS.md exists...\")\n    content = _get_file_content(\"CLAUDE_COLLABORATION_ANALYSIS.md\", headers, github_org)\n    if not content:\n        print(\n            \"Error: CLAUDE_COLLABORATION_ANALYSIS.md not found in main branch\",\n            file=sys.stderr,\n        )\n        return False\n    print(\"✓ CLAUDE_COLLABORATION_ANALYSIS.md found\")\n\n    # 2. Check required sections exist\n    print(\"2. Checking required sections...\")\n    required_sections = [\n        \"# Claude AI Collaboration Analysis\",\n        \"## Summary Statistics\",\n        \"## Top Claude Collaborators\",\n    ]\n\n    for section in required_sections:\n        if section not in content:\n            print(f\"Error: Missing required section '{section}'\", file=sys.stderr)\n            return False\n    print(\"✓ All required sections present\")\n\n    # 3. Parse and validate summary statistics\n    print(\"3. Validating summary statistics...\")\n    stats = _parse_summary_statistics(content)\n\n    if \"total_analyzed\" not in stats:\n        print(\"Error: Total commits analyzed not found\", file=sys.stderr)\n        return False\n\n    # Check exact values against expected statistics\n    if stats.get(\"total_analyzed\") != EXPECTED_STATS[\"total_analyzed\"]:\n        print(\n            f\"Error: Total analyzed should be {EXPECTED_STATS['total_analyzed']}, found {stats.get('total_analyzed')}\",\n            file=sys.stderr,\n        )\n        return False\n\n    if stats.get(\"claude_commits\") != EXPECTED_STATS[\"claude_commits\"]:\n        print(\n            f\"Error: Claude commits should be {EXPECTED_STATS['claude_commits']}, found {stats.get('claude_commits')}\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Allow 0.1% tolerance for percentage\n    expected_percentage = EXPECTED_STATS[\"percentage\"]\n    actual_percentage = stats.get(\"percentage\", 0)\n    if abs(actual_percentage - expected_percentage) > 0.1:\n        print(\n            f\"Error: Percentage should be around {expected_percentage}% (±0.1%), found {actual_percentage}%\",\n            file=sys.stderr,\n        )\n        return False\n\n    if stats.get(\"unique_collaborators\") != EXPECTED_STATS[\"unique_collaborators\"]:\n        print(\n            f\"Error: Unique collaborators should be {EXPECTED_STATS['unique_collaborators']}, found {stats.get('unique_collaborators')}\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\"✓ Summary statistics validated\")\n\n    # 4. Validate top collaborators table\n    print(\"4. Validating top collaborators...\")\n    collaborators = _parse_collaborators_table(content)\n\n    if len(collaborators) < 3:\n        print(\n            f\"Error: Expected 3 top collaborators, found {len(collaborators)}\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Check that expected top collaborators are present\n    found_usernames = [c[\"username\"] for c in collaborators]\n\n    # The top 3 should include at least 2 of our expected collaborators\n    expected_found = 0\n    for expected in EXPECTED_TOP_COLLABORATORS:\n        if expected[\"username\"] in found_usernames[:3]:\n            expected_found += 1\n            # Also check they have reasonable collaboration counts\n            for collab in collaborators:\n                if collab[\"username\"] == expected[\"username\"]:\n                    if collab[\"collaborations\"] < expected[\"min_collaborations\"]:\n                        print(\n                            f\"Error: {expected['username']} should have at least {expected['min_collaborations']} collaborations, found {collab['collaborations']}\",\n                            file=sys.stderr,\n                        )\n                        return False\n\n    if expected_found < 2:\n        print(\n            f\"Error: Expected to find at least 2 of the known top collaborators in top 3, found {expected_found}\",\n            file=sys.stderr,\n        )\n        print(\n            f\"Expected to see at least 2 of: {[e['username'] for e in EXPECTED_TOP_COLLABORATORS]}\",\n            file=sys.stderr,\n        )\n        print(f\"Found: {found_usernames[:3]}\", file=sys.stderr)\n        return False\n\n    print(\"✓ Top collaborators validated\")\n\n    # 5. Check commit message verification\n    print(\"5. Verifying commit message...\")\n    success, latest_commits = _get_github_api(\n        \"commits?per_page=10\", headers, github_org\n    )\n    if not success:\n        print(\"Error: Failed to fetch recent commits\", file=sys.stderr)\n        return False\n\n    # Look for commit with expected message\n    expected_commit_message = \"Add Claude AI collaboration analysis report\"\n    commit_found = False\n    for commit in latest_commits:\n        if commit[\"commit\"][\"message\"].startswith(expected_commit_message):\n            commit_found = True\n            break\n\n    if not commit_found:\n        print(\n            f\"Error: Expected commit message '{expected_commit_message}' not found in recent commits\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\"✓ Commit message verified\")\n\n    # 6. Additional validation: Check unique collaborators count\n    print(\"6. Final validation complete...\")\n    print(\"✓ All statistics match expected values\")\n\n    print(\"\\n✅ All verification checks passed!\")\n    print(\"Claude collaboration analysis completed successfully:\")\n    print(\"  - File: CLAUDE_COLLABORATION_ANALYSIS.md created in main branch\")\n    print(f\"  - Commits analyzed: {stats.get('total_analyzed', 'N/A')}\")\n    print(f\"  - Claude collaborations found: {stats.get('claude_commits', 'N/A')}\")\n    print(f\"  - Top collaborators identified: {len(collaborators)}\")\n    print(\"  - All statistics verified\")\n    print(\"  - Commit message verified\")\n\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify_task()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/standard/claude-code/critical_issue_hotfix_workflow/description.md",
    "content": "I need you to implement a comprehensive critical issue hotfix workflow for the repository that demonstrates advanced PR management, selective merging, and issue resolution tracking.\n\n**Step 1: Create Critical Bug Tracking Issue**\nCreate a new issue with:\n- Title: \"CRITICAL: Memory and Context Management Issues - Hotfix Tracking\"\n- Body must include:\n  - A \"## Critical Issues\" heading listing issues #49 and #46\n  - A \"## Impact Assessment\" heading describing user impact\n  - A \"## Resolution Strategy\" heading with planned approach\n  - References to existing issues #49, #46, and #47 using \"#\" notation\n  - Keywords: \"memory exhaustion\", \"context auto-compact\", \"JavaScript heap\", \"hotfix priority\"\n\n**Step 2: Create Memory Optimization Hotfix Branch**\nCreate a new branch called 'hotfix/memory-optimization-v1.0.72' from the main branch.\n\n**Step 3: Implement Memory Management Documentation**\nOn the hotfix branch, create the file `docs/MEMORY_OPTIMIZATION.md` with this exact content:\n```markdown\n# Memory Optimization Guide for Claude Code v1.0.72\n\n## Overview\nThis document addresses critical memory issues identified in issues #49 and #46.\n\n## Memory Management Issues\n\n### Context Auto-Compact Problem (Issue #49)\n- **Root Cause**: Context management stuck at 0% completion\n- **Impact**: Tool becomes unusable on macOS platforms\n- **Solution**: Implement progressive context cleanup with configurable thresholds\n\n### JavaScript Heap Exhaustion (Issue #46)\n- **Root Cause**: Memory allocation failure during large MCP operations\n- **Impact**: Complete Claude Code crash requiring restart\n- **Solution**: Add streaming data processing and garbage collection optimization\n\n## Optimization Strategies\n\n### Immediate Fixes\n1. **Context Buffer Management**\n   - Implement 10MB default context buffer limit\n   - Add automatic context pruning at 80% threshold\n   - Enable manual context reset via `/memory-reset` command\n\n2. **MCP Operation Streaming**\n   - Process large datasets in 1MB chunks\n   - Implement backpressure for MongoDB operations\n   - Add memory usage monitoring and alerts\n\n### Configuration Options\n```json\n{\n  \"memory\": {\n    \"contextBufferLimit\": \"10MB\",\n    \"autoCompactThreshold\": 0.8,\n    \"streamingChunkSize\": \"1MB\",\n    \"gcOptimization\": true\n  }\n}\n```\n\n## Related Issues\n- Fixes issue #49: Context auto-compact functionality\n- Addresses issue #46: JavaScript heap out of memory crashes\n- Related to issue #47: Cross-project hook execution problems\n```\n```\n\n**Step 4: Create Pull Request with Issue Cross-References**\nCreate a pull request from 'hotfix/memory-optimization-v1.0.72' to 'main' with:\n- Title: \"HOTFIX: Critical memory optimization for issues #49 and #46\"\n- Body must include:\n  - A \"## Summary\" heading describing the memory fixes\n  - A \"## Critical Issues Addressed\" heading listing specific problems\n  - A \"## Documentation Changes\" heading describing the new guide\n  - \"Addresses #49\" and \"Addresses #46\" pattern linking to existing issues\n  - Reference to your tracking issue using \"Tracked in #[ISSUE_NUMBER]\"\n  - Keywords: \"memory optimization\", \"context management\", \"heap exhaustion\", \"v1.0.72 hotfix\"\n\n**Step 5: Update and Merge PR #51 (Statsig Logging)**\nFor the existing PR #51:\n- Update the PR description to include technical implementation details\n- Add a \"## Technical Implementation\" section mentioning \"event logging integration\"\n- Add keywords: \"workflow enhancement\", \"issue management automation\", \"logging consistency\"\n- Merge the PR using the squash merge method\n\n**Step 6: Add Implementation Comment to Tracking Issue**\nAdd a comment to your original tracking issue with:\n- Reference to your hotfix PR using \"PR #[NUMBER]\" pattern\n- Reference to actions taken on PR #51\n- Technical details about the memory optimization approach\n- Keywords: \"context buffer management\", \"streaming optimization\", \"progressive cleanup\"\n- Mention of configuration options and thresholds\n\n**Step 7: Close Tracking Issue with Resolution Summary**\nClose your tracking issue by updating its state to 'closed' with:\n- A final comment summarizing completed actions\n- Reference to merged PR #51 and pending hotfix PR\n- Keywords: \"hotfix deployment\", \"memory issues resolved\", \"documentation updated\""
  },
  {
    "path": "tasks/github/standard/claude-code/critical_issue_hotfix_workflow/meta.json",
    "content": "{\n  \"task_id\": \"critical_issue_hotfix_workflow\",\n  \"task_name\": \"Critical Issue Hotfix Workflow\",\n  \"category_id\": \"claude-code\",\n  \"category_name\": \"Claude Code\",\n  \"description\": \"Implement a critical issue hotfix workflow for memory and context management issues with proper PR management and issue tracking.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"issue management\",\n    \"pr workflows\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/claude-code\",\n    \"stateOriginalUrl\": \"https://github.com/anthropics/claude-code\"\n  }\n}"
  },
  {
    "path": "tasks/github/standard/claude-code/critical_issue_hotfix_workflow/verify.py",
    "content": "import sys\nimport os\nimport requests\nfrom typing import Dict, List, Optional, Tuple\nimport base64\nfrom dotenv import load_dotenv\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str], org: str, repo: str = \"claude-code\"\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    url = f\"https://api.github.com/repos/{org}/{repo}/{endpoint}\"\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _check_branch_exists(\n    branch_name: str, headers: Dict[str, str], org: str, repo: str = \"claude-code\"\n) -> bool:\n    \"\"\"Verify that a branch exists in the repository.\"\"\"\n    success, _ = _get_github_api(f\"branches/{branch_name}\", headers, org, repo)\n    return success\n\n\ndef _get_file_content(\n    file_path: str,\n    headers: Dict[str, str],\n    org: str,\n    repo: str = \"claude-code\",\n    ref: str = \"main\",\n) -> Optional[str]:\n    \"\"\"Get the content of a file from the repository.\"\"\"\n    success, result = _get_github_api(\n        f\"contents/{file_path}?ref={ref}\", headers, org, repo\n    )\n    if not success or not result:\n        return None\n\n    try:\n        content = base64.b64decode(result.get(\"content\", \"\")).decode(\"utf-8\")\n        return content\n    except Exception as e:\n        print(f\"Content decode error for {file_path}: {e}\", file=sys.stderr)\n        return None\n\n\ndef _find_issue_by_title_keyword(\n    keyword: str, headers: Dict[str, str], org: str, repo: str = \"claude-code\"\n) -> Optional[Dict]:\n    \"\"\"Find an issue by title keyword and return the issue data.\"\"\"\n    # Check both open and closed issues\n    for state in [\"open\", \"closed\"]:\n        success, issues = _get_github_api(\n            f\"issues?state={state}&per_page=100\", headers, org, repo\n        )\n        if success and issues:\n            for issue in issues:\n                if keyword.lower() in issue.get(\"title\", \"\").lower():\n                    return issue\n    return None\n\n\ndef _find_pr_by_title_keyword(\n    keyword: str, headers: Dict[str, str], org: str, repo: str = \"claude-code\"\n) -> Optional[Dict]:\n    \"\"\"Find a PR by title keyword and return the PR data.\"\"\"\n    # Check both open and closed PRs\n    for state in [\"open\", \"closed\"]:\n        success, prs = _get_github_api(\n            f\"pulls?state={state}&per_page=100\", headers, org, repo\n        )\n        if success and prs:\n            for pr in prs:\n                if keyword.lower() in pr.get(\"title\", \"\").lower():\n                    return pr\n    return None\n\n\ndef _get_pr_by_number(\n    pr_number: int, headers: Dict[str, str], org: str, repo: str = \"claude-code\"\n) -> Optional[Dict]:\n    \"\"\"Get a specific PR by number.\"\"\"\n    success, pr = _get_github_api(f\"pulls/{pr_number}\", headers, org, repo)\n    if success:\n        return pr\n    return None\n\n\ndef _check_issue_references(text: str, reference_numbers: List[str]) -> bool:\n    \"\"\"Check if text contains references to specified issue numbers.\"\"\"\n    if not text:\n        return False\n\n    return all(f\"#{ref}\" in text for ref in reference_numbers)\n\n\ndef _check_addresses_pattern(pr_body: str, issue_numbers: List[str]) -> bool:\n    \"\"\"Check if PR body contains 'Addresses #X' pattern for specified issues.\"\"\"\n    if not pr_body:\n        return False\n\n    return all(\n        f\"Addresses #{num}\" in pr_body or f\"addresses #{num}\" in pr_body\n        for num in issue_numbers\n    )\n\n\ndef _get_issue_comments(\n    issue_number: int, headers: Dict[str, str], org: str, repo: str = \"claude-code\"\n) -> List[Dict]:\n    \"\"\"Get all comments for an issue.\"\"\"\n    success, comments = _get_github_api(\n        f\"issues/{issue_number}/comments\", headers, org, repo\n    )\n    if success and comments:\n        return comments\n    return []\n\n\ndef _get_pr_reviews(\n    pr_number: int, headers: Dict[str, str], org: str, repo: str = \"claude-code\"\n) -> List[Dict]:\n    \"\"\"Get all reviews for a PR.\"\"\"\n    success, reviews = _get_github_api(f\"pulls/{pr_number}/reviews\", headers, org, repo)\n    if success and reviews:\n        return reviews\n    return []\n\n\ndef _check_title_keywords(title: str, required_keywords: List[str]) -> bool:\n    \"\"\"Check if title contains all required keywords.\"\"\"\n    return all(keyword.lower() in title.lower() for keyword in required_keywords)\n\n\ndef _check_headings_and_keywords(\n    body: str, headings: List[str], keywords: List[str]\n) -> bool:\n    \"\"\"Check if body contains required headings and keywords.\"\"\"\n    has_headings = all(heading in body for heading in headings)\n    has_keywords = all(keyword.lower() in body.lower() for keyword in keywords)\n    return has_headings and has_keywords\n\n\ndef _check_exact_file_content(content: str, expected_sections: List[str]) -> bool:\n    \"\"\"Check if file content contains expected sections.\"\"\"\n    return all(section in content for section in expected_sections)\n\n\ndef verify() -> bool:\n    \"\"\"\n    Programmatically verify that the critical issue hotfix workflow meets the\n    requirements described in description.md.\n    \"\"\"\n    # Configuration constants\n    HOTFIX_BRANCH_NAME = \"hotfix/memory-optimization-v1.0.72\"\n    TRACKING_ISSUE_KEYWORD = \"Memory and Context Management Issues\"\n    HOTFIX_PR_KEYWORD = \"HOTFIX: Critical memory optimization\"\n\n    # Expected file content sections\n    MEMORY_DOC_SECTIONS = [\n        \"# Memory Optimization Guide for Claude Code v1.0.72\",\n        \"## Overview\",\n        \"### Context Auto-Compact Problem (Issue #49)\",\n        \"### JavaScript Heap Exhaustion (Issue #46)\",\n        \"## Optimization Strategies\",\n        \"### Immediate Fixes\",\n        \"### Configuration Options\",\n        \"## Related Issues\",\n    ]\n\n    # Issue content requirements\n    TRACKING_ISSUE_TITLE_KEYWORDS = [\n        \"CRITICAL\",\n        \"Memory\",\n        \"Context Management\",\n        \"Hotfix Tracking\",\n    ]\n    TRACKING_ISSUE_REFERENCE_NUMBERS = [\"49\", \"46\", \"47\"]\n    TRACKING_ISSUE_HEADINGS = [\n        \"## Critical Issues\",\n        \"## Impact Assessment\",\n        \"## Resolution Strategy\",\n    ]\n    TRACKING_ISSUE_KEYWORDS = [\n        \"memory exhaustion\",\n        \"context auto-compact\",\n        \"JavaScript heap\",\n        \"hotfix priority\",\n    ]\n\n    # PR content requirements\n    HOTFIX_PR_TITLE_KEYWORDS = [\n        \"HOTFIX\",\n        \"Critical memory optimization\",\n        \"issues #49\",\n        \"#46\",\n    ]\n    HOTFIX_PR_ADDRESSES_NUMBERS = [\"49\", \"46\"]\n    HOTFIX_PR_HEADINGS = [\n        \"## Summary\",\n        \"## Critical Issues Addressed\",\n        \"## Documentation Changes\",\n    ]\n    HOTFIX_PR_KEYWORDS = [\n        \"memory optimization\",\n        \"context management\",\n        \"heap exhaustion\",\n        \"v1.0.72 hotfix\",\n    ]\n\n    # PR #51 update requirements\n    PR51_UPDATE_KEYWORDS = [\n        \"Technical Implementation\",\n        \"event logging integration\",\n        \"workflow enhancement\",\n    ]\n\n    # Issue comment requirements\n    ISSUE_COMMENT_KEYWORDS = [\n        \"context buffer management\",\n        \"streaming optimization\",\n        \"progressive cleanup\",\n    ]\n\n    # Load environment variables from .mcp_env\n    load_dotenv(\".mcp_env\")\n\n    # Get GitHub token and org\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    if not github_org:\n        print(\"Error: GITHUB_EVAL_ORG environment variable not set\", file=sys.stderr)\n        return False\n\n    headers = {\n        \"Authorization\": f\"Bearer {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    # Run verification checks\n    print(\"Verifying critical issue hotfix workflow completion...\")\n\n    # 1. Check that hotfix branch exists\n    print(\"1. Verifying hotfix branch exists...\")\n    if not _check_branch_exists(HOTFIX_BRANCH_NAME, headers, github_org):\n        print(f\"Error: Branch '{HOTFIX_BRANCH_NAME}' not found\", file=sys.stderr)\n        return False\n    print(\"✓ Hotfix branch created\")\n\n    # 2. Check that the memory optimization documentation exists with exact content\n    print(\"2. Verifying MEMORY_OPTIMIZATION.md documentation...\")\n    memory_doc_content = _get_file_content(\n        \"docs/MEMORY_OPTIMIZATION.md\",\n        headers,\n        github_org,\n        \"claude-code\",\n        HOTFIX_BRANCH_NAME,\n    )\n    if not memory_doc_content:\n        print(\n            \"Error: docs/MEMORY_OPTIMIZATION.md not found in hotfix branch\",\n            file=sys.stderr,\n        )\n        return False\n\n    if not _check_exact_file_content(memory_doc_content, MEMORY_DOC_SECTIONS):\n        print(\n            \"Error: MEMORY_OPTIMIZATION.md missing required sections or content\",\n            file=sys.stderr,\n        )\n        return False\n    print(\"✓ Memory optimization documentation created with correct content\")\n\n    # 3. Find and verify the tracking issue\n    print(\"3. Verifying tracking issue creation and content...\")\n    tracking_issue = _find_issue_by_title_keyword(\n        TRACKING_ISSUE_KEYWORD, headers, github_org\n    )\n    if not tracking_issue:\n        print(\n            f\"Error: Tracking issue with keyword '{TRACKING_ISSUE_KEYWORD}' not found\",\n            file=sys.stderr,\n        )\n        return False\n\n    tracking_issue_number = tracking_issue.get(\"number\")\n    tracking_issue_title = tracking_issue.get(\"title\", \"\")\n    tracking_issue_body = tracking_issue.get(\"body\", \"\")\n\n    # Check tracking issue title keywords\n    if not _check_title_keywords(tracking_issue_title, TRACKING_ISSUE_TITLE_KEYWORDS):\n        print(\"Error: Tracking issue title missing required keywords\", file=sys.stderr)\n        return False\n\n    # Check tracking issue headings, content and references\n    if not _check_headings_and_keywords(\n        tracking_issue_body, TRACKING_ISSUE_HEADINGS, TRACKING_ISSUE_KEYWORDS\n    ):\n        print(\n            \"Error: Tracking issue missing required headings or keywords\",\n            file=sys.stderr,\n        )\n        return False\n\n    if not _check_issue_references(\n        tracking_issue_body, TRACKING_ISSUE_REFERENCE_NUMBERS\n    ):\n        print(\n            \"Error: Tracking issue does not reference required issues #49, #46, #47\",\n            file=sys.stderr,\n        )\n        return False\n    print(\"✓ Tracking issue created with correct content and references\")\n\n    # 4. Find and verify the hotfix PR\n    print(\"4. Verifying hotfix pull request creation and content...\")\n    hotfix_pr = _find_pr_by_title_keyword(HOTFIX_PR_KEYWORD, headers, github_org)\n    if not hotfix_pr:\n        print(\n            f\"Error: Hotfix PR with keyword '{HOTFIX_PR_KEYWORD}' not found\",\n            file=sys.stderr,\n        )\n        return False\n\n    hotfix_pr_number = hotfix_pr.get(\"number\")\n    hotfix_pr_title = hotfix_pr.get(\"title\", \"\")\n    hotfix_pr_body = hotfix_pr.get(\"body\", \"\")\n\n    # Check hotfix PR title keywords\n    if not _check_title_keywords(hotfix_pr_title, HOTFIX_PR_TITLE_KEYWORDS):\n        print(\"Error: Hotfix PR title missing required keywords\", file=sys.stderr)\n        return False\n\n    # Check hotfix PR headings and content\n    if not _check_headings_and_keywords(\n        hotfix_pr_body, HOTFIX_PR_HEADINGS, HOTFIX_PR_KEYWORDS\n    ):\n        print(\"Error: Hotfix PR missing required headings or keywords\", file=sys.stderr)\n        return False\n\n    # Check hotfix PR addresses pattern\n    if not _check_addresses_pattern(hotfix_pr_body, HOTFIX_PR_ADDRESSES_NUMBERS):\n        print(\n            \"Error: Hotfix PR does not properly address issues #49 and #46\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Check reference to tracking issue\n    if f\"#{tracking_issue_number}\" not in hotfix_pr_body:\n        print(\n            f\"Error: Hotfix PR does not reference tracking issue #{tracking_issue_number}\",\n            file=sys.stderr,\n        )\n        return False\n    print(\"✓ Hotfix PR created with correct content and references\")\n\n    # 5. Check PR #51 has been updated and merged\n    print(\"5. Verifying PR #51 update and merge...\")\n    pr51 = _get_pr_by_number(51, headers, github_org)\n    if not pr51:\n        print(\"Error: PR #51 not found\", file=sys.stderr)\n        return False\n\n    pr51_body = pr51.get(\"body\", \"\")\n    pr51_state = pr51.get(\"state\", \"\")\n\n    # Check PR #51 has been updated with required content\n    if not _check_headings_and_keywords(\n        pr51_body, [\"## Technical Implementation\"], PR51_UPDATE_KEYWORDS\n    ):\n        print(\n            \"Error: PR #51 missing updated technical implementation section\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Check PR #51 has been merged\n    if pr51_state != \"closed\" or not pr51.get(\"merged_at\"):\n        print(\"Error: PR #51 has not been merged\", file=sys.stderr)\n        return False\n    print(\"✓ PR #51 updated and merged successfully\")\n\n    # 6. Check tracking issue has implementation comment\n    print(\"6. Verifying tracking issue implementation comment...\")\n    tracking_issue_comments = _get_issue_comments(\n        tracking_issue_number, headers, github_org\n    )\n\n    has_implementation_comment = False\n    for comment in tracking_issue_comments:\n        body = comment.get(\"body\", \"\")\n        has_pr_ref = f\"PR #{hotfix_pr_number}\" in body\n        has_pr51_ref = \"PR #51\" in body\n        has_keywords = all(\n            keyword.lower() in body.lower() for keyword in ISSUE_COMMENT_KEYWORDS\n        )\n        if has_pr_ref and has_pr51_ref and has_keywords:\n            has_implementation_comment = True\n            break\n\n    if not has_implementation_comment:\n        print(\n            f\"Error: Tracking issue #{tracking_issue_number} missing implementation comment with required references and keywords\",\n            file=sys.stderr,\n        )\n        return False\n    print(\"✓ Tracking issue has implementation comment with PR references\")\n\n    # 7. Check tracking issue is closed\n    print(\"7. Verifying tracking issue closure...\")\n    if tracking_issue.get(\"state\") != \"closed\":\n        print(\n            f\"Error: Tracking issue #{tracking_issue_number} is not closed\",\n            file=sys.stderr,\n        )\n        return False\n    print(\"✓ Tracking issue closed successfully\")\n\n    print(\"\\n✅ All verification checks passed!\")\n    print(\"Critical issue hotfix workflow completed successfully:\")\n    print(f\"  - Tracking Issue #{tracking_issue_number}: {tracking_issue.get('title')}\")\n    print(f\"  - Hotfix PR #{hotfix_pr_number}: {hotfix_pr.get('title')}\")\n    print(f\"  - Branch: {HOTFIX_BRANCH_NAME}\")\n    print(\"  - PR #51 merged: ✓\")\n    print(\"  - Memory optimization documentation: ✓\")\n\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/standard/claude-code/feature_commit_tracking/description.md",
    "content": "I need you to research the development history of the repository across multiple branches and commits, then create a comprehensive feature tracking document and submit it as a new file to the repository.\n\n**Step 1: Multi-Branch Feature Investigation**\nResearch and identify the exact commit SHAs where these specific features were introduced by analyzing commits across different branches:\n\n1. **Shell Completion Scripts**: Find when shell completion functionality was first added to the repository\n2. **CHANGELOG Version 1.0.65**: Find when the changelog was updated to include version 1.0.65 \n3. **Rust Extraction Improvements**: Find when workflow improvements for Rust code extraction were implemented\n\n**Step 2: Create Feature Tracking Documentation**\nCreate a file called `FEATURE_COMMITS.md` in the repository root with:\n\n- A \"# Feature Development Tracking\" title\n- A \"## Overview\" section explaining this tracks major feature additions across repository branches\n- A \"## Feature Commit History\" section with this exact table format:\n```markdown\n| Feature Name | Commit SHA | Author | Branch | Date | Files Changed | Commit Message |\n|-------------|------------|---------|---------|------|---------------|----------------|\n```\n\nFor each feature, populate the table with:\n- Exact commit SHA (full 40-character hash)\n- GitHub username of the commit author\n- Branch where the commit was made\n- Commit date in YYYY-MM-DD format\n- Number of files changed in that commit\n- First line of the commit message\n\n**Step 3: Commit Documentation to Repository**\nCommit the `FEATURE_COMMITS.md` file to the main branch with:\n- Commit message: \"Add feature development tracking documentation\"\n- Ensure the file is properly formatted markdown\n- Verify all commit SHAs in the table are accurate and verifiable\n\nThe verification process will check that your table contains the correct commit SHAs for each specific feature, along with accurate author, branch, and date information."
  },
  {
    "path": "tasks/github/standard/claude-code/feature_commit_tracking/meta.json",
    "content": "{\n  \"task_id\": \"feature_commit_tracking\",\n  \"task_name\": \"Feature Commit Tracking\",\n  \"category_id\": \"claude-code\",\n  \"category_name\": \"Claude Code\",\n  \"description\": \"Research development history across branches to track when specific features were introduced and create comprehensive documentation.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"repository analysis\",\n    \"release coordination\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/claude-code\",\n    \"stateOriginalUrl\": \"https://github.com/anthropics/claude-code\"\n  }\n}"
  },
  {
    "path": "tasks/github/standard/claude-code/feature_commit_tracking/verify.py",
    "content": "import sys\nimport os\nimport requests\nfrom typing import Dict, List, Optional, Tuple\nimport base64\nimport re\nfrom dotenv import load_dotenv\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str], org: str, repo: str = \"claude-code\"\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    url = f\"https://api.github.com/repos/{org}/{repo}/{endpoint}\"\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _get_file_content(\n    file_path: str,\n    headers: Dict[str, str],\n    org: str,\n    repo: str = \"claude-code\",\n    ref: str = \"main\",\n) -> Optional[str]:\n    \"\"\"Get the content of a file from the repository.\"\"\"\n    success, result = _get_github_api(\n        f\"contents/{file_path}?ref={ref}\", headers, org, repo\n    )\n    if not success or not result:\n        return None\n\n    try:\n        content = base64.b64decode(result.get(\"content\", \"\")).decode(\"utf-8\")\n        return content\n    except Exception as e:\n        print(f\"Content decode error for {file_path}: {e}\", file=sys.stderr)\n        return None\n\n\ndef _verify_commit_exists(\n    commit_sha: str, headers: Dict[str, str], org: str, repo: str = \"claude-code\"\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Verify that a commit exists and return its details.\"\"\"\n    success, commit_data = _get_github_api(f\"commits/{commit_sha}\", headers, org, repo)\n    return success, commit_data\n\n\ndef _parse_feature_table(content: str) -> List[Dict]:\n    \"\"\"Parse the feature commit table from markdown content.\"\"\"\n    features = []\n\n    lines = content.split(\"\\n\")\n    in_table = False\n\n    for line in lines:\n        # Look for table header\n        if (\n            \"| Feature Name | Commit SHA | Author | Branch | Date | Files Changed | Commit Message |\"\n            in line\n        ):\n            in_table = True\n            continue\n        if in_table and line.startswith(\"|---\"):\n            continue\n\n        # Parse table rows\n        if in_table and line.startswith(\"|\"):\n            parts = [p.strip() for p in line.split(\"|\")]\n            if len(parts) >= 8:  # Should have 7 columns plus empty parts at start/end\n                feature_name = parts[1].strip()\n                commit_sha = parts[2].strip()\n                author = parts[3].strip()\n                branch = parts[4].strip()\n                date = parts[5].strip()\n                files_changed = parts[6].strip()\n                commit_message = parts[7].strip()\n\n                if feature_name and commit_sha and author and branch and date:\n                    features.append(\n                        {\n                            \"name\": feature_name,\n                            \"sha\": commit_sha,\n                            \"author\": author,\n                            \"branch\": branch,\n                            \"date\": date,\n                            \"files_changed\": files_changed,\n                            \"commit_message\": commit_message,\n                        }\n                    )\n\n        # Stop at end of table section\n        if in_table and line and not line.startswith(\"|\") and \"##\" in line:\n            break\n\n    return features\n\n\ndef verify_task() -> bool:\n    \"\"\"Verify the feature commit tracking task.\"\"\"\n    # Load environment variables from .mcp_env\n    load_dotenv(\".mcp_env\")\n\n    # Get GitHub token and org\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    if not github_org:\n        print(\"Error: GITHUB_EVAL_ORG environment variable not set\", file=sys.stderr)\n        return False\n\n    headers = {\n        \"Authorization\": f\"Bearer {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    # Expected feature commits based on exploration\n    expected_features = {\n        \"Shell Completion Scripts\": \"8a0febdd09bda32f38c351c0881784460d69997d\",\n        \"CHANGELOG Version 1.0.65\": \"94dcaca5d71ad82644ae97f3a2b0c5eb8b63eae0\",\n        \"Rust Extraction Improvements\": \"50e58affdf1bfc7d875202bc040ebe0dcfb7d332\",\n    }\n\n    # Expected authors for each commit\n    expected_authors = {\n        \"8a0febdd09bda32f38c351c0881784460d69997d\": \"gitmpr\",\n        \"94dcaca5d71ad82644ae97f3a2b0c5eb8b63eae0\": \"QwertyJack\",\n        \"50e58affdf1bfc7d875202bc040ebe0dcfb7d332\": \"alokdangre\",\n    }\n\n    # Expected commit messages for each commit\n    expected_messages = {\n        \"8a0febdd09bda32f38c351c0881784460d69997d\": \"feat: add shell completions (bash, zsh, fish)\",\n        \"94dcaca5d71ad82644ae97f3a2b0c5eb8b63eae0\": \"Merge branch 'anthropics:main' into main\",\n        \"50e58affdf1bfc7d875202bc040ebe0dcfb7d332\": \"Enhance Rust extraction and output handling in workflows\",\n    }\n\n    # Expected dates for each commit (YYYY-MM-DD format)\n    expected_dates = {\n        \"8a0febdd09bda32f38c351c0881784460d69997d\": \"2025-08-01\",\n        \"94dcaca5d71ad82644ae97f3a2b0c5eb8b63eae0\": \"2025-08-02\",\n        \"50e58affdf1bfc7d875202bc040ebe0dcfb7d332\": \"2025-08-09\",\n    }\n\n    print(\"Verifying feature commit tracking task...\")\n\n    # 1. Check if FEATURE_COMMITS.md exists in main branch\n    print(\"1. Checking if FEATURE_COMMITS.md exists...\")\n    content = _get_file_content(\"FEATURE_COMMITS.md\", headers, github_org)\n    if not content:\n        print(\"Error: FEATURE_COMMITS.md not found in main branch\", file=sys.stderr)\n        return False\n    print(\"✓ FEATURE_COMMITS.md found\")\n\n    # 2. Check required sections exist\n    print(\"2. Checking required sections...\")\n    required_sections = [\n        \"# Feature Development Tracking\",\n        \"## Overview\",\n        \"## Feature Commit History\",\n    ]\n\n    for section in required_sections:\n        if section not in content:\n            print(f\"Error: Missing required section '{section}'\", file=sys.stderr)\n            return False\n    print(\"✓ All required sections present\")\n\n    # 3. Parse and validate feature table\n    print(\"3. Parsing and validating feature table...\")\n    features = _parse_feature_table(content)\n\n    if len(features) < 3:\n        print(\n            f\"Error: Expected at least 3 features, found {len(features)}\",\n            file=sys.stderr,\n        )\n        return False\n\n    # 4. Verify each expected feature is present with correct commit SHA\n    print(\"4. Verifying feature commit SHAs...\")\n    found_features = {}\n    for feature in features:\n        found_features[feature[\"name\"]] = feature[\"sha\"]\n\n    for feature_name, expected_sha in expected_features.items():\n        if feature_name not in found_features:\n            print(\n                f\"Error: Feature '{feature_name}' not found in table\", file=sys.stderr\n            )\n            return False\n\n        actual_sha = found_features[feature_name]\n        if actual_sha != expected_sha:\n            print(\n                f\"Error: Wrong SHA for '{feature_name}'. Expected: {expected_sha}, Got: {actual_sha}\",\n                file=sys.stderr,\n            )\n            return False\n\n    print(\"✓ All feature commit SHAs are correct\")\n\n    # 5. Verify each commit exists and has correct author\n    print(\"5. Verifying commit details...\")\n    for feature in features:\n        if feature[\"sha\"] in expected_features.values():\n            success, commit_data = _verify_commit_exists(\n                feature[\"sha\"], headers, github_org\n            )\n            if not success:\n                print(f\"Error: Commit {feature['sha']} not found\", file=sys.stderr)\n                return False\n\n            # Check author\n            expected_author = expected_authors.get(feature[\"sha\"])\n            if expected_author:\n                actual_author = commit_data.get(\"author\", {}).get(\"login\", \"\")\n                if actual_author != expected_author:\n                    print(\n                        f\"Error: Wrong author for {feature['sha']}. Expected: {expected_author}, Got: {actual_author}\",\n                        file=sys.stderr,\n                    )\n                    return False\n\n            # Check commit message (compare with table entry)\n            expected_message = expected_messages.get(feature[\"sha\"])\n            if expected_message and \"commit_message\" in feature:\n                if feature[\"commit_message\"] != expected_message:\n                    print(\n                        f\"Error: Wrong commit message in table for {feature['sha']}. Expected: '{expected_message}', Got: '{feature['commit_message']}'\",\n                        file=sys.stderr,\n                    )\n                    return False\n\n            # Also verify against actual commit data\n            if expected_message:\n                actual_message = (\n                    commit_data.get(\"commit\", {}).get(\"message\", \"\").split(\"\\n\")[0]\n                )  # First line only\n                if actual_message != expected_message:\n                    print(\n                        f\"Error: Wrong commit message for {feature['sha']}. Expected: '{expected_message}', Got: '{actual_message}'\",\n                        file=sys.stderr,\n                    )\n                    return False\n\n            # Check date format (YYYY-MM-DD)\n            if not re.match(r\"^\\d{4}-\\d{2}-\\d{2}$\", feature[\"date\"]):\n                print(\n                    f\"Error: Invalid date format for {feature['name']}: {feature['date']}\",\n                    file=sys.stderr,\n                )\n                return False\n\n            # Check actual date matches expected\n            expected_date = expected_dates.get(feature[\"sha\"])\n            if expected_date:\n                if feature[\"date\"] != expected_date:\n                    print(\n                        f\"Error: Wrong date for {feature['sha']}. Expected: {expected_date}, Got: {feature['date']}\",\n                        file=sys.stderr,\n                    )\n                    return False\n\n    print(\"✓ All commit details verified\")\n\n    # 6. Verify the table format is correct\n    print(\"6. Verifying table format...\")\n    table_header = \"| Feature Name | Commit SHA | Author | Branch | Date | Files Changed | Commit Message |\"\n    if table_header not in content:\n        print(\"Error: Table header format is incorrect\", file=sys.stderr)\n        return False\n\n    # Check that all features have complete information\n    for feature in features:\n        if not all(\n            [\n                feature[\"name\"],\n                feature[\"sha\"],\n                feature[\"author\"],\n                feature[\"branch\"],\n                feature[\"date\"],\n                feature.get(\"commit_message\", \"\"),\n            ]\n        ):\n            print(\n                f\"Error: Incomplete information for feature: {feature['name']}\",\n                file=sys.stderr,\n            )\n            return False\n\n    print(\"✓ Table format is correct and complete\")\n\n    print(\"\\n✅ All verification checks passed!\")\n    print(\"Feature commit tracking completed successfully:\")\n    print(\"  - File: FEATURE_COMMITS.md created in main branch\")\n    print(f\"  - Features tracked: {len(features)}\")\n    print(\"  - All expected commit SHAs verified\")\n    print(\"  - All commit authors verified\")\n    print(\"  - Analysis summary complete\")\n\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify_task()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/standard/claude-code/label_color_standardization/description.md",
    "content": "I need you to implement a comprehensive label documentation and organization workflow for the repository.\n\n**Step 1: Create Label Documentation Issue**\nCreate a new issue with:\n- Title containing: \"Document label organization for better visual organization\" and \"label guide\"\n- Body must include:\n  - A \"## Problem\" heading describing the need for better label documentation\n  - A \"## Proposed Solution\" heading about creating a comprehensive label guide for different label categories\n  - A \"## Benefits\" heading listing improved visual organization and easier issue triage\n  - Keywords: \"label documentation\", \"visual organization\", \"label guide\", \"organization\"\n- Labels: Initially add \"enhancement\" and \"documentation\" labels to the issue\n\n**Step 2: Create Feature Branch**\nCreate a new branch called 'feat/label-color-guide' from main.\n\n**Step 3: Create Label Documentation**\nOn the feature branch, create the file `docs/LABEL_COLORS.md` with:\n- A \"# Label Organization Guide\" title\n- A \"## Label Categories\" section with a table that MUST follow this exact format:\n```markdown\n| Label Name | Category | Description |\n|------------|----------|-------------|\n```\nThe table must include ALL existing labels in the repository. For each label:\n- Group labels by category (e.g., issue-type, platform, area, status, performance)\n- Include a description for each label\n\n- A \"## Usage Guidelines\" section explaining when to use each label category\n\n**Step 4: Apply ALL Labels to the Documentation Issue**\nUpdate the issue you created in Step 1 by adding ALL existing labels from the repository. This serves as a visual demonstration of the label organization. The issue should have every single label that exists in the repository applied to it.\n\n**Step 5: Create Pull Request**\nCreate a pull request from 'feat/label-color-guide' to 'main' with:\n- Title containing: \"Add label organization guide\" and \"visual organization\"  \n- Body must include:\n  - A \"## Summary\" heading explaining the label organization documentation\n  - A \"## Changes\" heading with a bullet list of what was added\n  - \"Fixes #[ISSUE_NUMBER]\" pattern linking to your created issue\n  - A \"## Verification\" section stating that all labels have been documented\n  - Keywords: \"label documentation\", \"organization guide\", \"visual improvement\", \"documentation\"\n- Labels: Add a reasonable subset of labels to the PR (at least 5-10 labels from different categories)\n\n**Step 6: Document Changes in Issue**\nAdd a comment to the original issue with:\n- Confirmation that the label documentation has been created\n- Total count of labels documented\n- Reference to the PR using \"PR #[NUMBER]\" pattern\n- Keywords: \"documentation created\", \"label guide complete\", \"organization complete\""
  },
  {
    "path": "tasks/github/standard/claude-code/label_color_standardization/meta.json",
    "content": "{\n  \"task_id\": \"label_color_standardization\",\n  \"task_name\": \"Label Color Standardization\",\n  \"category_id\": \"claude-code\",\n  \"category_name\": \"Claude Code\",\n  \"description\": \"Standardize label colors from default gray to a comprehensive color scheme for better visual organization and issue triage.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"issue management\",\n    \"workflow automation\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/claude-code\",\n    \"stateOriginalUrl\": \"https://github.com/anthropics/claude-code\"\n  }\n}"
  },
  {
    "path": "tasks/github/standard/claude-code/label_color_standardization/verify.py",
    "content": "import sys\nimport os\nimport requests\nfrom typing import Dict, List, Optional, Tuple\nfrom dotenv import load_dotenv\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str], org: str, repo: str = \"claude-code\"\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    url = f\"https://api.github.com/repos/{org}/{repo}/{endpoint}\"\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\n\ndef _check_branch_exists(\n    branch_name: str, headers: Dict[str, str], org: str, repo: str = \"claude-code\"\n) -> bool:\n    \"\"\"Verify that a branch exists in the repository.\"\"\"\n    success, _ = _get_github_api(f\"branches/{branch_name}\", headers, org, repo)\n    return success\n\n\ndef _check_file_content(\n    branch: str,\n    file_path: str,\n    headers: Dict[str, str],\n    org: str,\n    repo: str = \"claude-code\",\n) -> Optional[str]:\n    \"\"\"Get file content from a branch.\"\"\"\n    import base64\n\n    success, result = _get_github_api(\n        f\"contents/{file_path}?ref={branch}\", headers, org, repo\n    )\n    if not success or not result:\n        return None\n\n    if result.get(\"content\"):\n        try:\n            content = base64.b64decode(result.get(\"content\", \"\")).decode(\"utf-8\")\n            return content\n        except Exception as e:\n            print(f\"Content decode error for {file_path}: {e}\", file=sys.stderr)\n            return None\n\n    return None\n\n\ndef _parse_label_table(content: str) -> List[str]:\n    \"\"\"Parse the label table from markdown content and return label names.\"\"\"\n    documented_labels = []\n\n    # Find the table in the content\n    lines = content.split(\"\\n\")\n    in_table = False\n\n    for line in lines:\n        # Skip header and separator lines\n        if \"| Label Name | Category |\" in line:\n            in_table = True\n            continue\n        if in_table and line.startswith(\"|---\"):\n            continue\n\n        # Parse table rows\n        if in_table and line.startswith(\"|\"):\n            parts = [p.strip() for p in line.split(\"|\")]\n            if len(parts) >= 3:  # Should have at least label, category\n                label_name = parts[1].strip()\n                if label_name:\n                    documented_labels.append(label_name)\n\n        # Stop at end of table\n        if in_table and line and not line.startswith(\"|\"):\n            break\n\n    return documented_labels\n\n\ndef _find_issue_by_title_keywords(\n    title_keywords: List[str],\n    headers: Dict[str, str],\n    org: str,\n    repo: str = \"claude-code\",\n) -> Optional[Dict]:\n    \"\"\"Find an issue by title keywords and return the issue data.\"\"\"\n    for state in [\"open\", \"closed\"]:\n        success, issues = _get_github_api(\n            f\"issues?state={state}&per_page=100\", headers, org, repo\n        )\n        if success and issues:\n            for issue in issues:\n                # Skip pull requests\n                if \"pull_request\" in issue:\n                    continue\n                title = issue.get(\"title\", \"\").lower()\n                if all(keyword.lower() in title for keyword in title_keywords):\n                    return issue\n    return None\n\n\ndef _find_pr_by_title_keywords(\n    title_keywords: List[str],\n    headers: Dict[str, str],\n    org: str,\n    repo: str = \"claude-code\",\n) -> Optional[Dict]:\n    \"\"\"Find a PR by title keywords and return the PR data.\"\"\"\n    for state in [\"open\", \"closed\"]:\n        success, prs = _get_github_api(\n            f\"pulls?state={state}&per_page=100\", headers, org, repo\n        )\n        if success and prs:\n            for pr in prs:\n                title = pr.get(\"title\", \"\").lower()\n                if all(keyword.lower() in title for keyword in title_keywords):\n                    return pr\n    return None\n\n\ndef _get_issue_comments(\n    issue_number: int, headers: Dict[str, str], org: str, repo: str = \"claude-code\"\n) -> List[Dict]:\n    \"\"\"Get all comments for an issue.\"\"\"\n    success, comments = _get_github_api(\n        f\"issues/{issue_number}/comments\", headers, org, repo\n    )\n    if success and comments:\n        return comments\n    return []\n\n\n\n\ndef verify() -> bool:\n    \"\"\"\n    Programmatically verify that the label color standardization workflow meets the\n    requirements described in description.md.\n    \"\"\"\n    # Load environment variables from .mcp_env\n    load_dotenv(\".mcp_env\")\n\n    # Get GitHub token and org\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    if not github_org:\n        print(\"Error: GITHUB_EVAL_ORG environment variable not set\", file=sys.stderr)\n        return False\n\n    # Configuration constants\n    BRANCH_NAME = \"feat/label-color-guide\"\n\n    # Issue requirements\n    ISSUE_TITLE_KEYWORDS = [\"Document label organization\", \"label guide\"]\n    ISSUE_KEYWORDS = [\n        \"label documentation\",\n        \"visual organization\",\n        \"label guide\",\n        \"organization\",\n    ]\n\n    # PR requirements\n    PR_TITLE_KEYWORDS = [\"label organization guide\", \"visual organization\"]\n    PR_KEYWORDS = [\n        \"label documentation\",\n        \"organization guide\",\n        \"visual improvement\",\n        \"documentation\",\n    ]\n\n    # All expected labels in the repository that are actually used/discoverable via MCP tools\n    # Note: Excludes 'wontfix', 'invalid', 'good first issue', 'help wanted' as they exist\n    # in the repository but are not used by any issues (not discoverable via MCP search)\n    ALL_EXPECTED_LABELS = [\n        \"bug\",\n        \"enhancement\",\n        \"duplicate\",\n        \"question\",\n        \"documentation\",\n        \"platform:macos\",\n        \"platform:linux\",\n        \"platform:windows\",\n        \"area:core\",\n        \"area:tools\",\n        \"area:tui\",\n        \"area:ide\",\n        \"area:mcp\",\n        \"area:api\",\n        \"area:security\",\n        \"area:model\",\n        \"area:auth\",\n        \"area:packaging\",\n        \"has repro\",\n        \"memory\",\n        \"perf:memory\",\n        \"external\",\n    ]\n\n    headers = {\n        \"Authorization\": f\"token {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    # Run verification checks\n    print(\"Verifying label color standardization workflow completion...\")\n\n    # 1. Check that feature branch exists\n    print(\"1. Verifying feature branch exists...\")\n    if not _check_branch_exists(BRANCH_NAME, headers, github_org):\n        print(f\"Error: Branch '{BRANCH_NAME}' not found\", file=sys.stderr)\n        return False\n\n    # 2. Check documentation file exists and has correct format\n    print(\"2. Verifying label documentation file...\")\n    doc_content = _check_file_content(\n        BRANCH_NAME, \"docs/LABEL_COLORS.md\", headers, github_org\n    )\n    if not doc_content:\n        print(\"Error: docs/LABEL_COLORS.md not found\", file=sys.stderr)\n        return False\n\n    # Parse the label table from documentation\n    documented_labels = _parse_label_table(doc_content)\n    if len(documented_labels) < 20:\n        print(\n            f\"Error: Documentation table incomplete, found only {len(documented_labels)} labels\",\n            file=sys.stderr,\n        )\n        return False\n\n    # 3. Verify labels are documented\n    print(\"3. Verifying expected labels are documented...\")\n    print(f\"  ✓ {len(ALL_EXPECTED_LABELS)} expected labels defined for verification\")\n\n    # 4. Find the created issue\n    print(\"4. Verifying issue creation...\")\n    issue = _find_issue_by_title_keywords(ISSUE_TITLE_KEYWORDS, headers, github_org)\n    if not issue:\n        print(\n            \"Error: Issue with title containing required keywords not found\",\n            file=sys.stderr,\n        )\n        return False\n\n    issue_number = issue.get(\"number\")\n    issue_body = issue.get(\"body\", \"\")\n\n    # Check issue content has required sections and keywords\n    issue_required_sections = [\"## Problem\", \"## Proposed Solution\", \"## Benefits\"]\n    for section in issue_required_sections:\n        if section not in issue_body:\n            print(f\"Error: Issue body missing required section: {section}\", file=sys.stderr)\n            return False\n\n    # Check issue has required keywords\n    if not all(keyword.lower() in issue_body.lower() for keyword in ISSUE_KEYWORDS):\n        missing_keywords = [kw for kw in ISSUE_KEYWORDS if kw.lower() not in issue_body.lower()]\n        print(f\"Error: Issue body missing required keywords: {missing_keywords}\", file=sys.stderr)\n        return False\n\n    # Check issue has initial required labels (enhancement and documentation)\n    issue_label_names = [label[\"name\"] for label in issue.get(\"labels\", [])]\n    initial_required_labels = [\"enhancement\", \"documentation\"]\n    for required_label in initial_required_labels:\n        if required_label not in issue_label_names:\n            print(f\"Error: Issue missing initial required label: {required_label}\", file=sys.stderr)\n            return False\n\n    # 5. Find the created PR\n    print(\"5. Verifying pull request creation...\")\n    pr = _find_pr_by_title_keywords(PR_TITLE_KEYWORDS, headers, github_org)\n    if not pr:\n        print(\n            \"Error: PR with title containing required keywords not found\",\n            file=sys.stderr,\n        )\n        return False\n\n    pr_number = pr.get(\"number\")\n    pr_body = pr.get(\"body\", \"\")\n    pr_labels = pr.get(\"labels\", [])\n\n    # Check PR references issue with correct pattern\n    if f\"Fixes #{issue_number}\" not in pr_body and f\"fixes #{issue_number}\" not in pr_body:\n        print(f\"Error: PR does not contain 'Fixes #{issue_number}' pattern\", file=sys.stderr)\n        return False\n\n    # Check PR body has required sections and keywords\n    pr_required_sections = [\"## Summary\", \"## Changes\", \"## Verification\"]\n    for section in pr_required_sections:\n        if section not in pr_body:\n            print(f\"Error: PR body missing required section: {section}\", file=sys.stderr)\n            return False\n\n    # Check PR has required keywords\n    if not all(keyword.lower() in pr_body.lower() for keyword in PR_KEYWORDS):\n        missing_keywords = [kw for kw in PR_KEYWORDS if kw.lower() not in pr_body.lower()]\n        print(f\"Error: PR body missing required keywords: {missing_keywords}\", file=sys.stderr)\n        return False\n\n    # Check PR has sufficient labels (at least 5 from different categories)\n    if len(pr_labels) < 5:\n        print(f\"Error: PR has only {len(pr_labels)} labels, needs at least 5\", file=sys.stderr)\n        return False\n\n    # 6. Verify issue has ALL expected/usable labels applied (demonstrates organization)\n    print(\"6. Verifying issue has all expected labels applied...\")\n    issue_label_names = [label[\"name\"] for label in issue.get(\"labels\", [])]\n    # Use our expected labels list instead of all repo labels (excludes unused labels)\n    expected_labels_to_check = ALL_EXPECTED_LABELS\n    missing_labels = []\n\n    for expected_label in expected_labels_to_check:\n        if expected_label not in issue_label_names:\n            missing_labels.append(expected_label)\n\n    if missing_labels:\n        print(\n            f\"Error: Issue missing {len(missing_labels)} expected labels: {missing_labels[:5]}...\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(f\"  ✓ Issue has all {len(expected_labels_to_check)} expected labels applied\")\n\n    # 7. Verify issue has comment documenting changes\n    print(\"7. Verifying issue comment with documentation...\")\n    issue_comments = _get_issue_comments(issue_number, headers, github_org)\n\n    found_update_comment = False\n    comment_required_keywords = [\"documentation created\", \"label guide complete\", \"organization complete\"]\n    \n    for comment in issue_comments:\n        body = comment.get(\"body\", \"\")\n        # Check for PR reference and required keywords\n        if (f\"PR #{pr_number}\" in body and \n            any(keyword.lower() in body.lower() for keyword in comment_required_keywords) and\n            \"total\" in body.lower() and \"labels\" in body.lower()):\n            found_update_comment = True\n            break\n\n    if not found_update_comment:\n        print(\"Error: Issue missing comment documenting changes with required content\", file=sys.stderr)\n        print(\"  Comment should include: PR reference, label count, and completion keywords\", file=sys.stderr)\n        return False\n\n    # 8. Final verification of complete workflow\n    print(\"8. Final verification of workflow completion...\")\n    \n    # Skip repository label existence check - we trust that our expected labels \n    # are the ones actually discoverable/usable via MCP tools\n\n    # Ensure expected labels are documented (not all repo labels, since some are unused)\n    documented_label_count = len(documented_labels)\n    expected_label_count = len(ALL_EXPECTED_LABELS)\n\n    if documented_label_count < expected_label_count:\n        print(\n            f\"Error: Documentation incomplete - {documented_label_count} documented vs {expected_label_count} expected\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Check that all expected labels are documented\n    missing_documented_labels = []\n    for expected_label in ALL_EXPECTED_LABELS:\n        if expected_label not in documented_labels:\n            missing_documented_labels.append(expected_label)\n\n    if missing_documented_labels:\n        print(\n            f\"Error: Documentation missing expected labels: {missing_documented_labels}\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(f\"  ✓ All {expected_label_count} expected labels documented\")\n    print(f\"  ✓ All {len(ALL_EXPECTED_LABELS)} expected labels present and documented\")\n\n    print(\"\\n✓ All verification checks passed!\")\n    print(\"Label documentation workflow completed successfully:\")\n    print(\n        f\"  - Issue #{issue_number}: {issue.get('title')} (with all {len(issue_label_names)} labels)\"\n    )\n    print(f\"  - PR #{pr_number}: {pr.get('title')}\")\n    print(f\"  - Branch: {BRANCH_NAME}\")\n    print(\"  - Documentation: docs/LABEL_COLORS.md\")\n    print(f\"  - {expected_label_count} labels documented for better organization\")\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/standard/easyr1/advanced_branch_strategy/description.md",
    "content": "The EasyR1 repository has a critical production issue: all development happens directly on the `main` branch, which is extremely risky for a project with 25 active issues. A recent commit `098931530606d22f867fd121b1dcb3225a43661f` introduced protocol changes that need to be properly managed through a structured branching workflow. I need you to implement a complete GitFlow strategy by working through a realistic development scenario.\n\n**The Scenario:** You're preparing for the v1.0.0 release while simultaneously handling a critical protocol serialization bug that was introduced in the recent data proto changes.\n\n**Step 1: Initialize GitFlow Structure**\nCreate a `develop` branch from `main` as the new integration branch. Then create a `release/v1.0.0` branch from `develop` to prepare for the upcoming release.\n\n**Step 2: Address the Critical Bug**\nCreate a `feature/protocol-serialization-fix` branch from `develop`. In this branch, create a new file called `PROTOCOL_FIXES.md` with the exact content:\n```\n# Protocol Serialization Fixes\n\n## Critical Fix for Data Proto Issue\n- Enhanced serialization safety check implemented\n- Addresses issue from commit 098931530606d22f867fd121b1dcb3225a43661f\n- Status: Ready for integration testing\n```\n\n**Step 3: Integrate the Fix Through Proper Workflow**\nCreate a pull request from `feature/protocol-serialization-fix` to `develop` to integrate the fix documentation. This demonstrates the feature → develop integration pattern.\n\n**Step 4: Update Release Branch and CI/CD**\nMerge the develop branch changes into `release/v1.0.0` branch to include the critical fix in the release.\n\n**Step 5: Document the New Process**\nCreate an issue titled `Implement Advanced Branch Protection Strategy` with exactly these 3 checkboxes in the body:\n- [ ] All development flows through develop branch\n- [ ] Release preparation happens in release/v1.0.0 branch  \n- [ ] Feature integration uses PR workflow\n\nAdd the label `process-implementation` to this issue to track the process implementation."
  },
  {
    "path": "tasks/github/standard/easyr1/advanced_branch_strategy/meta.json",
    "content": "{\n  \"task_id\": \"advanced_branch_strategy\",\n  \"task_name\": \"Advanced Branch Strategy\",\n  \"category_id\": \"easyr1\",\n  \"category_name\": \"EasyR1\",\n  \"description\": \"Implement GitFlow branching strategy with develop, release, and feature branches to replace risky direct-to-main development.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"pr workflows\",\n    \"release coordination\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/EasyR1\",\n    \"stateOriginalUrl\": \"https://github.com/hiyouga/EasyR1\"\n  }\n}"
  },
  {
    "path": "tasks/github/standard/easyr1/advanced_branch_strategy/verify.py",
    "content": "import sys\nimport os\nimport requests\nfrom typing import Dict, Optional, Tuple\nfrom dotenv import load_dotenv\n\nload_dotenv(\".mcp_env\")\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str]\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n    url = f\"https://api.github.com/repos/{github_org}/EasyR1/{endpoint}\"\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _check_gitflow_branches(headers: Dict[str, str]) -> bool:\n    \"\"\"Check if GitFlow branches are properly created from correct base branches.\"\"\"\n    success, branches_data = _get_github_api(\"branches\", headers)\n    if not success or not branches_data:\n        print(\"Error: Could not fetch branches\", file=sys.stderr)\n        return False\n\n    existing_branches = [branch.get(\"name\", \"\") for branch in branches_data]\n    required_branches = [\n        \"develop\",\n        \"release/v1.0.0\",\n        \"feature/protocol-serialization-fix\",\n    ]\n\n    for branch in required_branches:\n        if branch not in existing_branches:\n            print(f\"Error: Required branch '{branch}' not found\", file=sys.stderr)\n            return False\n\n    return True\n\n\ndef _check_protocol_fixes_file(headers: Dict[str, str]) -> bool:\n    \"\"\"Check if PROTOCOL_FIXES.md file exists in feature branch with correct content.\"\"\"\n    success, file_data = _get_github_api(\n        \"contents/PROTOCOL_FIXES.md?ref=feature/protocol-serialization-fix\", headers\n    )\n    if not success or not file_data:\n        print(\"Error: PROTOCOL_FIXES.md not found in feature branch\", file=sys.stderr)\n        return False\n\n    # Decode base64 content\n    import base64\n\n    content = base64.b64decode(file_data.get(\"content\", \"\")).decode(\"utf-8\")\n\n    # Check for required content elements\n    required_elements = [\n        \"# Protocol Serialization Fixes\",\n        \"## Critical Fix for Data Proto Issue\",\n        \"Enhanced serialization safety check implemented\",\n        \"098931530606d22f867fd121b1dcb3225a43661f\",\n        \"Status: Ready for integration testing\",\n    ]\n\n    for element in required_elements:\n        if element not in content:\n            print(\n                f\"Error: PROTOCOL_FIXES.md missing required content: {element}\",\n                file=sys.stderr,\n            )\n            return False\n\n    return True\n\n\ndef _check_integration_workflow(headers: Dict[str, str]) -> Optional[Dict]:\n    \"\"\"Verify the feature → develop integration pull request exists.\"\"\"\n    # Check both open and closed PRs since the workflow may have completed\n    success, prs = _get_github_api(\"pulls?state=all\", headers)\n    if not success or not prs:\n        print(\"Error: Could not fetch pull requests\", file=sys.stderr)\n        return None\n\n    for pr in prs:\n        head_ref = pr.get(\"head\", {}).get(\"ref\", \"\")\n        base_ref = pr.get(\"base\", {}).get(\"ref\", \"\")\n\n        if head_ref == \"feature/protocol-serialization-fix\" and base_ref == \"develop\":\n            return pr\n\n    print(\n        \"Error: Integration PR from feature/protocol-serialization-fix to develop not found\",\n        file=sys.stderr,\n    )\n    return None\n\n\ndef _check_release_branch_updated(headers: Dict[str, str]) -> bool:\n    \"\"\"Check if release branch contains the develop branch changes.\"\"\"\n    # Check if PROTOCOL_FIXES.md exists in release branch\n    success, file_data = _get_github_api(\n        \"contents/PROTOCOL_FIXES.md?ref=release/v1.0.0\", headers\n    )\n    if not success or not file_data:\n        print(\n            \"Error: PROTOCOL_FIXES.md not found in release branch - develop changes not merged\",\n            file=sys.stderr,\n        )\n        return False\n\n    return True\n\n\ndef _check_process_documentation(headers: Dict[str, str]) -> Optional[Dict]:\n    \"\"\"Check if process is properly documented in an issue.\"\"\"\n    success, issues = _get_github_api(\"issues\", headers)\n    if not success or not issues:\n        print(\"Error: Could not fetch issues for documentation check\", file=sys.stderr)\n        return None\n\n    expected_title = \"Implement Advanced Branch Protection Strategy\"\n    expected_checkboxes = [\n        \"All development flows through develop branch\",\n        \"Release preparation happens in release/v1.0.0 branch\",\n        \"Feature integration uses PR workflow\",\n    ]\n\n    for issue in issues:\n        title = issue.get(\"title\", \"\")\n        if title == expected_title:\n            body = issue.get(\"body\", \"\")\n\n            # Check for exactly 3 checkboxes with specific content\n            checkbox_count = body.count(\"- [ ]\") + body.count(\"- [x]\")\n            if checkbox_count != 3:\n                print(\n                    f\"Error: Documentation issue should have 3 checkboxes, found {checkbox_count}\",\n                    file=sys.stderr,\n                )\n                return None\n\n            # Check for specific checkbox content\n            for expected_text in expected_checkboxes:\n                if expected_text not in body:\n                    print(\n                        f\"Error: Documentation issue missing required checkbox: {expected_text}\",\n                        file=sys.stderr,\n                    )\n                    return None\n\n            # Check label assignment\n            labels = issue.get(\"labels\", [])\n            label_names = [label.get(\"name\") for label in labels]\n            if \"process-implementation\" not in label_names:\n                print(\n                    \"Error: Documentation issue not labeled with 'process-implementation'\",\n                    file=sys.stderr,\n                )\n                return None\n\n            return issue\n\n    print(\"Error: Process documentation issue not found\", file=sys.stderr)\n    return None\n\n\ndef verify() -> bool:\n    \"\"\"\n    Verify the complete GitFlow implementation following the integrated workflow\n    described in description.md.\n    \"\"\"\n    # Get GitHub token\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    headers = {\n        \"Authorization\": f\"token {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    print(\"Verifying integrated GitFlow workflow implementation...\")\n\n    # 1. Verify GitFlow structure initialization\n    print(\"1. Checking GitFlow branch structure...\")\n    if not _check_gitflow_branches(headers):\n        return False\n\n    # 2. Verify critical bug fix implementation via new file\n    print(\"2. Checking protocol serialization fix documentation...\")\n    if not _check_protocol_fixes_file(headers):\n        return False\n\n    # 3. Verify integration workflow (feature → develop PR)\n    print(\"3. Checking feature integration workflow...\")\n    integration_pr = _check_integration_workflow(headers)\n    if not integration_pr:\n        return False\n\n    # 4. Verify release branch updated and CI configured\n    print(\"4. Checking release branch sync and CI configuration...\")\n    if not _check_release_branch_updated(headers):\n        return False\n\n    # 5. Verify process documentation\n    print(\"5. Checking process documentation...\")\n    doc_issue = _check_process_documentation(headers)\n    if not doc_issue:\n        return False\n\n    print(\"\\n✓ Integrated GitFlow workflow successfully implemented!\")\n    print(\"✓ GitFlow structure: main → develop → release/v1.0.0 branches created\")\n    print(\"✓ Critical fix: Protocol fix documented in PROTOCOL_FIXES.md file\")\n    print(\n        f\"✓ Integration: PR #{integration_pr.get('number')} demonstrates feature → develop workflow\"\n    )\n    print(\n        \"✓ Release prep: Release branch contains develop changes, CI configured for both branches\"\n    )\n    print(\n        f\"✓ Documentation: Process documented in issue #{doc_issue.get('number')} with proper checkboxes\"\n    )\n    print(\n        \"\\nThe repository now has a structured GitFlow workflow ready for implementation!\"\n    )\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/standard/easyr1/config_parameter_audit/description.md",
    "content": "I need you to perform a deep investigation into recent configuration changes in our EasyR1 repository that may be causing training instability issues.\n\n## Task Requirements\n\n### 1. Deep Commit Analysis\nFind the exact commit SHA where the `micro_batch_size_per_device_for_update` parameter was changed from `4` to `1` in the `examples/config.yaml` file. Use GitHub API to:\n- Examine recent commits that modified `examples/config.yaml` \n- Get the specific commit diff showing this parameter change\n- Identify the commit author and timestamp\n\n### 2. Related Parameter Investigation  \nIn the same commit you found above, identify what value the `micro_batch_size_per_device_for_experience` parameter was changed to. Document:\n- The before value for this parameter\n- The after value for this parameter  \n- The specific line numbers in the diff where these changes occurred\n\n### 3. Issue Search and Verification\nSearch through all GitHub issues (both open and closed) to find issues that contain specific keywords. Identify all issue numbers where the issue title or body text contains any of these exact terms:\n- \"OOM\" (case insensitive)\n- \"memory\" (case insensitive) \n- \"batch\" (case insensitive)\n- \"显存\" (GPU memory in Chinese)\n\nYou must find and list ALL issues that contain any of these keywords in their titles or bodies, regardless of whether you think they're related to the parameter changes.\n\n### 4. File Creation and Results\nCreate a file named exactly `ANALYSIS_RESULTS.json` in the repository root with this exact structure:\n\n```json\n{\n  \"target_commit_sha\": \"full-40-character-commit-sha\",\n  \"commit_author\": \"author-username\", \n  \"commit_date\": \"YYYY-MM-DD\",\n  \"parameter_changes\": {\n    \"micro_batch_size_per_device_for_update\": {\n      \"before\": 4,\n      \"after\": 1,\n      \"line_number\": 123\n    },\n    \"micro_batch_size_per_device_for_experience\": {\n      \"before\": 16,\n      \"after\": 2, \n      \"line_number\": 124\n    }\n  },\n  \"related_issue_number_list\": [9, 46]\n}\n```\n\n### 5. Verification Requirements\n- The commit SHA must be exactly 40 hexadecimal characters\n- The parameter values must match the actual repository changes  \n- The issue number must reference a real issue in the repository\n- All data must be obtained through GitHub API analysis, not guesswork"
  },
  {
    "path": "tasks/github/standard/easyr1/config_parameter_audit/meta.json",
    "content": "{\n  \"task_id\": \"config_parameter_audit\",\n  \"task_name\": \"Config Parameter Audit\",\n  \"category_id\": \"easyr1\",\n  \"category_name\": \"EasyR1\",\n  \"description\": \"Investigate configuration changes causing training instability by analyzing commits and identifying related memory issues.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"repository analysis\",\n    \"issue management\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/EasyR1\",\n    \"stateOriginalUrl\": \"https://github.com/hiyouga/EasyR1\"\n  }\n}"
  },
  {
    "path": "tasks/github/standard/easyr1/config_parameter_audit/verify.py",
    "content": "import sys\nimport os\nimport json\nimport requests\nimport re\nfrom typing import Dict, Optional, Tuple\nfrom dotenv import load_dotenv\n\nload_dotenv(\".mcp_env\")\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str]\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n    url = f\"https://api.github.com/repos/{github_org}/EasyR1/{endpoint}\"\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _get_analysis_results(headers: Dict[str, str]) -> Optional[Dict]:\n    \"\"\"Get ANALYSIS_RESULTS.json file content.\"\"\"\n    success, file_data = _get_github_api(\"contents/ANALYSIS_RESULTS.json\", headers)\n    if not success:\n        return None\n\n    # Decode base64 content\n    import base64\n\n    content = file_data.get(\"content\", \"\")\n    if content:\n        try:\n            decoded_content = base64.b64decode(content).decode(\"utf-8\")\n            return json.loads(decoded_content)\n        except Exception as e:\n            print(f\"Error parsing JSON: {e}\", file=sys.stderr)\n            return None\n    return None\n\n\ndef _verify_commit_data(results: Dict, headers: Dict[str, str]) -> bool:\n    \"\"\"Verify the commit data is accurate.\"\"\"\n    commit_sha = results.get(\"target_commit_sha\")\n\n    # Validate SHA format\n    if not re.match(r\"^[a-f0-9]{40}$\", commit_sha, re.IGNORECASE):\n        print(f\"Error: Invalid commit SHA format: {commit_sha}\", file=sys.stderr)\n        return False\n\n    # Get commit details\n    success, commit_data = _get_github_api(f\"commits/{commit_sha}\", headers)\n    if not success:\n        print(f\"Error: Commit {commit_sha} not found in repository\", file=sys.stderr)\n        return False\n\n    # Verify author\n    expected_author = results.get(\"commit_author\")\n    actual_author = commit_data.get(\"author\", {}).get(\"login\")\n    if expected_author != actual_author:\n        print(\n            f\"Error: Commit author mismatch. Expected: {expected_author}, Actual: {actual_author}\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Verify date format\n    commit_date = results.get(\"commit_date\")\n    if not re.match(r\"^\\d{4}-\\d{2}-\\d{2}$\", commit_date):\n        print(\n            f\"Error: Invalid date format: {commit_date}. Expected YYYY-MM-DD\",\n            file=sys.stderr,\n        )\n        return False\n\n    return True\n\n\ndef _verify_parameter_changes(results: Dict, headers: Dict[str, str]) -> bool:\n    \"\"\"Verify the parameter changes are accurate.\"\"\"\n    param_changes = results.get(\"parameter_changes\", {})\n\n    # Check required parameters exist\n    required_params = [\n        \"micro_batch_size_per_device_for_update\",\n        \"micro_batch_size_per_device_for_experience\",\n    ]\n    for param in required_params:\n        if param not in param_changes:\n            print(f\"Error: Missing parameter change data for: {param}\", file=sys.stderr)\n            return False\n\n        change_data = param_changes[param]\n        if not all(key in change_data for key in [\"before\", \"after\", \"line_number\"]):\n            print(\n                f\"Error: Incomplete change data for parameter: {param}\", file=sys.stderr\n            )\n            return False\n\n    # Verify specific expected values based on known repository state\n    update_param = param_changes.get(\"micro_batch_size_per_device_for_update\", {})\n    if update_param.get(\"before\") != 4 or update_param.get(\"after\") != 1:\n        print(\n            \"Error: Incorrect values for micro_batch_size_per_device_for_update\",\n            file=sys.stderr,\n        )\n        return False\n\n    experience_param = param_changes.get(\n        \"micro_batch_size_per_device_for_experience\", {}\n    )\n    if experience_param.get(\"before\") != 16 or experience_param.get(\"after\") != 2:\n        print(\n            \"Error: Incorrect values for micro_batch_size_per_device_for_experience\",\n            file=sys.stderr,\n        )\n        return False\n\n    return True\n\n\ndef _get_all_issues_with_keywords(headers: Dict[str, str]) -> set:\n    \"\"\"Find all issues in repository that contain the required keywords.\"\"\"\n    required_keywords = [\"oom\", \"memory\", \"batch\", \"显存\"]\n    keyword_issues = set()\n\n    # Get all issues from repository (both open and closed)\n    page = 1\n    while True:\n        success, issues = _get_github_api(\n            f\"issues?state=all&per_page=100&page={page}\", headers\n        )\n        if not success or not issues:\n            break\n\n        for issue in issues:\n            issue_number = issue.get(\"number\")\n            title = issue.get(\"title\", \"\").lower()\n            body = issue.get(\"body\", \"\").lower() if issue.get(\"body\") else \"\"\n            issue_text = title + \" \" + body\n\n            # Check if any keyword appears in title or body\n            for keyword in required_keywords:\n                if keyword.lower() in issue_text:\n                    keyword_issues.add(issue_number)\n                    break\n\n        # If we got less than 100 issues, we're done\n        if len(issues) < 100:\n            break\n        page += 1\n\n    return keyword_issues\n\n\ndef _verify_issue_references(results: Dict, headers: Dict[str, str]) -> bool:\n    \"\"\"Verify the issue references contain the required keywords.\"\"\"\n    issue_number_list = results.get(\"related_issue_number_list\")\n\n    if not isinstance(issue_number_list, list) or len(issue_number_list) == 0:\n        print(\n            \"Error: related_issue_number_list must be a non-empty list\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Required keywords to search for (case insensitive)\n    required_keywords = [\"oom\", \"memory\", \"batch\", \"显存\"]\n\n    # First, dynamically find all issues that contain the required keywords\n    expected_issues = _get_all_issues_with_keywords(headers)\n    print(expected_issues)\n    provided_issues = set(issue_number_list)\n\n    # Verify each provided issue contains at least one of the required keywords\n    for issue_number in issue_number_list:\n        if not isinstance(issue_number, int) or issue_number <= 0:\n            print(\n                f\"Error: Invalid issue number format: {issue_number}\", file=sys.stderr\n            )\n            return False\n\n        # Get issue details\n        success, issue_data = _get_github_api(f\"issues/{issue_number}\", headers)\n        if not success:\n            print(\n                f\"Error: Issue #{issue_number} not found in repository\", file=sys.stderr\n            )\n            return False\n\n        # Check if issue title or body contains any required keywords\n        title = issue_data.get(\"title\", \"\").lower()\n        body = issue_data.get(\"body\", \"\").lower() if issue_data.get(\"body\") else \"\"\n        issue_text = title + \" \" + body\n\n        issue_has_keyword = False\n        for keyword in required_keywords:\n            if keyword.lower() in issue_text:\n                issue_has_keyword = True\n                break\n\n        if not issue_has_keyword:\n            print(\n                f\"Error: Issue #{issue_number} does not contain any required keywords: {required_keywords}\",\n                file=sys.stderr,\n            )\n            return False\n\n    # Verify agent found exactly the same issues as our dynamic search\n    if provided_issues != expected_issues:\n        missing = expected_issues - provided_issues\n        extra = provided_issues - expected_issues\n        if missing:\n            print(\n                f\"Error: Missing issues that contain required keywords: {missing}\",\n                file=sys.stderr,\n            )\n        if extra:\n            print(\n                f\"Error: Extra issues that don't contain required keywords: {extra}\",\n                file=sys.stderr,\n            )\n        return False\n\n    print(\n        f\"✓ Found all {len(issue_number_list)} issues containing required keywords: {issue_number_list}\"\n    )\n    return True\n\n\ndef verify() -> bool:\n    \"\"\"\n    Programmatically verify that the deep commit analysis meets the requirements.\n    \"\"\"\n    # Get GitHub token\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    headers = {\n        \"Authorization\": f\"token {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    print(\"Verifying deep commit analysis completion...\")\n\n    # 1. Check ANALYSIS_RESULTS.json exists and is valid JSON\n    print(\"1. Checking ANALYSIS_RESULTS.json exists and is valid...\")\n    results = _get_analysis_results(headers)\n    if not results:\n        print(\"Error: ANALYSIS_RESULTS.json not found or invalid JSON\", file=sys.stderr)\n        return False\n\n    print(\"✓ Found valid ANALYSIS_RESULTS.json\")\n\n    # 2. Verify commit data accuracy\n    print(\"2. Verifying commit data accuracy...\")\n    if not _verify_commit_data(results, headers):\n        return False\n\n    print(\"✓ Commit SHA, author, and date verified\")\n\n    # 3. Verify parameter changes accuracy\n    print(\"3. Verifying parameter changes accuracy...\")\n    if not _verify_parameter_changes(results, headers):\n        return False\n\n    print(\"✓ Parameter changes verified with correct before/after values\")\n\n    # 4. Verify issue references\n    print(\"4. Verifying issue references...\")\n    if not _verify_issue_references(results, headers):\n        return False\n\n    print(\"\\n✓ Task completed successfully!\")\n    print(\"Deep commit analysis results verified:\")\n    print(f\"- Found target commit: {results.get('target_commit_sha')}\")\n    print(\n        \"- Verified parameter changes: micro_batch_size_per_device_for_update (4→1), micro_batch_size_per_device_for_experience (16→2)\"\n    )\n    print(\n        f\"- Verified memory/performance issue correlations: {results.get('related_issue_number_list')}\"\n    )\n    print(\"- All data obtained through accurate GitHub API analysis\")\n\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/standard/easyr1/performance_regression_investigation/description.md",
    "content": "In the EasyR1 repo, I've discovered that the recent commit `098931530606d22f867fd121b1dcb3225a43661f` (which fixed data proto) may have introduced performance regressions based on user reports in issues #39 and #41. I need you to create a systematic investigation workflow:\n\n**Step 1: Create Main Tracking Issue**\nCreate a main issue with the exact title \"Performance Regression Analysis: Data Protocol Changes\" and add these 3 labels: \"bug\", \"performance\", \"investigation\".\n\n**Step 2: Create Investigation Branches** \nCreate exactly 3 feature branches from main for different investigation tracks:\n- `investigate-protocol-changes` - for testing protocol-related performance issues\n- `investigate-batch-processing` - for testing batch processing performance issues  \n- `investigate-memory-usage` - for testing memory utilization performance issues\n\n**Step 3: Create Sub-Issues**\nCreate 3 sub-issues and link them to the main tracking issue using sub-issue functionality:\n- \"Test Performance Impact: fix multi modal data oom\" \n- \"Test Performance Impact: upgrade vllm to 0.10\"\n- \"Test Performance Impact: non blocking false by default\"\n\n**Step 4: Document Changes**\nAdd at least 2 comments to the main tracking issue documenting the specific file changes from commit `098931530606d22f867fd121b1dcb3225a43661f`. Reference the exact files `verl/protocol.py` and `examples/config.yaml` with their commit SHA.\n\n**Step 5: Create Analysis PR**\nCreate a pull request from the `investigate-protocol-changes` branch to main with the exact title \"Performance Analysis: Protocol Changes Investigation\"."
  },
  {
    "path": "tasks/github/standard/easyr1/performance_regression_investigation/meta.json",
    "content": "{\n  \"task_id\": \"performance_regression_investigation\",\n  \"task_name\": \"Performance Regression Investigation\",\n  \"category_id\": \"easyr1\",\n  \"category_name\": \"EasyR1\",\n  \"description\": \"Create systematic investigation workflow for performance regressions with tracking issues, investigation branches, and sub-issues.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"issue management\",\n    \"repository analysis\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/EasyR1\",\n    \"stateOriginalUrl\": \"https://github.com/hiyouga/EasyR1\"\n  }\n}"
  },
  {
    "path": "tasks/github/standard/easyr1/performance_regression_investigation/verify.py",
    "content": "import sys\nimport os\nimport requests\nfrom typing import Dict, List, Optional, Tuple\nfrom dotenv import load_dotenv\n\nload_dotenv(\".mcp_env\")\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str]\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n    url = f\"https://api.github.com/repos/{github_org}/EasyR1/{endpoint}\"\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _find_main_tracking_issue(headers: Dict[str, str]) -> Optional[Dict]:\n    \"\"\"Find the main tracking issue with exact title and required labels.\"\"\"\n    success, issues = _get_github_api(\"issues?state=open&per_page=50\", headers)\n    if not success or not issues:\n        return None\n\n    for issue in issues:\n        title = issue.get(\"title\", \"\")\n        if title == \"Performance Regression Analysis: Data Protocol Changes\":\n            # Check labels\n            labels = [label.get(\"name\", \"\") for label in issue.get(\"labels\", [])]\n            required_labels = {\"bug\", \"performance\", \"investigation\"}\n            if required_labels.issubset(set(labels)):\n                return issue\n    return None\n\n\ndef _check_branches_exist(branch_names: List[str], headers: Dict[str, str]) -> bool:\n    \"\"\"Check if all required branches exist.\"\"\"\n    for branch_name in branch_names:\n        success, _ = _get_github_api(f\"branches/{branch_name}\", headers)\n        if not success:\n            print(f\"Error: Branch '{branch_name}' not found\", file=sys.stderr)\n            return False\n    return True\n\n\ndef _check_sub_issues(\n    main_issue_number: int, expected_titles: List[str], headers: Dict[str, str]\n) -> bool:\n    \"\"\"Check if sub-issues are created and linked to main issue.\"\"\"\n    success, sub_issues = _get_github_api(\n        f\"issues/{main_issue_number}/sub_issues\", headers\n    )\n    if not success:\n        # If sub_issues endpoint doesn't exist, check for issues mentioning the main issue\n        success, all_issues = _get_github_api(\"issues?state=open&per_page=100\", headers)\n        if not success:\n            return False\n\n        sub_issues = []\n        for issue in all_issues:\n            body = issue.get(\"body\", \"\")\n            title = issue.get(\"title\", \"\")\n            # Check if issue references main issue or has expected title pattern\n            if f\"#{main_issue_number}\" in body or any(\n                expected_title in title for expected_title in expected_titles\n            ):\n                sub_issues.append(issue)\n\n    if not sub_issues or len(sub_issues) < 3:\n        print(\n            f\"Error: Expected 3 sub-issues linked to main issue #{main_issue_number}\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Check if sub-issues have expected titles\n    found_titles = [issue.get(\"title\", \"\") for issue in sub_issues]\n    for expected_title in expected_titles:\n        if not any(expected_title in title for title in found_titles):\n            print(\n                f\"Error: Sub-issue with title containing '{expected_title}' not found\",\n                file=sys.stderr,\n            )\n            return False\n\n    return True\n\n\ndef _check_issue_comments(issue_number: int, headers: Dict[str, str]) -> bool:\n    \"\"\"Check if main issue has at least 2 comments with file references.\"\"\"\n    success, comments = _get_github_api(f\"issues/{issue_number}/comments\", headers)\n    if not success or not comments:\n        print(f\"Error: No comments found on issue #{issue_number}\", file=sys.stderr)\n        return False\n\n    if len(comments) < 2:\n        print(\n            f\"Error: Expected at least 2 comments on issue #{issue_number}\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Check if comments reference specific files and commit\n    required_refs = [\n        \"verl/protocol.py\",\n        \"examples/config.yaml\",\n        \"0989315\",\n    ]\n    comment_text = \" \".join([comment.get(\"body\", \"\") for comment in comments])\n\n    for ref in required_refs:\n        if ref not in comment_text:\n            print(f\"Error: Comments missing reference to '{ref}'\", file=sys.stderr)\n            return False\n\n    return True\n\n\ndef _find_analysis_pr(headers: Dict[str, str]) -> Optional[Dict]:\n    \"\"\"Find the analysis PR with exact title from specific branch.\"\"\"\n    success, prs = _get_github_api(\"pulls?state=open&per_page=50\", headers)\n    if not success or not prs:\n        return None\n\n    expected_title = \"Performance Analysis: Protocol Changes Investigation\"\n    expected_head = \"investigate-protocol-changes\"\n\n    for pr in prs:\n        title = pr.get(\"title\", \"\")\n        head_ref = pr.get(\"head\", {}).get(\"ref\", \"\")\n\n        if title == expected_title and head_ref == expected_head:\n            return pr\n\n    return None\n\n\ndef verify() -> bool:\n    \"\"\"\n    Programmatically verify that the performance regression investigation workflow meets the\n    requirements described in description.md.\n    \"\"\"\n    # Get GitHub token\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    headers = {\n        \"Authorization\": f\"token {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    # Run verification checks\n    print(\"Verifying performance regression investigation workflow completion...\")\n\n    # 1. Check main tracking issue exists with exact title and labels\n    print(\"1. Checking main tracking issue with required title and labels...\")\n    main_issue = _find_main_tracking_issue(headers)\n    if not main_issue:\n        print(\n            \"Error: Main tracking issue not found with exact title 'Performance Regression Analysis: Data Protocol Changes' and labels 'bug', 'performance', 'investigation'\",\n            file=sys.stderr,\n        )\n        return False\n\n    main_issue_number = main_issue.get(\"number\")\n    print(f\"Found main tracking issue #{main_issue_number}\")\n\n    # 2. Check that all 3 investigation branches exist\n    print(\"2. Checking investigation branches exist...\")\n    required_branches = [\n        \"investigate-protocol-changes\",\n        \"investigate-batch-processing\",\n        \"investigate-memory-usage\",\n    ]\n    if not _check_branches_exist(required_branches, headers):\n        return False\n\n    # 3. Check sub-issues are created and linked\n    print(\"3. Checking sub-issues are created and linked...\")\n    expected_sub_titles = [\n        \"Test Performance Impact: fix multi modal data oom\",\n        \"Test Performance Impact: upgrade vllm to 0.10\",\n        \"Test Performance Impact: non blocking false by default\",\n    ]\n    if not _check_sub_issues(main_issue_number, expected_sub_titles, headers):\n        return False\n\n    # 4. Check issue comments document file changes\n    print(\"4. Checking issue comments document file changes...\")\n    if not _check_issue_comments(main_issue_number, headers):\n        return False\n\n    # 5. Check analysis PR exists with exact title from correct branch\n    print(\"5. Checking analysis PR exists with exact title and branch...\")\n    analysis_pr = _find_analysis_pr(headers)\n    if not analysis_pr:\n        print(\n            \"Error: Analysis PR not found with title 'Performance Analysis: Protocol Changes Investigation' from branch 'investigate-protocol-changes'\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(f\"Found analysis PR #{analysis_pr.get('number')}\")\n\n    print(\"\\n✓ Task completed successfully!\")\n    print(\n        f\"Main tracking issue #{main_issue_number} created with proper labels and documentation\"\n    )\n    print(\"All 3 investigation branches created for different investigation tracks\")\n    print(\"3 sub-issues created and linked to main tracking issue\")\n    print(\"Issue comments document file changes with commit SHA references\")\n    print(f\"Analysis PR #{analysis_pr.get('number')} created from correct branch\")\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/standard/easyr1/qwen3_issue_management/description.md",
    "content": "The EasyR1 repository has several Qwen3-related issues that were closed but need to be reopened for further investigation. Qwen3 is an important model variant that requires continued attention. I need you to find and reopen all closed issues related to Qwen3 and properly tag them for tracking.\n\n**Step 1: Find All Closed Qwen3 Issues**\nSearch for ALL closed issues that mention 'qwen3' (case-insensitive) in either the title or body. Make note of each issue number and title.\n\n**Step 2: Reopen Each Qwen3 Issue**\nFor every closed issue that contains 'qwen3' (regardless of when it was closed or any other factors), reopen it by changing its state from closed to open.\n\n**Step 3: Add Tracking Label**\nAfter reopening each issue, add the label `qwen3-related` to it. This will help track all Qwen3-related issues in the future.\n\n**Step 4: Create Summary Issue**\nCreate a new issue titled \"Reopened Qwen3 Issues Summary\" with the following content in the body:\n```\n# Qwen3 Issues Reopened\n\nThe following closed issues containing 'qwen3' have been reopened:\n\n[List each reopened issue as: - #NUMBER: TITLE]\n\nTotal issues reopened: [NUMBER]\n\nAll reopened issues have been tagged with the `qwen3-related` label for easy tracking.\n```\n\nAdd the label `qwen3-related` to this summary issue as well.\n\nThis straightforward workflow ensures all Qwen3-related closed issues are reopened and properly tagged for visibility."
  },
  {
    "path": "tasks/github/standard/easyr1/qwen3_issue_management/meta.json",
    "content": "{\n  \"task_id\": \"qwen3_issue_management\",\n  \"task_name\": \"Qwen3 Issue Management\",\n  \"category_id\": \"easyr1\",\n  \"category_name\": \"EasyR1\",\n  \"description\": \"Find and reopen all closed Qwen3-related issues with proper tagging for continued tracking and investigation.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"issue management\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/EasyR1\",\n    \"stateOriginalUrl\": \"https://github.com/hiyouga/EasyR1\"\n  }\n}"
  },
  {
    "path": "tasks/github/standard/easyr1/qwen3_issue_management/verify.py",
    "content": "import sys\nimport os\nimport requests\nfrom typing import Dict, List, Optional, Tuple\nfrom dotenv import load_dotenv\n\nload_dotenv(\".mcp_env\")\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str]\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n    url = f\"https://api.github.com/repos/{github_org}/EasyR1/{endpoint}\"\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _search_github_issues(\n    query: str, headers: Dict[str, str]\n) -> Tuple[bool, Optional[List]]:\n    \"\"\"Search GitHub issues using the search API.\"\"\"\n    url = f\"https://api.github.com/search/issues?q={query}&per_page=100\"\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            data = response.json()\n            return True, data.get(\"items\", [])\n        else:\n            print(f\"Search API error: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Search exception: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _check_qwen3_issues_reopened(headers: Dict[str, str]) -> Tuple[bool, List]:\n    \"\"\"Check if all Qwen3 issues have been reopened and tagged.\"\"\"\n    # Search for all issues mentioning qwen3 (both open and closed)\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n    success, all_qwen3_issues = _search_github_issues(\n        f\"repo:{github_org}/EasyR1 qwen3\", headers\n    )\n\n    if not success or not all_qwen3_issues:\n        print(\"Error: Could not search for Qwen3 issues\", file=sys.stderr)\n        return False, []\n\n    reopened_issues = []\n    issues_not_reopened = []\n    issues_not_tagged = []\n\n    for issue in all_qwen3_issues:\n        issue_number = issue.get(\"number\")\n        issue_state = issue.get(\"state\")\n        issue_title = issue.get(\"title\", \"\")\n\n        # Check if the issue is open (should be reopened)\n        if issue_state == \"closed\":\n            issues_not_reopened.append(f\"#{issue_number}: {issue_title}\")\n            continue\n\n        # Check if issue has qwen3-related label\n        labels = [label.get(\"name\") for label in issue.get(\"labels\", [])]\n        if \"qwen3-related\" not in labels:\n            issues_not_tagged.append(f\"#{issue_number}: {issue_title}\")\n        else:\n            reopened_issues.append(issue)\n\n    # Report any issues not properly processed\n    if issues_not_reopened:\n        print(\"Error: The following Qwen3 issues are still closed:\", file=sys.stderr)\n        for issue in issues_not_reopened:\n            print(f\"  - {issue}\", file=sys.stderr)\n        return False, []\n\n    if issues_not_tagged:\n        print(\n            \"Error: The following reopened issues are missing 'qwen3-related' label:\",\n            file=sys.stderr,\n        )\n        for issue in issues_not_tagged:\n            print(f\"  - {issue}\", file=sys.stderr)\n        return False, reopened_issues\n\n    return True, reopened_issues\n\n\ndef _check_summary_issue(\n    headers: Dict[str, str], reopened_issues: List\n) -> Optional[Dict]:\n    \"\"\"Check if the summary issue exists with proper content.\"\"\"\n    success, issues = _get_github_api(\"issues?state=all\", headers)\n    if not success or not issues:\n        print(\"Error: Could not fetch issues for summary check\", file=sys.stderr)\n        return None\n\n    expected_title = \"Reopened Qwen3 Issues Summary\"\n\n    for issue in issues:\n        title = issue.get(\"title\", \"\")\n        if title == expected_title:\n            body = issue.get(\"body\", \"\")\n\n            # Check for required content\n            if \"# Qwen3 Issues Reopened\" not in body:\n                print(\"Error: Summary issue missing header\", file=sys.stderr)\n                return None\n\n            if (\n                \"The following closed issues containing 'qwen3' have been reopened:\"\n                not in body\n            ):\n                print(\"Error: Summary issue missing description\", file=sys.stderr)\n                return None\n\n            if \"Total issues reopened:\" not in body:\n                print(\"Error: Summary issue missing total count\", file=sys.stderr)\n                return None\n\n            if (\n                \"All reopened issues have been tagged with the `qwen3-related` label\"\n                not in body\n            ):\n                print(\"Error: Summary issue missing tagging note\", file=sys.stderr)\n                return None\n\n            # Check if all reopened issues are listed\n            for reopened_issue in reopened_issues:\n                issue_num = reopened_issue.get(\"number\")\n                if f\"#{issue_num}\" not in body:\n                    print(\n                        f\"Error: Summary issue missing reference to issue #{issue_num}\",\n                        file=sys.stderr,\n                    )\n                    return None\n\n            # Check if summary issue has the label\n            labels = [label.get(\"name\") for label in issue.get(\"labels\", [])]\n            if \"qwen3-related\" not in labels:\n                print(\n                    \"Error: Summary issue missing 'qwen3-related' label\",\n                    file=sys.stderr,\n                )\n                return None\n\n            return issue\n\n    print(\n        \"Error: Summary issue 'Reopened Qwen3 Issues Summary' not found\",\n        file=sys.stderr,\n    )\n    return None\n\n\ndef verify() -> bool:\n    \"\"\"\n    Verify that all Qwen3-related closed issues have been reopened and tagged.\n    \"\"\"\n    # Get GitHub token\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    headers = {\n        \"Authorization\": f\"token {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    print(\"Verifying Qwen3 issue reopening workflow...\")\n\n    # 1. Check if all Qwen3 issues have been reopened and tagged\n    print(\"1. Checking if Qwen3 issues are reopened and tagged...\")\n    all_reopened, reopened_issues = _check_qwen3_issues_reopened(headers)\n\n    if not all_reopened:\n        return False\n\n    if not reopened_issues:\n        print(\"Error: No Qwen3 issues found or reopened\", file=sys.stderr)\n        return False\n\n    # 2. Check if summary issue exists\n    print(\"2. Checking summary issue...\")\n    summary_issue = _check_summary_issue(headers, reopened_issues)\n    if not summary_issue:\n        return False\n\n    print(\"\\n✓ Qwen3 issue reopening workflow successfully completed!\")\n    print(f\"✓ Reopened Issues: {len(reopened_issues)} Qwen3-related issues reopened\")\n    print(\"✓ Tagging: All reopened issues tagged with 'qwen3-related' label\")\n    print(\n        f\"✓ Summary: Issue #{summary_issue.get('number')} created with complete list of reopened issues\"\n    )\n    print(\"\\nAll Qwen3-related closed issues have been reopened and properly tagged!\")\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/standard/harmony/fix_conflict/description.md",
    "content": "I have some pull requests that won't merge due to conflicts. Can you help me fix the merge conflicts by creating the missing infrastructure?\n\n**Step 1: Find Conflicted PR**\nLook through the open pull requests and find the one that has `mergeable: false` and `mergeable_state: \"dirty\"`. Check what file it's trying to modify - it appears to be missing a file that the PR is trying to add or modify.\n\n**Step 2: Create Infrastructure PR**  \nCreate a new branch and PR to add the missing file that the conflicted PR needs. The PR must have:\n\n- **Title**: Must contain \"Add CI infrastructure\" and \"resolve conflicts\"\n- **Body**: Must include:\n  - Reference to the conflicted PR using \"Fixes #[PR_NUMBER]\" or \"Resolves #[PR_NUMBER]\" \n  - Explanation that this \"prepares infrastructure\" for the other PR\n  - Mention of \"missing .github directory\" and \"workflow conflicts\"\n- **File Content**: Extract the complete file content from the conflicted PR's changes and add it to main. This ensures the conflicted PR can merge cleanly without conflicts.\n\n**Step 3: Merge Infrastructure PR**\nMerge the infrastructure PR to main.\n\n**Step 4: Add Comment to Original PR**\nAdd a comment to the original conflicted PR that references the infrastructure PR you just created and merged. The comment must mention the infrastructure PR number using \"PR #[NUMBER]\" format.\n\n**Step 5: Merge Original PR**\nNow merge the original conflicted PR since it should be able to merge cleanly."
  },
  {
    "path": "tasks/github/standard/harmony/fix_conflict/meta.json",
    "content": "{\n  \"task_id\": \"fix_conflict\",\n  \"task_name\": \"Fix Conflict\",\n  \"category_id\": \"harmony\",\n  \"category_name\": \"Harmony\",\n  \"description\": \"Resolve merge conflicts by creating missing infrastructure and ensuring conflicted PRs can merge cleanly.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"pr workflows\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/harmony\",\n    \"stateOriginalUrl\": \"https://github.com/openai/harmony\"\n  }\n}"
  },
  {
    "path": "tasks/github/standard/harmony/fix_conflict/verify.py",
    "content": "import sys\nimport os\nimport requests\nfrom typing import Dict, Optional, Tuple\nfrom dotenv import load_dotenv\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str], org: str, repo: str = \"harmony\"\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    url = f\"https://api.github.com/repos/{org}/{repo}/{endpoint}\"\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _check_ci_file_exists(\n    file_path: str, headers: Dict[str, str], org: str, repo: str = \"harmony\"\n) -> bool:\n    \"\"\"Check if CI file exists in main branch.\"\"\"\n    success, _ = _get_github_api(f\"contents/{file_path}?ref=main\", headers, org, repo)\n    return success\n\n\ndef _check_pr_comments(\n    pr_number: int,\n    infra_pr_number: int,\n    headers: Dict[str, str],\n    org: str,\n    repo: str = \"harmony\",\n) -> bool:\n    \"\"\"Check if PR has a comment linking to the infrastructure PR using 'PR #[NUMBER]' format.\"\"\"\n    success, comments = _get_github_api(\n        f\"issues/{pr_number}/comments\", headers, org, repo\n    )\n    if not success or not comments:\n        return False\n\n    # Look for \"PR #123\" pattern (case insensitive)\n    import re\n\n    for comment in comments:\n        body = comment.get(\"body\", \"\")\n        if re.search(rf\"PR\\s*#{infra_pr_number}\", body, re.IGNORECASE):\n            return True\n    return False\n\n\ndef _find_infrastructure_pr(\n    headers: Dict[str, str], org: str, repo: str = \"harmony\"\n) -> Optional[Dict]:\n    \"\"\"Find the infrastructure PR by checking title and body content.\"\"\"\n    success, prs = _get_github_api(\"pulls?state=all&per_page=50\", headers, org, repo)\n    if success and prs:\n        for pr in prs:\n            title = pr.get(\"title\", \"\").lower()\n            body = pr.get(\"body\", \"\").lower()\n\n            # Check title contains required keywords\n            title_ok = \"add ci infrastructure\" in title and \"resolve conflicts\" in title\n\n            # Check body contains required elements\n            has_reference = \"fixes #\" in body or \"resolves #\" in body\n            has_prep_text = \"prepares infrastructure\" in body\n            has_github_text = \"missing .github directory\" in body\n            has_workflow_text = \"workflow conflicts\" in body\n\n            body_ok = (\n                has_reference\n                and has_prep_text\n                and has_github_text\n                and has_workflow_text\n            )\n\n            if title_ok and body_ok:\n                return pr\n    return None\n\n\ndef verify() -> bool:\n    \"\"\"\n    Programmatically verify that the merge conflict resolution workflow meets the\n    requirements described in description.md.\n    \"\"\"\n    # Load environment variables from .mcp_env\n    load_dotenv(\".mcp_env\")\n\n    # Get GitHub token and org\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    if not github_org:\n        print(\"Error: GITHUB_EVAL_ORG environment variable not set\", file=sys.stderr)\n        return False\n\n    headers = {\n        \"Authorization\": f\"token {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    # Run verification checks\n    print(\"Verifying merge conflict resolution workflow completion...\")\n\n    # 1. Check that CI infrastructure file exists in main (extracted from conflicted PR)\n    print(\"1. Checking CI infrastructure was added to main...\")\n    # Check for both CI.yml and ci.yml (case-insensitive)\n    ci_exists = _check_ci_file_exists(\".github/workflows/CI.yml\", headers, github_org)\n    if not ci_exists:\n        ci_exists = _check_ci_file_exists(\".github/workflows/ci.yml\", headers, github_org)\n    \n    if not ci_exists:\n        print(\"Error: Neither .github/workflows/CI.yml nor .github/workflows/ci.yml found in main\", file=sys.stderr)\n        return False\n\n    # 2. Find infrastructure PR with required title and body content\n    print(\"2. Finding infrastructure PR with required content...\")\n    infra_pr = _find_infrastructure_pr(headers, github_org)\n    if not infra_pr:\n        print(\n            \"Error: No infrastructure PR found with required title and body content\",\n            file=sys.stderr,\n        )\n        print(\n            \"Required title: 'Add CI infrastructure' and 'resolve conflicts'\",\n            file=sys.stderr,\n        )\n        print(\n            \"Required body: reference with 'Fixes #' or 'Resolves #', 'prepares infrastructure', 'missing .github directory', 'workflow conflicts'\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(f\"Found infrastructure PR #{infra_pr.get('number')}: {infra_pr.get('title')}\")\n\n    # 3. Check that infrastructure PR is merged\n    if not infra_pr.get(\"merged_at\"):\n        print(\n            f\"Error: Infrastructure PR #{infra_pr.get('number')} not merged yet\",\n            file=sys.stderr,\n        )\n        return False\n\n    # 4. Check that PR #24 is merged\n    print(\"3. Checking that PR #24 is merged...\")\n    success, pr24 = _get_github_api(\"pulls/24\", headers, github_org)\n    if not success or not pr24:\n        print(\"Error: PR #24 not found\", file=sys.stderr)\n        return False\n\n    if not pr24.get(\"merged_at\"):\n        print(\"Error: PR #24 is not merged yet\", file=sys.stderr)\n        return False\n\n    # 5. Check that PR #24 has a comment linking to the infrastructure PR\n    print(\"4. Checking that PR #24 has comment linking to infrastructure PR...\")\n    if not _check_pr_comments(24, infra_pr.get(\"number\"), headers, github_org):\n        print(\n            f\"Error: PR #24 missing comment linking to infrastructure PR #{infra_pr.get('number')}\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\"\\n✓ Task completed successfully!\")\n    print(\n        f\"Infrastructure PR #{infra_pr.get('number')} extracted content from PR #24 and resolved conflicts\"\n    )\n    print(\n        \"PR #24 is now merged cleanly and has a comment linking to the infrastructure PR\"\n    )\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/standard/harmony/issue_pr_commit_workflow/description.md",
    "content": "I need you to implement a complete bug tracking and resolution workflow that demonstrates proper cross-referencing between issues, pull requests, and commits. Here's what you need to do:\n\n**Step 1: Create Issue for Race Condition Bug**\nCreate a new issue with:\n- Title containing: 'race condition', 'HarmonyEncoding', 'concurrent access'\n- Body must include:\n  - A \"## Problem\" heading describing threading issues\n  - A \"## Root Cause\" heading about file locking\n  - A \"## Expected Solution\" heading with bullet points\n  - References to issues #6 and #1\n  - Keywords: \"multiple threads\", \"tokenizer file downloads\", \"mutex-based file locking\"\n\n**Step 2: Create Feature Branch**\nCreate a new branch called 'fix/race-condition-tokenizer-loading' from main.\n\n**Step 3: Implement Thread-Safe Loading**\nOn the feature branch, create/update the file `src/concurrent_loading.rs` with:\n```rust\nuse std::sync::Mutex;\nuse std::sync::OnceLock;\n\n// Thread-safe tokenizer loading with file locks\nstatic DOWNLOAD_MUTEX: OnceLock<Mutex<()>> = OnceLock::new();\n\npub fn load_harmony_encoding_safe(name: &str) -> Result<HarmonyEncoding, HarmonyError> {\n    let _guard = DOWNLOAD_MUTEX.get_or_init(|| Mutex::new(())).lock().unwrap();\n    // Implementation for thread-safe loading\n    // Addresses race condition from issue #6\n    Ok(HarmonyEncoding::new())\n}\n\npub fn load_harmony_encoding_from_file(path: &str) -> Result<HarmonyEncoding, HarmonyError> {\n    // Offline loading API as requested in issue #1\n    HarmonyEncoding::from_file(path)\n}\n```\n\n**Step 4: Create Pull Request with Cross-References**\nCreate a pull request from 'fix/race-condition-tokenizer-loading' to 'main' with:\n- Title containing: 'Fix race condition', 'tokenizer loading', 'threading issues'\n- Body must include:\n  - A \"## Summary\" heading explaining the fix\n  - A \"## Changes\" heading with bullet points about mutex implementation\n  - A \"## Testing\" heading mentioning related issues\n  - \"Closes #[ISSUE_NUMBER]\" pattern linking to your created issue\n  - References to #1 and #6\n  - Keywords: \"thread-safe\", \"concurrent downloads\", \"offline loading API\"\n\n**Step 5: Add PR Review Comments**\nCreate a pending review and add a review comment to the PR with:\n- Technical analysis of the implementation approach\n- Discussion of thread safety mechanisms\n- Keywords that must be included: \"OnceLock\", \"mutex\", \"thread safety\", \"concurrent access\"\n- Reference to issue #1 and the offline loading capability\n- Explanation of how the solution prevents race conditions\nThen submit the review as a COMMENT type review.\n\n**Step 6: Update Issue with Implementation Details**\nAdd a comment to the original issue you created with:\n- Reference to the PR number using \"PR #[NUMBER]\" pattern\n- Technical details about the mutex-based solution\n- Keywords: \"std::sync::Mutex\", \"OnceLock\", \"thread-safe initialization\"\n- Mention of key implementation changes (DOWNLOAD_MUTEX, offline loading)\n- Reference back to issue #1 for offline loading requirement\n\n**Step 7: Close the Issue**\nClose the issue you created by updating its state to 'closed' with state_reason 'completed'."
  },
  {
    "path": "tasks/github/standard/harmony/issue_pr_commit_workflow/meta.json",
    "content": "{\n  \"task_id\": \"issue_pr_commit_workflow\",\n  \"task_name\": \"Issue Pr Commit Workflow\",\n  \"category_id\": \"harmony\",\n  \"category_name\": \"Harmony\",\n  \"description\": \"Implement complete bug tracking workflow demonstrating proper cross-referencing between issues, PRs, and commits for race condition fixes.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"issue management\",\n    \"pr workflows\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/harmony\",\n    \"stateOriginalUrl\": \"https://github.com/openai/harmony\"\n  }\n}"
  },
  {
    "path": "tasks/github/standard/harmony/issue_pr_commit_workflow/verify.py",
    "content": "import sys\nimport os\nimport requests\nfrom typing import Dict, List, Optional, Tuple\nimport base64\nfrom dotenv import load_dotenv\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str], org: str, repo: str = \"harmony\"\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    url = f\"https://api.github.com/repos/{org}/{repo}/{endpoint}\"\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _check_branch_exists(\n    branch_name: str, headers: Dict[str, str], org: str, repo: str = \"harmony\"\n) -> bool:\n    \"\"\"Verify that a branch exists in the repository.\"\"\"\n    success, _ = _get_github_api(f\"branches/{branch_name}\", headers, org, repo)\n    return success\n\n\ndef _check_file_content(\n    branch: str,\n    file_path: str,\n    keywords: List[str],\n    headers: Dict[str, str],\n    org: str,\n    repo: str = \"harmony\",\n) -> bool:\n    \"\"\"Verify that a file exists in branch and contains required keywords.\"\"\"\n    success, result = _get_github_api(\n        f\"contents/{file_path}?ref={branch}\", headers, org, repo\n    )\n    if not success or not result:\n        return False\n\n    if keywords and result.get(\"content\"):\n        try:\n            content = base64.b64decode(result.get(\"content\", \"\")).decode(\"utf-8\")\n            return all(keyword in content for keyword in keywords)\n        except Exception as e:\n            print(f\"Content decode error for {file_path}: {e}\", file=sys.stderr)\n            return False\n\n    return True\n\n\ndef _find_issue_by_title(\n    title_substring: str, headers: Dict[str, str], org: str, repo: str = \"harmony\"\n) -> Optional[Dict]:\n    \"\"\"Find an issue by title substring and return the issue data.\"\"\"\n    # Check both open and closed issues\n    for state in [\"open\", \"closed\"]:\n        success, issues = _get_github_api(\n            f\"issues?state={state}&per_page=100\", headers, org, repo\n        )\n        if success and issues:\n            for issue in issues:\n                if title_substring.lower() in issue.get(\"title\", \"\").lower():\n                    return issue\n    return None\n\n\ndef _find_pr_by_title(\n    title_substring: str, headers: Dict[str, str], org: str, repo: str = \"harmony\"\n) -> Optional[Dict]:\n    \"\"\"Find a PR by title substring and return the PR data.\"\"\"\n    # Check both open and closed PRs\n    for state in [\"open\", \"closed\"]:\n        success, prs = _get_github_api(\n            f\"pulls?state={state}&per_page=100\", headers, org, repo\n        )\n        if success and prs:\n            for pr in prs:\n                if title_substring.lower() in pr.get(\"title\", \"\").lower():\n                    return pr\n    return None\n\n\ndef _check_issue_references(issue_body: str, reference_numbers: List[str]) -> bool:\n    \"\"\"Check if issue body contains references to specified issue numbers.\"\"\"\n    if not issue_body:\n        return False\n\n    return all(f\"#{ref}\" in issue_body for ref in reference_numbers)\n\n\ndef _check_pr_references(\n    pr_body: str, issue_number: int, reference_numbers: List[str]\n) -> bool:\n    \"\"\"Check if PR body contains proper references.\"\"\"\n    if not pr_body:\n        return False\n\n    # Check for \"Closes #X\" pattern\n    closes_pattern = (\n        f\"Closes #{issue_number}\" in pr_body or f\"closes #{issue_number}\" in pr_body\n    )\n\n    # Check for other references\n    refs_present = all(f\"#{ref}\" in pr_body for ref in reference_numbers)\n\n    return closes_pattern and refs_present\n\n\ndef _get_issue_comments(\n    issue_number: int, headers: Dict[str, str], org: str, repo: str = \"harmony\"\n) -> List[Dict]:\n    \"\"\"Get all comments for an issue.\"\"\"\n    success, comments = _get_github_api(\n        f\"issues/{issue_number}/comments\", headers, org, repo\n    )\n    if success and comments:\n        return comments\n    return []\n\n\ndef _get_pr_reviews(\n    pr_number: int, headers: Dict[str, str], org: str, repo: str = \"harmony\"\n) -> List[Dict]:\n    \"\"\"Get all reviews for a PR.\"\"\"\n    success, reviews = _get_github_api(f\"pulls/{pr_number}/reviews\", headers, org, repo)\n    if success and reviews:\n        return reviews\n    return []\n\n\ndef _check_issue_comment_references(\n    comments: List[Dict], pr_number: int, keywords: List[str]\n) -> bool:\n    \"\"\"Check if issue has a comment referencing the PR number with required technical keywords.\"\"\"\n    for comment in comments:\n        body = comment.get(\"body\", \"\")\n        has_pr_ref = (\n            f\"PR #{pr_number}\" in body\n            or f\"PR#{pr_number}\" in body\n            or f\"pr #{pr_number}\" in body.lower()\n        )\n        has_keywords = all(keyword.lower() in body.lower() for keyword in keywords)\n        if has_pr_ref and has_keywords:\n            return True\n    return False\n\n\ndef _check_title_keywords(title: str, required_keywords: List[str]) -> bool:\n    \"\"\"Check if title contains all required keywords.\"\"\"\n    return all(keyword.lower() in title.lower() for keyword in required_keywords)\n\n\ndef _check_headings_and_content(\n    body: str, headings: List[str], keywords: List[str]\n) -> bool:\n    \"\"\"Check if body contains required headings and keywords.\"\"\"\n    has_headings = all(heading in body for heading in headings)\n    has_keywords = all(keyword.lower() in body.lower() for keyword in keywords)\n    return has_headings and has_keywords\n\n\ndef _check_pr_review_content(reviews: List[Dict], keywords: List[str]) -> bool:\n    \"\"\"Check if PR has review comments containing required keywords.\"\"\"\n    for review in reviews:\n        body = review.get(\"body\", \"\")\n        if body and all(keyword.lower() in body.lower() for keyword in keywords):\n            return True\n    return False\n\n\ndef verify() -> bool:\n    \"\"\"\n    Programmatically verify that the issue-PR-commit workflow meets the\n    requirements described in description.md.\n    \"\"\"\n    # Load environment variables from .mcp_env\n    load_dotenv(\".mcp_env\")\n\n    # Get GitHub token and org\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    if not github_org:\n        print(\"Error: GITHUB_EVAL_ORG environment variable not set\", file=sys.stderr)\n        return False\n\n    # Configuration constants\n    BRANCH_NAME = \"fix/race-condition-tokenizer-loading\"\n    ISSUE_TITLE_SUBSTRING = \"race condition in HarmonyEncoding\"\n    PR_TITLE_SUBSTRING = \"Fix race condition in tokenizer loading\"\n\n    # File content checks\n    RUST_FILE_KEYWORDS = [\n        \"DOWNLOAD_MUTEX\",\n        \"OnceLock<Mutex<()>>\",\n        \"load_harmony_encoding_safe\",\n        \"load_harmony_encoding_from_file\",\n        \"Thread-safe tokenizer loading\",\n    ]\n\n    # Issue content requirements\n    ISSUE_TITLE_KEYWORDS = [\"race condition\", \"HarmonyEncoding\", \"concurrent access\"]\n    ISSUE_REFERENCE_NUMBERS = [\"6\", \"1\"]\n    ISSUE_HEADINGS = [\"## Problem\", \"## Root Cause\", \"## Expected Solution\"]\n    ISSUE_KEYWORDS = [\n        \"multiple threads\",\n        \"tokenizer file downloads\",\n        \"mutex-based file locking\",\n    ]\n\n    # PR content requirements\n    PR_TITLE_KEYWORDS = [\"Fix race condition\", \"tokenizer loading\", \"threading issues\"]\n    PR_REFERENCE_NUMBERS = [\"1\", \"6\"]\n    PR_HEADINGS = [\"## Summary\", \"## Changes\", \"## Testing\"]\n    PR_KEYWORDS = [\"thread-safe\", \"concurrent downloads\", \"offline loading API\"]\n\n    # Review comment requirements\n    REVIEW_KEYWORDS = [\"OnceLock\", \"mutex\", \"thread safety\", \"concurrent access\"]\n\n    # Issue comment requirements\n    ISSUE_COMMENT_KEYWORDS = [\n        \"std::sync::Mutex\",\n        \"OnceLock\",\n        \"thread-safe initialization\",\n        \"DOWNLOAD_MUTEX\",\n    ]\n\n    headers = {\n        \"Authorization\": f\"token {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    # Run verification checks\n    print(\"Verifying GitHub issue-PR-commit workflow completion...\")\n\n    # 1. Check that feature branch exists\n    print(\"1. Verifying feature branch exists...\")\n    if not _check_branch_exists(BRANCH_NAME, headers, github_org):\n        print(f\"Error: Branch '{BRANCH_NAME}' not found\", file=sys.stderr)\n        return False\n\n    # 2. Check that the Rust implementation file exists with required content\n    print(\"2. Verifying concurrent_loading.rs implementation...\")\n    if not _check_file_content(\n        BRANCH_NAME,\n        \"src/concurrent_loading.rs\",\n        RUST_FILE_KEYWORDS,\n        headers,\n        github_org,\n    ):\n        print(\n            \"Error: src/concurrent_loading.rs not found or missing required content\",\n            file=sys.stderr,\n        )\n        return False\n\n    # 3. Find the created issue\n    print(\"3. Verifying issue creation and content...\")\n    issue = _find_issue_by_title(ISSUE_TITLE_SUBSTRING, headers, github_org)\n    if not issue:\n        print(\n            f\"Error: Issue with title containing '{ISSUE_TITLE_SUBSTRING}' not found\",\n            file=sys.stderr,\n        )\n        return False\n\n    issue_number = issue.get(\"number\")\n    issue_title = issue.get(\"title\", \"\")\n    issue_body = issue.get(\"body\", \"\")\n\n    # Check issue title keywords\n    if not _check_title_keywords(issue_title, ISSUE_TITLE_KEYWORDS):\n        print(\"Error: Issue title missing required keywords\", file=sys.stderr)\n        return False\n\n    # Check issue headings, content and references\n    if not _check_headings_and_content(issue_body, ISSUE_HEADINGS, ISSUE_KEYWORDS):\n        print(\"Error: Issue missing required headings or keywords\", file=sys.stderr)\n        return False\n\n    if not _check_issue_references(issue_body, ISSUE_REFERENCE_NUMBERS):\n        print(\n            \"Error: Issue does not reference required issues #6 and #1\", file=sys.stderr\n        )\n        return False\n\n    # 4. Find the created PR\n    print(\"4. Verifying pull request creation and content...\")\n    pr = _find_pr_by_title(PR_TITLE_SUBSTRING, headers, github_org)\n    if not pr:\n        print(\n            f\"Error: PR with title containing '{PR_TITLE_SUBSTRING}' not found\",\n            file=sys.stderr,\n        )\n        return False\n\n    pr_number = pr.get(\"number\")\n    pr_title = pr.get(\"title\", \"\")\n    pr_body = pr.get(\"body\", \"\")\n\n    # Check PR title keywords\n    if not _check_title_keywords(pr_title, PR_TITLE_KEYWORDS):\n        print(\"Error: PR title missing required keywords\", file=sys.stderr)\n        return False\n\n    # Check PR headings and content\n    if not _check_headings_and_content(pr_body, PR_HEADINGS, PR_KEYWORDS):\n        print(\"Error: PR missing required headings or keywords\", file=sys.stderr)\n        return False\n\n    # Check PR references\n    if not _check_pr_references(pr_body, issue_number, PR_REFERENCE_NUMBERS):\n        print(\n            f\"Error: PR does not properly reference issue #{issue_number} or issues #1, #6\",\n            file=sys.stderr,\n        )\n        return False\n\n    # 5. Check PR review comments\n    print(\"5. Verifying PR review comments...\")\n    reviews = _get_pr_reviews(pr_number, headers, github_org)\n    if not _check_pr_review_content(reviews, REVIEW_KEYWORDS):\n        print(\n            \"Error: PR missing review comment with required technical keywords\",\n            file=sys.stderr,\n        )\n        return False\n\n    # 6. Check issue comments for PR reference with technical keywords\n    print(\"6. Verifying issue comment referencing PR...\")\n    issue_comments = _get_issue_comments(issue_number, headers, github_org)\n    if not _check_issue_comment_references(\n        issue_comments, pr_number, ISSUE_COMMENT_KEYWORDS\n    ):\n        print(\n            f\"Error: Issue #{issue_number} missing comment referencing PR #{pr_number} with required technical keywords\",\n            file=sys.stderr,\n        )\n        return False\n\n    # 7. Check issue is closed\n    print(\"7. Verifying issue closure...\")\n    if issue.get(\"state\") != \"closed\":\n        print(f\"Error: Issue #{issue_number} is not closed\", file=sys.stderr)\n        return False\n\n    print(\"\\n✓ All verification checks passed!\")\n    print(\"Issue-PR-commit workflow completed successfully:\")\n    print(f\"  - Issue #{issue_number}: {issue.get('title')}\")\n    print(f\"  - PR #{pr_number}: {pr.get('title')}\")\n    print(f\"  - Branch: {BRANCH_NAME}\")\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/standard/harmony/issue_tagging_pr_closure/description.md",
    "content": "I need you to simulate a realistic development workflow where an enhancement issue is created, implementation is attempted via a pull request, but then the PR must be closed without merging due to technical constraints discovered during the implementation process.\n\n**Step 1: Create Enhancement Issue**\nCreate a new issue with:\n- Title containing: \"Upgrade JavaScript demo to use ESM imports\" and \"modern module system\"\n- Body must include:\n  - A \"## Problem\" heading describing CommonJS limitations\n  - A \"## Proposed Solution\" heading about ESM migration\n  - A \"## Benefits\" heading listing advantages\n  - Reference to issue #26 (which is about JavaScript demo issues)\n  - Keywords: \"CommonJS\", \"ESM imports\", \"module bundling\", \"modern JavaScript\"\n- Labels: Add \"enhancement\" label to the issue\n\n**Step 2: Create Feature Branch**\nCreate a new branch called 'feat/esm-migration-attempt' from main.\n\n**Step 3: Attempt ESM Implementation**\nOn the feature branch, update the file `javascript/demo/package.json` with:\n```json\n{\n  \"type\": \"module\",\n  \"scripts\": {\n    \"build\": \"webpack --mode production --entry ./src/main.js\"\n  },\n  \"dependencies\": {\n    \"@openai/harmony\": \"^0.1.0\",\n    \"webpack\": \"^5.0.0\"\n  }\n}\n```\n\nAlso create `javascript/demo/src/main.js` with:\n```javascript\n// ESM import attempt - fails due to harmony core requirements\nimport { HarmonyEncoding } from '@openai/harmony';\n\n// This breaks the existing CommonJS integration\n// harmony core requires specific CommonJS patterns\nexport const initHarmony = () => {\n    throw new Error(\"ESM migration incompatible with harmony core\");\n};\n```\n\n**Step 4: Create Pull Request**\nCreate a pull request from 'feat/esm-migration-attempt' to 'main' with:\n- Title containing: \"Upgrade JavaScript demo to ESM imports\" and \"modern modules\"\n- Body must include:\n  - A \"## Summary\" heading explaining the attempted migration\n  - A \"## Changes\" heading with bullet points about ESM implementation\n  - A \"## Issues Discovered\" heading describing technical problems found\n  - \"Addresses #[ISSUE_NUMBER]\" pattern linking to your created issue\n  - Keywords: \"ESM migration\", \"webpack configuration\", \"module compatibility\", \"breaking changes\"\n- Labels: Add \"enhancement\" and \"needs-investigation\" labels to the PR\n\n**Step 5: Investigate and Document Problems**\nAdd a comment to the PR explaining the technical barriers discovered. The comment must contain these exact keywords:\n- \"CommonJS required\"\n- \"breaking compatibility\" \n- \"build system constraints\"\n- \"core tokenization\"\n- \"approach is not viable\"\nAlso include technical analysis of harmony core's CommonJS dependencies and webpack configuration conflicts.\n\n**Step 6: Update Issue with Findings**\nAdd a comment to the original issue you created. The comment must contain these exact keywords:\n- \"technical constraints\"\n- \"CommonJS dependency\"\n- \"harmony core limitations\" \n- \"build system compatibility\"\n- \"not viable at this time\"\nAlso reference the PR number using \"PR #[NUMBER]\" pattern and provide detailed explanation of why ESM migration cannot proceed.\n\n**Step 7: Close PR Without Merging**\nClose the pull request without merging by updating its state to 'closed', and add a final comment. The comment must contain these exact keywords:\n- \"architectural limitations\"\n- \"future consideration\" \n- \"core refactoring required\"\n- \"cannot be merged\"\nAlso explain why the PR cannot be merged, what would need to change in the future, reference back to the issue, and add \"wontfix\" label to the PR.\n\n**Step 8: Close Issue**\nClose the original issue by updating its state to 'closed'. Add a final comment to the issue that must contain these exact keywords:\n- \"closing as not planned\"\n- \"architectural constraints\"\n- \"future implementation blocked\"\n- \"requires core redesign\""
  },
  {
    "path": "tasks/github/standard/harmony/issue_tagging_pr_closure/meta.json",
    "content": "{\n  \"task_id\": \"issue_tagging_pr_closure\",\n  \"task_name\": \"Issue Tagging Pr Closure\",\n  \"category_id\": \"harmony\",\n  \"category_name\": \"Harmony\",\n  \"description\": \"Simulate development workflow where enhancement PR is closed without merging due to technical constraints discovered during implementation.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"issue management\",\n    \"pr workflows\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/harmony\",\n    \"stateOriginalUrl\": \"https://github.com/openai/harmony\"\n  }\n}"
  },
  {
    "path": "tasks/github/standard/harmony/issue_tagging_pr_closure/verify.py",
    "content": "import sys\nimport os\nimport requests\nfrom typing import Dict, List, Optional, Tuple\nfrom dotenv import load_dotenv\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str], org: str, repo: str = \"harmony\"\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    url = f\"https://api.github.com/repos/{org}/{repo}/{endpoint}\"\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _check_branch_exists(\n    branch_name: str, headers: Dict[str, str], org: str, repo: str = \"harmony\"\n) -> bool:\n    \"\"\"Verify that a branch exists in the repository.\"\"\"\n    success, _ = _get_github_api(f\"branches/{branch_name}\", headers, org, repo)\n    return success\n\n\ndef _check_file_content(\n    branch: str,\n    file_path: str,\n    keywords: List[str],\n    headers: Dict[str, str],\n    org: str,\n    repo: str = \"harmony\",\n) -> bool:\n    \"\"\"Verify that a file exists in branch and contains required keywords.\"\"\"\n    import base64\n\n    success, result = _get_github_api(\n        f\"contents/{file_path}?ref={branch}\", headers, org, repo\n    )\n    if not success or not result:\n        return False\n\n    if keywords and result.get(\"content\"):\n        try:\n            content = base64.b64decode(result.get(\"content\", \"\")).decode(\"utf-8\")\n            return all(keyword in content for keyword in keywords)\n        except Exception as e:\n            print(f\"Content decode error for {file_path}: {e}\", file=sys.stderr)\n            return False\n\n    return True\n\n\ndef _find_issue_by_title_keywords(\n    title_keywords: List[str], headers: Dict[str, str], org: str, repo: str = \"harmony\"\n) -> Optional[Dict]:\n    \"\"\"Find an issue by title keywords and return the issue data.\"\"\"\n    for state in [\"open\", \"closed\"]:\n        success, issues = _get_github_api(\n            f\"issues?state={state}&per_page=100\", headers, org, repo\n        )\n        if success and issues:\n            for issue in issues:\n                title = issue.get(\"title\", \"\").lower()\n                if all(keyword.lower() in title for keyword in title_keywords):\n                    return issue\n    return None\n\n\ndef _find_pr_by_title_keywords(\n    title_keywords: List[str], headers: Dict[str, str], org: str, repo: str = \"harmony\"\n) -> Optional[Dict]:\n    \"\"\"Find a PR by title keywords and return the PR data.\"\"\"\n    for state in [\"open\", \"closed\"]:\n        success, prs = _get_github_api(\n            f\"pulls?state={state}&per_page=100\", headers, org, repo\n        )\n        if success and prs:\n            for pr in prs:\n                title = pr.get(\"title\", \"\").lower()\n                if all(keyword.lower() in title for keyword in title_keywords):\n                    return pr\n    return None\n\n\ndef _check_labels(labels: List[Dict], required_labels: List[str]) -> bool:\n    \"\"\"Check if required labels are present.\"\"\"\n    label_names = [label.get(\"name\", \"\").lower() for label in labels]\n    return all(req_label.lower() in label_names for req_label in required_labels)\n\n\ndef _check_headings_and_keywords(\n    body: str, headings: List[str], keywords: List[str]\n) -> bool:\n    \"\"\"Check if body contains required headings and keywords.\"\"\"\n    if not body:\n        return False\n    has_headings = all(heading in body for heading in headings)\n    has_keywords = all(keyword.lower() in body.lower() for keyword in keywords)\n    return has_headings and has_keywords\n\n\ndef _check_issue_reference(body: str, issue_number: int) -> bool:\n    \"\"\"Check if body contains reference to the issue.\"\"\"\n    if not body:\n        return False\n    return f\"#{issue_number}\" in body or f\"Addresses #{issue_number}\" in body\n\n\ndef _get_issue_comments(\n    issue_number: int, headers: Dict[str, str], org: str, repo: str = \"harmony\"\n) -> List[Dict]:\n    \"\"\"Get all comments for an issue.\"\"\"\n    success, comments = _get_github_api(\n        f\"issues/{issue_number}/comments\", headers, org, repo\n    )\n    if success and comments:\n        return comments\n    return []\n\n\ndef _get_pr_comments(\n    pr_number: int, headers: Dict[str, str], org: str, repo: str = \"harmony\"\n) -> List[Dict]:\n    \"\"\"Get all comments for a PR.\"\"\"\n    success, comments = _get_github_api(\n        f\"issues/{pr_number}/comments\", headers, org, repo\n    )\n    if success and comments:\n        return comments\n    return []\n\n\ndef _check_pr_technical_comment(comments: List[Dict], keywords: List[str]) -> bool:\n    \"\"\"Check if PR has a comment with technical analysis containing required keywords.\"\"\"\n    for comment in comments:\n        body = comment.get(\"body\", \"\")\n        if body and all(keyword.lower() in body.lower() for keyword in keywords):\n            return True\n    return False\n\n\ndef _check_issue_comment_with_pr_ref(\n    comments: List[Dict], pr_number: int, keywords: List[str]\n) -> bool:\n    \"\"\"Check if issue has a comment referencing the PR with required keywords.\"\"\"\n    for comment in comments:\n        body = comment.get(\"body\", \"\")\n        has_pr_ref = (\n            f\"PR #{pr_number}\" in body\n            or f\"PR#{pr_number}\" in body\n            or f\"pr #{pr_number}\" in body.lower()\n        )\n        has_keywords = all(keyword.lower() in body.lower() for keyword in keywords)\n        if has_pr_ref and has_keywords:\n            return True\n    return False\n\n\ndef verify() -> bool:\n    \"\"\"\n    Programmatically verify that the issue tagging and PR closure workflow meets the\n    requirements described in description.md.\n    \"\"\"\n    # Load environment variables from .mcp_env\n    load_dotenv(\".mcp_env\")\n\n    # Get GitHub token and org\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    if not github_org:\n        print(\"Error: GITHUB_EVAL_ORG environment variable not set\", file=sys.stderr)\n        return False\n\n    # Configuration constants\n    BRANCH_NAME = \"feat/esm-migration-attempt\"\n\n    # Issue requirements\n    ISSUE_TITLE_KEYWORDS = [\n        \"Upgrade JavaScript demo to use ESM imports\",\n        \"modern module system\",\n    ]\n    ISSUE_HEADINGS = [\"## Problem\", \"## Proposed Solution\", \"## Benefits\"]\n    ISSUE_KEYWORDS = [\"CommonJS\", \"ESM imports\", \"module bundling\", \"modern JavaScript\"]\n    ISSUE_LABELS = [\"enhancement\"]\n\n    # PR requirements\n    PR_TITLE_KEYWORDS = [\"Upgrade JavaScript demo to ESM imports\", \"modern modules\"]\n    PR_HEADINGS = [\"## Summary\", \"## Changes\", \"## Issues Discovered\"]\n    PR_KEYWORDS = [\n        \"ESM migration\",\n        \"webpack configuration\",\n        \"module compatibility\",\n        \"breaking changes\",\n    ]\n    PR_LABELS = [\"enhancement\", \"needs-investigation\", \"wontfix\"]\n\n    # File content requirements\n    PACKAGE_JSON_KEYWORDS = ['\"type\": \"module\"', \"webpack\", \"@openai/harmony\"]\n    MAIN_JS_KEYWORDS = [\n        \"import { HarmonyEncoding }\",\n        \"ESM import attempt\",\n        \"harmony core\",\n    ]\n\n    # Comment requirements\n    PR_TECHNICAL_KEYWORDS = [\n        \"CommonJS required\",\n        \"breaking compatibility\",\n        \"build system constraints\",\n        \"core tokenization\",\n        \"approach is not viable\",\n    ]\n    ISSUE_COMMENT_KEYWORDS = [\n        \"technical constraints\",\n        \"CommonJS dependency\",\n        \"harmony core limitations\",\n        \"build system compatibility\",\n        \"not viable at this time\",\n    ]\n    PR_CLOSURE_KEYWORDS = [\n        \"architectural limitations\",\n        \"future consideration\",\n        \"core refactoring required\",\n        \"cannot be merged\",\n    ]\n    ISSUE_CLOSURE_KEYWORDS = [\n        \"closing as not planned\",\n        \"architectural constraints\",\n        \"future implementation blocked\",\n        \"requires core redesign\",\n    ]\n\n    headers = {\n        \"Authorization\": f\"token {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    # Run verification checks\n    print(\"Verifying issue tagging and PR closure workflow completion...\")\n\n    # 1. Check that feature branch exists\n    print(\"1. Verifying feature branch exists...\")\n    if not _check_branch_exists(BRANCH_NAME, headers, github_org):\n        print(f\"Error: Branch '{BRANCH_NAME}' not found\", file=sys.stderr)\n        return False\n\n    # 2. Check that implementation files exist with required content\n    print(\"2. Verifying ESM implementation files...\")\n    if not _check_file_content(\n        BRANCH_NAME,\n        \"javascript/demo/package.json\",\n        PACKAGE_JSON_KEYWORDS,\n        headers,\n        github_org,\n    ):\n        print(\n            \"Error: javascript/demo/package.json not found or missing required content\",\n            file=sys.stderr,\n        )\n        return False\n\n    if not _check_file_content(\n        BRANCH_NAME,\n        \"javascript/demo/src/main.js\",\n        MAIN_JS_KEYWORDS,\n        headers,\n        github_org,\n    ):\n        print(\n            \"Error: javascript/demo/src/main.js not found or missing required content\",\n            file=sys.stderr,\n        )\n        return False\n\n    # 3. Find the created issue\n    print(\"3. Verifying issue creation and content...\")\n    issue = _find_issue_by_title_keywords(ISSUE_TITLE_KEYWORDS, headers, github_org)\n    if not issue:\n        print(\n            \"Error: Issue with title containing required keywords not found\",\n            file=sys.stderr,\n        )\n        return False\n\n    issue_number = issue.get(\"number\")\n    issue_body = issue.get(\"body\", \"\")\n    issue_labels = issue.get(\"labels\", [])\n\n    # Check issue content\n    if not _check_headings_and_keywords(issue_body, ISSUE_HEADINGS, ISSUE_KEYWORDS):\n        print(\"Error: Issue missing required headings or keywords\", file=sys.stderr)\n        return False\n\n    # Check issue references #26\n    if \"#26\" not in issue_body:\n        print(\"Error: Issue does not reference issue #26\", file=sys.stderr)\n        return False\n\n    # Check issue labels\n    if not _check_labels(issue_labels, ISSUE_LABELS):\n        print(f\"Error: Issue missing required labels: {ISSUE_LABELS}\", file=sys.stderr)\n        return False\n\n    # 4. Find the created PR\n    print(\"4. Verifying pull request creation and content...\")\n    pr = _find_pr_by_title_keywords(PR_TITLE_KEYWORDS, headers, github_org)\n    if not pr:\n        print(\n            \"Error: PR with title containing required keywords not found\",\n            file=sys.stderr,\n        )\n        return False\n\n    pr_number = pr.get(\"number\")\n    pr_body = pr.get(\"body\", \"\")\n    pr_labels = pr.get(\"labels\", [])\n    pr_state = pr.get(\"state\")\n\n    # Check PR content\n    if not _check_headings_and_keywords(pr_body, PR_HEADINGS, PR_KEYWORDS):\n        print(\"Error: PR missing required headings or keywords\", file=sys.stderr)\n        return False\n\n    # Check PR references issue\n    if not _check_issue_reference(pr_body, issue_number):\n        print(f\"Error: PR does not reference issue #{issue_number}\", file=sys.stderr)\n        return False\n\n    # Check PR labels\n    if not _check_labels(pr_labels, PR_LABELS):\n        print(f\"Error: PR missing required labels: {PR_LABELS}\", file=sys.stderr)\n        return False\n\n    # 5. Check PR is closed (not merged)\n    print(\"5. Verifying PR is closed without merging...\")\n    if pr_state != \"closed\":\n        print(f\"Error: PR #{pr_number} is not closed\", file=sys.stderr)\n        return False\n\n    if pr.get(\"merged_at\"):\n        print(\n            f\"Error: PR #{pr_number} was merged (should be closed without merging)\",\n            file=sys.stderr,\n        )\n        return False\n\n    # 6. Check PR technical analysis comment\n    print(\"6. Verifying PR technical analysis comment...\")\n    pr_comments = _get_pr_comments(pr_number, headers, github_org)\n    if not _check_pr_technical_comment(pr_comments, PR_TECHNICAL_KEYWORDS):\n        print(\n            \"Error: PR missing technical analysis comment with required keywords\",\n            file=sys.stderr,\n        )\n        return False\n\n    # 7. Check issue comment with PR reference\n    print(\"7. Verifying issue comment referencing PR...\")\n    issue_comments = _get_issue_comments(issue_number, headers, github_org)\n    if not _check_issue_comment_with_pr_ref(\n        issue_comments, pr_number, ISSUE_COMMENT_KEYWORDS\n    ):\n        print(\n            f\"Error: Issue #{issue_number} missing comment referencing PR #{pr_number} with required keywords\",\n            file=sys.stderr,\n        )\n        return False\n\n    # 8. Check PR closure comment with required keywords\n    print(\"8. Verifying PR closure comment...\")\n    pr_closure_comment_found = False\n    for comment in pr_comments:\n        body = comment.get(\"body\", \"\")\n        if body and all(\n            keyword.lower() in body.lower() for keyword in PR_CLOSURE_KEYWORDS\n        ):\n            pr_closure_comment_found = True\n            break\n\n    if not pr_closure_comment_found:\n        print(\n            \"Error: PR missing closure comment with required keywords\", file=sys.stderr\n        )\n        return False\n\n    # 9. Verify issue is closed\n    print(\"9. Verifying issue is closed...\")\n    if issue.get(\"state\") != \"closed\":\n        print(f\"Error: Issue #{issue_number} should be closed\", file=sys.stderr)\n        return False\n\n    # 10. Check issue closure comment with required keywords\n    print(\"10. Verifying issue closure comment...\")\n    issue_closure_comment_found = False\n    for comment in issue_comments:\n        body = comment.get(\"body\", \"\")\n        if body and all(\n            keyword.lower() in body.lower() for keyword in ISSUE_CLOSURE_KEYWORDS\n        ):\n            issue_closure_comment_found = True\n            break\n\n    if not issue_closure_comment_found:\n        print(\n            \"Error: Issue missing closure comment with required keywords\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\"\\n✓ All verification checks passed!\")\n    print(\"Issue tagging and PR closure workflow completed successfully:\")\n    print(f\"  - Issue #{issue_number}: {issue.get('title')} (closed)\")\n    print(f\"  - PR #{pr_number}: {pr.get('title')} (closed without merging)\")\n    print(f\"  - Branch: {BRANCH_NAME}\")\n    print(\"  - All comments contain required keywords\")\n    print(\"  - Technical constraints properly documented and communicated\")\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/standard/harmony/multi_branch_commit_aggregation/description.md",
    "content": "I need you to create a comprehensive commit history report by aggregating changes from multiple branches. Here's what you need to do:\n\n**Step 1: Create Analysis Branch**\nCreate a new branch called 'history-report-2025' from the main branch.\n\n**Step 2: Generate Branch Commits Report**\nIn the 'history-report-2025' branch, create a file called `BRANCH_COMMITS.json` that contains a JSON object with the following structure:\n- For each of these branches: ['pr/45-googlefan256-main', 'pr/25-neuralsorcerer-patch-1', 'pr/41-amirhosseinghanipour-fix-race-conditions-and-offline-api']\n- List the 3 most recent commits for each branch\n- Each commit must include: SHA, GitHub username, commit message, and files changed count\n- The JSON structure should be:\n```json\n{\n  \"pr/45-googlefan256-main\": [\n    {\n      \"sha\": \"commit_sha\",\n      \"author\": \"github_username\",\n      \"message\": \"commit message\",\n      \"files_changed\": number\n    }\n  ],\n  \"pr/25-neuralsorcerer-patch-1\": [...],\n  \"pr/41-amirhosseinghanipour-fix-race-conditions-and-offline-api\": [...]\n}\n```\n\n**Step 3: Create Cross-Branch Analysis**\nCreate a file `CROSS_BRANCH_ANALYSIS.md` that contains:\n- A section \"## Top Contributors\" listing the 3 contributors with the most commits on the main branch, sorted by commit count (format: \"github_username: X commits\")\n- Must include keywords: \"contributors\"\n\n**Step 4: Generate Merge Timeline**\nCreate a file `MERGE_TIMELINE.txt` that lists the 10 most recent merge commits from the main branch:\n- Format: `DATE | MERGE_COMMIT_MESSAGE | COMMIT_SHA`\n- List in reverse chronological order (newest first)\n- Only include actual merge commits (commits that have exactly 2 parent commits)\n- Note: While the commit messages reference PR numbers, those PRs no longer exist in the repository"
  },
  {
    "path": "tasks/github/standard/harmony/multi_branch_commit_aggregation/meta.json",
    "content": "{\n  \"task_id\": \"multi_branch_commit_aggregation\",\n  \"task_name\": \"Multi Branch Commit Aggregation\",\n  \"category_id\": \"harmony\",\n  \"category_name\": \"Harmony\",\n  \"description\": \"Generate comprehensive commit history report by aggregating changes from multiple branches with contributor analysis and merge timeline.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"repository analysis\",\n    \"release coordination\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/harmony\",\n    \"stateOriginalUrl\": \"https://github.com/openai/harmony\"\n  }\n}"
  },
  {
    "path": "tasks/github/standard/harmony/multi_branch_commit_aggregation/verify.py",
    "content": "import sys\nimport os\nimport requests\nfrom typing import Dict, Optional, Tuple\nimport base64\nimport json\nfrom dotenv import load_dotenv\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str], org: str\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    url = f\"https://api.github.com/repos/{org}/harmony/{endpoint}\"\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _check_branch_exists(branch_name: str, headers: Dict[str, str], org: str) -> bool:\n    \"\"\"Verify that a branch exists in the repository.\"\"\"\n    success, _ = _get_github_api(f\"branches/{branch_name}\", headers, org)\n    return success\n\n\ndef _get_file_content(\n    branch: str, file_path: str, headers: Dict[str, str], org: str\n) -> Optional[str]:\n    \"\"\"Get the content of a file from a specific branch.\"\"\"\n    success, result = _get_github_api(f\"contents/{file_path}?ref={branch}\", headers, org)\n    if not success or not result:\n        return None\n\n    try:\n        content = base64.b64decode(result.get(\"content\", \"\")).decode(\"utf-8\")\n        return content\n    except Exception as e:\n        print(f\"Content decode error for {file_path}: {e}\", file=sys.stderr)\n        return None\n\n\ndef _check_branch_commits_json(content: str) -> bool:\n    \"\"\"Verify BRANCH_COMMITS.json has correct structure and expected data.\"\"\"\n    expected_data = {\n        \"pr/45-googlefan256-main\": [\n            {\n                \"sha\": \"9fa3f54cf2a2501c7dcbf554d5fbdd0de619fdda\",\n                \"author\": \"googlefan256\",\n                \"message\": \"Update format.md\",\n                \"files_changed\": 1,\n            },\n            {\n                \"sha\": \"3efbf742533a375fc148d75513597e139329578b\",\n                \"author\": \"scott-oai\",\n                \"message\": \"Merge pull request #29 from axion66/improve-readme-and-checks\",\n                \"files_changed\": 1,\n            },\n            {\n                \"sha\": \"9d653a4c7382abc42d115014d195d9354e7ad357\",\n                \"author\": \"scott-oai\",\n                \"message\": \"Merge pull request #30 from Yuan-ManX/harmony-format\",\n                \"files_changed\": 1,\n            },\n        ],\n        \"pr/25-neuralsorcerer-patch-1\": [\n            {\n                \"sha\": \"c505a03e9c9a388a511b6125756097eee523742a\",\n                \"author\": \"neuralsorcerer\",\n                \"message\": \"fix: `meta_sep` token and add to registry\",\n                \"files_changed\": 1,\n            },\n            {\n                \"sha\": \"c044bf33f7e835ca6a723ccc97848de25dba5164\",\n                \"author\": \"neuralsorcerer\",\n                \"message\": \"fix: `meta_sep` token in `encoding.rs`\",\n                \"files_changed\": 1,\n            },\n            {\n                \"sha\": \"b255cbeb6274adbea774f26fd9590922ce8874ed\",\n                \"author\": \"scott-oai\",\n                \"message\": \"Merge pull request #18 from openai/dev/scl/better-ci\",\n                \"files_changed\": 6,\n            },\n        ],\n        \"pr/41-amirhosseinghanipour-fix-race-conditions-and-offline-api\": [\n            {\n                \"sha\": \"1dca6392934bf4e3c403b2ecc2104e8ff3f67f45\",\n                \"author\": \"amirhosseinghanipour\",\n                \"message\": \"fix race conditions and add offline tokenizer loading api\",\n                \"files_changed\": 8,\n            },\n            {\n                \"sha\": \"9528c7b4a00a3307fd9685fc1328aee11c3d9c90\",\n                \"author\": \"scott-oai\",\n                \"message\": \"version bump\",\n                \"files_changed\": 2,\n            },\n            {\n                \"sha\": \"82b3afb9eb043343f322c937262cc50405e892c3\",\n                \"author\": \"scott-oai\",\n                \"message\": \"Merge pull request #26 from jordan-wu-97/jordan/fix-function-call-atomic-bool\",\n                \"files_changed\": 6,\n            },\n        ],\n    }\n\n    try:\n        data = json.loads(content)\n\n        # Check if all required branches are present\n        for branch in expected_data.keys():\n            if branch not in data:\n                print(\n                    f\"Missing branch {branch} in BRANCH_COMMITS.json\", file=sys.stderr\n                )\n                return False\n\n        # Verify the exact content matches expected data\n        for branch, expected_commits in expected_data.items():\n            actual_commits = data.get(branch, [])\n            if len(actual_commits) != 3:\n                print(\n                    f\"Branch {branch} should have exactly 3 commits, found {len(actual_commits)}\",\n                    file=sys.stderr,\n                )\n                return False\n\n            for i, expected_commit in enumerate(expected_commits):\n                if i >= len(actual_commits):\n                    print(\n                        f\"Missing commit {i + 1} for branch {branch}\", file=sys.stderr\n                    )\n                    return False\n\n                actual_commit = actual_commits[i]\n                for field in [\"sha\", \"author\", \"files_changed\"]:\n                    if actual_commit.get(field) != expected_commit.get(field):\n                        print(\n                            f\"Mismatch in {field} for commit {i + 1} in branch {branch}\",\n                            file=sys.stderr,\n                        )\n                        print(\n                            f\"Expected: {expected_commit.get(field)}, Got: {actual_commit.get(field)}\",\n                            file=sys.stderr,\n                        )\n                        return False\n                \n                # For message field, use substring matching to be more flexible\n                expected_message = expected_commit.get(\"message\", \"\")\n                actual_message = actual_commit.get(\"message\", \"\")\n                if expected_message not in actual_message:\n                    print(\n                        f\"Mismatch in message for commit {i + 1} in branch {branch}\",\n                        file=sys.stderr,\n                    )\n                    print(\n                        f\"Expected: {expected_message}, Got: {actual_message}\",\n                        file=sys.stderr,\n                    )\n                    return False\n\n        return True\n    except json.JSONDecodeError as e:\n        print(f\"Invalid JSON in BRANCH_COMMITS.json: {e}\", file=sys.stderr)\n        return False\n    except Exception as e:\n        print(f\"Error checking BRANCH_COMMITS.json: {e}\", file=sys.stderr)\n        return False\n\n\ndef _check_cross_branch_analysis(content: str) -> bool:\n    \"\"\"Verify CROSS_BRANCH_ANALYSIS.md contains required sections and data.\"\"\"\n    # Check for required section header\n    if \"## Top Contributors\" not in content:\n        print(\n            \"Missing section '## Top Contributors' in CROSS_BRANCH_ANALYSIS.md\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Check for required keyword\n    if \"contributors\" not in content.lower():\n        print(\n            \"Missing keyword 'contributors' in CROSS_BRANCH_ANALYSIS.md\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Verify the top 3 contributors with correct counts from main branch (order matters)\n    expected_contributors = [\n        \"scott-oai: 35 commits\",\n        \"egorsmkv: 4 commits\",\n        \"axion66: 2 commits\",\n    ]\n\n    for contributor in expected_contributors:\n        if contributor not in content:\n            print(\n                f\"Missing or incorrect contributor entry: {contributor}\",\n                file=sys.stderr,\n            )\n            return False\n\n    return True\n\n\ndef _check_merge_timeline(content: str) -> bool:\n    \"\"\"Verify MERGE_TIMELINE.txt has correct format and expected merge commits.\"\"\"\n    expected_timeline = [\n        \"2025-08-06 | Merge pull request #29 from axion66/improve-readme-and-checks | 3efbf742533a375fc148d75513597e139329578b\",\n        \"2025-08-06 | Merge pull request #30 from Yuan-ManX/harmony-format | 9d653a4c7382abc42d115014d195d9354e7ad357\",\n        \"2025-08-06 | Merge pull request #28 from dkqjrm/fix-typo-format-md | 161e5fe2a57c63e9f8353c4c5b8faa3c3854bb5f\",\n        \"2025-08-05 | Merge pull request #26 from jordan-wu-97/jordan/fix-function-call-atomic-bool | 82b3afb9eb043343f322c937262cc50405e892c3\",\n        \"2025-08-05 | Merge pull request #18 from openai/dev/scl/better-ci | b255cbeb6274adbea774f26fd9590922ce8874ed\",\n        \"2025-08-05 | Merge pull request #21 from Tialo/main | 058ef3257c24fb099aac7960c10ce51c8e55d9fe\",\n        \"2025-08-05 | Merge branch 'main' into dev/scl/better-ci | 6375a15ea1b0a486cbb1468964cf8f5800ff5a5c\",\n        \"2025-08-05 | Merge pull request #8 from RustedBytes/main | f6179119ca894eda4124c86d408c01fdbf5281f0\",\n        \"2025-08-05 | Merge branch 'main' into main | eb86106b6980790b94f5702dc510483c66027277\",\n        \"2025-08-05 | Merge pull request #17 from openai/dev/scl/add-docs-to-cargo | 64bca4cf327ebeafa0bbd0345650d86e2d02142f\",\n    ]\n\n    # Verify each expected timeline entry exists in the content\n    for i, expected_line in enumerate(expected_timeline):\n        if expected_line not in content:\n            print(f\"Missing expected timeline entry {i + 1} in MERGE_TIMELINE.txt\", file=sys.stderr)\n            print(f\"Expected: {expected_line}\", file=sys.stderr)\n            return False\n\n    return True\n\n\ndef verify_task() -> bool:\n    \"\"\"Verify the multi-branch commit aggregation task.\"\"\"\n    # Get GitHub token from environment\n    load_dotenv(\".mcp_env\")\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    # Get GitHub organization from environment\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n    if not github_org:\n        print(\"Error: GITHUB_EVAL_ORG environment variable not set\", file=sys.stderr)\n        return False\n\n    headers = {\n        \"Authorization\": f\"Bearer {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    # 1. Check if branch 'history-report-2025' exists\n    if not _check_branch_exists(\"history-report-2025\", headers, github_org):\n        print(\"Branch 'history-report-2025' does not exist\", file=sys.stderr)\n        return False\n    print(\"✓ Branch 'history-report-2025' exists\")\n\n    # 2. Check BRANCH_COMMITS.json\n    content = _get_file_content(\"history-report-2025\", \"BRANCH_COMMITS.json\", headers, github_org)\n    if not content:\n        print(\n            \"File 'BRANCH_COMMITS.json' not found in 'history-report-2025' branch\",\n            file=sys.stderr,\n        )\n        return False\n\n    if not _check_branch_commits_json(content):\n        return False\n    print(\"✓ BRANCH_COMMITS.json has correct structure and data\")\n\n    # 3. Check CROSS_BRANCH_ANALYSIS.md\n    content = _get_file_content(\n        \"history-report-2025\", \"CROSS_BRANCH_ANALYSIS.md\", headers, github_org\n    )\n    if not content:\n        print(\n            \"File 'CROSS_BRANCH_ANALYSIS.md' not found in 'history-report-2025' branch\",\n            file=sys.stderr,\n        )\n        return False\n\n    if not _check_cross_branch_analysis(content):\n        return False\n    print(\"✓ CROSS_BRANCH_ANALYSIS.md contains required sections and data\")\n\n    # 4. Check MERGE_TIMELINE.txt\n    content = _get_file_content(\"history-report-2025\", \"MERGE_TIMELINE.txt\", headers, github_org)\n    if not content:\n        print(\n            \"File 'MERGE_TIMELINE.txt' not found in 'history-report-2025' branch\",\n            file=sys.stderr,\n        )\n        return False\n\n    if not _check_merge_timeline(content):\n        return False\n    print(\"✓ MERGE_TIMELINE.txt has correct format and data\")\n\n\n    print(\"\\nAll verification checks passed! ✅\")\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify_task()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/standard/harmony/release_management_workflow/description.md",
    "content": "I need help implementing a comprehensive release management workflow for this harmony repository. Here's what I need you to do:\n\n**Step 1: Analyze Current State** \nFirst, analyze the current open pull requests to understand what changes they contain and their impact on the codebase.\n\n**Step 2: Create Release Branch**\nCreate a release preparation branch called 'release-v1.1.0' from the current main branch.\n\n**Step 3: Apply Critical Bug Fixes**\nOn the release branch, apply the MetaSep token fix from PR #25 by creating/updating the file `src/encoding.rs` with the corrected content where FormattingToken::MetaSep maps to \"<|meta_sep|>\" instead of \"<|channel|>\".\n\nAlso create/update `src/registry.rs` to include the missing MetaSep and MetaEnd token registrations:\n```rust\n(FormattingToken::MetaSep, \"<|meta_sep|>\"),\n(FormattingToken::MetaEnd, \"<|meta_end|>\"),\n```\n\n**Step 4: Add Missing Utility File**\nFrom PR #26, create the missing shadcn utils file `demo/harmony-demo/src/lib/utils.ts` with content:\n```typescript\nimport { clsx, type ClassValue } from \"clsx\"\nimport { twMerge } from \"tailwind-merge\"\n\nexport function cn(...inputs: ClassValue[]) {\n  return twMerge(clsx(inputs))\n}\n```\n\nAnd create/update `.gitignore` to add:\n```\n# Avoid ignoring shadcn utils\n!demo/harmony-demo/src/lib\n```\n\n**Step 5: Version Update**\nUpdate the version number in `Cargo.toml`: Change the `version` field in the `[package]` section to `version = \"1.1.0\"`.\n\n**Step 6: Create Comprehensive Changelog**\nCreate a `CHANGELOG.md` file in the release branch with the following content:\n```markdown\n# Changelog\n\n## [1.1.0] - 2025-08-07\n\n### Added\n- Added missing shadcn utils.ts file for demo application\n- Enhanced gitignore rules to preserve shadcn utilities\n\n### Fixed\n- Fixed MetaSep token mapping bug (was incorrectly mapped to channel token)\n- Added missing MetaSep and MetaEnd token registrations in registry\n- Improved tokenizer registry functionality for meta formatting tokens\n\n### Changed\n- Updated version to 1.1.0 for new release cycle\n\n### Technical Details\n- MetaSep token now correctly maps to `<|meta_sep|>` instead of `<|channel|>`\n- Registry now properly recognizes MetaSep and MetaEnd formatting tokens\n- Demo application now includes required utility functions for UI components\n```\n\n**Step 7: Create Release Pull Request**\nCreate a pull request from 'release-v1.1.0' to 'main' with title \"Release v1.1.0 - Bug fixes and utility additions\" and a detailed description explaining all the integrated changes.\n\n**Step 8: Merge the Pull Request**\nAfter creating the PR, merge it into the main branch using the \"squash and merge\" method.\n\n**Step 9: Verification**\nEnsure the release branch contains at least 4 distinct commits before merging:\n1. MetaSep token fix commit\n2. Utility file addition commit  \n3. Version update commit\n4. Changelog addition commit"
  },
  {
    "path": "tasks/github/standard/harmony/release_management_workflow/meta.json",
    "content": "{\n  \"task_id\": \"release_management_workflow\",\n  \"task_name\": \"Release Management Workflow\",\n  \"category_id\": \"harmony\",\n  \"category_name\": \"Harmony\",\n  \"description\": \"Implement comprehensive release management workflow including bug fixes, version updates, changelog creation, and PR merging.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"release coordination\",\n    \"pr workflows\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/harmony\",\n    \"stateOriginalUrl\": \"https://github.com/openai/harmony\"\n  }\n}"
  },
  {
    "path": "tasks/github/standard/harmony/release_management_workflow/verify.py",
    "content": "import sys\nimport os\nimport requests\nfrom typing import Dict, List, Optional, Tuple\nimport base64\nfrom dotenv import load_dotenv\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str], org: str, repo: str = \"harmony\"\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    url = f\"https://api.github.com/repos/{org}/{repo}/{endpoint}\"\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _check_branch_exists(\n    branch_name: str, headers: Dict[str, str], org: str, repo: str = \"harmony\"\n) -> bool:\n    \"\"\"Verify that a branch exists in the repository.\"\"\"\n    success, _ = _get_github_api(f\"branches/{branch_name}\", headers, org, repo)\n    return success\n\n\ndef _check_file_content(\n    branch: str,\n    file_path: str,\n    keywords: List[str],\n    headers: Dict[str, str],\n    org: str,\n    repo: str = \"harmony\",\n) -> bool:\n    \"\"\"Verify that a file exists in branch and contains required keywords.\"\"\"\n    success, result = _get_github_api(\n        f\"contents/{file_path}?ref={branch}\", headers, org, repo\n    )\n    if not success or not result:\n        return False\n\n    if keywords and result.get(\"content\"):\n        try:\n            content = base64.b64decode(result.get(\"content\", \"\")).decode(\"utf-8\")\n            return all(keyword in content for keyword in keywords)\n        except Exception as e:\n            print(f\"Content decode error for {file_path}: {e}\", file=sys.stderr)\n            return False\n\n    return True\n\n\ndef _check_specific_file_content(\n    branch: str,\n    file_path: str,\n    expected_content: str,\n    headers: Dict[str, str],\n    org: str,\n    repo: str = \"harmony\",\n    min_length: int = 100,\n) -> bool:\n    \"\"\"Verify that a file contains specific exact content and has reasonable size.\"\"\"\n    success, result = _get_github_api(\n        f\"contents/{file_path}?ref={branch}\", headers, org, repo\n    )\n    if not success or not result:\n        return False\n\n    if result.get(\"content\"):\n        try:\n            content = base64.b64decode(result.get(\"content\", \"\")).decode(\"utf-8\")\n            # Check both that expected content exists and file has reasonable content\n            return expected_content in content and len(content) >= min_length\n        except Exception as e:\n            print(f\"Content decode error for {file_path}: {e}\", file=sys.stderr)\n            return False\n\n    return False\n\n\ndef _check_pr_merged(\n    title_substring: str,\n    base_branch: str,\n    headers: Dict[str, str],\n    org: str,\n    repo: str = \"harmony\",\n) -> Tuple[bool, Optional[int]]:\n    \"\"\"Check if a PR with specified title was merged into base branch and return PR number.\"\"\"\n    # Check closed PRs to find merged ones\n    success, prs = _get_github_api(\n        \"pulls?state=closed&per_page=100\", headers, org, repo\n    )\n    if not success or not prs:\n        return False, None\n\n    for pr in prs:\n        title_match = title_substring.lower() in pr.get(\"title\", \"\").lower()\n        base_match = pr.get(\"base\", {}).get(\"ref\") == base_branch\n        is_merged = pr.get(\"merged_at\") is not None\n\n        if title_match and base_match and is_merged:\n            return True, pr.get(\"number\")\n\n    return False, None\n\n\ndef _check_pr_squash_merged(\n    pr_number: int, headers: Dict[str, str], org: str, repo: str = \"harmony\"\n) -> bool:\n    \"\"\"Check if a PR was merged using squash and merge method.\"\"\"\n    # Get the PR details\n    success, pr = _get_github_api(f\"pulls/{pr_number}\", headers, org, repo)\n    if not success or not pr:\n        return False\n\n    if not pr.get(\"merged_at\"):\n        return False\n\n    merge_commit_sha = pr.get(\"merge_commit_sha\")\n    if not merge_commit_sha:\n        return False\n\n    # Get the merge commit details\n    success, commit = _get_github_api(f\"commits/{merge_commit_sha}\", headers, org, repo)\n    if not success or not commit:\n        return False\n\n    # For squash and merge, the commit will have exactly one parent\n    # and the commit message typically includes the PR number\n    parents = commit.get(\"parents\", [])\n    commit_message = commit.get(\"commit\", {}).get(\"message\", \"\")\n\n    # Squash and merge commits have exactly 1 parent (the base branch)\n    # Regular merge commits have 2 parents (base and head branches)\n    if len(parents) == 1 and f\"#{pr_number}\" in commit_message:\n        return True\n\n    return False\n\n\ndef verify() -> bool:\n    \"\"\"\n    Programmatically verify that the release management workflow meets the\n    requirements described in description.md.\n    \"\"\"\n    # Load environment variables from .mcp_env\n    load_dotenv(\".mcp_env\")\n\n    # Get GitHub token and org\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    if not github_org:\n        print(\"Error: GITHUB_EVAL_ORG environment variable not set\", file=sys.stderr)\n        return False\n\n    # Configuration constants\n    RELEASE_BRANCH = \"release-v1.1.0\"\n\n    # Expected content checks with minimum file sizes to ensure files aren't just stubs\n    METASEP_FIX = 'FormattingToken::MetaSep => \"<|meta_sep|>\"'\n    REGISTRY_FIX = '(FormattingToken::MetaSep, \"<|meta_sep|>\")'\n    METAEND_FIX = '(FormattingToken::MetaEnd, \"<|meta_end|>\")'\n    UTILS_CONTENT = \"export function cn(...inputs: ClassValue[])\"\n    GITIGNORE_ADDITION = \"!demo/harmony-demo/src/lib\"\n    VERSION_110 = 'version = \"1.1.0\"'\n\n    CHANGELOG_KEYWORDS = [\n        \"## [1.1.0] - 2025-08-07\",\n        \"MetaSep token mapping bug\",\n        \"shadcn utils.ts file\",\n        \"Fixed MetaSep token\",\n        \"Registry now properly recognizes\",\n    ]\n\n    headers = {\n        \"Authorization\": f\"token {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    # Run verification checks\n    print(\"Verifying GitHub release management workflow completion...\")\n\n    # 1. Check release branch exists\n    print(\"1. Verifying release branch exists...\")\n    if not _check_branch_exists(RELEASE_BRANCH, headers, github_org):\n        print(f\"Error: Branch '{RELEASE_BRANCH}' not found\", file=sys.stderr)\n        return False\n\n    # 2. Check MetaSep fix in encoding.rs (with min content length to ensure file wasn't gutted)\n    print(\"2. Verifying MetaSep token fix in encoding.rs...\")\n    if not _check_specific_file_content(\n        \"main\", \"src/encoding.rs\", METASEP_FIX, headers, github_org, min_length=500\n    ):\n        print(\n            \"Error: MetaSep token fix not found in src/encoding.rs or file is too small\",\n            file=sys.stderr,\n        )\n        return False\n\n    # 3. Check registry updates (both MetaSep and MetaEnd)\n    print(\"3. Verifying MetaSep and MetaEnd registry additions...\")\n    if not _check_specific_file_content(\n        \"main\", \"src/registry.rs\", REGISTRY_FIX, headers, github_org, min_length=500\n    ):\n        print(\n            \"Error: MetaSep registry fix not found in src/registry.rs or file is too small\",\n            file=sys.stderr,\n        )\n        return False\n    if not _check_specific_file_content(\n        \"main\", \"src/registry.rs\", METAEND_FIX, headers, github_org, min_length=500\n    ):\n        print(\n            \"Error: MetaEnd registry fix not found in src/registry.rs\", file=sys.stderr\n        )\n        return False\n\n    # 4. Check utils.ts file exists with correct content\n    print(\"4. Verifying shadcn utils.ts file...\")\n    if not _check_specific_file_content(\n        \"main\",\n        \"demo/harmony-demo/src/lib/utils.ts\",\n        UTILS_CONTENT,\n        headers,\n        github_org,\n        min_length=50,\n    ):\n        print(\"Error: utils.ts file not found or incorrect content\", file=sys.stderr)\n        return False\n\n    # 5. Check .gitignore update\n    print(\"5. Verifying .gitignore update...\")\n    if not _check_specific_file_content(\n        \"main\", \".gitignore\", GITIGNORE_ADDITION, headers, github_org, min_length=100\n    ):\n        print(\"Error: .gitignore update not found\", file=sys.stderr)\n        return False\n\n    # 6. Check version update in Cargo.toml only (pyproject.toml uses dynamic versioning)\n    print(\"6. Verifying version update in Cargo.toml...\")\n    if not _check_specific_file_content(\n        \"main\", \"Cargo.toml\", VERSION_110, headers, github_org, min_length=200\n    ):\n        print(\"Error: Version 1.1.0 not found in Cargo.toml\", file=sys.stderr)\n        return False\n\n    # 7. Check CHANGELOG.md exists with required content\n    print(\"7. Verifying CHANGELOG.md...\")\n    if not _check_file_content(\n        \"main\", \"CHANGELOG.md\", CHANGELOG_KEYWORDS, headers, github_org\n    ):\n        print(\n            \"Error: CHANGELOG.md not found or missing required content\", file=sys.stderr\n        )\n        return False\n\n    # 8. Check release PR was merged and get PR number\n    print(\"8. Verifying release pull request was merged...\")\n    pr_merged, pr_number = _check_pr_merged(\n        \"Release v1.1.0\", \"main\", headers, github_org\n    )\n    if not pr_merged:\n        print(\"Error: Release pull request not found or not merged\", file=sys.stderr)\n        return False\n\n    # 9. Check PR was merged using squash and merge\n    print(\"9. Verifying pull request was merged using 'squash and merge' method...\")\n    if pr_number and not _check_pr_squash_merged(pr_number, headers, github_org):\n        print(\n            f\"Error: Pull request #{pr_number} was not merged using 'squash and merge' method\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\"\\n✓ All verification checks passed!\")\n    print(\"Release management workflow completed successfully.\")\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/standard/mcpmark-cicd/deployment_status_workflow/description.md",
    "content": "I need you to create a Deployment Status workflow for this Node.js project. The project currently has no GitHub Actions workflows, so you'll be building a deployment-focused CI/CD workflow from scratch that responds to push events on the main branch. Here's what needs to be implemented:\n\n## Deployment Status Workflow\n\nCreate `.github/workflows/deployment-status.yml` that triggers on `push` to `main` branch with these sequential jobs:\n\n### 1. **pre-deployment** job (name: `pre-deployment`):\n   - Runs basic quality checks (lint and test)\n   - Creates deployment tracking issue with title: \"Deployment Tracking - [commit-sha]\"\n   - Adds labels: `deployment`, `in-progress`\n   - Captures previous commit SHA and package version information\n   - Posts comment containing \"Pre-deployment checks completed\"\n\n### 2. **rollback-preparation** job (name: `rollback-preparation`):\n   - Depends on: pre-deployment\n   - Creates comprehensive rollback artifacts including:\n     * Executable rollback script with proper error handling\n     * Configuration backups (package.json, package-lock.json, environment templates)\n     * Dependency verification script for compatibility checking\n     * Detailed rollback documentation with step-by-step instructions\n     * Compressed rollback package with SHA256 checksums\n   - Uploads rollback artifacts to GitHub Actions with 30-day retention\n   - Posts comment on deployment issue that MUST contain the following verifiable elements:\n     * Title: \"🔄 Rollback Plan Ready\"\n     * Previous commit SHA (format: \"Previous Commit: [sha]\")\n     * Current commit SHA (format: \"Current Commit: [sha]\")\n     * Package version (format: \"Package Version: [version]\")\n     * Artifact name (format: \"Artifact: rollback-package-[commit-sha]\")\n     * At least 5 checkmarks (✅) indicating completed rollback components\n     * Quick rollback command section with bash code block\n     * Script verification status: \"Rollback script created: true\"\n     * Backup verification status: \"Configuration backup: true\"\n     * Artifact checksum (format: \"SHA256: [checksum-value]\")\n\n### 3. **post-deployment** job (name: `post-deployment`):\n   - Depends on: rollback-preparation\n   - Removes `in-progress` label and adds `completed` label\n   - Posts final comment containing \"Deployment Completed Successfully\" with rollback artifact details\n   - Closes the deployment tracking issue\n\n## Implementation Requirements:\n\n**Step 1: Create Feature Branch**\nCreate a new branch called `deployment-status-workflow` from main.\n\n**Step 2: Implement the Workflow**\nCreate `.github/workflows/deployment-status.yml` with proper YAML syntax:\n- Trigger only on push to main branch\n- Sequential job execution: pre-deployment → rollback-preparation → post-deployment\n- Use github-script actions for issue management\n- Avoid identifier conflicts in github-script actions (don't redeclare 'github')\n- Include proper error handling and script validation\n- Implement comprehensive rollback artifact creation and verification\n- Use proper fetch-depth for accessing commit history\n- Include artifact upload/download capabilities with checksums\n\n**Step 3: Create and Merge Pull Request**\nCreate a comprehensive pull request and merge it to main:\n- Title: \"Implement Deployment Status Workflow\"\n- Detailed description of the workflow and its purpose\n- Merge the pull request to main branch to trigger the deployment workflow"
  },
  {
    "path": "tasks/github/standard/mcpmark-cicd/deployment_status_workflow/meta.json",
    "content": "{\n  \"task_id\": \"deployment_status_workflow\",\n  \"task_name\": \"Deployment Status Workflow\",\n  \"category_id\": \"mcpmark-cicd\",\n  \"category_name\": \"MCPMark CI/CD\",\n  \"description\": \"Create deployment status workflow with pre-deployment checks, rollback preparation, and comprehensive issue tracking for deployments.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"ci/cd automation\",\n    \"workflow automation\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/mcpmark-cicd\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/github/standard/mcpmark-cicd/deployment_status_workflow/verify.py",
    "content": "import sys\nimport os\nimport requests\nimport time\nfrom typing import Dict, List, Optional, Tuple\nfrom dotenv import load_dotenv\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str], owner: str, repo: str\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    url = f\"https://api.github.com/repos/{owner}/{repo}/{endpoint}\"\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _search_github_issues(\n    query: str, headers: Dict[str, str]\n) -> Tuple[bool, Optional[List]]:\n    \"\"\"Search GitHub issues using the search API.\"\"\"\n    url = f\"https://api.github.com/search/issues?q={query}&per_page=100\"\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            data = response.json()\n            return True, data.get(\"items\", [])\n        else:\n            print(f\"Search API error: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Search exception: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _wait_for_workflow_completion(\n    headers: Dict[str, str], owner: str, repo: str, max_wait: int = 90\n) -> bool:\n    \"\"\"Wait for GitHub Actions workflows to complete processing.\"\"\"\n    print(\"⏳ Waiting for deployment status workflows to complete...\")\n\n    start_time = time.time()\n    no_workflow_check_count = 0\n\n    while time.time() - start_time < max_wait:\n        try:\n            # Check workflow runs for deployment-status.yml\n            success, response = _get_github_api(\n                \"actions/workflows/deployment-status.yml/runs?per_page=10\",\n                headers,\n                owner,\n                repo,\n            )\n\n            if success and response:\n                runs = response.get(\"workflow_runs\", [])\n                if len(runs) > 0:\n                    # Check status of recent runs\n                    running_count = 0\n                    completed_count = 0\n                    failed_count = 0\n\n                    for run in runs[:3]:  # Check recent runs\n                        status = run[\"status\"]\n                        conclusion = run.get(\"conclusion\")\n\n                        if status == \"completed\":\n                            completed_count += 1\n                            if conclusion == \"failure\":\n                                failed_count += 1\n                        elif status in [\"in_progress\", \"queued\"]:\n                            running_count += 1\n\n                    print(\n                        f\"   Status: {completed_count} completed, {running_count} running/queued\"\n                    )\n\n                    # Wait until NO workflows are running\n                    if running_count == 0:\n                        if failed_count > 0:\n                            print(\n                                f\"⚠️ Warning: {failed_count} workflow runs failed, but continuing verification...\"\n                            )\n\n                        print(\n                            f\"✅ All workflows completed. Found {completed_count} completed runs.\"\n                        )\n                        # Additional wait to ensure all processing is done\n                        print(\n                            \"⏳ Additional wait for deployment processing to complete...\"\n                        )\n                        time.sleep(5)\n                        return True\n                else:\n                    # No workflow runs found\n                    no_workflow_check_count += 1\n                    if no_workflow_check_count == 1:\n                        print(\n                            \"   No workflow runs found yet, waiting 5 seconds and checking once more...\"\n                        )\n                        time.sleep(5)\n                        continue\n                    elif no_workflow_check_count >= 2:\n                        print(\n                            \"⚠️ No workflow runs detected after 2 checks. Workflow may not have been triggered.\"\n                        )\n                        print(\"   Continuing with verification...\")\n                        return False\n\n            print(f\"⏳ Still waiting... ({int(time.time() - start_time)}s elapsed)\")\n            time.sleep(5)\n\n        except Exception as e:\n            print(f\"⚠️ Error checking workflow status: {e}\")\n            time.sleep(5)\n\n    print(f\"⚠️ Workflow completion wait timed out after {max_wait}s\")\n    return False\n\n\ndef _verify_workflow_runs(\n    headers: Dict[str, str], owner: str, repo: str\n) -> Tuple[bool, List[str], Optional[Dict]]:\n    \"\"\"Verify that the deployment status workflow runs have the correct jobs.\"\"\"\n    print(\"\\n⚙️ Verifying deployment status workflow runs...\")\n    errors = []\n\n    # Get the most recent workflow run\n    success, response = _get_github_api(\n        \"actions/workflows/deployment-status.yml/runs?per_page=5\",\n        headers,\n        owner,\n        repo,\n    )\n\n    if not success or not response:\n        return False, [\"Failed to fetch workflow runs\"], None\n\n    runs = response.get(\"workflow_runs\", [])\n    if not runs:\n        return False, [\"No workflow runs found for deployment-status.yml\"], None\n\n    # Find the most recent successful run\n    latest_successful_run = None\n    for run in runs:\n        if run.get(\"conclusion\") == \"success\":\n            latest_successful_run = run\n            break\n\n    if not latest_successful_run:\n        return False, [\"No successful workflow runs found\"], None\n\n    run_id = latest_successful_run[\"id\"]\n    print(f\"   Found successful workflow run #{run_id}\")\n\n    # Get jobs for this run\n    success, jobs_response = _get_github_api(\n        f\"actions/runs/{run_id}/jobs\", headers, owner, repo\n    )\n\n    if not success:\n        return False, [\"Failed to fetch workflow jobs\"], None\n\n    jobs = jobs_response.get(\"jobs\", [])\n    expected_jobs = [\"pre-deployment\", \"rollback-preparation\", \"post-deployment\"]\n\n    found_jobs = [job[\"name\"] for job in jobs]\n    missing_jobs = [job for job in expected_jobs if job not in found_jobs]\n\n    if missing_jobs:\n        errors.append(f\"Missing jobs: {missing_jobs}. Found: {found_jobs}\")\n    else:\n        print(f\"   ✅ All 3 required jobs found: {found_jobs}\")\n\n    # Verify all jobs succeeded\n    failed_jobs = [job[\"name\"] for job in jobs if job[\"conclusion\"] != \"success\"]\n    if failed_jobs:\n        errors.append(f\"Failed jobs: {failed_jobs}\")\n    else:\n        print(\"   ✅ All jobs completed successfully\")\n\n    # Verify sequential execution (each job should start after the previous one)\n    if len(jobs) >= 3:\n        job_times = {}\n        for job in jobs:\n            if job[\"name\"] in expected_jobs and job[\"started_at\"]:\n                job_times[job[\"name\"]] = job[\"started_at\"]\n\n        if len(job_times) >= 3:\n            # Check that jobs ran in correct sequence\n            import datetime\n\n            times = {\n                name: datetime.datetime.fromisoformat(time.replace(\"Z\", \"+00:00\"))\n                for name, time in job_times.items()\n            }\n\n            # pre-deployment should start first\n            # rollback-preparation should start after pre-deployment\n            # post-deployment should start after rollback-preparation\n            if all(job in times for job in expected_jobs):\n                if (\n                    times[\"rollback-preparation\"] <= times[\"pre-deployment\"]\n                    or times[\"post-deployment\"] <= times[\"rollback-preparation\"]\n                ):\n                    errors.append(\"Jobs did not run in correct sequential order\")\n                else:\n                    print(\"   ✅ Jobs ran in correct sequential order\")\n            else:\n                errors.append(\n                    \"Not enough job timing data to verify sequential execution\"\n                )\n\n    return len(errors) == 0, errors, latest_successful_run\n\n\ndef _verify_deployment_issue(\n    run_data: Dict, headers: Dict[str, str], owner: str, repo: str\n) -> Tuple[bool, List[str]]:\n    \"\"\"Verify that a deployment tracking issue was created and closed properly.\"\"\"\n    print(\"\\n📋 Verifying deployment tracking issue...\")\n    errors = []\n\n    # Extract commit SHA from the workflow run\n    head_sha = run_data.get(\"head_sha\")\n    if not head_sha:\n        return False, [\"Could not determine head SHA from workflow run\"]\n\n    short_sha = head_sha[:7]\n    expected_title = f\"Deployment Tracking - {short_sha}\"\n\n    # Search for the deployment tracking issue\n    success, issues = _search_github_issues(\n        f'repo:{owner}/{repo} \"{expected_title}\" is:issue', headers\n    )\n\n    if not success:\n        return False, [\"Failed to search for deployment tracking issue\"]\n\n    # Find the exact issue\n    deployment_issue = None\n    for issue in issues:\n        if issue.get(\"title\") == expected_title:\n            deployment_issue = issue\n            break\n\n    if not deployment_issue:\n        return False, [f\"Deployment tracking issue '{expected_title}' not found\"]\n\n    issue_number = deployment_issue[\"number\"]\n    print(f\"   Found deployment tracking issue #{issue_number}: {expected_title}\")\n\n    # Check that issue is closed\n    if deployment_issue.get(\"state\") != \"closed\":\n        errors.append(\n            f\"Deployment issue #{issue_number} is not closed (state: {deployment_issue.get('state')})\"\n        )\n    else:\n        print(f\"   ✅ Deployment issue #{issue_number} is closed\")\n\n    # Check required labels\n    expected_labels = [\"deployment\", \"completed\"]\n    actual_labels = [label[\"name\"] for label in deployment_issue.get(\"labels\", [])]\n    missing_labels = [label for label in expected_labels if label not in actual_labels]\n\n    if missing_labels:\n        errors.append(\n            f\"Missing labels on deployment issue: {missing_labels}. Found: {actual_labels}\"\n        )\n    else:\n        print(f\"   ✅ Required labels found: {expected_labels}\")\n\n    # Get issue comments to verify GitHub Actions bot comments\n    success, comments = _get_github_api(\n        f\"issues/{issue_number}/comments\", headers, owner, repo\n    )\n\n    if not success:\n        errors.append(\"Failed to get deployment issue comments\")\n        return len(errors) == 0, errors\n\n    # Filter for GitHub Actions bot comments only\n    bot_comments = [\n        comment\n        for comment in comments\n        if comment.get(\"user\", {}).get(\"login\") == \"github-actions[bot]\"\n    ]\n\n    if not bot_comments:\n        errors.append(\"No comments found from GitHub Actions bot\")\n        return len(errors) == 0, errors\n\n    print(f\"   Found {len(bot_comments)} comment(s) from GitHub Actions bot\")\n\n    # Get all bot comment bodies\n    bot_comment_bodies = [comment.get(\"body\", \"\") for comment in bot_comments]\n    all_bot_comments = \" \".join(bot_comment_bodies)\n\n    # Check for required GitHub Actions bot comment indicators\n    required_comment_indicators = [\n        \"Pre-deployment checks completed\",\n        \"🔄 Rollback Plan Ready\",\n        \"Deployment Completed Successfully\",\n    ]\n\n    for indicator in required_comment_indicators:\n        if indicator not in all_bot_comments:\n            errors.append(\n                f\"Missing required GitHub Actions bot comment indicator: '{indicator}'\"\n            )\n        else:\n            print(f\"   ✅ Found GitHub Actions bot comment indicator: '{indicator}'\")\n\n    # Find and verify the rollback plan comment from GitHub Actions bot\n    rollback_comment = None\n    for comment in bot_comments:\n        if \"🔄 Rollback Plan Ready\" in comment.get(\"body\", \"\"):\n            rollback_comment = comment.get(\"body\", \"\")\n            break\n\n    if rollback_comment:\n        print(\"   ✅ Found rollback plan comment from GitHub Actions bot\")\n\n        # Check for required rollback plan elements\n        required_elements = [\n            \"**Previous Commit**:\",\n            \"**Current Commit**:\",\n            \"**Package Version**:\",\n            \"✅ Executable rollback script created\",\n            \"✅ Configuration backups saved\",\n            \"✅ Dependency verification script prepared\",\n            \"✅ Comprehensive rollback documentation generated\",\n            \"✅ Compressed rollback package created\",\n            \"**SHA256**:\",\n            \"**Artifact**:\",\n            \"Quick Rollback Commands\",\n        ]\n\n        for element in required_elements:\n            if element not in rollback_comment:\n                errors.append(f\"Missing element in rollback plan: '{element}'\")\n            else:\n                print(f\"   ✅ Found rollback plan element: '{element}'\")\n\n        # Verify commit SHAs in rollback comment\n        if f\"**Current Commit**: {head_sha}\" in rollback_comment:\n            print(f\"   ✅ Current commit SHA verified: {head_sha}\")\n        else:\n            errors.append(\n                f\"Current commit SHA {head_sha} not found in rollback comment\"\n            )\n\n        # Extract and verify previous commit SHA\n        if \"**Previous Commit**:\" in rollback_comment:\n            import re\n\n            prev_sha_match = re.search(\n                r\"\\*\\*Previous Commit\\*\\*:\\s*([a-f0-9]{40})\", rollback_comment\n            )\n            if prev_sha_match:\n                prev_sha = prev_sha_match.group(1)\n                print(f\"   ✅ Previous commit SHA found: {prev_sha}\")\n\n                # Verify it's a valid 40-character SHA\n                if len(prev_sha) != 40:\n                    errors.append(\n                        f\"Previous commit SHA has invalid length: {len(prev_sha)}\"\n                    )\n            else:\n                errors.append(\n                    \"Previous commit SHA format not found in rollback comment\"\n                )\n        else:\n            errors.append(\"Previous commit SHA not found in rollback comment\")\n\n        # Verify SHA256 checksum is present\n        sha256_match = re.search(r\"\\*\\*SHA256\\*\\*:\\s*([a-f0-9]{64})\", rollback_comment)\n        if sha256_match:\n            sha256_value = sha256_match.group(1)\n            print(f\"   ✅ SHA256 checksum found: {sha256_value[:16]}...\")\n        else:\n            errors.append(\n                \"SHA256 checksum not found or invalid format in rollback comment\"\n            )\n\n    else:\n        errors.append(\"Rollback plan comment not found from GitHub Actions bot\")\n\n    return len(errors) == 0, errors\n\n\ndef verify() -> bool:\n    \"\"\"\n    Verify that the deployment status workflow automation is working correctly.\n    \"\"\"\n    # Load environment variables\n    load_dotenv(\".mcp_env\")\n\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    # Get GitHub organization\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n    if not github_org:\n        print(\"Error: GITHUB_EVAL_ORG environment variable not set\", file=sys.stderr)\n        return False\n\n    # Repository configuration\n    owner = github_org\n    repo = \"mcpmark-cicd\"\n\n    headers = {\n        \"Authorization\": f\"token {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    print(\"🔍 Starting Deployment Status Workflow Verification\")\n    print(\"=\" * 60)\n\n    # Wait for workflows to complete\n    workflows_completed = _wait_for_workflow_completion(headers, owner, repo)\n    if not workflows_completed:\n        print(\n            \"⚠️ Warning: Workflows may still be running. Continuing with verification...\"\n        )\n\n    # Verify workflow runs and jobs\n    all_passed = True\n\n    # 1. Verify workflow runs have correct jobs\n    runs_ok, runs_errors, run_data = _verify_workflow_runs(headers, owner, repo)\n    if not runs_ok:\n        all_passed = False\n        print(\"❌ Workflow Runs Verification Failed:\")\n        for error in runs_errors:\n            print(f\"   - {error}\")\n    else:\n        print(\"✅ Workflow Runs Verification Passed\")\n\n        # 2. Verify deployment issue if workflow runs passed\n        if run_data:\n            issue_ok, issue_errors = _verify_deployment_issue(\n                run_data, headers, owner, repo\n            )\n            if not issue_ok:\n                all_passed = False\n                print(\"❌ Deployment Issue Verification Failed:\")\n                for error in issue_errors:\n                    print(f\"   - {error}\")\n            else:\n                print(\"✅ Deployment Issue Verification Passed\")\n\n    print(\"\\n\" + \"=\" * 60)\n    if all_passed:\n        print(\"🎉 All Deployment Status Workflow verifications PASSED!\")\n        print(\"\\n📋 Summary:\")\n        print(\n            \"   ✅ Workflow runs with correct 3 sequential jobs: pre-deployment, rollback-preparation, post-deployment\"\n        )\n        print(\"   ✅ Deployment tracking issue created and closed with proper labels\")\n        print(\"   ✅ Issue contains rollback plan with all required elements\")\n        print(\"   ✅ Previous and current commit SHAs are correctly tracked\")\n        print(\"   ✅ All workflow automation comments are present\")\n        print(\n            \"\\n🤖 The GitHub Actions deployment status workflow is working correctly!\"\n        )\n    else:\n        print(\"❌ Deployment Status Workflow verification FAILED!\")\n        print(\"   Some components did not meet the expected automation requirements.\")\n\n    return all_passed\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/standard/mcpmark-cicd/issue_management_workflow/description.md",
    "content": "I need you to create an intelligent Issue Management automation workflow for this Node.js project. The project currently has no GitHub Actions workflows, so you'll be building an issue-focused automation system from scratch that responds to issue events. Here's what needs to be implemented:\n\n## Issue Management Workflow\n\nCreate `.github/workflows/issue-automation.yml` that triggers on `issues` events (opened, labeled) with these jobs:\n\n### 1. **issue-triage** job:\n   - Auto-assigns category labels based on keywords in **issue title** (case-insensitive):\n     - Title contains \"bug\" → adds `bug` label\n     - Title contains \"epic\" → adds `epic` label  \n     - Title contains \"maintenance\" → adds `maintenance` label\n   - Auto-assigns priority labels based on keywords in **issue title OR body** (case-insensitive, highest priority wins if multiple keywords found):\n     - \"critical\", \"urgent\", \"production\", \"outage\" → `priority-critical`\n     - \"important\", \"high\", \"blocking\" → `priority-high` \n     - \"medium\", \"normal\" → `priority-medium` (default if no priority keywords found)\n     - \"low\", \"nice-to-have\", \"minor\" → `priority-low`\n   - All issues get `needs-triage` label initially\n\n### 2. **task-breakdown** job:\n   - For issues with a title containing \"Epic\", create exactly 4 sub-issues with the pattern: \"[SUBTASK] [Original Title] - Task N: [Task Name]\"\n   - Task names: 1. Requirements Analysis, 2. Design and Architecture, 3. Implementation, 4. Testing and Documentation\n   - Links sub-issues to parent using \"Related to #[parent-number]\" in sub-issue body\n   - Updates parent issue body with \"## Epic Tasks\" checklist linking to sub-issue numbers\n   - All sub-issues get `enhancement` and `needs-review` labels\n\n### 3. **auto-response** job:\n   - Checks if the issue author is creating their first issue in this repository (not first on GitHub globally, but first in this specific repo)\n   - If first issue in repo: adds `first-time-contributor` label and posts welcome message\n   - Posts different responses based on issue type:\n     - `bug` issues: comment must contain \"Bug Report Guidelines\"\n     - `epic` issues: comment must contain \"Feature Request Process\"  \n     - `maintenance` issues: comment must contain \"Maintenance Guidelines\"\n   - Sets milestone \"v1.0.0\" for `priority-high` and `priority-critical` issues\n   - Changes status from `needs-triage` to `needs-review` after response\n\n## Label Management Requirements\n\nThe system must create and manage these specific labels:\n\n### Category Labels:\n- `bug` - Something isn't working\n- `enhancement` - New feature or request  \n- `epic` - Large feature requiring multiple sub-tasks\n- `maintenance` - Maintenance and housekeeping tasks\n\n### Priority Labels:\n- `priority-critical` - Critical priority issue\n- `priority-high` - High priority issue  \n- `priority-medium` - Medium priority issue\n- `priority-low` - Low priority issue\n\n### Status Labels:\n- `needs-triage` - Needs to be reviewed by maintainers\n- `needs-review` - Awaiting review from maintainers\n- `first-time-contributor` - Issue created by first-time contributor\n\n## Implementation Requirements:\n\n**Step 1: Create Feature Branch**\nCreate a new branch called `issue-management-workflow` from main.\n\n**Step 2: Create Supporting Files**\nCreate these additional files on the new branch:\n- `.github/ISSUE_TEMPLATE/bug_report.md` - Bug report template\n- `.github/ISSUE_TEMPLATE/feature_request.md` - Feature request template\n- `.github/ISSUE_TEMPLATE/maintenance_report.md` - Maintenance report template\n\n\n**Step 3: Implement the Workflow**  \nCreate `.github/workflows/issue-automation.yml` with proper YAML syntax.  \nInclude:  \n- Appropriate triggers for issues events  \n- Job dependencies where needed  \n- Error handling and graceful fallbacks  \n- Avoid identifier conflicts in github-script actions (don't redeclare 'github')\n\n**Step 4: Create and Merge Pull Request**\nCreate a comprehensive pull request and merge it to main:\n- Title: \"Implement Issue Management Automation Workflow\"\n- Detailed description of the workflow and its purpose\n- Include all workflow files and templates created\n- Merge the pull request to main branch\n\n**Step 5: Test the Workflow**\nCreate test issues to demonstrate the issue automation workflow:\n\n1. **Bug Issue**: \"Bug: Login form validation not working\"\n   - Expected: `bug`, `priority-high`, `needs-triage`→`needs-review`, milestone \"v1.0.0\"\n   - Auto-response comment must contain \"Bug Report Guidelines\"\n\n2. **Epic Issue**: \"Epic: Redesign user dashboard interface\"\n   - Expected: `epic`, `priority-high`, `needs-triage`→`needs-review`, milestone \"v1.0.0\"\n   - Must create 4 sub-issues with `enhancement` and `needs-review` labels\n   - Parent updated with \"## Epic Tasks\" checklist, sub-issues linked with \"Related to #[parent-number]\"\n   - Auto-response comment must contain \"Feature Request Process\"\n\n3. **Maintenance Issue**: \"Weekly maintenance cleanup and refactor\"  \n   - Expected: `maintenance`, `priority-medium`, `needs-triage`→`needs-review`, no milestone\n   - Auto-response comment must contain \"Maintenance Guidelines\""
  },
  {
    "path": "tasks/github/standard/mcpmark-cicd/issue_management_workflow/meta.json",
    "content": "{\n  \"task_id\": \"issue_management_workflow\",\n  \"task_name\": \"Issue Management Workflow\",\n  \"category_id\": \"mcpmark-cicd\",\n  \"category_name\": \"MCPMark CI/CD\",\n  \"description\": \"Build intelligent issue management automation with auto-triage, task breakdown for epics, and first-time contributor handling.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"issue management\",\n    \"workflow automation\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/mcpmark-cicd\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/github/standard/mcpmark-cicd/issue_management_workflow/verify.py",
    "content": "import sys\nimport os\nimport requests\nimport time\nfrom typing import Dict, List, Optional, Tuple\nfrom dotenv import load_dotenv\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str], owner: str, repo: str\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    url = f\"https://api.github.com/repos/{owner}/{repo}/{endpoint}\"\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _search_github_issues(\n    query: str, headers: Dict[str, str]\n) -> Tuple[bool, Optional[List]]:\n    \"\"\"Search GitHub issues using the search API.\"\"\"\n    url = f\"https://api.github.com/search/issues?q={query}&per_page=100\"\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            data = response.json()\n            return True, data.get(\"items\", [])\n        else:\n            print(f\"Search API error: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Search exception: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _wait_for_workflow_completion(\n    headers: Dict[str, str], owner: str, repo: str, max_wait: int = 90\n) -> bool:\n    \"\"\"Wait for GitHub Actions workflows to complete processing.\"\"\"\n    print(\"⏳ Waiting for GitHub Actions workflows to complete...\")\n\n    start_time = time.time()\n    expected_runs = 3  # We created 3 test issues\n    no_workflow_check_count = 0\n\n    while time.time() - start_time < max_wait:\n        try:\n            # Check workflow runs\n            success, response = _get_github_api(\n                \"actions/workflows/issue-automation.yml/runs?per_page=20\",\n                headers,\n                owner,\n                repo,\n            )\n\n            if success and response:\n                runs = response.get(\"workflow_runs\", [])\n                if len(runs) >= expected_runs:\n                    # Check status of recent runs\n                    recent_runs = runs[:expected_runs]\n\n                    running_count = 0\n                    completed_count = 0\n                    failed_count = 0\n\n                    for run in recent_runs:\n                        status = run[\"status\"]\n                        conclusion = run.get(\"conclusion\")\n\n                        if status == \"completed\":\n                            completed_count += 1\n                            if conclusion == \"failure\":\n                                failed_count += 1\n                        elif status in [\"in_progress\", \"queued\"]:\n                            running_count += 1\n\n                    print(\n                        f\"   Status: {completed_count} completed, {running_count} running/queued\"\n                    )\n\n                    # Wait until NO workflows are running and we have enough completed runs\n                    if running_count == 0 and completed_count >= expected_runs:\n                        if failed_count > 0:\n                            print(\n                                f\"⚠️ Warning: {failed_count} workflow runs failed, but continuing verification...\"\n                            )\n\n                        print(\n                            f\"✅ All workflows completed. Found {completed_count} completed runs.\"\n                        )\n                        # Additional wait to ensure all issue processing is done\n                        print(\"⏳ Additional wait for issue processing to complete...\")\n                        time.sleep(5)\n                        return True\n                elif len(runs) == 0:\n                    # No workflow runs found\n                    no_workflow_check_count += 1\n                    if no_workflow_check_count == 1:\n                        print(\n                            \"   No workflow runs found yet, waiting 5 seconds and checking once more...\"\n                        )\n                        time.sleep(5)\n                        continue\n                    elif no_workflow_check_count >= 2:\n                        print(\n                            \"⚠️ No workflow runs detected after 2 checks. Workflow may not have been triggered.\"\n                        )\n                        print(\"   Continuing with verification...\")\n                        return False\n                else:\n                    print(\n                        f\"   Waiting for workflow runs... Found {len(runs)}, expected {expected_runs}\"\n                    )\n\n            print(f\"⏳ Still waiting... ({int(time.time() - start_time)}s elapsed)\")\n            time.sleep(5)\n\n        except Exception as e:\n            print(f\"⚠️ Error checking workflow status: {e}\")\n            time.sleep(5)\n\n    print(f\"⚠️ Workflow completion wait timed out after {max_wait}s\")\n    return False\n\n\ndef _find_issue_by_title(\n    title: str, headers: Dict[str, str], owner: str, repo: str\n) -> Optional[Dict]:\n    \"\"\"Find an issue by exact title match.\"\"\"\n    success, issues = _search_github_issues(\n        f'repo:{owner}/{repo} \"{title}\" is:issue', headers\n    )\n\n    if success and issues:\n        for issue in issues:\n            if issue.get(\"title\") == title:\n                return issue\n    return None\n\n\ndef _check_issue_labels(\n    issue: Dict, expected_labels: List[str]\n) -> Tuple[bool, List[str]]:\n    \"\"\"Check if issue has the expected labels.\"\"\"\n    actual_labels = [label[\"name\"] for label in issue.get(\"labels\", [])]\n    missing_labels = [label for label in expected_labels if label not in actual_labels]\n\n    if missing_labels:\n        return False, [f\"Missing labels: {missing_labels}. Found: {actual_labels}\"]\n    return True, []\n\n\ndef _check_issue_milestone(\n    issue: Dict, expected_milestone: str\n) -> Tuple[bool, List[str]]:\n    \"\"\"Check if issue has the expected milestone.\"\"\"\n    milestone = issue.get(\"milestone\")\n    if not milestone:\n        if expected_milestone:\n            return False, [f\"No milestone found. Expected: {expected_milestone}\"]\n        return True, []\n\n    if milestone.get(\"title\") != expected_milestone:\n        return False, [\n            f\"Wrong milestone: {milestone.get('title')}. Expected: {expected_milestone}\"\n        ]\n\n    return True, []\n\n\ndef _check_issue_comments(\n    issue_number: int,\n    expected_content: str,\n    headers: Dict[str, str],\n    owner: str,\n    repo: str,\n) -> Tuple[bool, List[str]]:\n    \"\"\"Check if issue has a comment containing expected content.\"\"\"\n    success, comments = _get_github_api(\n        f\"issues/{issue_number}/comments\", headers, owner, repo\n    )\n\n    if not success:\n        return False, [\"Failed to get issue comments\"]\n\n    if not comments:\n        return False, [f\"No comments found. Expected comment with: {expected_content}\"]\n\n    for comment in comments:\n        if expected_content in comment.get(\"body\", \"\"):\n            return True, []\n\n    return False, [f\"Expected content '{expected_content}' not found in comments\"]\n\n\ndef _find_epic_sub_issues(\n    parent_issue_number: int, headers: Dict[str, str], owner: str, repo: str\n) -> Tuple[List[Dict], List[str]]:\n    \"\"\"Find sub-issues created for an epic.\"\"\"\n    # Search for each expected sub-task by exact title\n    expected_subtasks = [\n        \"[SUBTASK] Epic: Redesign user dashboard interface - Task 1: Requirements Analysis\",\n        \"[SUBTASK] Epic: Redesign user dashboard interface - Task 2: Design and Architecture\",\n        \"[SUBTASK] Epic: Redesign user dashboard interface - Task 3: Implementation\",\n        \"[SUBTASK] Epic: Redesign user dashboard interface - Task 4: Testing and Documentation\",\n    ]\n\n    subtasks = []\n    errors = []\n\n    for expected_title in expected_subtasks:\n        # Search for exact title\n        success, issues = _search_github_issues(\n            f'repo:{owner}/{repo} \"{expected_title}\" is:issue', headers\n        )\n\n        if not success:\n            errors.append(f\"Failed to search for sub-issue: {expected_title}\")\n            continue\n\n        # Find exact match\n        found = False\n        for issue in issues:\n            if issue.get(\"title\") == expected_title:\n                # Verify it references the parent issue\n                body = issue.get(\"body\", \"\")\n                if (\n                    f\"#{parent_issue_number}\" in body\n                    or f\"Related to #{parent_issue_number}\" in body\n                ):\n                    subtasks.append(issue)\n                    found = True\n                    break\n\n        if not found:\n            errors.append(\n                f\"Sub-issue not found or doesn't reference parent: {expected_title}\"\n            )\n\n    return subtasks, errors\n\n\ndef _check_epic_checklist(\n    issue: Dict, subtask_numbers: List[int]\n) -> Tuple[bool, List[str]]:\n    \"\"\"Check if epic issue has the Epic Tasks checklist with correct issue references.\"\"\"\n    body = issue.get(\"body\", \"\")\n    errors = []\n\n    if \"## Epic Tasks\" not in body:\n        return False, [\"Epic Tasks section not found in issue body\"]\n\n    # Check that all subtask issue numbers are referenced in checkbox format\n    for number in subtask_numbers:\n        # Check for checkbox format: - [ ] #number\n        if f\"- [ ] #{number}\" not in body:\n            errors.append(\n                f\"Sub-issue #{number} not found in Epic Tasks checklist format (expected: '- [ ] #{number}')\"\n            )\n\n    # Also verify the expected task names are present\n    expected_tasks = [\n        \"Requirements Analysis\",\n        \"Design and Architecture\",\n        \"Implementation\",\n        \"Testing and Documentation\",\n    ]\n\n    for task in expected_tasks:\n        if task not in body:\n            errors.append(f\"Task name '{task}' not found in Epic Tasks section\")\n\n    if errors:\n        return False, errors\n\n    return True, []\n\n\ndef _verify_bug_issue(\n    headers: Dict[str, str], owner: str, repo: str\n) -> Tuple[bool, List[str]]:\n    \"\"\"Verify the bug issue requirements.\"\"\"\n    print(\"\\n🐛 Verifying Bug Issue...\")\n    errors = []\n\n    # Find bug issue\n    bug_issue = _find_issue_by_title(\n        \"Bug: Login form validation not working\", headers, owner, repo\n    )\n    if not bug_issue:\n        return False, [\"Bug issue 'Bug: Login form validation not working' not found\"]\n\n    issue_number = bug_issue[\"number\"]\n    print(f\"   Found bug issue #{issue_number}\")\n\n    # Check labels (including first-time-contributor since it's the first issue)\n    expected_labels = [\"bug\", \"priority-high\", \"needs-review\", \"first-time-contributor\"]\n    labels_ok, label_errors = _check_issue_labels(bug_issue, expected_labels)\n    if not labels_ok:\n        errors.extend(label_errors)\n    else:\n        print(f\"   ✅ Labels verified: {expected_labels}\")\n\n    # Check milestone\n    milestone_ok, milestone_errors = _check_issue_milestone(bug_issue, \"v1.0.0\")\n    if not milestone_ok:\n        errors.extend(milestone_errors)\n    else:\n        print(\"   ✅ Milestone verified: v1.0.0\")\n\n    # Check comment\n    comment_ok, comment_errors = _check_issue_comments(\n        issue_number, \"Bug Report Guidelines\", headers, owner, repo\n    )\n    if not comment_ok:\n        errors.extend(comment_errors)\n    else:\n        print(\"   ✅ Bug Report Guidelines comment found\")\n\n    return len(errors) == 0, errors\n\n\ndef _verify_epic_issue(\n    headers: Dict[str, str], owner: str, repo: str\n) -> Tuple[bool, List[str]]:\n    \"\"\"Verify the epic issue requirements.\"\"\"\n    print(\"\\n🚀 Verifying Epic Issue...\")\n    errors = []\n\n    # Find epic issue\n    epic_issue = _find_issue_by_title(\n        \"Epic: Redesign user dashboard interface\", headers, owner, repo\n    )\n    if not epic_issue:\n        return False, [\"Epic issue 'Epic: Redesign user dashboard interface' not found\"]\n\n    issue_number = epic_issue[\"number\"]\n    print(f\"   Found epic issue #{issue_number}\")\n\n    # Check labels\n    expected_labels = [\"epic\", \"priority-high\", \"needs-review\"]\n    labels_ok, label_errors = _check_issue_labels(epic_issue, expected_labels)\n    if not labels_ok:\n        errors.extend(label_errors)\n    else:\n        print(f\"   ✅ Labels verified: {expected_labels}\")\n\n    # Check milestone\n    milestone_ok, milestone_errors = _check_issue_milestone(epic_issue, \"v1.0.0\")\n    if not milestone_ok:\n        errors.extend(milestone_errors)\n    else:\n        print(\"   ✅ Milestone verified: v1.0.0\")\n\n    # Check comment\n    comment_ok, comment_errors = _check_issue_comments(\n        issue_number, \"Feature Request Process\", headers, owner, repo\n    )\n    if not comment_ok:\n        errors.extend(comment_errors)\n    else:\n        print(\"   ✅ Feature Request Process comment found\")\n\n    # Find and verify sub-issues\n    sub_issues, sub_errors = _find_epic_sub_issues(issue_number, headers, owner, repo)\n    if sub_errors:\n        errors.extend(sub_errors)\n    elif len(sub_issues) != 4:\n        errors.append(f\"Expected 4 sub-issues, found {len(sub_issues)}\")\n    else:\n        print(f\"   ✅ Found {len(sub_issues)} sub-issues\")\n\n        # Collect sub-issue numbers for checklist verification\n        subtask_numbers = []\n\n        # Verify each sub-issue has correct labels and link to parent\n        for sub_issue in sub_issues:\n            sub_number = sub_issue[\"number\"]\n            subtask_numbers.append(sub_number)\n\n            # Check labels\n            sub_labels = [label[\"name\"] for label in sub_issue.get(\"labels\", [])]\n            expected_sub_labels = [\"enhancement\", \"needs-review\"]\n\n            missing_sub_labels = [\n                label for label in expected_sub_labels if label not in sub_labels\n            ]\n            if missing_sub_labels:\n                errors.append(\n                    f\"Sub-issue #{sub_number} missing labels: {missing_sub_labels}\"\n                )\n\n            # Verify parent reference in body\n            sub_body = sub_issue.get(\"body\", \"\")\n            if (\n                f\"#{issue_number}\" not in sub_body\n                and f\"Related to #{issue_number}\" not in sub_body\n            ):\n                errors.append(\n                    f\"Sub-issue #{sub_number} doesn't reference parent issue #{issue_number}\"\n                )\n\n        if not errors:\n            print(\n                \"   ✅ All 4 sub-tasks created with correct labels and parent references\"\n            )\n\n        # Check Epic Tasks checklist with correct issue numbers\n        checklist_ok, checklist_errors = _check_epic_checklist(\n            epic_issue, subtask_numbers\n        )\n        if not checklist_ok:\n            errors.extend(checklist_errors)\n        else:\n            print(\n                f\"   ✅ Epic Tasks checklist verified with correct issue references: {subtask_numbers}\"\n            )\n\n    return len(errors) == 0, errors\n\n\ndef _verify_maintenance_issue(\n    headers: Dict[str, str], owner: str, repo: str\n) -> Tuple[bool, List[str]]:\n    \"\"\"Verify the maintenance issue requirements.\"\"\"\n    print(\"\\n🔧 Verifying Maintenance Issue...\")\n    errors = []\n\n    # Find maintenance issue\n    maintenance_issue = _find_issue_by_title(\n        \"Weekly maintenance cleanup and refactor\", headers, owner, repo\n    )\n    if not maintenance_issue:\n        return False, [\n            \"Maintenance issue 'Weekly maintenance cleanup and refactor' not found\"\n        ]\n\n    issue_number = maintenance_issue[\"number\"]\n    print(f\"   Found maintenance issue #{issue_number}\")\n\n    # Check labels\n    expected_labels = [\"maintenance\", \"priority-medium\", \"needs-review\"]\n    labels_ok, label_errors = _check_issue_labels(maintenance_issue, expected_labels)\n    if not labels_ok:\n        errors.extend(label_errors)\n    else:\n        print(f\"   ✅ Labels verified: {expected_labels}\")\n\n    # Check NO milestone (maintenance issues shouldn't get v1.0.0)\n    milestone_ok, milestone_errors = _check_issue_milestone(maintenance_issue, None)\n    if not milestone_ok:\n        errors.extend(milestone_errors)\n    else:\n        print(\"   ✅ No milestone assigned (correct for maintenance issue)\")\n\n    # Check comment\n    comment_ok, comment_errors = _check_issue_comments(\n        issue_number, \"Maintenance Guidelines\", headers, owner, repo\n    )\n    if not comment_ok:\n        errors.extend(comment_errors)\n    else:\n        print(\"   ✅ Maintenance Guidelines comment found\")\n\n    return len(errors) == 0, errors\n\n\ndef verify() -> bool:\n    \"\"\"\n    Verify that the issue management workflow automation is working correctly.\n    \"\"\"\n    # Load environment variables\n    load_dotenv(\".mcp_env\")\n\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    # Get GitHub organization\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n    if not github_org:\n        print(\"Error: GITHUB_EVAL_ORG environment variable not set\", file=sys.stderr)\n        return False\n\n    # Repository configuration\n    owner = github_org\n    repo = \"mcpmark-cicd\"\n\n    headers = {\n        \"Authorization\": f\"token {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    print(\"🔍 Starting Issue Management Workflow Verification\")\n    print(\"=\" * 60)\n\n    # Wait for workflows to complete\n    workflows_completed = _wait_for_workflow_completion(headers, owner, repo)\n    if not workflows_completed:\n        print(\n            \"⚠️ Warning: Workflows may still be running. Continuing with verification...\"\n        )\n\n    # Verify each test issue\n    all_passed = True\n\n    # 1. Verify bug issue\n    bug_ok, bug_errors = _verify_bug_issue(headers, owner, repo)\n    if not bug_ok:\n        all_passed = False\n        print(\"❌ Bug Issue Verification Failed:\")\n        for error in bug_errors:\n            print(f\"   - {error}\")\n    else:\n        print(\"✅ Bug Issue Verification Passed\")\n\n    # 2. Verify epic issue\n    epic_ok, epic_errors = _verify_epic_issue(headers, owner, repo)\n    if not epic_ok:\n        all_passed = False\n        print(\"❌ Epic Issue Verification Failed:\")\n        for error in epic_errors:\n            print(f\"   - {error}\")\n    else:\n        print(\"✅ Epic Issue Verification Passed\")\n\n    # 3. Verify maintenance issue\n    maintenance_ok, maintenance_errors = _verify_maintenance_issue(headers, owner, repo)\n    if not maintenance_ok:\n        all_passed = False\n        print(\"❌ Maintenance Issue Verification Failed:\")\n        for error in maintenance_errors:\n            print(f\"   - {error}\")\n    else:\n        print(\"✅ Maintenance Issue Verification Passed\")\n\n    print(\"\\n\" + \"=\" * 60)\n    if all_passed:\n        print(\"🎉 All Issue Management Workflow verifications PASSED!\")\n        print(\"\\n📋 Summary:\")\n        print(\n            \"   ✅ Bug issue: labels (including first-time-contributor), milestone, and auto-response verified\"\n        )\n        print(\n            \"   ✅ Epic issue: labels, milestone, 4 sub-issues with checklist, and correct issue references verified\"\n        )\n        print(\n            \"   ✅ Maintenance issue: labels, no milestone, and auto-response verified\"\n        )\n        print(\"\\n🤖 The GitHub Actions workflow automation is working correctly!\")\n    else:\n        print(\"❌ Issue Management Workflow verification FAILED!\")\n        print(\"   Some issues did not meet the expected automation requirements.\")\n\n    return all_passed\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/standard/mcpmark-cicd/linting_ci_workflow/description.md",
    "content": "I need you to set up a proper linting workflow for our CI pipeline to ensure code quality standards are enforced on all pull requests. Here's what you need to do:\n\n**Step 1: Create Linting Configuration Branch**\nCreate a new branch called 'ci/add-eslint-workflow' from the main branch.\n\n**Step 2: Create ESLint Configuration**\nOn the new branch, create the file `.eslintrc.json` in the repository root with:\n```json\n{\n  \"env\": {\n    \"browser\": true,\n    \"es2021\": true,\n    \"node\": true\n  },\n  \"extends\": [\n    \"eslint:recommended\"\n  ],\n  \"parserOptions\": {\n    \"ecmaVersion\": 12,\n    \"sourceType\": \"module\"\n  },\n  \"rules\": {\n    \"no-unused-vars\": \"error\",\n    \"no-console\": \"warn\",\n    \"semi\": [\"error\", \"always\"],\n    \"quotes\": [\"error\", \"single\"]\n  },\n  \"ignorePatterns\": [\"node_modules/\", \"dist/\", \"build/\"]\n}\n```\n\n**Step 3: Create GitHub Actions Linting Workflow**\nCreate the file `.github/workflows/lint.yml` with:\n- Workflow name: \"Code Linting\"\n- Triggers on: push to main, pull_request events\n- Uses ubuntu-latest runner\n- Sets up Node.js version 18 using actions/setup-node\n- Installs dependencies with npm ci\n- Installs ESLint globally\n- Runs ESLint on all JavaScript files in src/ directories\n- Fails the workflow if linting errors are found\n\n**Step 4: Create a File That Will Fail Linting**\nCreate the file `src/example.js` with intentional linting violations that will cause the CI check to fail.\n\n**Step 5: Create Pull Request**\nCommit all the changes (ESLint config, workflow file, and example file with linting errors) in a single commit, then create a pull request from 'ci/add-eslint-workflow' to 'main' with:\n- Title: \"Add ESLint workflow for code quality enforcement\"\n- Body must include:\n  - A \"## Summary\" heading describing the linting setup\n  - A \"## Changes\" heading listing the files added\n  - A \"## Testing\" heading explaining how to test the workflow\n  - Mention that the PR intentionally includes linting errors to demonstrate the workflow\n\n**Step 6: Fix Linting Errors and Update PR**\nFix the linting errors in `src/example.js` and commit the changes in a single commit to update the PR so that the CI check passes.\n\n"
  },
  {
    "path": "tasks/github/standard/mcpmark-cicd/linting_ci_workflow/meta.json",
    "content": "{\n  \"task_id\": \"linting_ci_workflow\",\n  \"task_name\": \"Linting Ci Workflow\",\n  \"category_id\": \"mcpmark-cicd\",\n  \"category_name\": \"MCPMark CI/CD\",\n  \"description\": \"Set up ESLint workflow for code quality enforcement on all pull requests with proper CI integration.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"ci/cd automation\",\n    \"pr workflows\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/mcpmark-cicd\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/github/standard/mcpmark-cicd/linting_ci_workflow/verify.py",
    "content": "import sys\nimport os\nimport requests\nfrom typing import Dict, List, Optional, Tuple\nimport base64\nfrom dotenv import load_dotenv\nimport time\nimport json\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str], org: str, repo: str = \"claude-code\"\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    url = f\"https://api.github.com/repos/{org}/{repo}/{endpoint}\"\n\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _check_branch_exists(\n    branch_name: str, headers: Dict[str, str], org: str, repo: str = \"mcpmark-cicd\"\n) -> bool:\n    \"\"\"Verify that a branch exists in the repository.\"\"\"\n    success, _ = _get_github_api(f\"branches/{branch_name}\", headers, org, repo)\n    return success\n\n\ndef _get_file_content(\n    file_path: str,\n    headers: Dict[str, str],\n    org: str,\n    repo: str = \"claude-code\",\n    ref: str = \"main\",\n) -> Optional[str]:\n    \"\"\"Get the content of a file from the repository.\"\"\"\n    success, result = _get_github_api(\n        f\"contents/{file_path}?ref={ref}\", headers, org, repo\n    )\n    if not success or not result:\n        return None\n\n    try:\n        content = base64.b64decode(result.get(\"content\", \"\")).decode(\"utf-8\")\n        return content\n    except Exception as e:\n        print(f\"Content decode error for {file_path}: {e}\", file=sys.stderr)\n        return None\n\n\ndef _find_pr_by_title_keyword(\n    keyword: str, headers: Dict[str, str], org: str, repo: str = \"mcpmark-cicd\"\n) -> Optional[Dict]:\n    \"\"\"Find a PR by title keyword and return the PR data.\"\"\"\n    for state in [\"open\", \"closed\"]:\n        success, prs = _get_github_api(\n            f\"pulls?state={state}&per_page=100\", headers, org, repo\n        )\n        if success and prs:\n            for pr in prs:\n                if keyword.lower() in pr.get(\"title\", \"\").lower():\n                    return pr\n    return None\n\n\ndef _get_workflow_runs_for_pr(\n    pr_number: int, headers: Dict[str, str], org: str, repo: str = \"mcpmark-cicd\"\n) -> List[Dict]:\n    \"\"\"Get workflow runs for a specific PR.\"\"\"\n    success, runs = _get_github_api(\n        \"actions/runs?event=pull_request&per_page=100\", headers, org, repo\n    )\n    if not success or not runs:\n        return []\n\n    pr_runs = []\n    for run in runs.get(\"workflow_runs\", []):\n        # Check if this run is associated with our PR\n        for pr in run.get(\"pull_requests\", []):\n            if pr.get(\"number\") == pr_number:\n                pr_runs.append(run)\n                break\n\n    return pr_runs\n\n\ndef _get_pr_commits(\n    pr_number: int, headers: Dict[str, str], org: str, repo: str = \"mcpmark-cicd\"\n) -> List[Dict]:\n    \"\"\"Get commits for a specific PR.\"\"\"\n    success, commits = _get_github_api(f\"pulls/{pr_number}/commits\", headers, org, repo)\n    if not success or not commits:\n        return []\n    return commits\n\n\ndef _get_workflow_runs_for_commit(\n    commit_sha: str, headers: Dict[str, str], org: str, repo: str = \"mcpmark-cicd\"\n) -> List[Dict]:\n    \"\"\"Get workflow runs for a specific commit.\"\"\"\n    success, runs = _get_github_api(\n        f\"actions/runs?head_sha={commit_sha}&per_page=100\", headers, org, repo\n    )\n    if not success or not runs:\n        return []\n    return runs.get(\"workflow_runs\", [])\n\n\ndef verify() -> bool:\n    \"\"\"\n    Programmatically verify that the ESLint CI workflow setup\n    meets the requirements described in description.md.\n    \"\"\"\n    # Configuration constants\n    BRANCH_NAME = \"ci/add-eslint-workflow\"\n    PR_KEYWORD = \"eslint workflow\"\n\n    # Expected files and their content checks\n    ESLINT_CONFIG_PATH = \".eslintrc.json\"\n    WORKFLOW_PATH = \".github/workflows/lint.yml\"\n    EXAMPLE_FILE_PATH = \"src/example.js\"\n\n    # Expected workflow content keywords\n    WORKFLOW_KEYWORDS = [\n        \"Code Linting\",\n        \"ubuntu-latest\",\n        \"actions/setup-node\",\n        \"npm ci\",\n        \"eslint\",\n        \"src/\",\n    ]\n\n    # Load environment variables from .mcp_env\n    load_dotenv(\".mcp_env\")\n\n    # Get GitHub token and org\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    if not github_org:\n        print(\"Error: GITHUB_EVAL_ORG environment variable not set\", file=sys.stderr)\n        return False\n\n    headers = {\n        \"Authorization\": f\"Bearer {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    # Run verification checks\n    print(\"Verifying ESLint CI workflow setup...\")\n\n    # 1. Check that branch exists\n    print(\"1. Verifying CI branch exists...\")\n    if not _check_branch_exists(BRANCH_NAME, headers, github_org):\n        print(f\"Error: Branch '{BRANCH_NAME}' not found\", file=sys.stderr)\n        return False\n    print(\"✓ CI branch created\")\n\n    # 2. Check ESLint configuration file\n    print(\"2. Verifying .eslintrc.json...\")\n    eslint_content = _get_file_content(\n        ESLINT_CONFIG_PATH, headers, github_org, \"mcpmark-cicd\", BRANCH_NAME\n    )\n    if not eslint_content:\n        print(\"Error: .eslintrc.json not found\", file=sys.stderr)\n        return False\n\n    # Validate ESLint config is valid JSON and contains required rules\n    try:\n        eslint_config = json.loads(eslint_content)\n        rules = eslint_config.get(\"rules\", {})\n\n        required_rules = [\"no-unused-vars\", \"semi\", \"quotes\"]\n        missing_rules = [rule for rule in required_rules if rule not in rules]\n        if missing_rules:\n            print(\n                f\"Error: .eslintrc.json missing rules: {missing_rules}\", file=sys.stderr\n            )\n            return False\n\n    except json.JSONDecodeError:\n        print(\"Error: .eslintrc.json is not valid JSON\", file=sys.stderr)\n        return False\n\n    print(\"✓ ESLint configuration created with proper rules\")\n\n    # 3. Check GitHub Actions workflow file\n    print(\"3. Verifying .github/workflows/lint.yml...\")\n    workflow_content = _get_file_content(\n        WORKFLOW_PATH, headers, github_org, \"mcpmark-cicd\", BRANCH_NAME\n    )\n    if not workflow_content:\n        print(\"Error: .github/workflows/lint.yml not found\", file=sys.stderr)\n        return False\n\n    # Check workflow contains required keywords\n    missing_keywords = [kw for kw in WORKFLOW_KEYWORDS if kw not in workflow_content]\n    if missing_keywords:\n        print(f\"Error: Workflow missing keywords: {missing_keywords}\", file=sys.stderr)\n        return False\n\n    # Check trigger configuration\n    if \"pull_request\" not in workflow_content or \"push\" not in workflow_content:\n        print(\"Error: Workflow missing proper triggers\", file=sys.stderr)\n        return False\n\n    print(\"✓ GitHub Actions workflow created with proper configuration\")\n\n    # 4. Check example file with linting errors initially exists\n    print(\"4. Verifying src/example.js...\")\n    example_content = _get_file_content(\n        EXAMPLE_FILE_PATH, headers, github_org, \"mcpmark-cicd\", BRANCH_NAME\n    )\n    if not example_content:\n        print(\"Error: src/example.js not found\", file=sys.stderr)\n        return False\n\n    print(\"✓ Example file created\")\n\n    # 5. Find and verify the linting PR\n    print(\"5. Verifying linting pull request...\")\n    lint_pr = _find_pr_by_title_keyword(PR_KEYWORD, headers, github_org)\n    if not lint_pr:\n        # Try alternative keywords\n        lint_pr = _find_pr_by_title_keyword(\"eslint\", headers, github_org)\n\n    if not lint_pr:\n        print(\"Error: Linting PR not found\", file=sys.stderr)\n        return False\n\n    pr_body = lint_pr.get(\"body\", \"\")\n    pr_number = lint_pr.get(\"number\")\n\n    # Check PR body sections\n    required_sections = [\"## Summary\", \"## Changes\", \"## Testing\"]\n    missing_sections = [\n        section for section in required_sections if section not in pr_body\n    ]\n    if missing_sections:\n        print(\n            f\"Error: Linting PR missing sections: {missing_sections}\", file=sys.stderr\n        )\n        return False\n\n    print(\"✓ Linting PR created with proper structure\")\n\n    # 6. Check workflow runs and status changes\n    print(\"6. Verifying workflow execution and status...\")\n\n    # First get the commits for this PR\n    commits = _get_pr_commits(pr_number, headers, github_org)\n    if len(commits) != 2:\n        print(\n            f\"Error: Expected exactly 2 commits, found {len(commits)}\", file=sys.stderr\n        )\n        return False\n\n    print(\"✓ Found exactly 2 commits as expected\")\n\n    # Sort commits chronologically (oldest first)\n    commits.sort(key=lambda x: x.get(\"commit\", {}).get(\"author\", {}).get(\"date\", \"\"))\n\n    first_commit_sha = commits[0].get(\"sha\")\n    second_commit_sha = commits[1].get(\"sha\")\n\n    print(f\"First commit (should fail): {first_commit_sha[:7]}\")\n    print(f\"Second commit (should pass): {second_commit_sha[:7]}\")\n\n    # Wait for workflows on both commits to complete\n    print(\"Waiting for workflow completion on first commit...\")\n    first_commit_runs = []\n    second_commit_runs = []\n\n    start_time = time.time()\n    timeout = 90\n    no_workflow_check_count = 0\n\n    while time.time() - start_time < timeout:\n        first_commit_runs = _get_workflow_runs_for_commit(\n            first_commit_sha, headers, github_org\n        )\n        second_commit_runs = _get_workflow_runs_for_commit(\n            second_commit_sha, headers, github_org\n        )\n\n        # Check if any workflows exist\n        if not first_commit_runs and not second_commit_runs:\n            no_workflow_check_count += 1\n            if no_workflow_check_count == 1:\n                print(\n                    \"No workflow runs found yet, waiting 5 seconds and checking once more...\"\n                )\n                time.sleep(5)\n                continue\n            elif no_workflow_check_count >= 2:\n                print(\n                    \"⚠️ No workflow runs detected after 2 checks. Workflows may not have been triggered.\"\n                )\n                print(\"   Continuing with verification...\")\n                break\n\n        # Check if workflows are completed\n        first_completed = any(\n            run.get(\"status\") == \"completed\" for run in first_commit_runs\n        )\n        second_completed = any(\n            run.get(\"status\") == \"completed\" for run in second_commit_runs\n        )\n\n        if first_completed and second_completed:\n            break\n\n        print(\"Waiting for workflows to complete...\")\n        time.sleep(10)\n\n    # Verify first commit workflow failed\n    first_commit_status = None\n    for run in first_commit_runs:\n        if run.get(\"status\") == \"completed\":\n            conclusion = run.get(\"conclusion\")\n            if conclusion in [\"failure\", \"cancelled\"]:\n                first_commit_status = \"failed\"\n                print(\"✓ First commit workflow failed as expected\")\n                break\n            elif conclusion == \"success\":\n                first_commit_status = \"passed\"\n                break\n\n    if first_commit_status != \"failed\":\n        print(\n            \"Error: First commit workflow should have failed due to linting errors\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Verify second commit workflow succeeded\n    second_commit_status = None\n    for run in second_commit_runs:\n        if run.get(\"status\") == \"completed\":\n            conclusion = run.get(\"conclusion\")\n            if conclusion == \"success\":\n                second_commit_status = \"passed\"\n                print(\"✓ Second commit workflow passed as expected\")\n                break\n            elif conclusion in [\"failure\", \"cancelled\"]:\n                second_commit_status = \"failed\"\n                break\n\n    if second_commit_status != \"passed\":\n        print(\n            \"Error: Second commit workflow should have passed after fixing linting errors\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\n        \"✓ Workflow status sequence verified: first commit failed → second commit passed\"\n    )\n\n    # 7. Verify the final state shows clean code\n    print(\"7. Verifying final file state...\")\n    final_example_content = _get_file_content(\n        EXAMPLE_FILE_PATH, headers, github_org, \"mcpmark-cicd\", BRANCH_NAME\n    )\n\n    if final_example_content:\n        # Check that obvious linting errors are fixed\n        if (\n            \"unusedVariable\" in final_example_content\n            or 'console.log(\"Hello World\")' in final_example_content\n        ):\n            print(\n                \"Warning: Example file may still contain linting errors\",\n                file=sys.stderr,\n            )\n        else:\n            print(\"✓ Linting errors appear to be fixed\")\n\n    print(\"\\n✅ All verification checks passed!\")\n    print(\"ESLint CI workflow setup completed successfully:\")\n    print(f\"  - Linting PR #{pr_number}\")\n    print(f\"  - Branch: {BRANCH_NAME}\")\n    print(\n        \"  - Files created: .eslintrc.json, .github/workflows/lint.yml, src/example.js\"\n    )\n    print(\"  - Workflow configured for pull_request and push triggers\")\n    print(\n        f\"  - Total workflow runs found: {len(first_commit_runs) + len(second_commit_runs)}\"\n    )\n    print(\n        f\"  - First commit runs: {len(first_commit_runs)}, Second commit runs: {len(second_commit_runs)}\"\n    )\n\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/standard/mcpmark-cicd/pr_automation_workflow/description.md",
    "content": "I need you to create a comprehensive Pull Request automation workflow for this Node.js project. The project currently has no GitHub Actions workflows, so you'll be building a PR-focused CI/CD workflow from scratch that responds to pull request events. Here's what needs to be implemented:\n\n## Pull Request Automation Workflow\n\nCreate `.github/workflows/pr-automation.yml` that triggers on `pull_request` events (opened, synchronize, reopened) with these jobs:\n\n### 1. **code-quality** job (name: `code-quality`):\n  - Runs ESLint checks using `npm run lint`\n  - Runs Prettier formatting checks\n  - Posts code quality results as PR comment (must include keywords: \"Code Quality Report\", \"ESLint\", \"Prettier\")\n\n### 2. **testing-suite** job (name: `testing-suite`):\n  - Runs full test suite with `npm test`\n  - Generates test coverage report\n  - Posts coverage summary as PR comment (must include keywords: \"Test Coverage Report\")\n  - Uploads coverage artifacts\n\n### 3. **security-scan** job (name: `security-scan`):\n  - Runs dependency vulnerability checks\n  - Scans for secrets in code changes\n  - Creates security report as PR comment (must include keywords: \"Security Scan Report\", \"Vulnerabilities\", \"Dependencies\")\n\n### 4. **build-validation** job (name: `build-validation`):\n  - Attempts to build the application\n  - Validates all endpoints are accessible\n  - Creates deployment preview artifacts\n  - Posts build status as PR comment (must include keywords: \"Build Validation\")\n\n**IMPORTANT: All four jobs must run in parallel.**\n\n## Implementation Requirements:\n\n**Step 1: Create Feature Branch**\nCreate a new branch called `pr-automation-workflow` from main.\n\n**Step 2: Create the Workflow**\nCreate `.github/workflows/pr-automation.yml` with proper YAML syntax:\n- Appropriate triggers for pull_request events\n- All four jobs configured to run in parallel\n- Avoid identifier conflicts in github-script actions\n\n**Step 3: Create and Merge Pull Request**\nCreate a comprehensive pull request and merge it to main:\n- Title: \"Implement Pull Request Automation Workflow\"\n- Detailed description of the workflow and its purpose\n- Merge the pull request to main branch\n\n## Important Notes:\n\n- **All jobs MUST run in parallel**\n- Ensure your PR satisfies ALL required checks\n- The workflow should handle edge cases, have proper error recovery, and provide clear logging"
  },
  {
    "path": "tasks/github/standard/mcpmark-cicd/pr_automation_workflow/meta.json",
    "content": "{\n  \"task_id\": \"pr_automation_workflow\",\n  \"task_name\": \"Pr Automation Workflow\",\n  \"category_id\": \"mcpmark-cicd\",\n  \"category_name\": \"MCPMark CI/CD\",\n  \"description\": \"Create comprehensive PR automation with parallel jobs for code quality, testing, security scanning, and build validation.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"pr workflows\",\n    \"ci/cd automation\",\n    \"workflow automation\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/mcpmark-cicd\",\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/github/standard/mcpmark-cicd/pr_automation_workflow/verify.py",
    "content": "import sys\nimport os\nimport requests\nimport time\nfrom typing import Dict, List, Optional, Tuple\nfrom dotenv import load_dotenv\nimport base64\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str], owner: str, repo: str\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    url = f\"https://api.github.com/repos/{owner}/{repo}/{endpoint}\"\n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _post_github_api(\n    endpoint: str, headers: Dict[str, str], owner: str, repo: str, data: Dict\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a POST request to GitHub API and return (success, response).\"\"\"\n    url = f\"https://api.github.com/repos/{owner}/{repo}/{endpoint}\"\n    try:\n        response = requests.post(url, headers=headers, json=data)\n        if response.status_code in [200, 201]:\n            return True, response.json()\n        else:\n            print(\n                f\"API error for {endpoint}: {response.status_code} - {response.text}\",\n                file=sys.stderr,\n            )\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _patch_github_api(\n    endpoint: str, headers: Dict[str, str], owner: str, repo: str, data: Dict\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a PATCH request to GitHub API and return (success, response).\"\"\"\n    url = f\"https://api.github.com/repos/{owner}/{repo}/{endpoint}\"\n    try:\n        response = requests.patch(url, headers=headers, json=data)\n        if response.status_code == 200:\n            return True, response.json()\n        else:\n            print(\n                f\"API error for {endpoint}: {response.status_code} - {response.text}\",\n                file=sys.stderr,\n            )\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _get_file_content(\n    file_path: str,\n    headers: Dict[str, str],\n    owner: str,\n    repo: str,\n    ref: str = \"main\",\n) -> Optional[str]:\n    \"\"\"Get the content of a file from the repository.\"\"\"\n    success, result = _get_github_api(\n        f\"contents/{file_path}?ref={ref}\", headers, owner, repo\n    )\n    if not success or not result:\n        return None\n\n    try:\n        content = base64.b64decode(result.get(\"content\", \"\")).decode(\"utf-8\")\n        return content\n    except Exception as e:\n        print(f\"Content decode error for {file_path}: {e}\", file=sys.stderr)\n        return None\n\n\ndef _find_pr_by_title(\n    title: str, headers: Dict[str, str], owner: str, repo: str\n) -> Optional[Dict]:\n    \"\"\"Find a PR by exact title match.\"\"\"\n    for state in [\"closed\", \"open\"]:\n        success, prs = _get_github_api(\n            f\"pulls?state={state}&per_page=100\", headers, owner, repo\n        )\n        if success and prs:\n            for pr in prs:\n                if pr.get(\"title\") == title:\n                    return pr\n    return None\n\n\ndef _wait_for_workflow_completion(\n    headers: Dict[str, str],\n    owner: str,\n    repo: str,\n    workflow_file: str,\n    max_wait: int = 90,\n) -> bool:\n    \"\"\"Wait for GitHub Actions workflows to complete processing.\"\"\"\n    print(f\"⏳ Waiting for {workflow_file} workflows to complete...\")\n\n    start_time = time.time()\n    no_workflow_check_count = 0\n\n    while time.time() - start_time < max_wait:\n        try:\n            success, response = _get_github_api(\n                f\"actions/workflows/{workflow_file}/runs?per_page=10\",\n                headers,\n                owner,\n                repo,\n            )\n\n            if success and response:\n                runs = response.get(\"workflow_runs\", [])\n                if len(runs) > 0:\n                    running_count = 0\n                    completed_count = 0\n\n                    for run in runs[:5]:  # Check recent runs\n                        status = run[\"status\"]\n                        if status == \"completed\":\n                            completed_count += 1\n                        elif status in [\"in_progress\", \"queued\"]:\n                            running_count += 1\n\n                    print(\n                        f\"   Status: {completed_count} completed, {running_count} running/queued\"\n                    )\n\n                    if running_count == 0:\n                        print(f\"✅ All {workflow_file} workflows completed.\")\n                        return True\n                else:\n                    # No workflow runs found\n                    no_workflow_check_count += 1\n                    if no_workflow_check_count == 1:\n                        print(\n                            \"   No workflow runs found yet, waiting 5 seconds and checking once more...\"\n                        )\n                        time.sleep(5)\n                        continue\n                    elif no_workflow_check_count >= 2:\n                        print(\n                            f\"⚠️ No workflow runs detected after 2 checks. {workflow_file} may not have been triggered.\"\n                        )\n                        print(\"   Continuing with verification...\")\n                        return False\n\n            print(f\"⏳ Still waiting... ({int(time.time() - start_time)}s elapsed)\")\n            time.sleep(10)\n\n        except Exception as e:\n            print(f\"⚠️ Error checking workflow status: {e}\")\n            time.sleep(10)\n\n    print(f\"⚠️ Workflow completion wait timed out after {max_wait}s\")\n    return False\n\n\ndef _verify_workflow_file(\n    headers: Dict[str, str], owner: str, repo: str\n) -> Tuple[bool, List[str]]:\n    \"\"\"Verify that the workflow file exists and has correct content.\"\"\"\n    print(\"\\n📄 Verifying workflow file...\")\n    errors = []\n\n    workflow_content = _get_file_content(\n        \".github/workflows/pr-automation.yml\", headers, owner, repo\n    )\n\n    if not workflow_content:\n        return False, [\n            \"Workflow file .github/workflows/pr-automation.yml not found in main branch\"\n        ]\n\n    print(\"   ✅ Workflow file exists in main branch\")\n\n    # Verify required components\n    required_events = [\"opened\", \"synchronize\", \"reopened\"]\n    required_jobs = [\n        \"code-quality\",\n        \"testing-suite\",\n        \"security-scan\",\n        \"build-validation\",\n    ]\n\n    if \"pull_request:\" not in workflow_content:\n        errors.append(\"Workflow missing pull_request trigger\")\n    else:\n        print(\"   ✅ Pull request trigger found\")\n\n    for event in required_events:\n        if event not in workflow_content:\n            errors.append(f\"Missing event trigger: {event}\")\n\n    if not errors:\n        print(f\"   ✅ Required events found: {required_events}\")\n\n    for job in required_jobs:\n        if f\"{job}:\" not in workflow_content:\n            errors.append(f\"Missing job: {job}\")\n\n    if not errors:\n        print(f\"   ✅ All 4 required jobs found: {required_jobs}\")\n\n    return len(errors) == 0, errors\n\n\ndef _verify_main_pr_merged(\n    headers: Dict[str, str], owner: str, repo: str\n) -> Tuple[bool, List[str], Optional[Dict]]:\n    \"\"\"Verify that the main PR implementing the workflow was merged.\"\"\"\n    print(\"\\n🔍 Verifying main PR was merged...\")\n    errors = []\n\n    pr = _find_pr_by_title(\n        \"Implement Pull Request Automation Workflow\", headers, owner, repo\n    )\n\n    if not pr:\n        return (\n            False,\n            [\"Main PR 'Implement Pull Request Automation Workflow' not found\"],\n            None,\n        )\n\n    pr_number = pr[\"number\"]\n    print(f\"   Found PR #{pr_number}\")\n\n    if not pr.get(\"merged_at\", False):\n        errors.append(f\"PR #{pr_number} was not merged\")\n    else:\n        print(f\"   ✅ PR #{pr_number} was merged\")\n\n    if pr.get(\"head\", {}).get(\"ref\") != \"pr-automation-workflow\":\n        errors.append(f\"PR #{pr_number} was not from pr-automation-workflow branch\")\n    else:\n        print(\"   ✅ PR was from pr-automation-workflow branch\")\n\n    if pr.get(\"base\", {}).get(\"ref\") != \"main\":\n        errors.append(f\"PR #{pr_number} was not merged to main branch\")\n    else:\n        print(\"   ✅ PR was merged to main branch\")\n\n    return len(errors) == 0, errors, pr\n\n\ndef _verify_workflow_runs(\n    pr_data: Dict, headers: Dict[str, str], owner: str, repo: str\n) -> Tuple[bool, List[str]]:\n    \"\"\"Verify that workflow runs occurred for the PR and all 4 jobs ran in parallel.\"\"\"\n    print(\"\\n⚙️ Verifying workflow runs...\")\n    errors = []\n\n    pr_number = pr_data[\"number\"]\n\n    # Get workflow runs for the PR\n    success, runs_response = _get_github_api(\n        \"actions/runs?event=pull_request&per_page=50\", headers, owner, repo\n    )\n\n    if not success:\n        return False, [\"Failed to fetch workflow runs\"]\n\n    pr_runs = []\n    pr_head_sha = pr_data.get(\"head\", {}).get(\"sha\")\n\n    for run in runs_response.get(\"workflow_runs\", []):\n        # Method 1: Check if this run is associated with the PR's head SHA\n        if pr_head_sha and run.get(\"head_sha\") == pr_head_sha:\n            pr_runs.append(run)\n            continue\n\n        # Method 2: Check pull_requests field (may be empty for merged PRs)\n        for pr in run.get(\"pull_requests\", []):\n            if pr.get(\"number\") == pr_number:\n                pr_runs.append(run)\n                break\n\n    if not pr_runs:\n        # Try alternative approach: get runs by head branch\n        pr_head_ref = pr_data.get(\"head\", {}).get(\"ref\")\n        if pr_head_ref:\n            success, branch_runs = _get_github_api(\n                f\"actions/runs?branch={pr_head_ref}&per_page=50\", headers, owner, repo\n            )\n            if success:\n                pr_runs = branch_runs.get(\"workflow_runs\", [])\n\n    if not pr_runs:\n        return False, [\n            f\"No workflow runs found for PR #{pr_number} (head_sha: {pr_head_sha})\"\n        ]\n\n    print(f\"   Found {len(pr_runs)} workflow run(s) for PR #{pr_number}\")\n\n    # Check the most recent run\n    latest_run = pr_runs[0]  # GitHub returns runs in descending order by creation time\n    run_id = latest_run[\"id\"]\n\n    if latest_run[\"conclusion\"] != \"success\":\n        errors.append(\n            f\"Latest workflow run {run_id} did not succeed (conclusion: {latest_run['conclusion']})\"\n        )\n    else:\n        print(f\"   ✅ Latest workflow run {run_id} succeeded\")\n\n    # Get jobs for this run\n    success, jobs_response = _get_github_api(\n        f\"actions/runs/{run_id}/jobs\", headers, owner, repo\n    )\n\n    if not success:\n        return False, [\"Failed to fetch workflow jobs\"]\n\n    jobs = jobs_response.get(\"jobs\", [])\n    expected_jobs = [\n        \"code-quality\",\n        \"testing-suite\",\n        \"security-scan\",\n        \"build-validation\",\n    ]\n\n    found_jobs = [job[\"name\"] for job in jobs]\n    missing_jobs = [job for job in expected_jobs if job not in found_jobs]\n\n    if missing_jobs:\n        errors.append(f\"Missing jobs: {missing_jobs}. Found: {found_jobs}\")\n    else:\n        print(f\"   ✅ All 4 required jobs found: {found_jobs}\")\n\n    # Verify all jobs succeeded\n    failed_jobs = [job[\"name\"] for job in jobs if job[\"conclusion\"] != \"success\"]\n    if failed_jobs:\n        errors.append(f\"Failed jobs: {failed_jobs}\")\n    else:\n        print(\"   ✅ All jobs completed successfully\")\n\n    # Verify jobs ran in parallel (started around the same time)\n    if len(jobs) >= 4:\n        start_times = [job[\"started_at\"] for job in jobs if job[\"started_at\"]]\n        if len(start_times) >= 4:\n            # Check if all jobs started within 2 minutes of each other\n            import datetime\n\n            start_dt = [\n                datetime.datetime.fromisoformat(t.replace(\"Z\", \"+00:00\"))\n                for t in start_times\n            ]\n            time_diff = max(start_dt) - min(start_dt)\n            if time_diff.total_seconds() > 120:  # 2 minutes\n                errors.append(\n                    f\"Jobs did not run in parallel (time span: {time_diff.total_seconds()}s)\"\n                )\n            else:\n                print(\"   ✅ Jobs ran in parallel\")\n        else:\n            errors.append(\"Not enough job start times to verify parallel execution\")\n\n    return len(errors) == 0, errors\n\n\ndef _verify_pr_comments(\n    pr_data: Dict, headers: Dict[str, str], owner: str, repo: str\n) -> Tuple[bool, List[str]]:\n    \"\"\"Verify that PR has required automation comments from GitHub Actions bot.\"\"\"\n    print(\"\\n💬 Verifying PR comments...\")\n    errors = []\n\n    pr_number = pr_data[\"number\"]\n\n    success, comments = _get_github_api(\n        f\"issues/{pr_number}/comments\", headers, owner, repo\n    )\n\n    if not success:\n        return False, [\"Failed to fetch PR comments\"]\n\n    # Filter for GitHub Actions bot comments only\n    bot_comments = [\n        comment\n        for comment in comments\n        if comment.get(\"user\", {}).get(\"login\") == \"github-actions[bot]\"\n    ]\n\n    if not bot_comments:\n        return False, [\"No comments found from GitHub Actions bot\"]\n\n    print(f\"   Found {len(bot_comments)} comment(s) from GitHub Actions bot\")\n\n    # Get all bot comment bodies\n    bot_comment_bodies = [comment.get(\"body\", \"\") for comment in bot_comments]\n\n    # Define required automation reports with their keywords\n    required_reports = [\n        {\n            \"name\": \"Code Quality Report\",\n            \"main_keywords\": [\"Code Quality Report\"],\n            \"sub_keywords\": [\"ESLint\", \"Prettier\"],\n            \"found\": False,\n        },\n        {\n            \"name\": \"Test Coverage Report\",\n            \"main_keywords\": [\"Test Coverage Report\"],\n            \"sub_keywords\": [],\n            \"found\": False,\n        },\n        {\n            \"name\": \"Security Scan Report\",\n            \"main_keywords\": [\"Security Scan Report\"],\n            \"sub_keywords\": [\"Vulnerabilities\", \"Dependencies\"],\n            \"found\": False,\n        },\n        {\n            \"name\": \"Build Validation Report\",\n            \"main_keywords\": [\"Build Validation\"],\n            \"sub_keywords\": [],\n            \"found\": False,\n        },\n    ]\n\n    # Check each bot comment for the required reports\n    for comment_body in bot_comment_bodies:\n        for report in required_reports:\n            # Check if this comment contains any of the main keywords for this report\n            if any(keyword in comment_body for keyword in report[\"main_keywords\"]):\n                if not report[\"found\"]:  # Only mark as found once\n                    report[\"found\"] = True\n                    print(f\"   ✅ Found {report['name']}\")\n\n                    # Verify sub-keywords are present in this specific comment\n                    for sub_keyword in report[\"sub_keywords\"]:\n                        if sub_keyword not in comment_body:\n                            errors.append(\n                                f\"Missing sub-keyword '{sub_keyword}' in {report['name']}\"\n                            )\n                        else:\n                            print(\n                                f\"   ✅ Found sub-keyword '{sub_keyword}' in {report['name']}\"\n                            )\n\n    # Check if all required reports were found\n    for report in required_reports:\n        if not report[\"found\"]:\n            errors.append(f\"Missing {report['name']} from GitHub Actions bot\")\n\n    # Verify we have exactly 4 automation reports\n    found_reports = sum(1 for report in required_reports if report[\"found\"])\n    if found_reports != 4:\n        errors.append(f\"Expected 4 automation reports, but found {found_reports}\")\n    else:\n        print(\"   ✅ All 4 required automation reports found from GitHub Actions bot\")\n\n    return len(errors) == 0, errors\n\n\ndef _create_test_pr(\n    title: str,\n    branch: str,\n    content: str,\n    file_path: str,\n    headers: Dict[str, str],\n    owner: str,\n    repo: str,\n) -> Optional[int]:\n    \"\"\"Create a test PR with specific content designed to fail a check.\"\"\"\n    print(f\"   Creating test PR: {title}\")\n\n    # Create branch\n    success, main_ref = _get_github_api(\"git/ref/heads/main\", headers, owner, repo)\n    if not success:\n        print(\"   ❌ Failed to get main branch reference\")\n        return None\n\n    main_sha = main_ref[\"object\"][\"sha\"]\n\n    branch_data = {\"ref\": f\"refs/heads/{branch}\", \"sha\": main_sha}\n\n    success, _ = _post_github_api(\"git/refs\", headers, owner, repo, branch_data)\n    if not success:\n        # Branch might already exist, try to delete and recreate\n        print(f\"   Branch {branch} already exists, trying to delete and recreate...\")\n        import requests\n\n        # Force delete existing branch\n        delete_url = (\n            f\"https://api.github.com/repos/{owner}/{repo}/git/refs/heads/{branch}\"\n        )\n        delete_response = requests.delete(delete_url, headers=headers)\n\n        if delete_response.status_code == 204:\n            print(f\"   Successfully deleted existing branch {branch}\")\n            # Wait a moment for deletion to complete\n            import time\n\n            time.sleep(2)\n\n            # Try creating again\n            success, _ = _post_github_api(\"git/refs\", headers, owner, repo, branch_data)\n            if not success:\n                print(f\"   ❌ Failed to create branch {branch} after cleanup\")\n                return None\n            else:\n                print(f\"   ✅ Successfully created branch {branch} after cleanup\")\n        else:\n            print(\n                f\"   ❌ Failed to delete existing branch {branch}: {delete_response.status_code}\"\n            )\n            return None\n\n    # Create or update file\n    file_content = base64.b64encode(content.encode()).decode()\n\n    file_data = {\n        \"message\": f\"Test commit for {title}\",\n        \"content\": file_content,\n        \"branch\": branch,\n    }\n\n    # Check if file exists in main branch first\n    success, file_info = _get_github_api(\n        f\"contents/{file_path}?ref=main\", headers, owner, repo\n    )\n    if success and file_info:\n        # File exists, need SHA for update\n        file_data[\"sha\"] = file_info[\"sha\"]\n        print(f\"   File {file_path} exists, updating with SHA\")\n    else:\n        print(f\"   Creating new file {file_path}\")\n\n    # Use PUT method for file creation/update\n    url = f\"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}\"\n    try:\n        import requests\n\n        response = requests.put(url, headers=headers, json=file_data)\n        if response.status_code in [200, 201]:\n            print(f\"   ✅ Successfully created/updated file {file_path}\")\n        else:\n            print(\n                f\"   ❌ Failed to create/update file {file_path}: {response.status_code} - {response.text}\"\n            )\n            return None\n    except Exception as e:\n        print(f\"   ❌ Exception creating file {file_path}: {e}\")\n        return None\n\n    # Create PR\n    pr_data = {\n        \"title\": title,\n        \"head\": branch,\n        \"base\": \"main\",\n        \"body\": f\"Test PR to validate that {title.split(':')[1].strip()} check fails correctly.\",\n    }\n\n    success, pr_response = _post_github_api(\"pulls\", headers, owner, repo, pr_data)\n    if not success:\n        print(\"   ❌ Failed to create PR\")\n        return None\n\n    pr_number = pr_response[\"number\"]\n    print(f\"   ✅ Created test PR #{pr_number}\")\n    return pr_number\n\n\ndef _close_pr(pr_number: int, headers: Dict[str, str], owner: str, repo: str) -> bool:\n    \"\"\"Close a PR.\"\"\"\n    success, _ = _patch_github_api(\n        f\"pulls/{pr_number}\", headers, owner, repo, {\"state\": \"closed\"}\n    )\n    return success\n\n\ndef _run_unit_tests(\n    headers: Dict[str, str], owner: str, repo: str\n) -> Tuple[bool, List[str]]:\n    \"\"\"Create test PRs to verify workflow correctly fails on bad code.\"\"\"\n    print(\"\\n🧪 Running unit tests with failing PRs...\")\n    errors = []\n    created_prs = []\n\n    test_cases = [\n        {\n            \"title\": \"Test: Code Quality Failure\",\n            \"branch\": \"test-code-quality-fail\",\n            \"file_path\": \"src/lint-fail-test.js\",\n            \"content\": \"// This file contains intentional ESLint violations\\nvar unused_variable = 'this will trigger unused-vars rule'\\nconsole.log('missing semicolon - will trigger semi rule')\\nconst   badly_spaced   =   'too many spaces'\\nif(true){console.log('missing spaces around braces')}\\nfunction unusedFunction() { return 'unused'; }\\neeval('alert(\\\"dangerous eval\\\")');\\nwith (Math) { var x = cos(3 * PI) + sin(LN10) }\\nvar a = 1; var a = 2; // redeclared variable\",\n            \"expected_failure\": \"code-quality\",\n        },\n        {\n            \"title\": \"Test: Testing Suite Failure\",\n            \"branch\": \"test-testing-fail\",\n            \"file_path\": \"tests/fail-test.test.js\",\n            \"content\": \"const request = require('supertest');\\n\\ndescribe('Intentional Test Failures', () => {\\n  test('This test should always fail', () => {\\n    expect(2 + 2).toBe(5); // Intentionally wrong\\n  });\\n  \\n  test('Another failing test', () => {\\n    expect(true).toBe(false); // Intentionally wrong\\n  });\\n  \\n  test('Math failure', () => {\\n    expect(Math.max(1, 2, 3)).toBe(1); // Intentionally wrong\\n  });\\n});\",\n            \"expected_failure\": \"testing-suite\",\n        },\n        {\n            \"title\": \"Test: Security Scan Failure\",\n            \"branch\": \"test-security-fail\",\n            \"file_path\": \"src/security-fail-test.js\",\n            \"content\": \"// This file contains patterns that should trigger secret detection\\nconst hardcodedPassword = 'admin123password';\\nconst fakeApiKey = 'sk_test_' + 'fake123key456here789';\\nconst awsLikeKey = 'AKIA' + 'FAKEKEY7EXAMPLE';\\nconst dbPassword = 'password' + '=' + 'supersecret123';\\nconst tokenPattern = 'token' + '=' + 'ghp_1234567890abcdef';\\n\\n// These patterns should trigger secret detection\\nconsole.log('Password:', hardcodedPassword);\\nconsole.log('API Key:', fakeApiKey);\\nconsole.log('AWS Key:', awsLikeKey);\\nconsole.log('DB Password:', dbPassword);\\nconsole.log('Token:', tokenPattern);\\n\\nmodule.exports = {\\n  password: hardcodedPassword,\\n  apiKey: fakeApiKey\\n};\",\n            \"expected_failure\": \"security-scan\",\n        },\n        {\n            \"title\": \"Test: Build Validation Failure\",\n            \"branch\": \"test-build-fail\",\n            \"file_path\": \"src/build-fail-test.js\",\n            \"content\": \"// This file will cause build/startup failures\\nconst express = require('express');\\nconst nonExistentModule = require('this-module-does-not-exist-anywhere');\\nconst anotherMissing = require('@fake/missing-package');\\n\\n// This will cause runtime errors during startup\\nconst app = express();\\n\\n// Define a route that will cause issues\\napp.get('/test', (req, res) => {\\n  // Try to use non-existent modules\\n  nonExistentModule.doSomething();\\n  anotherMissing.initialize();\\n  res.send('This should never work');\\n});\\n\\n// Override the listen method to always fail\\nconst originalListen = app.listen;\\napp.listen = function(port, callback) {\\n  console.log('Attempting to start server...');\\n  // This will crash during build validation\\n  throw new Error('Intentional build failure for testing');\\n};\\n\\nmodule.exports = app;\",\n            \"expected_failure\": \"build-validation\",\n        },\n    ]\n\n    for test_case in test_cases:\n        pr_number = _create_test_pr(\n            test_case[\"title\"],\n            test_case[\"branch\"],\n            test_case[\"content\"],\n            test_case[\"file_path\"],\n            headers,\n            owner,\n            repo,\n        )\n\n        if pr_number:\n            created_prs.append(pr_number)\n        else:\n            errors.append(f\"Failed to create test PR: {test_case['title']}\")\n\n    if created_prs:\n        print(f\"   Created {len(created_prs)} test PRs, waiting for workflows...\")\n\n        # Wait a bit for workflows to start\n        time.sleep(5)\n\n        # Wait for workflows to complete\n        _wait_for_workflow_completion(\n            headers, owner, repo, \"pr-automation.yml\", max_wait=90\n        )\n\n        # Verify each test PR failed appropriately\n        for i, pr_number in enumerate(created_prs):\n            test_case = test_cases[i]\n            print(\n                f\"   Checking test PR #{pr_number} ({test_case['expected_failure']} failure)...\"\n            )\n\n            # Get workflow runs for this PR\n            success, runs_response = _get_github_api(\n                \"actions/runs?event=pull_request&per_page=20\", headers, owner, repo\n            )\n\n            if success:\n                pr_runs = []\n                for run in runs_response.get(\"workflow_runs\", []):\n                    # Check pull_requests field\n                    for pr in run.get(\"pull_requests\", []):\n                        if pr.get(\"number\") == pr_number:\n                            pr_runs.append(run)\n                            break\n\n                # If no runs found via pull_requests, try matching by branch\n                if not pr_runs:\n                    branch_name = test_case[\"branch\"]\n                    for run in runs_response.get(\"workflow_runs\", []):\n                        if run.get(\"head_branch\") == branch_name:\n                            pr_runs.append(run)\n\n                if pr_runs:\n                    latest_run = pr_runs[0]\n                    if latest_run[\"conclusion\"] != \"failure\":\n                        errors.append(\n                            f\"Test PR #{pr_number} should have failed but got: {latest_run['conclusion']}\"\n                        )\n                    else:\n                        print(f\"   ✅ Test PR #{pr_number} correctly failed\")\n                else:\n                    errors.append(f\"No workflow runs found for test PR #{pr_number}\")\n\n        # Clean up test PRs and branches\n        print(\"   Cleaning up test PRs and branches...\")\n        for i, pr_number in enumerate(created_prs):\n            if _close_pr(pr_number, headers, owner, repo):\n                print(f\"   ✅ Closed test PR #{pr_number}\")\n            else:\n                print(f\"   ⚠️ Failed to close test PR #{pr_number}\")\n\n            # Delete test branch\n            branch_name = test_cases[i][\"branch\"]\n            import requests\n\n            url = f\"https://api.github.com/repos/{owner}/{repo}/git/refs/heads/{branch_name}\"\n            response = requests.delete(url, headers=headers)\n            if response.status_code == 204:\n                print(f\"   ✅ Deleted test branch {branch_name}\")\n            else:\n                print(f\"   ⚠️ Failed to delete test branch {branch_name}\")\n\n    return len(errors) == 0, errors\n\n\ndef verify() -> bool:\n    \"\"\"\n    Verify that the PR automation workflow is working correctly.\n    \"\"\"\n    load_dotenv(\".mcp_env\")\n\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n    if not github_org:\n        print(\"Error: GITHUB_EVAL_ORG environment variable not set\", file=sys.stderr)\n        return False\n\n    owner = github_org\n    repo = \"mcpmark-cicd\"\n\n    headers = {\n        \"Authorization\": f\"token {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    print(\"🔍 Starting PR Automation Workflow Verification\")\n    print(\"=\" * 60)\n\n    all_passed = True\n\n    # 1. Verify workflow file exists\n    workflow_ok, workflow_errors = _verify_workflow_file(headers, owner, repo)\n    if not workflow_ok:\n        all_passed = False\n        print(\"❌ Workflow File Verification Failed:\")\n        for error in workflow_errors:\n            print(f\"   - {error}\")\n    else:\n        print(\"✅ Workflow File Verification Passed\")\n\n    # 2. Verify main PR was merged\n    pr_ok, pr_errors, pr_data = _verify_main_pr_merged(headers, owner, repo)\n    if not pr_ok:\n        all_passed = False\n        print(\"❌ Main PR Verification Failed:\")\n        for error in pr_errors:\n            print(f\"   - {error}\")\n    else:\n        print(\"✅ Main PR Verification Passed\")\n\n    # 3. Verify workflow runs (only if PR verification passed)\n    if pr_ok and pr_data:\n        runs_ok, runs_errors = _verify_workflow_runs(pr_data, headers, owner, repo)\n        if not runs_ok:\n            all_passed = False\n            print(\"❌ Workflow Runs Verification Failed:\")\n            for error in runs_errors:\n                print(f\"   - {error}\")\n        else:\n            print(\"✅ Workflow Runs Verification Passed\")\n\n        # 4. Verify PR comments\n        comments_ok, comments_errors = _verify_pr_comments(\n            pr_data, headers, owner, repo\n        )\n        if not comments_ok:\n            all_passed = False\n            print(\"❌ PR Comments Verification Failed:\")\n            for error in comments_errors:\n                print(f\"   - {error}\")\n        else:\n            print(\"✅ PR Comments Verification Passed\")\n\n    # 5. Run unit tests with failing PRs\n    tests_ok, tests_errors = _run_unit_tests(headers, owner, repo)\n    if not tests_ok:\n        all_passed = False\n        print(\"❌ Unit Tests Failed:\")\n        for error in tests_errors:\n            print(f\"   - {error}\")\n    else:\n        print(\"✅ Unit Tests Passed\")\n\n    print(\"\\n\" + \"=\" * 60)\n    if all_passed:\n        print(\"🎉 All PR Automation Workflow verifications PASSED!\")\n        print(\"\\n📋 Summary:\")\n        print(\"   ✅ Workflow file exists with correct triggers and 4 parallel jobs\")\n        print(\"   ✅ Main PR was merged from pr-automation-workflow to main\")\n        print(\"   ✅ Workflow runs show all 4 jobs executed in parallel and succeeded\")\n        print(\"   ✅ PR comments contain required automation reports\")\n        print(\"   ✅ Unit tests confirmed workflow correctly fails on problematic code\")\n        print(\"\\n🤖 The GitHub Actions PR automation workflow is working correctly!\")\n    else:\n        print(\"❌ PR Automation Workflow verification FAILED!\")\n        print(\"   Some components did not meet the expected automation requirements.\")\n\n    return all_passed\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/standard/missing-semester/assign_contributor_labels/description.md",
    "content": "Assign assignees for each open issue and open PR by adding labels instead of using direct assignees. Only contributors who appeared in the past 100 commits are considered. First, collect all such contributors and identify the most frequent author among them. For each open issue or PR, assign using labels according to the following rules:\n\t•\tIf the comments mention an author with @username, add a label in the format assigned-username.\n\t•\tIf multiple authors are mentioned, add labels in the same format for all of them.\n\t•\tIf no authors are mentioned in the comments, add a label for the most frequent contributor from the past 100 commits, using the format assigned-username."
  },
  {
    "path": "tasks/github/standard/missing-semester/assign_contributor_labels/meta.json",
    "content": "{\n  \"task_id\": \"assign_contributor_labels\",\n  \"task_name\": \"Assign Contributor Labels\",\n  \"category_id\": \"missing-semester\",\n  \"category_name\": \"Missing Semester\",\n  \"description\": \"Assign labels to open issues and PRs based on contributors mentioned in comments or the most frequent contributor from past 100 commits, using assigned-username format.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"issue management\",\n    \"label automation\",\n    \"contributor analysis\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/missing-semester\",\n    \"stateOriginalUrl\": \"https://github.com/missing-semester/missing-semester\"\n  }\n}"
  },
  {
    "path": "tasks/github/standard/missing-semester/assign_contributor_labels/verify.py",
    "content": "import sys\nimport os\nimport requests\nfrom typing import Dict, Optional, Tuple, List\nfrom dotenv import load_dotenv\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str], org: str, repo: str = \"missing-semester\"\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    url = f\"https://api.github.com/repos/{org}/{repo}/{endpoint}\"\n    \n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _get_issue_labels(\n    issue_number: int,\n    headers: Dict[str, str],\n    org: str,\n    repo: str = \"missing-semester\"\n) -> Optional[List[str]]:\n    \"\"\"Get labels for a specific issue/PR.\"\"\"\n    success, result = _get_github_api(f\"issues/{issue_number}\", headers, org, repo)\n    if not success or not result:\n        return None\n    \n    labels = result.get(\"labels\", [])\n    return [label[\"name\"] for label in labels]\n\n\ndef verify() -> bool:\n    \"\"\"\n    Programmatically verify that the labels were assigned correctly to issues and PRs.\n    \"\"\"\n    # Load environment variables from .mcp_env\n    load_dotenv(\".mcp_env\")\n\n    # Get GitHub token and org\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    if not github_org:\n        print(\"Error: GITHUB_EVAL_ORG environment variable not set\", file=sys.stderr)\n        return False\n\n    headers = {\n        \"Authorization\": f\"Bearer {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    print(\"Verifying contributor labels assignment task completion...\")\n\n    # Expected labels configuration\n    expected_labels = {\n        # Issues\n        9: [\"assigned-jonhoo\", \"assigned-anishathalye\"],  # Issue #9\n        14: [\"assigned-jonhoo\", \"assigned-anishathalye\"],  # Issue #14\n        15: [\"assigned-anishathalye\"],  # Issue #15\n        # PRs\n        21: [\"assigned-anishathalye\"],  # PR #21\n        22: [\"assigned-anishathalye\"],  # PR #22\n        23: [\"assigned-anishathalye\"],  # PR #23\n        24: [\"assigned-anishathalye\"],  # PR #24\n    }\n\n    all_passed = True\n\n    for item_number, expected in expected_labels.items():\n        item_type = \"Issue\" if item_number in [9, 14, 15] else \"PR\"\n        print(f\"\\nChecking {item_type} #{item_number}...\")\n        \n        labels = _get_issue_labels(item_number, headers, github_org, \"missing-semester\")\n        \n        if labels is None:\n            print(f\"  ❌ Failed to retrieve {item_type} #{item_number}\", file=sys.stderr)\n            all_passed = False\n            continue\n        \n        # Sort both lists for comparison\n        labels_sorted = sorted(labels)\n        expected_sorted = sorted(expected)\n        \n        if labels_sorted == expected_sorted:\n            print(f\"  ✅ {item_type} #{item_number} has correct labels: {labels_sorted}\")\n        else:\n            print(f\"  ❌ {item_type} #{item_number} has incorrect labels\", file=sys.stderr)\n            print(f\"     Expected: {expected_sorted}\", file=sys.stderr)\n            print(f\"     Found: {labels_sorted}\", file=sys.stderr)\n            all_passed = False\n\n    if all_passed:\n        print(\"\\n✅ All verification checks passed!\")\n        print(\"Contributor labels assignment task completed successfully:\")\n        print(\"  - Issues #9 and #14 have both 'assigned-jonhoo' and 'assigned-anishathalye' labels\")\n        print(\"  - Issue #15 and all 4 open PRs have 'assigned-anishathalye' label\")\n    else:\n        print(\"\\n❌ Some verification checks failed\", file=sys.stderr)\n\n    return all_passed\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)"
  },
  {
    "path": "tasks/github/standard/missing-semester/find_legacy_name/description.md",
    "content": "I remember that a long time ago, *The Missing Semester of Your CS Education* had a different name and domain. There should be some related commit history. Please find the old name and domain and create an **ANSWER.md** file with them, formatted as:\n\n[title](url)\n\nThen push the file to the `master` branch."
  },
  {
    "path": "tasks/github/standard/missing-semester/find_legacy_name/meta.json",
    "content": "{\n  \"task_id\": \"find_legacy_name\",\n  \"task_name\": \"Find Legacy Name\",\n  \"category_id\": \"missing-semester\",\n  \"category_name\": \"Missing Semester\",\n  \"description\": \"Find the old name and domain of The Missing Semester course from commit history and document the findings.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"repository analysis\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/missing-semester\",\n    \"stateOriginalUrl\": \"https://github.com/missing-semester/missing-semester\"\n  }\n}"
  },
  {
    "path": "tasks/github/standard/missing-semester/find_legacy_name/verify.py",
    "content": "import sys\nimport os\nimport requests\nimport base64\nfrom typing import Dict, Optional, Tuple\nfrom dotenv import load_dotenv\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str], org: str, repo: str = \"missing-semester\"\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    url = f\"https://api.github.com/repos/{org}/{repo}/{endpoint}\"\n    \n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _get_file_content(\n    file_path: str,\n    headers: Dict[str, str],\n    org: str,\n    repo: str = \"missing-semester\",\n    ref: str = \"master\",\n) -> Optional[str]:\n    \"\"\"Get the content of a file from the repository.\"\"\"\n    success, result = _get_github_api(\n        f\"contents/{file_path}?ref={ref}\", headers, org, repo\n    )\n    if not success or not result:\n        return None\n\n    try:\n        content = base64.b64decode(result.get(\"content\", \"\")).decode(\"utf-8\")\n        return content\n    except Exception as e:\n        print(f\"Content decode error for {file_path}: {e}\", file=sys.stderr)\n        return None\n\n\ndef verify() -> bool:\n    \"\"\"\n    Programmatically verify that the legacy name finding task was completed correctly.\n    Checks for ANSWER.md file in master branch with the correct content.\n    \"\"\"\n    # Expected answer content (accept both with and without trailing slash)\n    EXPECTED_CONTENTS = {\n        \"[Hacker Tools](https://hacker-tools.github.io)\",\n        \"[Hacker Tools](https://hacker-tools.github.io/)\",\n    }\n    \n    # Load environment variables from .mcp_env\n    load_dotenv(\".mcp_env\")\n\n    # Get GitHub token and org\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    if not github_org:\n        print(\"Error: GITHUB_EVAL_ORG environment variable not set\", file=sys.stderr)\n        return False\n\n    headers = {\n        \"Authorization\": f\"Bearer {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    # Run verification checks\n    print(\"Verifying legacy name finding task completion...\")\n\n    # 1. Check that ANSWER.md exists in master branch\n    print(\"1. Checking ANSWER.md exists in master branch...\")\n    answer_content = _get_file_content(\"ANSWER.md\", headers, github_org, \"missing-semester\", \"master\")\n    \n    if not answer_content:\n        print(\"Error: ANSWER.md not found in master branch\", file=sys.stderr)\n        return False\n\n    print(\"✓ ANSWER.md found in master branch\")\n\n    # 2. Check that the content matches expected answer\n    print(\"2. Verifying ANSWER.md content...\")\n    answer_content = answer_content.strip()\n    \n    if answer_content not in EXPECTED_CONTENTS:\n        print(f\"Error: ANSWER.md content does not match expected answer(s)\", file=sys.stderr)\n        print(f\"Expected one of: {sorted(EXPECTED_CONTENTS)}\", file=sys.stderr)\n        print(f\"Found: {answer_content}\", file=sys.stderr)\n        return False\n\n    print(\"✓ ANSWER.md contains correct legacy name and URL\")\n\n    print(\"\\n✅ All verification checks passed!\")\n    print(\"Legacy name finding task completed successfully:\")\n    print(f\"  - ANSWER.md created in master branch\")\n    print(f\"  - Content accepted: {answer_content}\")\n\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/github/standard/missing-semester/find_salient_file/description.md",
    "content": "I want to know which file has been modified most frequently in the past 100 commits. However, I don't want to consider files related to GitHub Actions.\nPlease find the file and create an ANSWER.md, then write the file name in it."
  },
  {
    "path": "tasks/github/standard/missing-semester/find_salient_file/meta.json",
    "content": "{\n  \"task_id\": \"find_salient_file\",\n  \"task_name\": \"Find Salient File\",\n  \"category_id\": \"missing-semester\",\n  \"category_name\": \"Missing Semester\",\n  \"description\": \"Identify the most frequently modified file in the past 100 commits, excluding GitHub Actions related files, and create an ANSWER.md with the file name.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"commit analysis\",\n    \"file tracking\",\n    \"git history\"\n  ],\n  \"mcp\": [\n    \"github\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://github.com/mcpmark-source/missing-semester\",\n    \"stateOriginalUrl\": \"https://github.com/missing-semester/missing-semester\"\n  }\n}"
  },
  {
    "path": "tasks/github/standard/missing-semester/find_salient_file/verify.py",
    "content": "import sys\nimport os\nimport requests\nimport base64\nfrom typing import Dict, Optional, Tuple\nfrom dotenv import load_dotenv\n\n\ndef _get_github_api(\n    endpoint: str, headers: Dict[str, str], org: str, repo: str = \"missing-semester\"\n) -> Tuple[bool, Optional[Dict]]:\n    \"\"\"Make a GET request to GitHub API and return (success, response).\"\"\"\n    url = f\"https://api.github.com/repos/{org}/{repo}/{endpoint}\"\n    \n    try:\n        response = requests.get(url, headers=headers)\n        if response.status_code == 200:\n            return True, response.json()\n        elif response.status_code == 404:\n            return False, None\n        else:\n            print(f\"API error for {endpoint}: {response.status_code}\", file=sys.stderr)\n            return False, None\n    except Exception as e:\n        print(f\"Exception for {endpoint}: {e}\", file=sys.stderr)\n        return False, None\n\n\ndef _get_file_content(\n    file_path: str,\n    headers: Dict[str, str],\n    org: str,\n    repo: str = \"missing-semester\",\n    ref: str = \"master\",\n) -> Optional[str]:\n    \"\"\"Get the content of a file from the repository.\"\"\"\n    success, result = _get_github_api(\n        f\"contents/{file_path}?ref={ref}\", headers, org, repo\n    )\n    if not success or not result:\n        return None\n\n    try:\n        content = base64.b64decode(result.get(\"content\", \"\")).decode(\"utf-8\")\n        return content\n    except Exception as e:\n        print(f\"Content decode error for {file_path}: {e}\", file=sys.stderr)\n        return None\n\n\ndef verify() -> bool:\n    \"\"\"\n    Programmatically verify that the most frequently modified file was identified correctly.\n    Checks for ANSWER.md file in master branch with the correct content.\n    \"\"\"\n    # Expected answer content (excluding GitHub Actions files)\n    EXPECTED_CONTENT = \"index.md\"\n    \n    # Load environment variables from .mcp_env\n    load_dotenv(\".mcp_env\")\n\n    # Get GitHub token and org\n    github_token = os.environ.get(\"MCP_GITHUB_TOKEN\")\n    github_org = os.environ.get(\"GITHUB_EVAL_ORG\")\n\n    if not github_token:\n        print(\"Error: MCP_GITHUB_TOKEN environment variable not set\", file=sys.stderr)\n        return False\n\n    if not github_org:\n        print(\"Error: GITHUB_EVAL_ORG environment variable not set\", file=sys.stderr)\n        return False\n\n    headers = {\n        \"Authorization\": f\"Bearer {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    }\n\n    # Run verification checks\n    print(\"Verifying salient file identification task completion...\")\n\n    # 1. Check that ANSWER.md exists in master branch\n    print(\"1. Checking ANSWER.md exists in master branch...\")\n    answer_content = _get_file_content(\"ANSWER.md\", headers, github_org, \"missing-semester\", \"master\")\n    \n    if not answer_content:\n        print(\"Error: ANSWER.md not found in master branch\", file=sys.stderr)\n        return False\n\n    print(\"✅ ANSWER.md found in master branch\")\n\n    # 2. Check that the content matches expected answer\n    print(\"2. Verifying ANSWER.md content...\")\n    answer_content = answer_content.strip()\n    \n    if answer_content != EXPECTED_CONTENT:\n        print(f\"Error: ANSWER.md content does not match expected answer\", file=sys.stderr)\n        print(f\"Expected: {EXPECTED_CONTENT}\", file=sys.stderr)\n        print(f\"Found: {answer_content}\", file=sys.stderr)\n        return False\n\n    print(\"✅ ANSWER.md contains correct filename\")\n\n    print(\"\\n✅ All verification checks passed!\")\n    print(\"Salient file identification task completed successfully:\")\n    print(f\"  - ANSWER.md created in master branch\")\n    print(f\"  - Content: {EXPECTED_CONTENT}\")\n\n    return True\n\n\nif __name__ == \"__main__\":\n    success = verify()\n    sys.exit(0 if success else 1)"
  },
  {
    "path": "tasks/notion/easy/.gitkeep",
    "content": ""
  },
  {
    "path": "tasks/notion/easy/computer_science_student_dashboard/simple__code_snippets_go/description.md",
    "content": "Find the page named \"Computer Science Student Dashboard\" and extend the **Code Snippets** section with Go content.\n\n**Task Requirements:**\n1. Add a bold paragraph that contains exactly the text `Go` to mark the start of the Go snippets.\n2. Directly under that heading, add three code blocks configured with `language` set to **go**:\n   a. **Basic Go program** – Caption must be `Basic Go program` and the code content must be exactly:\n   ```go\n   package main\n\n   import \"fmt\"\n\n   func main() {\n       fmt.Println(\"Hello, World!\")\n   }\n   ```\n   b. **For loop in Go** – Caption must be `For loop in Go` and the code content must be exactly:\n   ```go\n   for i := 0; i < 5; i++ {\n       fmt.Println(i)\n   }\n   ```\n   c. **Function definition in Go** – Caption must be `Function definition in Go` and the code content must be exactly:\n   ```go\n   func add(a, b int) int {\n       return a + b\n   }\n   ```\n"
  },
  {
    "path": "tasks/notion/easy/computer_science_student_dashboard/simple__code_snippets_go/meta.json",
    "content": "{\n  \"task_id\": \"simple__code_snippets_go\",\n  \"task_name\": \"Simple Code Snippets Go\",\n  \"category_id\": \"computer_science_student_dashboard\",\n  \"category_name\": \"Computer Science Student Dashboard\",\n  \"description\": \"Add a new Go column to the Code Snippets section between Python and JavaScript columns.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"content organization\",\n    \"visual formatting\",\n    \"template population\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Computer-Science-Student-Dashboard-23e81626b6d78083b787d3c832b02ef4\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/computer-science-student-dashboard\"\n  }\n}\n"
  },
  {
    "path": "tasks/notion/easy/computer_science_student_dashboard/simple__code_snippets_go/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n# Expected code blocks (language=go)\nEXPECTED_CODE_BLOCKS = [\n    {\n        \"caption\": \"Basic Go program\",\n        \"code\": (\n            'package main\\n\\nimport \"fmt\"\\n\\nfunc main() {\\n    fmt.Println(\"Hello, World!\")\\n}'\n        ),\n    },\n    {\n        \"caption\": \"For loop in Go\",\n        \"code\": (\"for i := 0; i < 5; i++ {\\n    fmt.Println(i)\\n}\"),\n    },\n    {\n        \"caption\": \"Function definition in Go\",\n        \"code\": (\"func add(a, b int) int {\\n    return a + b\\n}\"),\n    },\n]\n\nHEADER_TEXT = \"Go\"\n\n\ndef _normalize(text: str) -> str:\n    \"\"\"Remove trailing spaces on each line and strip leading/trailing blank lines.\"\"\"\n    return \"\\n\".join(line.rstrip() for line in text.strip().splitlines())\n\n\ndef _find_page(notion: Client, main_id: str | None) -> str | None:\n    \"\"\"Return a page_id to verify against or None if not found.\"\"\"\n    page_id = None\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(\n            notion, main_id\n        )\n        if found_id and object_type == \"page\":\n            page_id = found_id\n    if not page_id:\n        page_id = notion_utils.find_page(notion, \"Computer Science Student Dashboard\")\n    return page_id\n\n\ndef _has_bold_header_text(block, text: str) -> bool:\n    \"\"\"Generic bold header/paragraph check for a given text.\"\"\"\n    block_type = block.get(\"type\")\n    if block_type not in {\"paragraph\", \"heading_1\", \"heading_2\", \"heading_3\"}:\n        return False\n    rich_text_list = block.get(block_type, {}).get(\"rich_text\", [])\n    if not rich_text_list:\n        return False\n    plain = \"\".join(rt.get(\"plain_text\", \"\") for rt in rich_text_list).strip()\n    if plain != text:\n        return False\n    return any(rt.get(\"annotations\", {}).get(\"bold\", False) for rt in rich_text_list)\n\n\ndef _collect_code_blocks(blocks):\n    \"\"\"Return list of (code_content, caption) tuples for code blocks with language 'go'.\"\"\"\n    collected = []\n    for block in blocks:\n        if block.get(\"type\") != \"code\":\n            continue\n        code_data = block.get(\"code\", {})\n        if code_data.get(\"language\") != \"go\":\n            continue\n        code_plain = \"\".join(\n            rt.get(\"plain_text\", \"\") for rt in code_data.get(\"rich_text\", [])\n        )\n        caption_plain = \"\".join(\n            rt.get(\"plain_text\", \"\") for rt in code_data.get(\"caption\", [])\n        )\n        collected.append((code_plain, caption_plain))\n    return collected\n\n\ndef verify(notion: Client, main_id: str | None = None) -> bool:\n    page_id = _find_page(notion, main_id)\n    if not page_id:\n        print(\"Error: Target page not found.\", file=sys.stderr)\n        return False\n\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, page_id)\n\n    # Verify header\n    header_ok = any(_has_bold_header_text(b, HEADER_TEXT) for b in all_blocks)\n    if not header_ok:\n        print(\"Failure: Bold header 'Go' not found.\", file=sys.stderr)\n        return False\n\n    # Verify code blocks\n    code_blocks_found = _collect_code_blocks(all_blocks)\n\n    remaining = EXPECTED_CODE_BLOCKS.copy()\n    for code, caption in code_blocks_found:\n        norm_code = _normalize(code)\n        for expected in remaining:\n            if (\n                _normalize(expected[\"code\"]) == norm_code\n                and expected[\"caption\"] == caption\n            ):\n                remaining.remove(expected)\n                break\n    if remaining:\n        missing = \", \".join(exp[\"caption\"] for exp in remaining)\n        print(\n            f\"Failure: Missing or incorrect Go code blocks: {missing}\", file=sys.stderr\n        )\n        return False\n\n    print(\n        \"Success: Verified Go header and required Go code blocks.\"\n    )\n    return True\n\n\ndef main():\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    sys.exit(0 if verify(notion, main_id) else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/easy/computer_science_student_dashboard/simple__study_session_tracker/description.md",
    "content": "Create a new study-session entry on the **Computer Science Student Dashboard** page.\n\n1. Locate the ☑️ Habit tracker section of the page.\n2. **Insert a new date mention** for `2025-01-29` immediately **after the existing `2022-09-02` items but before the divider block** that follows them. Match the formatting of the existing dates (bold text with a Notion date mention).\n"
  },
  {
    "path": "tasks/notion/easy/computer_science_student_dashboard/simple__study_session_tracker/meta.json",
    "content": "{\n  \"task_id\": \"simple__study_session_tracker\",\n  \"task_name\": \"Simple Study Session Tracker\",\n  \"category_id\": \"computer_science_student_dashboard\",\n  \"category_name\": \"Computer Science Student Dashboard\",\n  \"description\": \"Create a new study-session entry in the Habit tracker section with four unchecked to-do items.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"content organization\",\n    \"visual formatting\",\n    \"status tracking\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Computer-Science-Student-Dashboard-23e81626b6d78083b787d3c832b02ef4\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/computer-science-student-dashboard\"\n  }\n}\n"
  },
  {
    "path": "tasks/notion/easy/computer_science_student_dashboard/simple__study_session_tracker/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n\ndef verify(notion: Client, main_id: str | None = None) -> bool:\n    \"\"\"Verify that the new study-session entry for 2025-01-29 was added correctly.\n\n    The script checks that:\n    1. A bold date-mention with start=2025-01-29 exists.\n    2. The mention sits after the 2022-09-02 section but before the divider that originally\n       followed that section.\n    \"\"\"\n\n    # ---------------------------------------------------------------------\n    # Locate the main page -------------------------------------------------\n    # ---------------------------------------------------------------------\n    page_id: str | None = None\n\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(\n            notion, main_id\n        )\n        if found_id and object_type == \"page\":\n            page_id = found_id\n\n    if not page_id:\n        page_id = notion_utils.find_page(notion, \"Computer Science Student Dashboard\")\n\n    if not page_id:\n        print(\n            \"Error: Page 'Computer Science Student Dashboard' not found.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # ---------------------------------------------------------------------\n    # Fetch all blocks under the page (flattened order) --------------------\n    # ---------------------------------------------------------------------\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, page_id)\n\n    # ---------------------------------------------------------------------\n    # Locate reference blocks ---------------------------------------------\n    # ---------------------------------------------------------------------\n    TARGET_DATE = \"2025-01-29\"\n    PREVIOUS_DATE = \"2022-09-02\"\n\n    index_previous_date: int | None = None\n    index_new_date: int | None = None\n    index_divider_after_previous: int | None = None\n\n    for idx, block in enumerate(all_blocks):\n        # Divider detection (we care only about the first divider that appears after\n        # the 2022-09-02 block)\n        if block.get(\"type\") == \"divider\":\n            if index_previous_date is not None and index_divider_after_previous is None:\n                index_divider_after_previous = idx\n\n        # We only need to inspect paragraph blocks that contain a date mention\n        if block.get(\"type\") != \"paragraph\":\n            continue\n\n        rich_text_list = block[\"paragraph\"].get(\"rich_text\", [])\n        for rt in rich_text_list:\n            if (\n                rt.get(\"type\") != \"mention\"\n                or rt.get(\"mention\", {}).get(\"type\") != \"date\"\n            ):\n                continue\n\n            date_start = rt[\"mention\"][\"date\"].get(\"start\")\n\n            if date_start == PREVIOUS_DATE and index_previous_date is None:\n                index_previous_date = idx\n\n            if date_start == TARGET_DATE and index_new_date is None:\n                index_new_date = idx\n                # (1) Verify bold annotation\n                if not rt.get(\"annotations\", {}).get(\"bold\", False):\n                    print(\n                        \"Error: The 2025-01-29 date mention is not bold.\",\n                        file=sys.stderr,\n                    )\n                    return False\n\n    # Ensure all reference indices were found\n    if index_previous_date is None:\n        print(\"Error: Could not locate the 2022-09-02 date section.\", file=sys.stderr)\n        return False\n    if index_divider_after_previous is None:\n        print(\n            \"Error: Could not locate the divider that follows the 2022-09-02 section.\",\n            file=sys.stderr,\n        )\n        return False\n    if index_new_date is None:\n        print(\n            \"Error: Could not locate the new 2025-01-29 date mention.\", file=sys.stderr\n        )\n        return False\n\n    # (2) Verify ordering\n    if not (index_previous_date < index_new_date < index_divider_after_previous):\n        print(\n            \"Error: The 2025-01-29 section is positioned incorrectly.\", file=sys.stderr\n        )\n        return False\n\n    # ---------------------------------------------------------------------\n    # Success --------------------------------------------------------------\n    # ---------------------------------------------------------------------\n    print(\"Success: Date mention for 2025-01-29 added in the correct position.\")\n    return True\n\n\n# -------------------------------------------------------------------------\n# Command-line entry-point -------------------------------------------------\n# -------------------------------------------------------------------------\n\n\ndef main() -> None:\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/easy/it_trouble_shooting_hub/simple__asset_retirement_migration/description.md",
    "content": "Please migrate expiring assets out of the **IT Inventory** database using the simplified checklist below. Your changes will be verified automatically, so match the details exactly.\n\n---\nTask Steps\n1. Inside the **IT Trouble Shooting Hub** page, locate the database named **IT Inventory**.\n2. Collect every page in **IT Inventory** whose **Status** is **Expired** or **To be returned**.\n3. Create a **new full-page database** under the same hub titled **IT Asset Retirement Queue** with exactly these properties (names and types must match):\n   • Serial – title  \n   • Status – select  \n   • Expiration date – date\n4. For every item gathered in step 2, create a page in **IT Asset Retirement Queue**, copy over the Serial, Status, and Expiration date values, then archive the original inventory page once the copy is made.\n"
  },
  {
    "path": "tasks/notion/easy/it_trouble_shooting_hub/simple__asset_retirement_migration/meta.json",
    "content": "{\n  \"task_id\": \"simple__asset_retirement_migration\",\n  \"task_name\": \"Simple Asset Retirement Migration\",\n  \"category_id\": \"it_trouble_shooting_hub\",\n  \"category_name\": \"IT Trouble Shooting Hub\",\n  \"description\": \"Restructure the IT Inventory database by migrating expired assets to a new IT Asset Retirement Queue database.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"database manipulation\",\n    \"automated migration\",\n    \"conditional filtering\",\n    \"data aggregation\",\n    \"report generation\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/It-Trouble-Shooting-Hub-23e81626b6d78020aba7eb65ae1cc2d5\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/it-trouble-shooting-hub\"\n  }\n}\n"
  },
  {
    "path": "tasks/notion/easy/it_trouble_shooting_hub/simple__asset_retirement_migration/verify.py",
    "content": "import sys\nfrom typing import Dict\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n\ndef _get_database(root_page_id: str, notion: Client, name: str) -> str | None:\n    \"\"\"Helper that finds a child database by title inside a page.\"\"\"\n    return notion_utils.find_database_in_block(notion, root_page_id, name)\n\n\ndef _check_property(props: Dict, name: str, expected_type: str) -> bool:\n    if name not in props:\n        print(f\"Error: Property '{name}' missing in database.\", file=sys.stderr)\n        return False\n    if props[name][\"type\"] != expected_type:\n        print(\n            f\"Error: Property '{name}' expected type '{expected_type}', found '{props[name]['type']}'.\",\n            file=sys.stderr,\n        )\n        return False\n    return True\n\n\ndef verify(notion: Client, main_id: str | None = None) -> bool:\n    \"\"\"Verifies that the IT Asset Retirement Queue was created and populated correctly.\"\"\"\n\n    # -------------------------------------------------------------------------\n    # Resolve the root IT Trouble Shooting Hub page\n    # -------------------------------------------------------------------------\n    root_page_id = None\n    if main_id:\n        found_id, obj_type = notion_utils.find_page_or_database_by_id(notion, main_id)\n        if found_id and obj_type == \"page\":\n            root_page_id = found_id\n\n    if not root_page_id:\n        root_page_id = notion_utils.find_page(notion, \"IT Trouble Shooting Hub\")\n    if not root_page_id:\n        print(\n            \"Error: Could not locate the 'IT Trouble Shooting Hub' page.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # -------------------------------------------------------------------------\n    # Locate the original and new databases\n    # -------------------------------------------------------------------------\n    inventory_db_id = _get_database(root_page_id, notion, \"IT Inventory\")\n    if not inventory_db_id:\n        print(\"Error: 'IT Inventory' database not found.\", file=sys.stderr)\n        return False\n\n    retirement_db_id = _get_database(root_page_id, notion, \"IT Asset Retirement Queue\")\n    if not retirement_db_id:\n        print(\"Error: 'IT Asset Retirement Queue' database not found.\", file=sys.stderr)\n        return False\n\n    # -------------------------------------------------------------------------\n    # Validate schema of the retirement queue database\n    # -------------------------------------------------------------------------\n    retirement_db = notion.databases.retrieve(database_id=retirement_db_id)\n    r_props = retirement_db[\"properties\"]\n\n    required_schema = {\n        \"Serial\": \"title\",\n        \"Status\": \"select\",\n        \"Expiration date\": \"date\",\n    }\n\n    for pname, ptype in required_schema.items():\n        if not _check_property(r_props, pname, ptype):\n            return False\n\n    # -------------------------------------------------------------------------\n    # Validate that inventory items are moved & archived\n    # -------------------------------------------------------------------------\n    expired_filter = {\n        \"property\": \"Status\",\n        \"select\": {\"equals\": \"Expired\"},\n    }\n    to_return_filter = {\n        \"property\": \"Status\",\n        \"select\": {\"equals\": \"To be returned\"},\n    }\n    compound_filter = {\"or\": [expired_filter, to_return_filter]}\n\n    # Query for any *active* items that still match these statuses\n    remaining_items = notion.databases.query(\n        database_id=inventory_db_id,\n        filter=compound_filter,\n        archived=False,\n    ).get(\"results\", [])\n\n    if remaining_items:\n        print(\n            f\"Error: {len(remaining_items)} 'Expired' / 'To be returned' items still present in IT Inventory.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # There should be at least one entry in the retirement queue\n    retirement_pages = notion.databases.query(database_id=retirement_db_id).get(\n        \"results\", []\n    )\n    expected_serials = {\"65XYQ/GB\", \"36x10PIQ\"}\n    if len(retirement_pages) != len(expected_serials):\n        print(\n            f\"Error: Expected {len(expected_serials)} retirement pages, found {len(retirement_pages)}.\",\n            file=sys.stderr,\n        )\n        return False\n\n    serials_seen = set()\n    for page in retirement_pages:\n        props = page[\"properties\"]\n        # Collect Serial title\n        title_rich = props.get(\"Serial\", {}).get(\"title\", [])\n        serial_val = \"\".join([t.get(\"plain_text\", \"\") for t in title_rich]).strip()\n        serials_seen.add(serial_val)\n\n    if serials_seen != expected_serials:\n        print(\n            f\"Error: Serial values mismatch. Expected {sorted(expected_serials)}, found {sorted(serials_seen)}.\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\"Success: All verification criteria satisfied.\")\n    return True\n\n\ndef main():\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/easy/japan_travel_planner/simple__remove_osaka_itinerary/description.md",
    "content": "Go to Japan Travel Planner, and go to the Travel Itineray database, and remove the itinerary in OSAKA after 6 PM (excluding 6 PM) in Day 1 and Day 2."
  },
  {
    "path": "tasks/notion/easy/japan_travel_planner/simple__remove_osaka_itinerary/meta.json",
    "content": "{\n  \"task_id\": \"simple__remove_osaka_itinerary\",\n  \"task_name\": \"Simple Remove Osaka Itinerary\",\n  \"category_id\": \"japan_travel_planner\",\n  \"category_name\": \"Japan Travel Planner\",\n  \"description\": \"Remove the itinerary items in Osaka after 6 PM from Day 1 and Day 2 travel schedules.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"conditional filtering\",\n    \"automated migration\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Japan-Travel-Planner-23181626b6d781c4b6bedb12786b5abe\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/japantravelplanner101\"\n  }\n}\n"
  },
  {
    "path": "tasks/notion/easy/japan_travel_planner/simple__remove_osaka_itinerary/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\ndef get_page_title(page_result):\n    \"\"\"Extract title from a page result\"\"\"\n    properties = page_result.get('properties', {})\n    name_property = properties.get('Name', {})\n    if name_property.get('type') == 'title':\n        title_array = name_property.get('title', [])\n        if title_array and len(title_array) > 0:\n            return title_array[0].get('plain_text', '')\n    return ''\n\ndef get_page_time(page_result):\n    \"\"\"Extract time from Notes field\"\"\"\n    properties = page_result.get('properties', {})\n    notes_property = properties.get('Notes', {})\n    if notes_property.get('type') == 'rich_text':\n        rich_text_array = notes_property.get('rich_text', [])\n        if rich_text_array and len(rich_text_array) > 0:\n            notes_text = rich_text_array[0].get('plain_text', '')\n            return notes_text.strip()\n    return ''\n\ndef get_page_group(page_result):\n    \"\"\"Extract group/location from page\"\"\"\n    properties = page_result.get('properties', {})\n    group_property = properties.get('Group', {})\n    if group_property.get('type') == 'select':\n        select = group_property.get('select')\n        if select:\n            return select.get('name', '')\n    return ''\n\ndef get_page_day(page_result):\n    \"\"\"Extract day from page\"\"\"\n    properties = page_result.get('properties', {})\n    day_property = properties.get('Day', {})\n    if day_property.get('type') == 'select':\n        select = day_property.get('select')\n        if select:\n            return select.get('name', '')\n    return ''\n\ndef parse_time_to_minutes(time_str):\n    \"\"\"Convert time string to minutes for comparison\n    Returns None if time cannot be parsed\"\"\"\n    if not time_str:\n        return None\n    \n    # Clean the time string\n    time_str = time_str.strip().upper()\n    \n    # Remove any text after the time (e.g., \"7:30 PM\\n\" -> \"7:30 PM\")\n    time_str = time_str.split('\\n')[0].strip()\n    \n    # Extract time components\n    try:\n        if 'PM' in time_str:\n            time_part = time_str.replace('PM', '').strip()\n            if ':' in time_part:\n                hours, minutes = time_part.split(':')\n                hours = int(hours)\n                minutes = int(minutes)\n            else:\n                hours = int(time_part)\n                minutes = 0\n            # Convert PM hours (add 12 for PM times except 12 PM)\n            if hours != 12:\n                hours += 12\n            return hours * 60 + minutes\n        elif 'AM' in time_str:\n            time_part = time_str.replace('AM', '').strip()\n            if ':' in time_part:\n                hours, minutes = time_part.split(':')\n                hours = int(hours)\n                minutes = int(minutes)\n            else:\n                hours = int(time_part)\n                minutes = 0\n            # Handle 12 AM (midnight)\n            if hours == 12:\n                hours = 0\n            return hours * 60 + minutes\n    except:\n        return None\n    \n    return None\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies that all OSAKA events after 6PM have been removed from Day 1 and Day 2 in the Japan Travel Planner.\n    \n    Expected items that should be deleted (all in OSAKA, after 6PM, on Day 1 or Day 2):\n    1. Rikuro's Namba Main Branch - 7 PM (Day 1)\n    2. Shin Sekai \"New World\" - 8 PM (Day 2)\n    3. Katsudon Chiyomatsu - 7:30 PM (Day 2)\n    4. Ebisubashi Bridge - 9 PM (Day 1)\n    \n    Note: Kuromon Ichiba Market at 6 PM should NOT be deleted (it's at 6PM, not after)\n    Items after 6PM on other days (Day 3-8) should NOT be deleted\n    \"\"\"\n    \n    # Step 1: Find the main Japan Travel Planner page\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(notion, main_id)\n        if not found_id or object_type != 'page':\n            print(\"Error: Japan Travel Planner page not found.\", file=sys.stderr)\n            return False\n    else:\n        # Try to find the page by searching\n        found_id = notion_utils.find_page(notion, \"Japan Travel Planner\")\n        if not found_id:\n            print(\"Error: Japan Travel Planner page not found.\", file=sys.stderr)\n            return False\n    \n    print(f\"Found Japan Travel Planner page: {found_id}\")\n    \n    # Step 2: Find the Travel Itinerary database\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, found_id)\n    travel_itinerary_db_id = None\n    \n    for block in all_blocks:\n        if block and block.get(\"type\") == \"child_database\":\n            title = block.get(\"child_database\", {}).get(\"title\", \"\")\n            if \"Travel Itinerary\" in title:\n                travel_itinerary_db_id = block.get(\"id\")\n                print(f\"Found Travel Itinerary database: {travel_itinerary_db_id}\")\n                break\n    \n    if not travel_itinerary_db_id:\n        print(\"Error: Travel Itinerary database not found\", file=sys.stderr)\n        return False\n    \n    # Step 3: Query the database for OSAKA items on Day 1 and Day 2\n    try:\n        query_result = notion.databases.query(\n            database_id=travel_itinerary_db_id,\n            filter={\n                \"and\": [\n                    {\"property\": \"Group\", \"select\": {\"equals\": \"Osaka\"}},\n                    {\"or\": [\n                        {\"property\": \"Day\", \"select\": {\"equals\": \"Day 1\"}},\n                        {\"property\": \"Day\", \"select\": {\"equals\": \"Day 2\"}}\n                    ]}\n                ]\n            }\n        )\n    except Exception as e:\n        print(f\"Error querying Travel Itinerary database: {e}\", file=sys.stderr)\n        return False\n    \n    # Step 4: Check for items that should have been deleted\n    six_pm_minutes = 18 * 60  # 6 PM in minutes (18:00)\n    \n    # Expected deleted items (4 specific items after 6 PM on Day 1 and Day 2)\n    expected_deleted = {\n        \"Rikuro's Namba Main Branch\": {\"time\": \"7 PM\", \"day\": \"Day 1\", \"found\": False},\n        \"Shin Sekai \\\"New World\\\"\": {\"time\": \"8 PM\", \"day\": \"Day 2\", \"found\": False},\n        \"Katsudon Chiyomatsu\": {\"time\": \"7:30 PM\", \"day\": \"Day 2\", \"found\": False},\n        \"Ebisubashi Bridge\": {\"time\": \"9 PM\", \"day\": \"Day 1\", \"found\": False}\n    }\n    \n    # Items that should remain (at or before 6 PM)\n    expected_remaining = {\n        \"Kuromon Ichiba Market\": {\"time\": \"6 PM\", \"found\": False}\n    }\n    \n    osaka_items_after_6pm = []\n    osaka_items_at_or_before_6pm = []\n    \n    # Debug: Show total query results\n    print(f\"Debug: Found {len(query_result.get('results', []))} total OSAKA items on Day 1 and Day 2\")\n    \n    # Process all OSAKA items on Day 1 and Day 2\n    for page in query_result.get('results', []):\n        page_title = get_page_title(page).strip()\n        page_time = get_page_time(page)\n        page_group = get_page_group(page)\n        page_day = get_page_day(page)\n        \n        if page_group != \"Osaka\":\n            continue\n        \n        # Parse time to check if after 6 PM\n        time_minutes = parse_time_to_minutes(page_time)\n        \n        if time_minutes is not None and time_minutes > six_pm_minutes:\n            osaka_items_after_6pm.append({\n                \"title\": page_title,\n                \"time\": page_time,\n                \"day\": page_day,\n                \"id\": page.get('id')\n            })\n            \n            # Check if this is one of the expected deleted items\n            for expected_title, expected_info in expected_deleted.items():\n                # Clean up the titles for comparison\n                clean_page_title = page_title.strip().lower()\n                clean_expected_title = expected_title.strip().lower()\n                \n                # Check for \"Rikuro's\" or \"Rikuro's\" (different apostrophe types)\n                if \"rikuro\" in clean_page_title and \"rikuro\" in clean_expected_title:\n                    title_match = True\n                elif clean_page_title == clean_expected_title:\n                    title_match = True\n                elif clean_expected_title in clean_page_title or clean_page_title in clean_expected_title:\n                    title_match = True\n                else:\n                    title_match = False\n                    \n                if title_match and page_day == expected_info[\"day\"]:\n                    print(f\"Debug: Found '{page_title}' on {page_day} at {page_time} - matches expected '{expected_title}'\")\n                    expected_deleted[expected_title][\"found\"] = True\n                \n        elif time_minutes is not None and time_minutes <= six_pm_minutes:\n            osaka_items_at_or_before_6pm.append({\n                \"title\": page_title,\n                \"time\": page_time,\n                \"day\": page_day,\n                \"id\": page.get('id')\n            })\n            \n            # Check if this is one of the expected remaining items\n            for expected_title in expected_remaining:\n                if expected_title.lower() in page_title.lower() or page_title.lower() in expected_title.lower():\n                    expected_remaining[expected_title][\"found\"] = True\n    \n    # Step 5: Verify results\n    print(f\"\\nVerification Summary:\")\n    print(f\"=\" * 50)\n    \n    all_passed = True\n    \n    # Check that the 4 expected items after 6 PM have been deleted\n    print(\"\\n4 Items that should be deleted (after 6 PM on Day 1 and Day 2):\")\n    for item_name, item_info in expected_deleted.items():\n        if item_info[\"found\"]:\n            # If found = True, it means the item still exists (was not deleted)\n            print(f\"✗ {item_name} ({item_info['day']}, {item_info['time']}) - Still exists, should be deleted\", file=sys.stderr)\n            all_passed = False\n        else:\n            # If found = False, it means the item was deleted correctly\n            print(f\"✓ {item_name} ({item_info['day']}, {item_info['time']}) - Correctly deleted\")\n    \n    \n    # Check that items at or before 6 PM remain\n    print(\"\\nItems that should remain (at or before 6 PM on Day 1 and Day 2):\")\n    for item_name, item_info in expected_remaining.items():\n        if item_info[\"found\"]:\n            print(f\"✓ {item_name} ({item_info['time']}) - Correctly retained\")\n        else:\n            print(f\"✗ {item_name} ({item_info['time']}) - Missing, should not be deleted\", file=sys.stderr)\n            all_passed = False\n    \n    # Report any items after 6 PM that still exist\n    if osaka_items_after_6pm:\n        print(f\"\\n✗ Found {len(osaka_items_after_6pm)} OSAKA item(s) after 6 PM on Day 1/Day 2:\", file=sys.stderr)\n        for item in osaka_items_after_6pm:\n            print(f\"  - {item['title']} at {item['time']} ({item['day']})\", file=sys.stderr)\n    else:\n        print(f\"\\n✓ No OSAKA items found after 6 PM on Day 1/Day 2 (all correctly deleted)\")\n    \n    # Report count summary\n    print(f\"\\nCount Summary:\")\n    print(f\"- OSAKA items after 6 PM on Day 1/Day 2 found: {len(osaka_items_after_6pm)} (should be 0)\")\n    print(f\"- OSAKA items at/before 6 PM on Day 1/Day 2 found: {len(osaka_items_at_or_before_6pm)}\")\n    print(f\"- Expected deletions verified: {sum(1 for item in expected_deleted.values() if not item['found'])}/4\")\n    \n    return all_passed\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    \n    if verify(notion, main_id):\n        print(\"\\nVerification passed: All 4 required OSAKA events after 6 PM on Day 1 and Day 2 have been removed\")\n        sys.exit(0)\n    else:\n        print(\"\\nVerification failed: Some OSAKA events after 6 PM on Day 1/Day 2 still exist\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/notion/easy/online_resume/simple__skills_development_tracker/description.md",
    "content": "Create a comprehensive skills audit system by performing the following tasks:\n\n**Task Requirements:**\n1. Create a new database named \"Skills Development Tracker\" as a child database in the main resume page with the following properties:\n   - Name (title property)\n   - Current Skill (relation to Skills database)\n   - Current Proficiency (rollup from related skill's \"Skill Level\" property)\n   - Target Proficiency (number property with format \"percent\")\n   - Gap (formula: Target Proficiency - Current Proficiency)\n   - Learning Resources (rich text property)\n   - Progress Notes (rich text property)\n\n2. Populate the Skills Development Tracker database with entries for all skills that have a proficiency level below 70% (0.7):\n   - For each qualifying skill, create an entry with:\n     - Name: \"[Skill Name] Development Plan\"\n     - Link to the corresponding skill in Skills database\n     - Target Proficiency: Set to Current + 25% (capped at 95%)\n     - Learning Resources: \"Online courses and practice projects\"\n     - Progress Notes: \"Initial assessment completed\"\n"
  },
  {
    "path": "tasks/notion/easy/online_resume/simple__skills_development_tracker/meta.json",
    "content": "{\n  \"task_id\": \"simple__skills_development_tracker\",\n  \"task_name\": \"Simple Skills Development Tracker\",\n  \"category_id\": \"online_resume\",\n  \"category_name\": \"Online Resume\",\n  \"description\": \"Create a comprehensive skills audit system with development tracking for skills below 70% proficiency.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"database manipulation\",\n    \"cross-reference linking\",\n    \"conditional filtering\",\n    \"data aggregation\",\n    \"template population\",\n    \"visual formatting\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Online-Resume-23181626b6d781159faaeb5eadaf612e\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/online-resume\"\n  }\n}\n"
  },
  {
    "path": "tasks/notion/easy/online_resume/simple__skills_development_tracker/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies that the Skills Development Tracker database was created correctly.\n    \"\"\"\n    page_id = None\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(\n            notion, main_id\n        )\n        if found_id and object_type == \"page\":\n            page_id = found_id\n\n    if not page_id:\n        page_id = notion_utils.find_page(notion, \"New Online Resume\")\n    if not page_id:\n        print(\"Error: Page 'New Online Resume' not found.\", file=sys.stderr)\n        return False\n\n    # Step 1: Verify Skills Development Tracker database exists\n    tracker_db_id = notion_utils.find_database_in_block(\n        notion, page_id, \"Skills Development Tracker\"\n    )\n    if not tracker_db_id:\n        print(\n            \"Error: Database 'Skills Development Tracker' not found.\", file=sys.stderr\n        )\n        return False\n\n    # Step 2: Verify database schema\n    try:\n        db_info = notion.databases.retrieve(database_id=tracker_db_id)\n        properties = db_info.get(\"properties\", {})\n\n        # Check required properties\n        required_props = {\n            \"Name\": \"title\",\n            \"Current Skill\": \"relation\",\n            \"Current Proficiency\": \"rollup\",\n            \"Target Proficiency\": \"number\",\n            \"Gap\": \"formula\",\n            \"Learning Resources\": \"rich_text\",\n            \"Progress Notes\": \"rich_text\",\n        }\n\n        for prop_name, expected_type in required_props.items():\n            if prop_name not in properties:\n                print(\n                    f\"Error: Property '{prop_name}' not found in database.\",\n                    file=sys.stderr,\n                )\n                return False\n            if properties[prop_name][\"type\"] != expected_type:\n                print(\n                    f\"Error: Property '{prop_name}' has incorrect type. Expected '{expected_type}', got '{properties[prop_name]['type']}'.\",\n                    file=sys.stderr,\n                )\n                return False\n\n        # Verify Target Proficiency is percent format\n        if (\n            properties[\"Target Proficiency\"].get(\"number\", {}).get(\"format\")\n            != \"percent\"\n        ):\n            print(\n                \"Error: Target Proficiency should have 'percent' format.\",\n                file=sys.stderr,\n            )\n            return False\n\n    except Exception as e:\n        print(f\"Error retrieving database info: {e}\", file=sys.stderr)\n        return False\n\n    # Step 3: Get Skills database to check entries\n    skills_db_id = notion_utils.find_database_in_block(notion, page_id, \"Skills\")\n    if not skills_db_id:\n        print(\"Error: Skills database not found.\", file=sys.stderr)\n        return False\n\n    # Get all skills with proficiency < 70%\n    skills_below_70 = []\n    try:\n        skills_results = notion.databases.query(database_id=skills_db_id).get(\n            \"results\", []\n        )\n        for skill in skills_results:\n            skill_level = (\n                skill.get(\"properties\", {}).get(\"Skill Level\", {}).get(\"number\", 1.0)\n            )\n            if skill_level < 0.7:\n                skill_name = (\n                    skill.get(\"properties\", {}).get(\"Skill\", {}).get(\"title\", [])\n                )\n                if skill_name:\n                    skill_name_text = skill_name[0].get(\"text\", {}).get(\"content\", \"\")\n                    skills_below_70.append(\n                        {\n                            \"name\": skill_name_text,\n                            \"id\": skill[\"id\"],\n                            \"level\": skill_level,\n                        }\n                    )\n    except Exception as e:\n        print(f\"Error querying Skills database: {e}\", file=sys.stderr)\n        return False\n\n    if not skills_below_70:\n        print(\"Warning: No skills found with proficiency below 70%.\", file=sys.stderr)\n        # This might be OK if all skills are above 70%\n\n    # Step 4: Verify entries in Skills Development Tracker\n    try:\n        tracker_results = notion.databases.query(database_id=tracker_db_id).get(\n            \"results\", []\n        )\n\n        # Check that we have entries for skills below 70%\n        if len(skills_below_70) > 0 and len(tracker_results) == 0:\n            print(\n                \"Error: No entries found in Skills Development Tracker database.\",\n                file=sys.stderr,\n            )\n            return False\n\n        # Verify each entry\n        for entry in tracker_results:\n            props = entry.get(\"properties\", {})\n\n            # Check name format\n            name_prop = props.get(\"Name\", {}).get(\"title\", [])\n            if not name_prop:\n                print(\"Error: Entry missing Name property.\", file=sys.stderr)\n                return False\n            name_text = name_prop[0].get(\"text\", {}).get(\"content\", \"\")\n            if not name_text.endswith(\" Development Plan\"):\n                print(\n                    f\"Error: Entry name '{name_text}' doesn't follow expected format.\",\n                    file=sys.stderr,\n                )\n                return False\n\n            # Check relation to Skills database\n            skill_relation = props.get(\"Current Skill\", {}).get(\"relation\", [])\n            if not skill_relation:\n                print(\n                    f\"Error: Entry '{name_text}' missing Current Skill relation.\",\n                    file=sys.stderr,\n                )\n                return False\n\n            # Check Target Proficiency (should be set)\n            target_prof = props.get(\"Target Proficiency\", {}).get(\"number\")\n            if target_prof is None:\n                print(\n                    f\"Error: Entry '{name_text}' missing Target Proficiency.\",\n                    file=sys.stderr,\n                )\n                return False\n\n            # Check Learning Resources\n            learning_resources = props.get(\"Learning Resources\", {}).get(\n                \"rich_text\", []\n            )\n            if not learning_resources:\n                print(\n                    f\"Error: Entry '{name_text}' missing Learning Resources.\",\n                    file=sys.stderr,\n                )\n                return False\n\n            # Check Progress Notes\n            progress_notes = props.get(\"Progress Notes\", {}).get(\"rich_text\", [])\n            if not progress_notes:\n                print(\n                    f\"Error: Entry '{name_text}' missing Progress Notes.\",\n                    file=sys.stderr,\n                )\n                return False\n\n    except Exception as e:\n        print(f\"Error querying Skills Development Tracker: {e}\", file=sys.stderr)\n        return False\n\n    print(\"Success: Skills Development Tracker database verified successfully.\")\n    return True\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/easy/python_roadmap/simple__expert_level_lessons/description.md",
    "content": "# Task: Expert Level Learning Path (Simplified)\n\n## Objective\nExtend the Python Roadmap with a new Expert Level chapter, create a bridge lesson, and add two expert lessons that build on existing material.\n\n## Requirements\n\n### 1. Add the Expert Level chapter\n- **Database**: Chapters\n- **Name**: `Expert Level`\n- **Icon**: 🟣 (purple circle emoji)\n- Make sure it is linked into the roadmap alongside the existing chapters.\n\n### 2. Create the bridge lesson\nCreate a lesson that connects advanced material to the new chapter:\n- **Title**: `Advanced Foundations Review`\n- **Status**: Done\n- **Chapter**: Link it to `Expert Level`\n- **Parent item**: Link to the lesson whose title contains \"Control\" (e.g., \"Control Flow\")\n- **Sub-items**: Include links to the lessons containing \"Decorators\" and \"Calling API\"\n\n### 3. Add two expert lessons\nAdd the following entries to the Steps database:\n\n| Lesson Title | Status | Chapter | Parent item | Date |\n|--------------|--------|---------|-------------|------|\n| `Metaprogramming and AST Manipulation` | To Do | Expert Level | Advanced Foundations Review | 2025-09-15 |\n| `Async Concurrency Patterns` | To Do | Expert Level | Calling API | 2025-09-20 |\n\nThe lessons must inherit the correct chapter link, parent relationship, and due date as shown above.\n"
  },
  {
    "path": "tasks/notion/easy/python_roadmap/simple__expert_level_lessons/meta.json",
    "content": "{\n  \"task_id\": \"expert_level_lessons\",\n  \"task_name\": \"Expert Level Lessons\",\n  \"category_id\": \"python_roadmap\",\n  \"category_name\": \"Python Roadmap\",\n  \"description\": \"Create an Expert Level chapter with sophisticated prerequisite chains and four expert-level lessons.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"database manipulation\",\n    \"cross-reference linking\",\n    \"conditional filtering\",\n    \"status tracking\",\n    \"template population\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Python-Roadmap-25281626b6d78012bf2bce1fa8711f4d\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/python-roadmap\"\n  }\n}\n"
  },
  {
    "path": "tasks/notion/easy/python_roadmap/simple__expert_level_lessons/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n\nTARGET_PAGE_TITLE = \"Python Roadmap\"\nCHAPTER_NAME = \"Expert Level\"\nCHAPTER_ICON = \"🟣\"\nBRIDGE_TITLE = \"Advanced Foundations Review\"\nREQUIRED_SUBITEM_TITLES = [\"Decorators\", \"Calling API\"]\n\nLESSON_REQUIREMENTS = [\n    {\n        \"title\": \"Metaprogramming and AST Manipulation\",\n        \"status\": \"To Do\",\n        \"date\": \"2025-09-15\",\n        \"parent_title\": BRIDGE_TITLE,\n    },\n    {\n        \"title\": \"Async Concurrency Patterns\",\n        \"status\": \"To Do\",\n        \"date\": \"2025-09-20\",\n        \"parent_title\": \"Calling API\",\n    },\n]\n\n\ndef _get_database_ids(notion: Client, page_id: str) -> tuple[str | None, str | None]:\n    \"\"\"Return the block IDs for the Chapters and Steps databases on the page.\"\"\"\n    chapters_db_id = None\n    steps_db_id = None\n\n    blocks = notion_utils.get_all_blocks_recursively(notion, page_id)\n    for block in blocks:\n        if block.get(\"type\") != \"child_database\":\n            continue\n        title = block.get(\"child_database\", {}).get(\"title\", \"\")\n        if \"Chapters\" in title and not chapters_db_id:\n            chapters_db_id = block[\"id\"]\n        elif \"Steps\" in title and not steps_db_id:\n            steps_db_id = block[\"id\"]\n\n    return chapters_db_id, steps_db_id\n\n\ndef _query_step_by_title(notion: Client, database_id: str, title: str, *, exact: bool = True):\n    \"\"\"Return the first step entry matching the given title pattern.\"\"\"\n    title_filter = {\"equals\": title} if exact else {\"contains\": title}\n    response = notion.databases.query(\n        database_id=database_id,\n        filter={\"property\": \"Lessons\", \"title\": title_filter},\n        page_size=5,\n    )\n    results = response.get(\"results\", [])\n    return results[0] if results else None\n\n\ndef verify(notion: Client, main_id: str | None = None) -> bool:\n    \"\"\"Verify the simplified Expert Level learning path setup.\"\"\"\n    # Resolve the roadmap page.\n    if main_id:\n        page_id, object_type = notion_utils.find_page_or_database_by_id(notion, main_id)\n        if not page_id or object_type != \"page\":\n            print(\"Error: Python Roadmap page not found.\", file=sys.stderr)\n            return False\n    else:\n        page_id = notion_utils.find_page(notion, TARGET_PAGE_TITLE)\n        if not page_id:\n            print(\"Error: Python Roadmap page not found.\", file=sys.stderr)\n            return False\n\n    # Locate the Chapters and Steps databases.\n    chapters_db_id, steps_db_id = _get_database_ids(notion, page_id)\n    if not chapters_db_id:\n        print(\"Error: Chapters database not found on the page.\", file=sys.stderr)\n        return False\n    if not steps_db_id:\n        print(\"Error: Steps database not found on the page.\", file=sys.stderr)\n        return False\n\n    # Ensure the Expert Level chapter exists with the purple icon.\n    try:\n        chapter_resp = notion.databases.query(\n            database_id=chapters_db_id,\n            filter={\"property\": \"Name\", \"title\": {\"equals\": CHAPTER_NAME}},\n            page_size=1,\n        )\n    except Exception as exc:\n        print(f\"Error querying Chapters database: {exc}\", file=sys.stderr)\n        return False\n\n    results = chapter_resp.get(\"results\", [])\n    if not results:\n        print(\"Error: Expert Level chapter not found.\", file=sys.stderr)\n        return False\n\n    expert_chapter = results[0]\n    expert_chapter_id = expert_chapter[\"id\"]\n    icon = expert_chapter.get(\"icon\") or {}\n    if icon.get(\"type\") != \"emoji\" or icon.get(\"emoji\") != CHAPTER_ICON:\n        print(\"Error: Expert Level chapter must use the purple circle emoji icon.\", file=sys.stderr)\n        return False\n\n    print(\"✓ Expert Level chapter exists with the correct icon.\")\n\n    # Locate prerequisite lessons (Control Flow, Decorators, Calling API).\n    control_lesson = _query_step_by_title(notion, steps_db_id, \"Control\", exact=False)\n    if not control_lesson:\n        print(\"Error: Could not find a lesson containing 'Control' in its title.\", file=sys.stderr)\n        return False\n    control_lesson_id = control_lesson[\"id\"]\n\n    prerequisite_ids = {}\n    for title in REQUIRED_SUBITEM_TITLES:\n        lesson = _query_step_by_title(notion, steps_db_id, title, exact=False)\n        if not lesson:\n            print(f\"Error: Required lesson containing '{title}' not found.\", file=sys.stderr)\n            return False\n        prerequisite_ids[title] = lesson[\"id\"]\n\n    # Verify the bridge lesson.\n    bridge_lesson = _query_step_by_title(notion, steps_db_id, BRIDGE_TITLE, exact=True)\n    if not bridge_lesson:\n        print(\"Error: Advanced Foundations Review lesson not found.\", file=sys.stderr)\n        return False\n\n    status = (bridge_lesson[\"properties\"].get(\"Status\", {}).get(\"status\") or {}).get(\"name\")\n    if status != \"Done\":\n        print(\"Error: Advanced Foundations Review must have status 'Done'.\", file=sys.stderr)\n        return False\n\n    # Ensure chapter relation includes Expert Level.\n    chapter_rel = bridge_lesson[\"properties\"].get(\"Chapters\", {}).get(\"relation\", [])\n    if not any(rel[\"id\"] == expert_chapter_id for rel in chapter_rel):\n        print(\"Error: Advanced Foundations Review must link to the Expert Level chapter.\", file=sys.stderr)\n        return False\n\n    # Parent item should be the control lesson.\n    parent_rel = bridge_lesson[\"properties\"].get(\"Parent item\", {}).get(\"relation\", [])\n    if not parent_rel or parent_rel[0][\"id\"] != control_lesson_id:\n        print(\"Error: Advanced Foundations Review should use the control lesson as its Parent item.\", file=sys.stderr)\n        return False\n\n    # Sub-items must include the required lessons.\n    sub_rel = bridge_lesson[\"properties\"].get(\"Sub-item\", {}).get(\"relation\", [])\n    sub_ids = {rel[\"id\"] for rel in sub_rel}\n    missing = [title for title, rel_id in prerequisite_ids.items() if rel_id not in sub_ids]\n    if missing:\n        print(\n            f\"Error: Advanced Foundations Review must include these lessons as sub-items: {', '.join(missing)}.\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\"✓ Bridge lesson configured with the correct status, chapter, parent, and sub-items.\")\n\n    # Verify the two expert lessons.\n    overall_success = True\n    for spec in LESSON_REQUIREMENTS:\n        lesson = _query_step_by_title(notion, steps_db_id, spec[\"title\"], exact=True)\n        if not lesson:\n            print(f\"Error: Lesson '{spec['title']}' not found.\", file=sys.stderr)\n            overall_success = False\n            continue\n\n        lesson_ok = True\n\n        # Status check.\n        status_name = (lesson[\"properties\"].get(\"Status\", {}).get(\"status\") or {}).get(\"name\")\n        if status_name != spec[\"status\"]:\n            print(\n                f\"Error: Lesson '{spec['title']}' should have status '{spec['status']}', found '{status_name}'.\",\n                file=sys.stderr,\n            )\n            lesson_ok = False\n\n        # Chapter relation check.\n        lesson_chapters = lesson[\"properties\"].get(\"Chapters\", {}).get(\"relation\", [])\n        if not any(rel[\"id\"] == expert_chapter_id for rel in lesson_chapters):\n            print(f\"Error: Lesson '{spec['title']}' must link to the Expert Level chapter.\", file=sys.stderr)\n            lesson_ok = False\n\n        # Parent relation check.\n        parent_title = spec[\"parent_title\"]\n        if parent_title == BRIDGE_TITLE:\n            expected_parent_id = bridge_lesson[\"id\"]\n        else:\n            expected_parent_id = prerequisite_ids.get(parent_title)\n\n        parent_relation = lesson[\"properties\"].get(\"Parent item\", {}).get(\"relation\", [])\n        if not expected_parent_id:\n            print(\n                f\"Error: Could not resolve expected parent '{parent_title}' for lesson '{spec['title']}'.\",\n                file=sys.stderr,\n            )\n            lesson_ok = False\n        else:\n            if not parent_relation or parent_relation[0][\"id\"] != expected_parent_id:\n                print(\n                    f\"Error: Lesson '{spec['title']}' should have '{parent_title}' as its Parent item.\",\n                    file=sys.stderr,\n                )\n                lesson_ok = False\n        # Date check.\n        date_prop = lesson[\"properties\"].get(\"Date\", {}).get(\"date\") or {}\n        if date_prop.get(\"start\") != spec[\"date\"]:\n            print(\n                f\"Error: Lesson '{spec['title']}' should use date {spec['date']}, found {date_prop.get('start')}.\",\n                file=sys.stderr,\n            )\n            lesson_ok = False\n\n        if lesson_ok:\n            print(f\"✓ Lesson '{spec['title']}' has the expected properties.\")\n        else:\n            overall_success = False\n\n    if not overall_success:\n        return False\n\n    print(\"Success: Expert Level chapter, bridge lesson, and expert lessons configured correctly.\")\n    return True\n\n\ndef main() -> None:\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/easy/self_assessment/simple__faq_column_layout/description.md",
    "content": "Navigate to the \"Self Assessment\" page and reorganize the FAQ toggle content to make it easier to scan.\n\n**Task Requirements:**\n1. Add a column list with two columns inside the FAQ toggle.\n2. Move the first two existing Q&A pairs from the FAQ into the left column.\n3. Move the third existing Q&A pair into the right column, keeping the original heading/paragraph formatting.\n"
  },
  {
    "path": "tasks/notion/easy/self_assessment/simple__faq_column_layout/meta.json",
    "content": "{\n  \"task_id\": \"simple__faq_column_layout\",\n  \"task_name\": \"Simple FAQ Column Layout\",\n  \"category_id\": \"self_assessment\",\n  \"category_name\": \"Self Assessment\",\n  \"description\": \"Reorganize the FAQ section content into a two-column layout with balanced Q&A pairs.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"content organization\",\n    \"visual formatting\",\n    \"template population\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Self-Assessment-24381626b6d780fe9f56c2ba14ea042d\",\n    \"stateOriginalUrl\": \"https://painted-tennis-ebc.notion.site/Self-Assessment-24381626b6d780fe9f56c2ba14ea042d\"\n  }\n}\n"
  },
  {
    "path": "tasks/notion/easy/self_assessment/simple__faq_column_layout/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies that the FAQ toggle has been properly reorganized with a column list.\n    \"\"\"\n    # Start from main_id if provided\n    page_id = None\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(\n            notion, main_id\n        )\n        if found_id and object_type == \"page\":\n            page_id = found_id\n\n    if not page_id:\n        # Try to find the Self Assessment page\n        page_id = notion_utils.find_page(notion, \"Self Assessment\")\n\n    if not page_id:\n        print(\"Error: Self Assessment page not found.\", file=sys.stderr)\n        return False\n\n    # Get all blocks recursively from the page\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, page_id)\n\n    # Find the FAQ toggle block\n    faq_toggle_block = None\n    faq_toggle_id = None\n    for block in all_blocks:\n        if block.get(\"type\") == \"toggle\":\n            block_text = notion_utils.get_block_plain_text(block)\n            if \"FAQ\" in block_text:\n                faq_toggle_block = block\n                faq_toggle_id = block.get(\"id\")\n                print(f\"Found FAQ toggle block: {block_text}\")\n                break\n\n    if not faq_toggle_block:\n        print(\"Error: FAQ toggle block not found.\", file=sys.stderr)\n        return False\n\n    # Find column_list inside the FAQ toggle\n    column_list_block = None\n    for block in all_blocks:\n        if (\n            block.get(\"type\") == \"column_list\"\n            and block.get(\"parent\", {}).get(\"block_id\") == faq_toggle_id\n        ):\n            column_list_block = block\n            break\n\n    if not column_list_block:\n        print(\"Error: No column_list found inside FAQ toggle.\", file=sys.stderr)\n        return False\n\n    # Check that there are no Q&A pairs directly under FAQ toggle (outside column_list)\n    direct_faq_children = []\n    for block in all_blocks:\n        if block.get(\"parent\", {}).get(\"block_id\") == faq_toggle_id and block.get(\n            \"id\"\n        ) != column_list_block.get(\"id\"):\n            direct_faq_children.append(block)\n\n    # Check if any of these are heading_3 or paragraph blocks (Q&A content)\n    for block in direct_faq_children:\n        if block.get(\"type\") in [\"heading_3\", \"paragraph\"]:\n            print(\n                f\"Error: Found Q&A content outside column_list: {notion_utils.get_block_plain_text(block)[:50]}...\",\n                file=sys.stderr,\n            )\n            return False\n\n    # Find the two columns\n    columns = []\n    column_list_id = column_list_block.get(\"id\")\n    for block in all_blocks:\n        if (\n            block.get(\"type\") == \"column\"\n            and block.get(\"parent\", {}).get(\"block_id\") == column_list_id\n        ):\n            columns.append(block)\n\n    if len(columns) != 2:\n        print(f\"Error: Expected 2 columns, found {len(columns)}.\", file=sys.stderr)\n        return False\n\n    # Count Q&A pairs in each column\n    qa_counts = []\n    total_pairs = 0\n\n    for i, column in enumerate(columns[:2]):\n        column_id = column.get(\"id\")\n\n        column_blocks = [\n            block\n            for block in all_blocks\n            if block.get(\"parent\", {}).get(\"block_id\") == column_id\n        ]\n\n        qa_pairs = 0\n        j = 0\n        while j < len(column_blocks):\n            if (\n                column_blocks[j].get(\"type\") == \"heading_3\"\n                and j + 1 < len(column_blocks)\n                and column_blocks[j + 1].get(\"type\") == \"paragraph\"\n            ):\n                qa_pairs += 1\n                j += 2\n            else:\n                j += 1\n\n        qa_counts.append(qa_pairs)\n        total_pairs += qa_pairs\n        print(f\"Column {i + 1}: Found {qa_pairs} Q&A pairs\")\n\n    if qa_counts[0] < 2:\n        print(\n            f\"Error: Left column should contain at least 2 Q&A pairs, found {qa_counts[0]}.\",\n            file=sys.stderr,\n        )\n        return False\n\n    if qa_counts[1] < 1:\n        print(\n            f\"Error: Right column should contain at least 1 Q&A pair, found {qa_counts[1]}.\",\n            file=sys.stderr,\n        )\n        return False\n\n    if total_pairs < 3:\n        print(\n            f\"Error: Expected at least 3 total Q&A pairs across both columns, found {total_pairs}.\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\n        \"Success: FAQ toggle organized with two columns holding the existing Q&A pairs (two on the left, one on the right).\"\n    )\n    return True\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/easy/standard_operating_procedure/simple__section_organization/description.md",
    "content": "# Task: Reorganize Standard Operating Procedure Page Sections\n\n## Objective\nModify the structure of the Standard Operating Procedure page in Notion by updating the order of two sections.\n\n## Requirements\n- Navigate to the Standard Operating Procedure page\n- Swap the positions of the \"Terminologies\" and \"Roles & responsibilities\" sections\n- Preserve all content within each section exactly as is\n- Maintain the original formatting and structure of each section\n"
  },
  {
    "path": "tasks/notion/easy/standard_operating_procedure/simple__section_organization/meta.json",
    "content": "{\n  \"task_id\": \"simple__section_organization\",\n  \"task_name\": \"Simple Section Organization\",\n  \"category_id\": \"standard_operating_procedure\",\n  \"category_name\": \"Standard Operating Procedure\",\n  \"description\": \"Reorganize the Standard Operating Procedure page by swapping sections and creating a column layout.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"content organization\",\n    \"cross-reference linking\",\n    \"visual formatting\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Standard-Operating-Procedure-24381626b6d780a8b678f9e62ae5b152\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/standard-operating-procedure\"\n  }\n}\n"
  },
  {
    "path": "tasks/notion/easy/standard_operating_procedure/simple__section_organization/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n\nTARGET_PAGE_TITLE = \"Standard Operating Procedure\"\nROLES_HEADING = \"Roles & responsibilities\"\nTERMINOLOGIES_HEADING = \"Terminologies\"\n\n\ndef _find_heading_indices(blocks: list[dict]) -> tuple[int | None, int | None]:\n    \"\"\"Return the indices of the target headings within the flattened block list.\"\"\"\n    roles_index = None\n    terminologies_index = None\n\n    for index, block in enumerate(blocks):\n        if block.get(\"type\") != \"heading_2\":\n            continue\n        rich_text = block.get(\"heading_2\", {}).get(\"rich_text\", [])\n        if not rich_text:\n            continue\n        heading_text = rich_text[0].get(\"text\", {}).get(\"content\", \"\")\n        if heading_text == ROLES_HEADING and roles_index is None:\n            roles_index = index\n        elif heading_text == TERMINOLOGIES_HEADING and terminologies_index is None:\n            terminologies_index = index\n\n    return roles_index, terminologies_index\n\n\ndef verify(notion: Client, main_id: str | None = None) -> bool:\n    \"\"\"Ensure the Roles & responsibilities section appears before Terminologies.\"\"\"\n    # Resolve page id.\n    if main_id:\n        page_id, object_type = notion_utils.find_page_or_database_by_id(notion, main_id)\n        if not page_id or object_type != \"page\":\n            print(\"Error: Standard Operating Procedure page not found.\", file=sys.stderr)\n            return False\n    else:\n        page_id = notion_utils.find_page(notion, TARGET_PAGE_TITLE)\n        if not page_id:\n            print(\"Error: Standard Operating Procedure page not found.\", file=sys.stderr)\n            return False\n\n    # Fetch all blocks (flattened order from top to bottom).\n    blocks = notion_utils.get_all_blocks_recursively(notion, page_id)\n    roles_index, terminologies_index = _find_heading_indices(blocks)\n\n    if roles_index is None:\n        print(\"Error: 'Roles & responsibilities' section not found.\", file=sys.stderr)\n        return False\n    if terminologies_index is None:\n        print(\"Error: 'Terminologies' section not found.\", file=sys.stderr)\n        return False\n\n    if roles_index >= terminologies_index:\n        print(\n            \"Error: Sections are not swapped. 'Roles & responsibilities' should appear before 'Terminologies'.\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\"Success: Section order updated so 'Roles & responsibilities' precedes 'Terminologies'.\")\n    return True\n\n\ndef main() -> None:\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/easy/team_projects/simple__swap_tasks/description.md",
    "content": "Go to the Team Projects page, find the person responsible for the most tasks (10 in total) and the person responsible for the fewest tasks (3 in total), then swap their assigned tasks."
  },
  {
    "path": "tasks/notion/easy/team_projects/simple__swap_tasks/meta.json",
    "content": "{\n  \"task_id\": \"simple__swap_tasks\",\n  \"task_name\": \"Simple Swap Tasks\",\n  \"category_id\": \"team_projects\",\n  \"category_name\": \"Team Projects\",\n  \"description\": \"Find the person responsible for the most and fewest tasks, then swap their assigned tasks.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"data aggregation\",\n    \"automated migration\",\n    \"conditional filtering\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Team-Projects-24e81626b6d7809c982fdb7a25825898\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/gantt-chart\"\n  }\n}\n"
  },
  {
    "path": "tasks/notion/easy/team_projects/simple__swap_tasks/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies that the task assignees have been swapped correctly.\n    Checks:\n    1. \"Develop a plan for promotion\" and \"Evaluate different third-party services\" have swapped assignees\n    2. The person with most tasks and person with least tasks have swapped all their tasks\n    \"\"\"\n    # Step 1: Find the Team Projects page\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(notion, main_id)\n        if not found_id or object_type != 'page':\n            print(\"Error: Team Projects page not found.\", file=sys.stderr)\n            return False\n    else:\n        # Try to find the page by searching\n        found_id = notion_utils.find_page(notion, \"Team Projects\")\n        if not found_id:\n            print(\"Error: Team Projects page not found.\", file=sys.stderr)\n            return False\n    \n    # Get all blocks from the page to find database references\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, found_id)\n    \n    # Find Tasks database ID from the page\n    tasks_db_id = None\n    \n    for block in all_blocks:\n        if block and block.get(\"type\") == \"child_database\":\n            db_title = block.get(\"child_database\", {}).get(\"title\", \"\")\n            if \"Tasks\" in db_title:\n                tasks_db_id = block[\"id\"]\n                break\n    \n    if not tasks_db_id:\n        print(\"Error: Tasks database not found.\", file=sys.stderr)\n        return False\n    \n    print(\"\\n📋 Starting verification...\")\n    \n    # Step 2: Query all tasks to analyze assignees\n    \n    try:\n        all_tasks_response = notion.databases.query(\n            database_id=tasks_db_id,\n            page_size=100\n        )\n        \n        if not all_tasks_response.get(\"results\"):\n            print(\"Error: No tasks found in Tasks database.\", file=sys.stderr)\n            return False\n        \n        tasks = all_tasks_response[\"results\"]\n        \n    except Exception as e:\n        print(f\"Error querying Tasks database: {e}\", file=sys.stderr)\n        return False\n    \n    # Step 3: Check specific tasks have swapped assignees\n    \n    develop_plan_task = None\n    evaluate_services_task = None\n    \n    for task in tasks:\n        task_name = task[\"properties\"][\"Name\"][\"title\"][0][\"text\"][\"content\"]\n        if task_name == \"Develop a plan for promotion\":\n            develop_plan_task = task\n        elif task_name == \"Evaluate different third-party services\":\n            evaluate_services_task = task\n    \n    if not develop_plan_task or not evaluate_services_task:\n        print(\"Error: Could not find both required tasks.\", file=sys.stderr)\n        return False\n    \n    # Get assignees for these tasks\n    develop_plan_assignees = develop_plan_task[\"properties\"][\"Assigned\"][\"people\"]\n    evaluate_services_assignees = evaluate_services_task[\"properties\"][\"Assigned\"][\"people\"]\n    \n    if not develop_plan_assignees or not evaluate_services_assignees:\n        print(\"Error: Tasks don't have assignees.\", file=sys.stderr)\n        return False\n    \n    develop_plan_assignee_id = develop_plan_assignees[0][\"id\"]\n    evaluate_services_assignee_id = evaluate_services_assignees[0][\"id\"]\n    \n    # These should be different (swapped)\n    if develop_plan_assignee_id == evaluate_services_assignee_id:\n        print(\"Error: Tasks should have different assignees after swap.\", file=sys.stderr)\n        return False\n    \n    # Step 4: Count tasks per person\n    \n    task_counts = {}\n    unassigned_count = 0\n    \n    for task in tasks:\n        assignees = task[\"properties\"][\"Assigned\"][\"people\"]\n        if assignees:\n            assignee_id = assignees[0][\"id\"]\n            if assignee_id not in task_counts:\n                task_counts[assignee_id] = []\n            task_counts[assignee_id].append(task[\"properties\"][\"Name\"][\"title\"][0][\"text\"][\"content\"])\n        else:\n            unassigned_count += 1\n    \n    # Sort by task count\n    sorted_assignees = sorted(task_counts.items(), key=lambda x: len(x[1]))\n    \n    if len(sorted_assignees) < 2:\n        print(\"Error: Need at least 2 people with tasks to verify swap.\", file=sys.stderr)\n        return False\n    \n    # Get person with least and most tasks\n    person_with_least = sorted_assignees[0]\n    person_with_most = sorted_assignees[-1]\n    \n    least_id, least_tasks = person_with_least\n    most_id, most_tasks = person_with_most\n    \n    # Step 5: Verify the swap pattern\n    \n    # Original distribution (before swap):\n    # - 5ac96c02-49a4-4320-8de6-b663ba83126b had 3 tasks (least)\n    # - ac7a3bd0-c111-4464-8f45-8a857a1abc8a had 10 tasks (most)\n    \n    # After complete swap, we expect:\n    # - 5ac96c02-49a4-4320-8de6-b663ba83126b should have 10 tasks\n    # - ac7a3bd0-c111-4464-8f45-8a857a1abc8a should have 3 tasks\n    \n    original_least_id = \"5ac96c02-49a4-4320-8de6-b663ba83126b\"\n    original_most_id = \"ac7a3bd0-c111-4464-8f45-8a857a1abc8a\"\n    \n    # Check if the swap has been completed\n    swap_completed = False\n    for assignee_id, assignee_tasks in task_counts.items():\n        if assignee_id == original_least_id and len(assignee_tasks) == 10:\n            # Person who had 3 now has 10\n            for other_id, other_tasks in task_counts.items():\n                if other_id == original_most_id and len(other_tasks) == 3:\n                    # Person who had 10 now has 3\n                    swap_completed = True\n                    break\n    \n    # Step 6: Summary\n    print(f\"\\n📊 Task Distribution:\")\n    print(f\"  • Total tasks: {len(tasks)}\")\n    print(f\"  • Assigned tasks: {len(tasks) - unassigned_count}\")\n    print(f\"  • Unassigned tasks: {unassigned_count}\")\n    print(f\"  • People with tasks: {len(task_counts)}\")\n    print(f\"\\n  Task counts by person:\")\n    for assignee_id, assignee_tasks in sorted_assignees:\n        print(f\"    - {assignee_id[:8]}...: {len(assignee_tasks)} tasks\")\n    \n    # Step 7: Final verification\n    print(\"\\n🔍 Verification Results:\")\n    \n    # Check that the swap has created a significant difference\n    if len(most_tasks) - len(least_tasks) < 5:\n        print(f\"Warning: Difference between most and least is only {len(most_tasks) - len(least_tasks)} tasks\", file=sys.stderr)\n    \n    # Verify specific expected outcomes\n    verification_passed = True\n    \n    # Check 1: Specific tasks have been swapped\n    specific_tasks_swapped = develop_plan_assignee_id != evaluate_services_assignee_id\n    if specific_tasks_swapped:\n        print(\"  ✓ Specific tasks have been swapped\")\n    else:\n        print(\"  ✗ Specific tasks were not swapped\", file=sys.stderr)\n        verification_passed = False\n    \n    # Check 2: Task distribution shows a complete swap\n    if swap_completed:\n        print(\"  ✓ Complete task swap verified (3↔10 tasks)\")\n    else:\n        # Show actual distribution for debugging\n        person1_tasks = len(task_counts.get(original_least_id, []))\n        person2_tasks = len(task_counts.get(original_most_id, []))\n        print(f\"  ✗ Swap incomplete! Expected 5ac96c02→10 tasks, ac7a3bd0→3 tasks\", file=sys.stderr)\n        print(f\"    Actual: 5ac96c02→{person1_tasks} tasks, ac7a3bd0→{person2_tasks} tasks\", file=sys.stderr)\n        verification_passed = False\n    \n    # Check 3: Total task count is preserved\n    total_assigned_tasks = sum(len(tasks) for _, tasks in task_counts.items())\n    expected_total = len(tasks) - unassigned_count\n    \n    if total_assigned_tasks == expected_total:\n        print(f\"  ✓ Total task count preserved ({total_assigned_tasks} assigned)\")\n    else:\n        print(f\"  ✗ Task count mismatch: {total_assigned_tasks} vs {expected_total} expected\", file=sys.stderr)\n        verification_passed = False\n    \n    if verification_passed:\n        print(\"\\n✅ All verification checks passed!\")\n        return True\n    else:\n        print(\"\\n❌ Verification failed\", file=sys.stderr)\n        return False\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/notion/easy/toronto_guide/simple__change_color/description.md",
    "content": "Open the **Toronto Guide** page and refresh the colors of the tags in the **Food** database.\n\n## Requirements\n1. Find and open the Toronto Guide page in Notion.\n2. Locate the *Food* database on that page.\n3. Update every tag in the Food database that is currently pink so that it uses a different color of your choice (any non-pink color is fine).\n4. Do not modify callouts or tags in the other databases.\n"
  },
  {
    "path": "tasks/notion/easy/toronto_guide/simple__change_color/meta.json",
    "content": "{\n  \"task_id\": \"simple__change_color\",\n  \"task_name\": \"Simple Change Color\",\n  \"category_id\": \"toronto_guide\",\n  \"category_name\": \"Toronto Guide\",\n  \"description\": \"Navigate to the Toronto Guide page and change all pink-colored elements to different colors.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"visual formatting\",\n    \"conditional filtering\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Toronto-Guide-25281626b6d7802caa7cc394647e901c\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/conquering-toronto-a-destination-guide\"\n  }\n}\n"
  },
  {
    "path": "tasks/notion/easy/toronto_guide/simple__change_color/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n\nTARGET_PAGE_TITLE = \"Toronto Guide\"\nFOOD_DATABASE_KEYWORD = \"Food\"\nTARGET_TAG_NAMES = [\n    \"Middle Eastern\",\n    \"Jamaican\",\n    \"Indian\",\n]\n\n\ndef _get_food_database_id(notion: Client, page_id: str) -> str | None:\n    \"\"\"Return the block ID of the Food database shown on the target page.\"\"\"\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, page_id)\n    for block in all_blocks:\n        if not block or block.get(\"type\") != \"child_database\":\n            continue\n        title = block.get(\"child_database\", {}).get(\"title\", \"\")\n        if FOOD_DATABASE_KEYWORD.lower() in title.lower():\n            return block.get(\"id\")\n    return None\n\n\ndef verify(notion: Client, main_id: str | None = None) -> bool:\n    \"\"\"Check that all target tags in the Food database are no longer pink.\"\"\"\n    # Resolve the Toronto Guide page ID.\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(notion, main_id)\n        if not found_id or object_type != \"page\":\n            print(\"Error: Toronto Guide page not found.\", file=sys.stderr)\n            return False\n        page_id = found_id\n    else:\n        page_id = notion_utils.find_page(notion, TARGET_PAGE_TITLE)\n        if not page_id:\n            print(\"Error: Toronto Guide page not found.\", file=sys.stderr)\n            return False\n\n    # Locate the Food database block.\n    food_db_id = _get_food_database_id(notion, page_id)\n    if not food_db_id:\n        print(\"Error: Food database not found on the Toronto Guide page.\", file=sys.stderr)\n        return False\n\n    # Fetch database definition and inspect tag options.\n    try:\n        db_info = notion.databases.retrieve(database_id=food_db_id)\n    except Exception as exc:\n        print(f\"Error: Unable to retrieve Food database ({exc}).\", file=sys.stderr)\n        return False\n\n    tags_property = db_info.get(\"properties\", {}).get(\"Tags\", {})\n    if tags_property.get(\"type\") != \"multi_select\":\n        print(\"Error: Food database does not have a multi-select Tags property.\", file=sys.stderr)\n        return False\n\n    options = tags_property.get(\"multi_select\", {}).get(\"options\", [])\n    remaining_targets = set(TARGET_TAG_NAMES)\n    failures = False\n\n    for option in options:\n        tag_name = option.get(\"name\", \"\").strip()\n        if tag_name not in remaining_targets:\n            continue\n\n        remaining_targets.discard(tag_name)\n        color = option.get(\"color\")\n        if color == \"pink\":\n            print(f\"Error: Tag '{tag_name}' in Food database is still pink.\", file=sys.stderr)\n            failures = True\n        else:\n            print(f\"✓ Tag '{tag_name}' color updated to '{color}'.\")\n\n    if remaining_targets:\n        print(\n            f\"Error: Food tags not found (expected to exist): {sorted(remaining_targets)}.\",\n            file=sys.stderr,\n        )\n        return False\n\n    if failures:\n        return False\n\n    print(\"Success: All Food database tags are now non-pink.\")\n    return True\n\n\ndef main() -> None:\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/standard/company_in_a_box/employee_onboarding/description.md",
    "content": "Build an integrated **Employee Onboarding** system for the existing **Company In A Box** page.\n\n**Task Requirements:**\n1. Create a new **database** titled **Employee Onboarding Checklist** with the following properties *exactly*:\n   • **Employee Name** – title  \n   • **Start Date** – date  \n   • **Department** – select (options: Product, Marketing, Sales, HR, Engineering)  \n\n   Populate this database with **3** sample new-hire pages covering three different departments. Every property in each entry must be filled.\n\n2. Under the top-level page **Company In A Box**, create a new child page titled **Onboarding Hub** containing, in order:\n   1) The **Employee Onboarding Checklist** database embedded at the top.  \n   2) A section headed **Benefits Overview** that includes linked mentions (@-mentions or link-to-page blocks) to **≥ 3** distinct benefit-policy pages from the **Company Wiki** (for example *Benefits policy*, *Vacation Policy*, *Corporate travel*).  \n   3) A section headed **30-Day Timeline** that presents a numbered list with **7** steps covering the first 30 days. **Each step must reference (via @-mention) an existing page or database**.  \n   4) A section headed **Feedback Form** that provides **≥ 3** to-do items for new hires to check off."
  },
  {
    "path": "tasks/notion/standard/company_in_a_box/employee_onboarding/meta.json",
    "content": "{\n  \"task_id\": \"employee_onboarding\",\n  \"task_name\": \"Employee Onboarding\",\n  \"category_id\": \"company_in_a_box\",\n  \"category_name\": \"Company In A Box\",\n  \"description\": \"Build an integrated Employee Onboarding system for the existing Company In A Box page with a checklist database, onboarding hub, and feedback form.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-07-27\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"database manipulation\",\n    \"template population\",\n    \"cross-reference linking\",\n    \"status tracking\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Company-In-A-Box-23d81626b6d7800098f3d0e64a706cd8\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/company-in-a-box\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/company_in_a_box/employee_onboarding/verify.py",
    "content": "import sys\nfrom typing import Dict, Set\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n\ndef _check_db_schema(db_props: Dict[str, Dict], required: Dict[str, str]) -> bool:\n    \"\"\"Return True if every required property exists with the correct type.\"\"\"\n    for prop_name, expected_type in required.items():\n        if prop_name not in db_props:\n            print(\n                f\"Error: Property '{prop_name}' missing from database.\", file=sys.stderr\n            )\n            return False\n        actual_type = db_props[prop_name][\"type\"]\n        if actual_type != expected_type:\n            print(\n                f\"Error: Property '{prop_name}' has type '{actual_type}', expected '{expected_type}'.\",\n                file=sys.stderr,\n            )\n            return False\n    return True\n\n\ndef verify(notion: Client, main_id: str | None = None) -> bool:  # noqa: C901\n    \"\"\"Programmatically verify the onboarding system described in description.md.\"\"\"\n\n    DB_TITLE = \"Employee Onboarding Checklist\"\n    HUB_PAGE_TITLE = \"Onboarding Hub\"\n    DEPARTMENT_OPTIONS: Set[str] = {\n        \"Product\",\n        \"Marketing\",\n        \"Sales\",\n        \"HR\",\n        \"Engineering\",\n    }\n    REQUIRED_DB_PROPERTIES = {\n        \"Employee Name\": \"title\",\n        \"Start Date\": \"date\",\n        \"Department\": \"select\",\n    }\n\n    # 1. Locate onboarding database\n    db_id = notion_utils.find_database(notion, DB_TITLE)\n    if not db_id:\n        print(f\"Error: Database '{DB_TITLE}' not found.\", file=sys.stderr)\n        return False\n\n    try:\n        db_obj = notion.databases.retrieve(database_id=db_id)\n    except Exception as exc:\n        print(f\"Error retrieving database: {exc}\", file=sys.stderr)\n        return False\n\n    db_props = db_obj.get(\"properties\", {})\n    if not _check_db_schema(db_props, REQUIRED_DB_PROPERTIES):\n        return False\n\n    # Extra: validate select options\n    dept_options = {opt[\"name\"] for opt in db_props[\"Department\"][\"select\"][\"options\"]}\n    if not DEPARTMENT_OPTIONS.issubset(dept_options):\n        print(\n            f\"Error: Department select options must include {sorted(DEPARTMENT_OPTIONS)}. Current: {sorted(dept_options)}\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Check there are at least 3 entries in the database\n    try:\n        db_pages = notion.databases.query(database_id=db_id).get(\"results\", [])\n    except Exception as exc:\n        print(f\"Error querying database: {exc}\", file=sys.stderr)\n        return False\n    if len(db_pages) < 3:\n        print(\n            \"Error: Less than 3 onboarding entries found in the database.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # 2. Locate Onboarding Hub page\n    hub_page_id = notion_utils.find_page(notion, HUB_PAGE_TITLE)\n    if not hub_page_id:\n        print(f\"Error: Page '{HUB_PAGE_TITLE}' not found.\", file=sys.stderr)\n        return False\n\n    # 3. Ensure the onboarding database is embedded in the hub page\n    embedded_db_id = notion_utils.find_database_in_block(notion, hub_page_id, DB_TITLE)\n    if embedded_db_id != db_id:\n        print(\n            \"Error: The Employee Onboarding Checklist database is not embedded in the Onboarding Hub page.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # 4. Analyse blocks within the hub page for linked mentions, timeline, and feedback form\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, hub_page_id)\n\n    seen_link_targets: Set[str] = set()\n    numbered_list_count = 0\n    todo_count = 0\n\n    for blk in all_blocks:\n        blk_type = blk.get(\"type\")\n\n        # Direct link-to-page blocks\n        if blk_type == \"link_to_page\":\n            info = blk.get(\"link_to_page\", {})\n            target_id = info.get(\"page_id\") or info.get(\"database_id\")\n            if target_id:\n                seen_link_targets.add(target_id)\n            continue\n\n        # Rich-text mentions inside content blocks\n        if blk_type in {\n            \"paragraph\",\n            \"numbered_list_item\",\n            \"bulleted_list_item\",\n            \"to_do\",\n        }:\n            content = blk.get(blk_type, {})\n            for rt in content.get(\"rich_text\", []):\n                if rt.get(\"type\") == \"mention\":\n                    mention = rt.get(\"mention\", {})\n                    if mention.get(\"type\") in {\"page\", \"database\"}:\n                        target_id = mention.get(\"page\", {}).get(\"id\") or mention.get(\n                            \"database\", {}\n                        ).get(\"id\")\n                        if target_id:\n                            seen_link_targets.add(target_id)\n\n        # Count numbered list items\n        if blk_type == \"numbered_list_item\":\n            numbered_list_count += 1\n\n        # Count to-do items in Feedback Form\n        if blk_type == \"to_do\":\n            todo_count += 1\n\n    if len(seen_link_targets) < 3:\n        print(\n            \"Error: Fewer than 3 linked mentions to benefit policy pages found in the Benefits Overview section.\",\n            file=sys.stderr,\n        )\n        return False\n\n    if numbered_list_count < 7:\n        print(\n            \"Error: Numbered list contains fewer than 7 steps in the 30-Day Timeline section.\",\n            file=sys.stderr,\n        )\n        return False\n\n    if todo_count < 3:\n        print(\n            \"Error: Feedback Form section contains fewer than 3 to-do items.\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\n        \"Success: Verified Employee Onboarding Checklist database, Onboarding Hub page, and all required sections.\"\n    )\n    return True\n\n\ndef main():\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/standard/company_in_a_box/goals_restructure/description.md",
    "content": "Please restructure the **Current Goals** section on my **Company In A Box** page as follows:\n\n1. **Add a new goal heading** — create a new `heading_3` block titled:\n   \n   `🔄 Digital Transformation Initiative`\n\n2. **Convert all four goal headings to toggles** — the three existing goals\n   * ⚙️ Expand Operations to LATAM  \n   * 🛠️ Push for Enterprise  \n   * 🩶 Boost Employee Engagement  \n   * 🔄 Digital Transformation Initiative  \n\n3. **Move descriptions inside the toggles** — every paragraph or list that originally sat directly under a goal heading should become a **child block** of that heading after it is made toggleable.\n\n4. **Preserve content & order** — apart from the changes above, do **not** modify the text, formatting, or order of existing goal descriptions.\n\nThe end result should be a clean **Current Goals** section containing four toggleable goal headings, each with its corresponding details tucked inside."
  },
  {
    "path": "tasks/notion/standard/company_in_a_box/goals_restructure/meta.json",
    "content": "{\n  \"task_id\": \"goals_restructure\",\n  \"task_name\": \"Goals Restructure\",\n  \"category_id\": \"company_in_a_box\",\n  \"category_name\": \"Company In A Box\",\n  \"description\": \"Restructure the Current Goals section on the Company In A Box page by adding a new goal heading and converting all goal headings to toggles with content inside.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-07-27\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"content organization\",\n    \"visual formatting\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Company-In-A-Box-23d81626b6d7800098f3d0e64a706cd8\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/company-in-a-box\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/company_in_a_box/goals_restructure/verify.py",
    "content": "import sys\nfrom typing import List\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n# Expected new goal heading text (including emoji)\nNEW_GOAL_HEADING = \"🔄 Digital Transformation Initiative\"\n\n# Section title to look for\nGOALS_SECTION_TITLE = \"Current Goals\"\n\n\ndef _plain(block) -> str:\n    \"\"\"Return concatenated plain text of a block.\"\"\"\n    return notion_utils.get_block_plain_text(block)\n\n\n# Some Notion rich-text strings may include non-breaking spaces (\\xa0) after emoji.\n# Normalize them to plain spaces so text matching is robust.\ndef _normalize_string(s: str) -> str:\n    return s.replace(\"\\xa0\", \" \")\n\n\ndef _is_heading(block) -> bool:\n    return block.get(\"type\") in [\"heading_1\", \"heading_2\", \"heading_3\"]\n\n\ndef _is_toggle(block) -> bool:\n    \"\"\"Determine whether a block is a toggle (standard toggle block or toggle-able heading).\"\"\"\n    btype = block.get(\"type\")\n    # In our scenario, goal blocks are headings (usually heading_3) marked as toggleable.\n    if btype in [\"heading_1\", \"heading_2\", \"heading_3\"]:\n        heading_data = block.get(btype, {})\n        return heading_data.get(\"is_toggleable\", False)\n    # Some Notion pages may contain classic toggle blocks (type == \"toggle\"). They are\n    # not expected in this task, but keeping this check allows broader compatibility.\n    return btype == \"toggle\"\n\n\ndef _get_children(notion: Client, block_id: str) -> List[dict]:\n    \"\"\"Retrieve **direct** children of a block (no pagination handling needed for small test pages).\"\"\"\n    try:\n        return notion.blocks.children.list(block_id=block_id).get(\"results\", [])\n    except Exception:\n        return []\n\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"Verifies that the Company in a Box page has been updated per the task requirements.\"\"\"\n    # 1. Locate the main page\n    page_id = None\n    if main_id:\n        found_id, obj_type = notion_utils.find_page_or_database_by_id(notion, main_id)\n        if found_id and obj_type == \"page\":\n            page_id = found_id\n\n    if not page_id:\n        # Try a few case variations just in case\n        for title in [\n            \"Company In A Box\",\n        ]:\n            page_id = notion_utils.find_page(notion, title)\n            if page_id:\n                break\n\n    if not page_id:\n        print(\"Error: Could not find the 'Company in a Box' page.\", file=sys.stderr)\n        return False\n\n    # 2. Recursively locate the \"Current Goals\" heading and collect its sibling blocks that\n    #     constitute the section.\n\n    def _fetch_children(bid: str) -> List[dict]:\n        try:\n            return notion.blocks.children.list(block_id=bid).get(\"results\", [])\n        except Exception:\n            return []\n\n    goals_section_blocks: List[dict] = []\n\n    # Breadth-first traversal to find the heading\n    queue = [page_id]\n    found_parent = None\n    found_index = None\n\n    while queue and found_parent is None:\n        parent_id = queue.pop(0)\n        children = _fetch_children(parent_id)\n        for idx, child in enumerate(children):\n            if (\n                _is_heading(child)\n                and GOALS_SECTION_TITLE.lower()\n                in _normalize_string(_plain(child)).lower()\n            ):\n                found_parent = parent_id\n                found_index = idx\n                break\n        # enqueue grandchildren for further search\n        for ch in children:\n            if ch.get(\"has_children\"):\n                queue.append(ch[\"id\"])\n\n    if found_parent is None:\n        print(\n            \"Error: Could not find the 'Current Goals' heading anywhere in the page.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Retrieve siblings once more to get the final list and slice after heading.\n    siblings = _fetch_children(found_parent)\n    if found_index is None or found_index >= len(siblings):\n        print(\n            \"Error: Internal logic issue when locating Current Goals section.\",\n            file=sys.stderr,\n        )\n        return False\n\n    goals_section_blocks = siblings[found_index + 1 :]\n\n    if not goals_section_blocks:\n        print(\"Error: 'Current Goals' section appears to be empty.\", file=sys.stderr)\n        return False\n\n    # 3. Identify toggle blocks that represent goals\n    toggle_blocks = [b for b in goals_section_blocks if _is_toggle(b)]\n\n    if len(toggle_blocks) != 4:\n        print(\n            f\"Error: Expected 4 toggle blocks for goals, found {len(toggle_blocks)}.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # 4. Ensure the new goal heading exists among the toggles\n    found_new_goal = False\n    for tb in toggle_blocks:\n        if (\n            _normalize_string(NEW_GOAL_HEADING).lower()\n            in _normalize_string(_plain(tb)).lower()\n        ):\n            found_new_goal = True\n            break\n    if not found_new_goal:\n        print(\n            f\"Error: Did not find a toggle block with heading '{NEW_GOAL_HEADING}'.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # 5. Validate that each toggle has at least one child paragraph/description\n    for tb in toggle_blocks:\n        if (\n            _normalize_string(NEW_GOAL_HEADING).lower()\n            in _normalize_string(_plain(tb)).lower()\n        ):\n            # Skip checking the new goal itself, as it does not have a description yet.\n            continue\n        if not tb.get(\"has_children\", False):\n            print(\n                f\"Error: Toggle '{_normalize_string(_plain(tb))}' has no child blocks (description not moved).\",\n                file=sys.stderr,\n            )\n            return False\n        children = _get_children(notion, tb[\"id\"])\n        # Ensure there is at least one content child (paragraph, list item, etc.)\n        content_types = {\n            \"paragraph\",\n            \"bulleted_list_item\",\n            \"numbered_list_item\",\n            \"to_do\",\n            \"callout\",\n            \"quote\",\n        }\n        if not any(c.get(\"type\") in content_types for c in children):\n            print(\n                f\"Error: Toggle '{_normalize_string(_plain(tb))}' seems to lack any description/content inside it.\",\n                file=sys.stderr,\n            )\n            return False\n\n    # 6. Confirm that there are **no** residual heading_3 blocks (non-toggle) for the goals\n    non_toggle_headings = [\n        b\n        for b in goals_section_blocks\n        if b.get(\"type\") == \"heading_3\" and not _is_toggle(b)\n    ]\n    if non_toggle_headings:\n        titles = [_normalize_string(_plain(b)) for b in non_toggle_headings]\n        print(\n            f\"Error: Found heading_3 blocks that were not converted to toggles: {titles}.\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\n        \"Success: Verified goal restructuring with new toggle blocks and descriptions.\"\n    )\n    return True\n\n\ndef main():\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/standard/company_in_a_box/quarterly_review_dashboard/description.md",
    "content": "Create a quarterly business review dashboard in Notion based on the existing **Company In A Box** workspace.\n\n**Task Requirements:**\n1. Inside the **Company Wiki** page you will find a sub-page named **Company Goals**. Extract every departmental objective listed under the four departments — **Product**, **Marketing**, **Sales**, and **HR**.\n2. Under the top-level page **Company In A Box**, create a new child page titled **Q4 2024 Business Review Dashboard**.\n3. Inside that new page build the following structure (all parts must exist):\n   1. A single **callout** block near the top that summarises progress toward the three *Current Goals* shown on the main page:\n      • *LATAM expansion*  • *Enterprise push*  • *Employee engagement*  \n      (All three phrases must appear in the callout text.)\n   2. Four separate **section headings** (any heading level) – one for each department (**Product**, **Marketing**, **Sales**, **Human Resources**) – placed below the callout.  Under each heading list that department’s objectives in a progress-tracking format (e.g. to-dos, check-box list). Each objective from the **Company Goals** page must appear at least once.\n   3. Add a **database** named **Action Items** with the following properties *exactly*:\n      • **Task Name** – title\n      • **Department** – select (options: Product, Marketing, Sales, HR)\n      • **Priority** – select (options: High, Medium, Low)\n      • **Status** – status\n      Populate this database with **≥ 5** action-item pages derived from the departmental objectives, making sure every field in each entry is filled:\n       • **Task Name** & **Department** must correctly correspond to the underlying objective/department.\n       • **Priority** and **Status** can be any allowed value, but they must **not** be left empty.\n4. Keep the overall visual style consistent with the existing wiki (use headings, dividers, etc.)."
  },
  {
    "path": "tasks/notion/standard/company_in_a_box/quarterly_review_dashboard/meta.json",
    "content": "{\n  \"task_id\": \"quarterly_review_dashboard\",\n  \"task_name\": \"Quarterly Review Dashboard\",\n  \"category_id\": \"company_in_a_box\",\n  \"category_name\": \"Company In A Box\",\n  \"description\": \"Create a quarterly business review dashboard in Notion based on the existing Company In A Box workspace with department objectives and action items database.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-07-27\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"database manipulation\",\n    \"data aggregation\",\n    \"report generation\",\n    \"status tracking\",\n    \"template population\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Company-In-A-Box-23d81626b6d7800098f3d0e64a706cd8\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/company-in-a-box\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/company_in_a_box/quarterly_review_dashboard/verify.py",
    "content": "import sys\nfrom typing import List\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n\ndef _contains_keywords(text: str, keywords: List[str]) -> bool:\n    lowered = text.lower()\n    return all(kw.lower() in lowered for kw in keywords)\n\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"Programmatically verify that the dashboard page and its contents meet the\n    requirements described in description.md.\n    \"\"\"\n    DASHBOARD_TITLE = \"Q4 2024 Business Review Dashboard\"\n    PARENT_PAGE_TITLE = \"Company In A Box\"\n    CALL_OUT_KEYWORDS = [\"latam\", \"enterprise\", \"employee engagement\"]\n    DEPARTMENTS = [\"Product\", \"Marketing\", \"Sales\", \"Human Resources\"]\n    REQUIRED_DB_PROPERTIES = {\n        \"Task Name\": \"title\",\n        \"Department\": \"select\",\n        \"Priority\": \"select\",\n        \"Status\": \"status\",\n    }\n    PRIORITY_OPTIONS = {\"High\", \"Medium\", \"Low\"}\n\n    # 1. Locate the dashboard page\n    page_id = None\n    if main_id:\n        found_id, obj_type = notion_utils.find_page_or_database_by_id(notion, main_id)\n        if found_id and obj_type == \"page\":\n            page_id = found_id\n\n    if not page_id:\n        page_id = notion_utils.find_page(notion, DASHBOARD_TITLE)\n\n    if not page_id:\n        print(f\"Error: Page '{DASHBOARD_TITLE}' not found.\", file=sys.stderr)\n        return False\n\n    # Optional: ensure it is a child of Company In A Box\n    try:\n        page_obj = notion.pages.retrieve(page_id=page_id)\n        parent_id = page_obj.get(\"parent\", {}).get(\"page_id\")\n        if parent_id:\n            parent_page = notion.pages.retrieve(page_id=parent_id)\n            parent_title_rt = (\n                parent_page.get(\"properties\", {}).get(\"title\", {}).get(\"title\", [])\n            )\n            parent_title = (\n                parent_title_rt[0].get(\"plain_text\") if parent_title_rt else None\n            )\n            if parent_title != PARENT_PAGE_TITLE:\n                print(\n                    f\"Error: Dashboard page is not a direct child of '{PARENT_PAGE_TITLE}'.\",\n                    file=sys.stderr,\n                )\n                return False\n    except Exception:\n        pass  # parent check is best-effort only\n\n    # 2. Verify callout with keywords\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, page_id)\n    callout_ok = False\n    for block in all_blocks:\n        if block.get(\"type\") == \"callout\":\n            callout_text = notion_utils.get_block_plain_text(block)\n            if _contains_keywords(callout_text, CALL_OUT_KEYWORDS):\n                callout_ok = True\n                break\n    if not callout_ok:\n        print(\n            \"Error: No callout found that includes all three Current Goal keywords (LATAM, Enterprise, Employee engagement).\",\n            file=sys.stderr,\n        )\n        return False\n\n    # 3. Verify department section headings\n    found_depts = set()\n    for block in all_blocks:\n        if block.get(\"type\") in {\"heading_1\", \"heading_2\", \"heading_3\"}:\n            heading_text = notion_utils.get_block_plain_text(block)\n            for dept in DEPARTMENTS:\n                if dept.lower() in heading_text.lower():\n                    found_depts.add(dept)\n    if set(DEPARTMENTS) != found_depts:\n        missing = set(DEPARTMENTS) - found_depts\n        print(\n            f\"Error: Missing department headings: {', '.join(missing)}.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # 4. Verify Action Items database exists and has correct schema\n    db_id = notion_utils.find_database_in_block(notion, page_id, \"Action Items\")\n    if not db_id:\n        print(\n            \"Error: Database 'Action Items' not found on the dashboard.\",\n            file=sys.stderr,\n        )\n        return False\n\n    try:\n        db = notion.databases.retrieve(database_id=db_id)\n    except Exception as exc:\n        print(f\"Error: Unable to retrieve database: {exc}\", file=sys.stderr)\n        return False\n\n    db_props = db.get(\"properties\", {})\n    for prop_name, expected_type in REQUIRED_DB_PROPERTIES.items():\n        if prop_name not in db_props:\n            print(\n                f\"Error: Property '{prop_name}' missing from database.\", file=sys.stderr\n            )\n            return False\n        actual_type = db_props[prop_name][\"type\"]\n        if isinstance(expected_type, list):\n            if actual_type not in expected_type:\n                print(\n                    f\"Error: Property '{prop_name}' has type '{actual_type}', expected one of {expected_type}.\",\n                    file=sys.stderr,\n                )\n                return False\n        else:\n            if actual_type != expected_type:\n                print(\n                    f\"Error: Property '{prop_name}' has type '{actual_type}', expected '{expected_type}'.\",\n                    file=sys.stderr,\n                )\n                return False\n        # Extra check for Priority options\n        if prop_name == \"Priority\":\n            options = {opt[\"name\"] for opt in db_props[prop_name][\"select\"][\"options\"]}\n            if not PRIORITY_OPTIONS.issubset(options):\n                print(\n                    f\"Error: Priority property options must include High/Medium/Low. Current options: {options}\",\n                    file=sys.stderr,\n                )\n                return False\n\n    # 5. Verify at least 5 action items exist\n    try:\n        pages = notion.databases.query(database_id=db_id).get(\"results\", [])\n    except Exception as exc:\n        print(f\"Error querying database pages: {exc}\", file=sys.stderr)\n        return False\n\n    if len(pages) < 5:\n        print(\"Error: Database contains fewer than 5 action items.\", file=sys.stderr)\n        return False\n\n    # Optional: Verify Department values valid\n    for page in pages:\n        props = page.get(\"properties\", {})\n\n        # Task Name must be non-empty\n        title_rt = props.get(\"Task Name\", {}).get(\"title\", [])\n        task_name = title_rt[0].get(\"plain_text\") if title_rt else \"\"\n        if not task_name.strip():\n            print(\n                f\"Error: Action item '{page.get('id')}' is missing a Task Name.\",\n                file=sys.stderr,\n            )\n            return False\n\n        # Department must be valid\n        dept_select = props.get(\"Department\", {}).get(\"select\", {}).get(\"name\")\n        if not dept_select or dept_select not in DEPARTMENTS:\n            print(\n                f\"Error: Action item '{page.get('id')}' has invalid or missing Department value.\",\n                file=sys.stderr,\n            )\n            return False\n\n        # Priority and Status must be set (any value)\n        priority_val = props.get(\"Priority\", {}).get(\"select\", {}).get(\"name\")\n        status_val = props.get(\"Status\", {}).get(\"status\", {}).get(\"name\")\n        if not priority_val or not status_val:\n            print(\n                f\"Error: Action item '{page.get('id')}' must have both Priority and Status set.\",\n                file=sys.stderr,\n            )\n            return False\n\n    print(\n        \"Success: Verified Business Review Dashboard, departmental sections, callout, and Action Items database with ≥5 entries.\"\n    )\n    return True\n\n\ndef main():\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/standard/computer_science_student_dashboard/code_snippets_go/description.md",
    "content": "Find the page named \"Computer Science Student Dashboard\" and add a new Go column to the \"Code Snippets\" section.\n\n**Task Requirements:**\n1. In the \"Code Snippets\" section, create (or locate) a column dedicated to the Go programming language. **This column must appear between the existing Python and JavaScript columns** within the same column list.\n2. At the top of the Go column, add a bold paragraph that contains exactly the text `Go`.\n3. Under the header paragraph, add three code-block blocks configured with `language` set to **go**:\n   a. **Basic Go program** – Caption must be `Basic Go program` and the code content must be exactly:\n   ```go\n   package main\n\n   import \"fmt\"\n\n   func main() {\n       fmt.Println(\"Hello, World!\")\n   }\n   ```\n   b. **For loop in Go** – Caption must be `For loop in Go` and the code content must be exactly:\n   ```go\n   for i := 0; i < 5; i++ {\n       fmt.Println(i)\n   }\n   ```\n   c. **Function definition in Go** – Caption must be `Function definition in Go` and the code content must be exactly:\n   ```go\n   func add(a, b int) int {\n       return a + b\n   }\n   ```"
  },
  {
    "path": "tasks/notion/standard/computer_science_student_dashboard/code_snippets_go/meta.json",
    "content": "{\n  \"task_id\": \"code_snippets_go\",\n  \"task_name\": \"Code Snippets Go\",\n  \"category_id\": \"computer_science_student_dashboard\",\n  \"category_name\": \"Computer Science Student Dashboard\",\n  \"description\": \"Add a new Go column to the Code Snippets section between Python and JavaScript columns.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-07-27\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"content organization\",\n    \"visual formatting\",\n    \"template population\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Computer-Science-Student-Dashboard-23e81626b6d78083b787d3c832b02ef4\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/computer-science-student-dashboard\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/computer_science_student_dashboard/code_snippets_go/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n# Expected code blocks (language=go)\nEXPECTED_CODE_BLOCKS = [\n    {\n        \"caption\": \"Basic Go program\",\n        \"code\": (\n            'package main\\n\\nimport \"fmt\"\\n\\nfunc main() {\\n    fmt.Println(\"Hello, World!\")\\n}'\n        ),\n    },\n    {\n        \"caption\": \"For loop in Go\",\n        \"code\": (\"for i := 0; i < 5; i++ {\\n    fmt.Println(i)\\n}\"),\n    },\n    {\n        \"caption\": \"Function definition in Go\",\n        \"code\": (\"func add(a, b int) int {\\n    return a + b\\n}\"),\n    },\n]\n\nHEADER_TEXT = \"Go\"\n\n\ndef _normalize(text: str) -> str:\n    \"\"\"Remove trailing spaces on each line and strip leading/trailing blank lines.\"\"\"\n    return \"\\n\".join(line.rstrip() for line in text.strip().splitlines())\n\n\ndef _find_page(notion: Client, main_id: str | None) -> str | None:\n    \"\"\"Return a page_id to verify against or None if not found.\"\"\"\n    page_id = None\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(\n            notion, main_id\n        )\n        if found_id and object_type == \"page\":\n            page_id = found_id\n    if not page_id:\n        page_id = notion_utils.find_page(notion, \"Computer Science Student Dashboard\")\n    return page_id\n\n\ndef _has_bold_header_text(block, text: str) -> bool:\n    \"\"\"Generic bold header/paragraph check for a given text.\"\"\"\n    block_type = block.get(\"type\")\n    if block_type not in {\"paragraph\", \"heading_1\", \"heading_2\", \"heading_3\"}:\n        return False\n    rich_text_list = block.get(block_type, {}).get(\"rich_text\", [])\n    if not rich_text_list:\n        return False\n    plain = \"\".join(rt.get(\"plain_text\", \"\") for rt in rich_text_list).strip()\n    if plain != text:\n        return False\n    return any(rt.get(\"annotations\", {}).get(\"bold\", False) for rt in rich_text_list)\n\n\ndef _go_column_order_correct(notion: Client, page_id: str) -> bool:\n    \"\"\"Return True if there exists a column list where Python → Go → JavaScript order holds.\"\"\"\n    # Gather all blocks once (flat list) to locate column_list blocks\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, page_id)\n    column_list_ids = [\n        blk[\"id\"] for blk in all_blocks if blk.get(\"type\") == \"column_list\"\n    ]\n\n    for cl_id in column_list_ids:\n        # Retrieve columns in explicit order\n        columns = notion.blocks.children.list(block_id=cl_id).get(\"results\", [])\n        header_to_idx: dict[str, int] = {}\n        for idx, col in enumerate(columns):\n            # Recursively inspect blocks within this column\n            col_blocks = notion_utils.get_all_blocks_recursively(notion, col[\"id\"])\n            for blk in col_blocks:\n                if _has_bold_header_text(blk, \"Python\"):\n                    header_to_idx.setdefault(\"Python\", idx)\n                elif _has_bold_header_text(blk, \"Go\"):\n                    header_to_idx.setdefault(\"Go\", idx)\n                elif _has_bold_header_text(blk, \"JavaScript\"):\n                    header_to_idx.setdefault(\"JavaScript\", idx)\n            # Short-circuit if all three found within current traversal\n            if len(header_to_idx) == 3:\n                break\n\n        if (\n            \"Python\" in header_to_idx\n            and \"Go\" in header_to_idx\n            and \"JavaScript\" in header_to_idx\n            and header_to_idx[\"Python\"]\n            < header_to_idx[\"Go\"]\n            < header_to_idx[\"JavaScript\"]\n        ):\n            return True\n    return False\n\n\ndef _collect_code_blocks(blocks):\n    \"\"\"Return list of (code_content, caption) tuples for code blocks with language 'go'.\"\"\"\n    collected = []\n    for block in blocks:\n        if block.get(\"type\") != \"code\":\n            continue\n        code_data = block.get(\"code\", {})\n        if code_data.get(\"language\") != \"go\":\n            continue\n        code_plain = \"\".join(\n            rt.get(\"plain_text\", \"\") for rt in code_data.get(\"rich_text\", [])\n        )\n        caption_plain = \"\".join(\n            rt.get(\"plain_text\", \"\") for rt in code_data.get(\"caption\", [])\n        )\n        collected.append((code_plain, caption_plain))\n    return collected\n\n\ndef verify(notion: Client, main_id: str | None = None) -> bool:\n    page_id = _find_page(notion, main_id)\n    if not page_id:\n        print(\"Error: Target page not found.\", file=sys.stderr)\n        return False\n\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, page_id)\n\n    # Verify header\n    header_ok = any(_has_bold_header_text(b, HEADER_TEXT) for b in all_blocks)\n    if not header_ok:\n        print(\"Failure: Bold header 'Go' not found.\", file=sys.stderr)\n        return False\n\n    # Verify code blocks\n    code_blocks_found = _collect_code_blocks(all_blocks)\n\n    remaining = EXPECTED_CODE_BLOCKS.copy()\n    for code, caption in code_blocks_found:\n        norm_code = _normalize(code)\n        for expected in remaining:\n            if (\n                _normalize(expected[\"code\"]) == norm_code\n                and expected[\"caption\"] == caption\n            ):\n                remaining.remove(expected)\n                break\n    if remaining:\n        missing = \", \".join(exp[\"caption\"] for exp in remaining)\n        print(\n            f\"Failure: Missing or incorrect Go code blocks: {missing}\", file=sys.stderr\n        )\n        return False\n\n    # Verify column order Python → Go → JavaScript\n    if not _go_column_order_correct(notion, page_id):\n        print(\n            \"Failure: Go column is not positioned between Python and JavaScript.\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\n        \"Success: Verified Go column with required code blocks and correct positioning.\"\n    )\n    return True\n\n\ndef main():\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    sys.exit(0 if verify(notion, main_id) else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/standard/computer_science_student_dashboard/courses_internships_relation/description.md",
    "content": "Your goal is to connect the `Courses` and `Internship search` databases inside the **Computer Science Student Dashboard** page and populate them with sample data that can be verified automatically.\n\n**Task Requirements:**\n\n1. In the **Courses** database, add a new **relation** property named **Related Internships** that points to the **Internship search** database.\n2. Ensure the relation is **bidirectional** by adding a relation property in the **Internship search** database named **Relevant Courses** that points back to the **Courses** database.\n3. Create **exactly three** new pages in the **Courses** database with realistic computer-science course data.  Each course page must include **all** of the following properties and values:\n   • **Code** (text) – unique codes `CS301`, `CS302`, and `CS303` respectively  \n   • **Name** (text) – pick appropriate names (e.g., *Computer Networks*, *Operating Systems*, *Machine Learning*)  \n   • **Credit** (number) – any positive integer  \n   • **Status** (status) – choose from `Planned`, `In Progress`, or `Completed`  \n   • **Related Internships** (relation) – link to at least one internship created in step4.\n4. Create **exactly two** new pages in the **Internship search** database with complete application information.  Each internship page must include **all** of the following properties and values:\n   • **Company** (text) – `OpenAI` and `Google` respectively  \n   • **Role** (text) – `Machine Learning Intern` and `Software Engineering Intern`  \n   • **Status** (status) – set to `Interested`  \n   • **Relevant Courses** (relation) – link to one or more of the courses created in step3.\n5. Every course created in step3 must be linked to at least one internship from step4 **and** every internship must be linked back to at least one course.\n\nThe task is considered complete when the relation properties exist, the specified course and internship pages are present with the exact values above, and the relations correctly connect the two databases in both directions."
  },
  {
    "path": "tasks/notion/standard/computer_science_student_dashboard/courses_internships_relation/meta.json",
    "content": "{\n  \"task_id\": \"courses_internships_relation\",\n  \"task_name\": \"Courses Internships Relation\",\n  \"category_id\": \"computer_science_student_dashboard\",\n  \"category_name\": \"Computer Science Student Dashboard\",\n  \"description\": \"Connect the Courses and Internship search databases with bidirectional relations and populate with sample data.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-07-27\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"database manipulation\",\n    \"cross-reference linking\",\n    \"template population\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Computer-Science-Student-Dashboard-23e81626b6d78083b787d3c832b02ef4\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/computer-science-student-dashboard\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/computer_science_student_dashboard/courses_internships_relation/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n# ---------------------------------------------------------------------------\n# Constants -----------------------------------------------------------------\n# ---------------------------------------------------------------------------\nMAIN_PAGE_TITLE = \"Computer Science Student Dashboard\"\nCOURSES_DB_TITLE = \"Courses\"\nINTERNSHIP_DB_TITLE = \"Internship search\"\n\nCOURSE_CODES = {\"CS301\", \"CS302\", \"CS303\"}\nCOURSE_RELATION_NAME = \"Related Internships\"\nINTERNSHIP_RELATION_NAME = \"Relevant Courses\"\n\nINTERNSHIP_COMPANIES = {\"OpenAI\", \"Google\"}\n\n# ---------------------------------------------------------------------------\n# Helper functions -----------------------------------------------------------\n# ---------------------------------------------------------------------------\n\n\ndef _locate_main_page(notion: Client, main_id: str | None) -> str | None:\n    \"\"\"Return the page_id of the dashboard page or None if not found.\"\"\"\n    page_id = None\n    if main_id:\n        found_id, obj_type = notion_utils.find_page_or_database_by_id(notion, main_id)\n        if found_id and obj_type == \"page\":\n            page_id = found_id\n    if not page_id:\n        page_id = notion_utils.find_page(notion, MAIN_PAGE_TITLE)\n    return page_id\n\n\ndef _locate_database(notion: Client, parent_page_id: str, db_title: str) -> str | None:\n    \"\"\"Recursively search for a child database by title and return its id.\"\"\"\n    return notion_utils.find_database_in_block(notion, parent_page_id, db_title)\n\n\n# ---------------------------------------------------------------------------\n# Verification logic ---------------------------------------------------------\n# ---------------------------------------------------------------------------\n\n\ndef verify(notion: Client, main_id: str | None = None) -> bool:\n    \"\"\"Verify completion of the Courses ↔ Internship relation task.\"\"\"\n    # ------------------------------------------------------------------\n    # Locate main page and databases -----------------------------------\n    # ------------------------------------------------------------------\n    page_id = _locate_main_page(notion, main_id)\n    if not page_id:\n        print(f\"Error: Page '{MAIN_PAGE_TITLE}' not found.\", file=sys.stderr)\n        return False\n\n    courses_db_id = _locate_database(notion, page_id, COURSES_DB_TITLE)\n    internships_db_id = _locate_database(notion, page_id, INTERNSHIP_DB_TITLE)\n\n    if not courses_db_id:\n        print(f\"Error: Database '{COURSES_DB_TITLE}' not found.\", file=sys.stderr)\n        return False\n    if not internships_db_id:\n        print(f\"Error: Database '{INTERNSHIP_DB_TITLE}' not found.\", file=sys.stderr)\n        return False\n\n    # ------------------------------------------------------------------\n    # Validate relation properties -------------------------------------\n    # ------------------------------------------------------------------\n    courses_db_obj = notion.databases.retrieve(database_id=courses_db_id)\n    internships_db_obj = notion.databases.retrieve(database_id=internships_db_id)\n\n    courses_props = courses_db_obj.get(\"properties\", {})\n    internships_props = internships_db_obj.get(\"properties\", {})\n\n    # Courses → Internships relation\n    if COURSE_RELATION_NAME not in courses_props:\n        print(\n            f\"Error: Property '{COURSE_RELATION_NAME}' missing in Courses database.\",\n            file=sys.stderr,\n        )\n        return False\n    course_rel_prop = courses_props[COURSE_RELATION_NAME]\n    if (\n        course_rel_prop.get(\"type\") != \"relation\"\n        or course_rel_prop[\"relation\"].get(\"database_id\") != internships_db_id\n    ):\n        print(\n            \"Error: Courses relation property is not configured correctly.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Internships → Courses relation\n    if INTERNSHIP_RELATION_NAME not in internships_props:\n        print(\n            f\"Error: Property '{INTERNSHIP_RELATION_NAME}' missing in Internship search database.\",\n            file=sys.stderr,\n        )\n        return False\n    intern_rel_prop = internships_props[INTERNSHIP_RELATION_NAME]\n    if (\n        intern_rel_prop.get(\"type\") != \"relation\"\n        or intern_rel_prop[\"relation\"].get(\"database_id\") != courses_db_id\n    ):\n        print(\n            \"Error: Internship relation property is not configured correctly.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # ------------------------------------------------------------------\n    # Validate course pages --------------------------------------------\n    # ------------------------------------------------------------------\n    course_pages = notion.databases.query(database_id=courses_db_id).get(\"results\", [])\n\n    valid_course_count = 0\n    course_page_id_set = set()\n    internship_ids_seen: set[str] = set()\n\n    for page in course_pages:\n        props = page.get(\"properties\", {})\n        code_rts = props.get(\"Code\", {}).get(\"rich_text\", [])\n        code_val = \"\".join(rt.get(\"plain_text\", \"\") for rt in code_rts).strip()\n        if code_val not in COURSE_CODES:\n            continue  # not one of the new course entries we care about\n\n        # Check required scalar props\n        title_rts = props.get(\"Name\", {}).get(\"title\", [])\n        name_ok = bool(\"\".join(rt.get(\"plain_text\", \"\") for rt in title_rts).strip())\n        credits_ok = props.get(\"Credit\", {}).get(\"number\") is not None\n        status_name = props.get(\"Status\", {}).get(\"status\", {}).get(\"name\", \"\")\n        status_allowed = {\"planned\", \"in progress\", \"completed\"}\n        status_ok = status_name.lower() in status_allowed\n\n        # Relation must point to at least one internship\n        relations = props.get(COURSE_RELATION_NAME, {}).get(\"relation\", [])\n        if not (name_ok and credits_ok and status_ok and relations):\n            print(\n                f\"Error: Course '{code_val}' is missing required property values or relations, or wrong values.\",\n                file=sys.stderr,\n            )\n            return False\n\n        # Collect IDs for further mutual check\n        course_page_id_set.add(page[\"id\"])\n        internship_ids_seen.update(rel[\"id\"] for rel in relations)\n        valid_course_count += 1\n\n    if valid_course_count != 3:\n        print(\n            f\"Error: Expected exactly 3 new course pages with codes {COURSE_CODES}, found {valid_course_count}.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # ------------------------------------------------------------------\n    # Validate internship pages ----------------------------------------\n    # ------------------------------------------------------------------\n    internship_pages = notion.databases.query(database_id=internships_db_id).get(\n        \"results\", []\n    )\n\n    valid_intern_count = 0\n    internship_page_ids = set()\n    course_ids_seen_from_intern: set[str] = set()\n\n    for page in internship_pages:\n        props = page.get(\"properties\", {})\n        company_rts = props.get(\"Company\", {}).get(\"rich_text\", [])\n        company = \"\".join(rt.get(\"plain_text\", \"\") for rt in company_rts).strip()\n        if company not in INTERNSHIP_COMPANIES:\n            continue  # not one of the two new internships\n\n        role_rts = props.get(\"Role\", {}).get(\"title\", [])\n        role_ok = bool(\"\".join(rt.get(\"plain_text\", \"\") for rt in role_rts).strip())\n        status_name = props.get(\"Status\", {}).get(\"status\", {}).get(\"name\", \"\")\n        status_ok = status_name.lower() == \"interested\"\n        relations = props.get(INTERNSHIP_RELATION_NAME, {}).get(\"relation\", [])\n\n        if not (role_ok and status_ok and relations):\n            print(\n                f\"Error: Internship at '{company}' is missing required property values or relations, or wrong values.\",\n                file=sys.stderr,\n            )\n            return False\n\n        internship_page_ids.add(page[\"id\"])\n        course_ids_seen_from_intern.update(rel[\"id\"] for rel in relations)\n        valid_intern_count += 1\n\n    if valid_intern_count != 2:\n        print(\n            f\"Error: Expected exactly 2 new internship pages for companies {INTERNSHIP_COMPANIES}, found {valid_intern_count}.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # ------------------------------------------------------------------\n    # Mutual relation consistency --------------------------------------\n    # ------------------------------------------------------------------\n    # Each relation from courses should point to one of the two internships identified\n    if not internship_ids_seen.issubset(internship_page_ids):\n        print(\n            \"Error: Some course relations point to pages outside the expected internships.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Each relation from internships should point back to the three course pages identified\n    if not course_ids_seen_from_intern.issubset(course_page_id_set):\n        print(\n            \"Error: Some internship relations point to pages outside the expected courses.\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\n        \"Success: Verified bidirectional relations, course and internship entries as required.\"\n    )\n    return True\n\n\n# ---------------------------------------------------------------------------\n# CLI entry-point -----------------------------------------------------------\n# ---------------------------------------------------------------------------\n\n\ndef main() -> None:\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    sys.exit(0 if verify(notion, main_id) else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/standard/computer_science_student_dashboard/study_session_tracker/description.md",
    "content": "Your goal is to create a new study-session entry in the **Computer Science Student Dashboard** page.\n\n1. Locate the ☑️ Habit tracker section of the page.\n2. **Insert a new date section** immediately **after the existing `2022-09-02` to-do items but *before* the divider block** that follows them. Make sure the new date has proper formatting with a date mention and bold styling like the existing dates, and all to-do items should be unchecked initially. The new section should be inserted right after the 2022-09-02 to-do items but before the divider.\n3. Directly **beneath** this new date mention, add **exactly four unchecked to-do blocks** with the following plain text (including the leading emoji on each line):\n   • 🧠 Review algorithms for technical interview\n   • 📚 Study database systems chapter 7\n   • ⚡ Practice system design problems\n   • 🎯 Complete data structures assignment"
  },
  {
    "path": "tasks/notion/standard/computer_science_student_dashboard/study_session_tracker/meta.json",
    "content": "{\n  \"task_id\": \"study_session_tracker\",\n  \"task_name\": \"Study Session Tracker\",\n  \"category_id\": \"computer_science_student_dashboard\",\n  \"category_name\": \"Computer Science Student Dashboard\",\n  \"description\": \"Create a new study-session entry in the Habit tracker section with four unchecked to-do items.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-07-27\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"content organization\",\n    \"visual formatting\",\n    \"status tracking\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Computer-Science-Student-Dashboard-23e81626b6d78083b787d3c832b02ef4\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/computer-science-student-dashboard\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/computer_science_student_dashboard/study_session_tracker/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\nfrom typing import Dict\n\n\ndef _normalize_string(s: str) -> str:\n    \"\"\"Replace non-breaking space with regular space for safe comparison.\"\"\"\n    return s.replace(\"\\xa0\", \" \")\n\n\ndef verify(notion: Client, main_id: str | None = None) -> bool:\n    \"\"\"Verify that the new study-session entry for 2025-01-29 was added correctly.\n\n    The script checks that:\n    1. A bold date-mention with start=2025-01-29 exists.\n    2. The mention sits after the 2022-09-02 section but before the divider that originally\n       followed that section.\n    3. Exactly four specified to-do items follow the new date mention and they are all unchecked.\n    \"\"\"\n\n    # ---------------------------------------------------------------------\n    # Locate the main page -------------------------------------------------\n    # ---------------------------------------------------------------------\n    page_id: str | None = None\n\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(\n            notion, main_id\n        )\n        if found_id and object_type == \"page\":\n            page_id = found_id\n\n    if not page_id:\n        page_id = notion_utils.find_page(notion, \"Computer Science Student Dashboard\")\n\n    if not page_id:\n        print(\n            \"Error: Page 'Computer Science Student Dashboard' not found.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # ---------------------------------------------------------------------\n    # Fetch all blocks under the page (flattened order) --------------------\n    # ---------------------------------------------------------------------\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, page_id)\n\n    # ---------------------------------------------------------------------\n    # Locate reference blocks ---------------------------------------------\n    # ---------------------------------------------------------------------\n    TARGET_DATE = \"2025-01-29\"\n    PREVIOUS_DATE = \"2022-09-02\"\n\n    index_previous_date: int | None = None\n    index_new_date: int | None = None\n    index_divider_after_previous: int | None = None\n\n    for idx, block in enumerate(all_blocks):\n        # Divider detection (we care only about the first divider that appears after\n        # the 2022-09-02 block)\n        if block.get(\"type\") == \"divider\":\n            if index_previous_date is not None and index_divider_after_previous is None:\n                index_divider_after_previous = idx\n\n        # We only need to inspect paragraph blocks that contain a date mention\n        if block.get(\"type\") != \"paragraph\":\n            continue\n\n        rich_text_list = block[\"paragraph\"].get(\"rich_text\", [])\n        for rt in rich_text_list:\n            if (\n                rt.get(\"type\") != \"mention\"\n                or rt.get(\"mention\", {}).get(\"type\") != \"date\"\n            ):\n                continue\n\n            date_start = rt[\"mention\"][\"date\"].get(\"start\")\n\n            if date_start == PREVIOUS_DATE and index_previous_date is None:\n                index_previous_date = idx\n\n            if date_start == TARGET_DATE and index_new_date is None:\n                index_new_date = idx\n                # (1) Verify bold annotation\n                if not rt.get(\"annotations\", {}).get(\"bold\", False):\n                    print(\n                        \"Error: The 2025-01-29 date mention is not bold.\",\n                        file=sys.stderr,\n                    )\n                    return False\n\n    # Ensure all reference indices were found\n    if index_previous_date is None:\n        print(\"Error: Could not locate the 2022-09-02 date section.\", file=sys.stderr)\n        return False\n    if index_divider_after_previous is None:\n        print(\n            \"Error: Could not locate the divider that follows the 2022-09-02 section.\",\n            file=sys.stderr,\n        )\n        return False\n    if index_new_date is None:\n        print(\n            \"Error: Could not locate the new 2025-01-29 date mention.\", file=sys.stderr\n        )\n        return False\n\n    # (2) Verify ordering\n    if not (index_previous_date < index_new_date < index_divider_after_previous):\n        print(\n            \"Error: The 2025-01-29 section is positioned incorrectly.\", file=sys.stderr\n        )\n        return False\n\n    # ---------------------------------------------------------------------\n    # Verify to-do items under the new date section ------------------------\n    # ---------------------------------------------------------------------\n    expected_texts = [\n        \"🧠 Review algorithms for technical interview\",\n        \"📚 Study database systems chapter 7\",\n        \"⚡ Practice system design problems\",\n        \"🎯 Complete data structures assignment\",\n    ]\n    expected_todos: Dict[str, bool] = {\n        _normalize_string(t): False for t in expected_texts\n    }\n\n    # Look through the blocks that lie between the new date mention and the divider\n    for block in all_blocks[index_new_date + 1 : index_divider_after_previous]:\n        if block.get(\"type\") != \"to_do\":\n            # Any non to-do block inside this range indicates mis-placement.\n            # We simply ignore it – correctness is determined by presence of required to-dos.\n            continue\n\n        plain_text = notion_utils.get_block_plain_text(block).strip()\n        plain_text_norm = _normalize_string(plain_text)\n        if plain_text_norm in expected_todos:\n            # (3a) Verify the to-do is unchecked\n            if block[\"to_do\"].get(\"checked\", False):\n                print(f\"Error: To-do '{plain_text}' is checked.\", file=sys.stderr)\n                return False\n            expected_todos[plain_text_norm] = True\n\n    missing_items = [text for text, found in expected_todos.items() if not found]\n    if missing_items:\n        print(f\"Error: Missing to-do items: {missing_items}\", file=sys.stderr)\n        return False\n\n    # ---------------------------------------------------------------------\n    # Success --------------------------------------------------------------\n    # ---------------------------------------------------------------------\n    print(\"Success: Study session for 2025-01-29 added correctly.\")\n    return True\n\n\n# -------------------------------------------------------------------------\n# Command-line entry-point -------------------------------------------------\n# -------------------------------------------------------------------------\n\n\ndef main() -> None:\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/standard/it_trouble_shooting_hub/asset_retirement_migration/description.md",
    "content": "Please restructure the **IT Inventory** database as described below. Your automation will be checked by an automated script, so follow every detail exactly.\n\n---\nTask Steps\n1. Inside the **IT Trouble Shooting Hub** page, locate the database named **IT Inventory**.\n2. Query this database and collect every page whose **Status** property is **Expired** or **To be returned**.\n3. Create a **new full-page database** directly under the same IT Trouble Shooting Hub page called **IT Asset Retirement Queue**.\n4. Configure this new database so that it contains **exactly** the following properties (spellings and types must match):\n   • Serial – title  \n   • Tags – multi_select  \n   • Status – select  \n   • Vendor – select  \n   • Expiration date – date  \n   • Retirement Reason – select with option set { **Expired License**, **Hardware Obsolete**, **Security Risk**, **User Offboarding** }\n5. For every inventory item gathered in step2:\n   a. Create a corresponding page in **IT Asset Retirement Queue** and copy over the values of the Serial, Tags, Status, Vendor and Expiration date properties.  \n   b. Set **Retirement Reason** to one of the four options above (choose the most appropriate).  \n   c. Archive the original inventory page **after** the new page has been created.\n6. After all items are migrated:\n   a. Update the **description** of the **IT Asset Retirement Queue** database so it is **exactly** `AUTO-GENERATED MIGRATION COMPLETED` (no additional text).\n   b. Create a new page under **IT Trouble Shooting Hub** titled **Retirement Migration Log**. Inside this page, add a **callout block** whose text follows the exact pattern:\n\n      `Successfully migrated <N> assets to the retirement queue on 2025-03-24.`\n\n      • `<N>` is the total number of items moved."
  },
  {
    "path": "tasks/notion/standard/it_trouble_shooting_hub/asset_retirement_migration/meta.json",
    "content": "{\n  \"task_id\": \"asset_retirement_migration\",\n  \"task_name\": \"Asset Retirement Migration\",\n  \"category_id\": \"it_trouble_shooting_hub\",\n  \"category_name\": \"IT Trouble Shooting Hub\",\n  \"description\": \"Restructure the IT Inventory database by migrating expired assets to a new IT Asset Retirement Queue database.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-07-27\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"database manipulation\",\n    \"automated migration\",\n    \"conditional filtering\",\n    \"data aggregation\",\n    \"report generation\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/It-Trouble-Shooting-Hub-23e81626b6d78020aba7eb65ae1cc2d5\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/it-trouble-shooting-hub\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/it_trouble_shooting_hub/asset_retirement_migration/verify.py",
    "content": "import sys\nfrom typing import Dict, Set\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n\ndef _get_database(root_page_id: str, notion: Client, name: str) -> str | None:\n    \"\"\"Helper that finds a child database by title inside a page.\"\"\"\n    return notion_utils.find_database_in_block(notion, root_page_id, name)\n\n\ndef _check_property(props: Dict, name: str, expected_type: str) -> bool:\n    if name not in props:\n        print(f\"Error: Property '{name}' missing in database.\", file=sys.stderr)\n        return False\n    if props[name][\"type\"] != expected_type:\n        print(\n            f\"Error: Property '{name}' expected type '{expected_type}', found '{props[name]['type']}'.\",\n            file=sys.stderr,\n        )\n        return False\n    return True\n\n\ndef verify(notion: Client, main_id: str | None = None) -> bool:\n    \"\"\"Verifies that the IT Asset Retirement Queue was created and populated correctly.\"\"\"\n\n    # -------------------------------------------------------------------------\n    # Resolve the root IT Trouble Shooting Hub page\n    # -------------------------------------------------------------------------\n    root_page_id = None\n    if main_id:\n        found_id, obj_type = notion_utils.find_page_or_database_by_id(notion, main_id)\n        if found_id and obj_type == \"page\":\n            root_page_id = found_id\n\n    if not root_page_id:\n        root_page_id = notion_utils.find_page(notion, \"IT Trouble Shooting Hub\")\n    if not root_page_id:\n        print(\n            \"Error: Could not locate the 'IT Trouble Shooting Hub' page.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # -------------------------------------------------------------------------\n    # Locate the original and new databases\n    # -------------------------------------------------------------------------\n    inventory_db_id = _get_database(root_page_id, notion, \"IT Inventory\")\n    if not inventory_db_id:\n        print(\"Error: 'IT Inventory' database not found.\", file=sys.stderr)\n        return False\n\n    retirement_db_id = _get_database(root_page_id, notion, \"IT Asset Retirement Queue\")\n    if not retirement_db_id:\n        print(\"Error: 'IT Asset Retirement Queue' database not found.\", file=sys.stderr)\n        return False\n\n    # -------------------------------------------------------------------------\n    # Validate schema of the retirement queue database\n    # -------------------------------------------------------------------------\n    retirement_db = notion.databases.retrieve(database_id=retirement_db_id)\n    r_props = retirement_db[\"properties\"]\n\n    required_schema = {\n        \"Serial\": \"title\",\n        \"Tags\": \"multi_select\",\n        \"Status\": \"select\",\n        \"Vendor\": \"select\",\n        \"Expiration date\": \"date\",\n        \"Retirement Reason\": \"select\",\n    }\n\n    for pname, ptype in required_schema.items():\n        if not _check_property(r_props, pname, ptype):\n            return False\n\n    # Check Retirement Reason options\n    expected_reason_options: Set[str] = {\n        \"Expired License\",\n        \"Hardware Obsolete\",\n        \"Security Risk\",\n        \"User Offboarding\",\n    }\n    actual_options = {\n        opt[\"name\"] for opt in r_props[\"Retirement Reason\"][\"select\"][\"options\"]\n    }\n    if actual_options != expected_reason_options:\n        print(\n            \"Error: 'Retirement Reason' select options mismatch.\\n\"\n            f\"Expected: {sorted(expected_reason_options)}\\n\"\n            f\"Found: {sorted(actual_options)}\",\n            file=sys.stderr,\n        )\n        return False\n\n    # ---------------------------------------------------------------\n    # Validate database description starts with required phrase\n    # ---------------------------------------------------------------\n    desc_rich = retirement_db.get(\"description\", [])\n    desc_text = \"\".join([t.get(\"plain_text\", \"\") for t in desc_rich])\n    required_desc = \"AUTO-GENERATED MIGRATION COMPLETED\"\n    if desc_text.strip() != required_desc:\n        print(\n            f\"Error: Retirement database description must be exactly '{required_desc}'.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # -------------------------------------------------------------------------\n    # Validate that inventory items are moved & archived\n    # -------------------------------------------------------------------------\n    expired_filter = {\n        \"property\": \"Status\",\n        \"select\": {\"equals\": \"Expired\"},\n    }\n    to_return_filter = {\n        \"property\": \"Status\",\n        \"select\": {\"equals\": \"To be returned\"},\n    }\n    compound_filter = {\"or\": [expired_filter, to_return_filter]}\n\n    # Query for any *active* items that still match these statuses\n    remaining_items = notion.databases.query(\n        database_id=inventory_db_id,\n        filter=compound_filter,\n        archived=False,\n    ).get(\"results\", [])\n\n    if remaining_items:\n        print(\n            f\"Error: {len(remaining_items)} 'Expired' / 'To be returned' items still present in IT Inventory.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # There should be at least one entry in the retirement queue\n    retirement_pages = notion.databases.query(database_id=retirement_db_id).get(\n        \"results\", []\n    )\n    expected_serials = {\"65XYQ/GB\", \"36x10PIQ\"}\n    if len(retirement_pages) != len(expected_serials):\n        print(\n            f\"Error: Expected {len(expected_serials)} retirement pages, found {len(retirement_pages)}.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Each retirement page must have a Retirement Reason\n    serials_seen = set()\n    for page in retirement_pages:\n        props = page[\"properties\"]\n        reason = props.get(\"Retirement Reason\", {}).get(\"select\", {})\n        if not reason or reason.get(\"name\") not in expected_reason_options:\n            print(\n                f\"Error: Page {page['id']} missing valid 'Retirement Reason'.\",\n                file=sys.stderr,\n            )\n            return False\n\n        # Collect Serial title\n        title_rich = props.get(\"Serial\", {}).get(\"title\", [])\n        serial_val = \"\".join([t.get(\"plain_text\", \"\") for t in title_rich]).strip()\n        serials_seen.add(serial_val)\n\n    if serials_seen != expected_serials:\n        print(\n            f\"Error: Serial values mismatch. Expected {sorted(expected_serials)}, found {sorted(serials_seen)}.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # -----------------------------------------------------------------\n    # Verify the migration log page and callout block contents\n    # -----------------------------------------------------------------\n    log_page_title = \"Retirement Migration Log\"\n    log_page_id = notion_utils.find_page(notion, log_page_title)\n    if not log_page_id:\n        print(f\"Error: Page '{log_page_title}' not found.\", file=sys.stderr)\n        return False\n\n    # Search for a callout block with required pattern\n    import re\n\n    callout_pattern = re.compile(\n        r\"Successfully migrated (\\d+) assets to the retirement queue on 2025-03-24\\.\"\n    )\n    blocks = notion_utils.get_all_blocks_recursively(notion, log_page_id)\n    match_found = False\n    for blk in blocks:\n        if blk.get(\"type\") == \"callout\":\n            text = notion_utils.get_block_plain_text(blk)\n            m = callout_pattern.search(text)\n            if m:\n                migrated_num = int(m.group(1))\n                if migrated_num == len(expected_serials):\n                    match_found = True\n                else:\n                    print(\n                        f\"Error: Callout reports {migrated_num} assets, but {len(retirement_pages)} retirement pages found.\",\n                        file=sys.stderr,\n                    )\n                    return False\n                break\n    if not match_found:\n        print(\n            \"Error: Required callout block not found in migration log page.\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\"Success: All verification criteria satisfied.\")\n    return True\n\n\ndef main():\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/standard/it_trouble_shooting_hub/security_audit_ticket/description.md",
    "content": "Please help me create a comprehensive security audit ticket based on the data already stored in the **IT Trouble Shooting Hub** page.\n\nYour automation should:\n\n1. In the **IT Inventory** database, find every item whose **Expiration date** is **before 2023-07-15**.\n2. In the **IT FAQs** database, look up any FAQ entries that have the **\"Security\"** tag.\n3. **Create a new page** inside the **IT Requests** database with **exact title**:\n   \n   `Quarterly Security Audit - Expired Assets Review`\n4. Set its **Priority** property to **High**.\n5. Set its **Due** property to **2023-06-22**.\n6. In the page body, add a bullet-list block that enumerates **each expired inventory item**. **Each bullet item must follow this exact text format (including the dashes):**\n\n   `<Serial> - <Tag> - <Recommendation>`\n\n   • `<Serial>` is the item’s Serial value.\n   • `<Tag>` is the first tag assigned to the inventory item (e.g., \"Laptop\").\n   • `<Recommendation>` is a brief action you suggest based on the security FAQ entry (any text is acceptable).\n\n   Example (do **not** copy):\n   `ABC123 - Laptop - Renew warranty and enable disk encryption`"
  },
  {
    "path": "tasks/notion/standard/it_trouble_shooting_hub/security_audit_ticket/meta.json",
    "content": "{\n  \"task_id\": \"security_audit_ticket\",\n  \"task_name\": \"Security Audit Ticket\",\n  \"category_id\": \"it_trouble_shooting_hub\",\n  \"category_name\": \"IT Trouble Shooting Hub\",\n  \"description\": \"Create a comprehensive security audit ticket based on expired inventory items and security FAQ entries.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-07-27\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"conditional filtering\",\n    \"database manipulation\",\n    \"data aggregation\",\n    \"report generation\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/It-Trouble-Shooting-Hub-23e81626b6d78020aba7eb65ae1cc2d5\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/it-trouble-shooting-hub\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/it_trouble_shooting_hub/security_audit_ticket/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\nimport re\n\n\ndef _get_title_text(page_properties: dict) -> str:\n    \"\"\"Extract the plain text of the first title property from a page.\"\"\"\n    for prop in page_properties.values():\n        if prop.get(\"type\") == \"title\":\n            title_rich = prop.get(\"title\", [])\n            if title_rich:\n                return title_rich[0].get(\"plain_text\")\n    return \"\"\n\n\ndef verify(notion: Client, main_id: str | None = None) -> bool:\n    \"\"\"Verify that the automation created the expected security audit ticket.\"\"\"\n\n    # ----------------------------------------------------------------------------------\n    # Locate the root page (IT Trouble Shooting Hub) either via main_id or by title.\n    # ----------------------------------------------------------------------------------\n    root_page_id = None\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(\n            notion, main_id\n        )\n        if found_id and object_type == \"page\":\n            root_page_id = found_id\n\n    if not root_page_id:\n        root_page_id = notion_utils.find_page(notion, \"IT Trouble Shooting Hub\")\n    if not root_page_id:\n        print(\n            \"Error: Could not locate the 'IT Trouble Shooting Hub' page.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # ----------------------------------------------------------------------------------\n    # Find the IT Requests database under the root page.\n    # ----------------------------------------------------------------------------------\n    requests_db_id = notion_utils.find_database_in_block(\n        notion, root_page_id, \"IT Requests\"\n    )\n    if not requests_db_id:\n        print(\n            \"Error: 'IT Requests' database not found in the workspace.\", file=sys.stderr\n        )\n        return False\n\n    # ----------------------------------------------------------------------------------\n    # Search for the expected ticket inside the IT Requests database.\n    # ----------------------------------------------------------------------------------\n    expected_title = \"Quarterly Security Audit - Expired Assets Review\"\n    results = notion.databases.query(database_id=requests_db_id).get(\"results\", [])\n\n    target_page = None\n    for page in results:\n        title_text = _get_title_text(page.get(\"properties\", {}))\n        if title_text == expected_title:\n            target_page = page\n            break\n\n    if not target_page:\n        print(\n            f\"Failure: Ticket with title '{expected_title}' was not found in 'IT Requests' database.\",\n            file=sys.stderr,\n        )\n        return False\n\n    props = target_page.get(\"properties\", {})\n\n    # ----------------------------------------------------------------------------------\n    # Validate Priority property.\n    # ----------------------------------------------------------------------------------\n    priority_value = props.get(\"Priority\", {}).get(\"select\", {}).get(\"name\")\n    if priority_value != \"High\":\n        print(\n            f\"Failure: Expected Priority 'High', found '{priority_value}'.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # ----------------------------------------------------------------------------------\n    # Validate Due date property.\n    # ----------------------------------------------------------------------------------\n    due_date_start = props.get(\"Due\", {}).get(\"date\", {}).get(\"start\")\n    expected_due_iso = \"2023-06-22\"\n    if not due_date_start or not due_date_start.startswith(expected_due_iso):\n        print(\n            f\"Failure: Expected Due date '{expected_due_iso}', found '{due_date_start}'.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # ----------------------------------------------------------------------------------\n    # Validate the bulleted list contains the correct expired items in required format.\n    # ----------------------------------------------------------------------------------\n    page_id = target_page[\"id\"]\n    blocks = notion.blocks.children.list(block_id=page_id).get(\"results\", [])\n    bullet_texts = [\n        notion_utils.get_block_plain_text(b)\n        for b in blocks\n        if b.get(\"type\") == \"bulleted_list_item\"\n    ]\n\n    expected_items = {\n        \"192371-8910/54\": \"Computer Accessory\",\n        \"32x11PIP\": \"Computer Accessory\",\n        \"76x87PCY\": \"Laptop\",\n        \"36x10PIQ\": \"Computer Accessory\",\n        \"65XYQ/GB\": \"License\",\n    }\n\n    if len(bullet_texts) != len(expected_items):\n        print(\n            f\"Failure: Expected {len(expected_items)} bullet items, found {len(bullet_texts)}.\",\n            file=sys.stderr,\n        )\n        return False\n\n    bullet_pattern = re.compile(r\"^\\s*(.*?)\\s+-\\s+(.*?)\\s+-\\s+(.+?)\\s*$\")\n    matched = set()\n    for text in bullet_texts:\n        m = bullet_pattern.match(text)\n        if not m:\n            print(\n                f\"Failure: Bullet item '{text}' does not follow '<Serial> - <Tag> - <Recommendation>' format.\",\n                file=sys.stderr,\n            )\n            return False\n        serial, tag, advice = m.group(1).strip(), m.group(2).strip(), m.group(3).strip()\n        if serial not in expected_items:\n            print(\n                f\"Failure: Unexpected Serial '{serial}' found in bullet list.\",\n                file=sys.stderr,\n            )\n            return False\n        if expected_items[serial] != tag:\n            print(\n                f\"Failure: Serial '{serial}' expected tag '{expected_items[serial]}', found '{tag}'.\",\n                file=sys.stderr,\n            )\n            return False\n        if not advice:\n            print(\n                f\"Failure: Bullet item for Serial '{serial}' is missing a recommendation/advice.\",\n                file=sys.stderr,\n            )\n            return False\n        matched.add(serial)\n\n    if len(matched) != len(expected_items):\n        missing = set(expected_items.keys()) - matched\n        print(\n            f\"Failure: Missing bullet items for serials: {', '.join(missing)}.\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\"Success: All verification criteria satisfied.\")\n    return True\n\n\ndef main():\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/standard/it_trouble_shooting_hub/verification_expired_update/description.md",
    "content": "**Task Overview**\n\nMy IT knowledge base contains pages whose verification status has expired:\n\n**Task Requirements**\n1. Locate the database named **\"IT Homepage\"** inside the main page **\"It Trouble Shooting Hub\"**.\n2. Within that database, find every page (except for **\"It Inventory\"**) where the **Verification** property state contains `expired`.\n3. For **each** expired page:\n   • Insert a **callout block** at the very top (as the first child block) whose rich-text content is:\n     `VERIFICATION EXPIRED - This page needs review and re-verification`\n   • Set the callout’s icon to ⚠️.\n   • Set the callout’s colour to `red_background`.\n4. Create a new entry in the **\"IT Requests\"** database with:\n   • Title (property **Task name**) **exactly** `Batch Verification Update Required`.\n   • **Priority** set to `High`.\n   • **Status** set to `In progress`.\n   • In the page body add a **bulleted list** where each bullet is a **mention** of the page processed in step 3 (i.e., use the Notion mention object linking to that page)."
  },
  {
    "path": "tasks/notion/standard/it_trouble_shooting_hub/verification_expired_update/meta.json",
    "content": "{\n  \"task_id\": \"verification_expired_update\",\n  \"task_name\": \"Verification Expired Update\",\n  \"category_id\": \"it_trouble_shooting_hub\",\n  \"category_name\": \"IT Trouble Shooting Hub\",\n  \"description\": \"Update pages with expired verification status by adding warning callouts and creating a batch update request.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-07-27\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"conditional filtering\",\n    \"visual formatting\",\n    \"database manipulation\",\n    \"cross-reference linking\",\n    \"status tracking\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/It-Trouble-Shooting-Hub-23e81626b6d78020aba7eb65ae1cc2d5\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/it-trouble-shooting-hub\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/it_trouble_shooting_hub/verification_expired_update/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\nCALL_OUT_TEXT = \"VERIFICATION EXPIRED - This page needs review and re-verification\"\nCALL_OUT_ICON = \"⚠️\"\nCALL_OUT_COLOR = \"red_background\"\nIT_HOMEPAGE_DB_TITLE = \"IT Homepage\"\nIT_REQUESTS_DB_TITLE = \"IT Requests\"\nREQUEST_TITLE = \"Batch Verification Update Required\"\nPRIORITY_HIGH = \"High\"\nSTATUS_IN_PROGRESS = \"In progress\"\n\n\ndef _get_main_page_id(notion: Client, main_id: str | None) -> str | None:\n    \"\"\"Resolve the main page id starting from CLI arg or by title search.\"\"\"\n    if main_id:\n        found_id, obj_type = notion_utils.find_page_or_database_by_id(notion, main_id)\n        if found_id and obj_type == \"page\":\n            return found_id\n    # Fallback to title search (case-insensitive)\n    return notion_utils.find_page(notion, \"It Trouble Shooting Hub\")\n\n\ndef _fetch_database_id(\n    notion: Client, parent_page_id: str, db_title: str\n) -> str | None:\n    \"\"\"Locate a child database by title inside a given page.\"\"\"\n    return notion_utils.find_database_in_block(notion, parent_page_id, db_title)\n\n\ndef _expired_pages(notion: Client, db_id: str) -> list[dict]:\n    \"\"\"Return list of page objects with Verification.state == 'expired'.\"\"\"\n    # Query all pages (API max 100 per call). If many pages expected, iterate.\n    results = notion.databases.query(database_id=db_id).get(\"results\", [])\n    expired = []\n    for page in results:\n        verification_prop = page.get(\"properties\", {}).get(\"Verification\", {})\n        state = verification_prop.get(\"verification\", {}).get(\"state\")\n        # Skip the IT Inventory database entry\n        title_prop = page.get(\"properties\", {}).get(\"Page\", {}).get(\"title\", [])\n        title_text = title_prop[0].get(\"plain_text\") if title_prop else \"\"\n        if title_text.strip().lower() == \"it inventory\":\n            continue\n\n        if state and \"expired\" in state.lower():\n            expired.append(page)\n    return expired\n\n\ndef _check_callout_present(notion: Client, page_id: str) -> bool:\n    \"\"\"Verify the specified callout is the first child block of the page.\"\"\"\n    children = notion.blocks.children.list(block_id=page_id, page_size=1).get(\n        \"results\", []\n    )\n    if not children:\n        return False\n    first_block = children[0]\n    if first_block.get(\"type\") != \"callout\":\n        return False\n    data = first_block.get(\"callout\", {})\n    # Check color\n    if data.get(\"color\") != CALL_OUT_COLOR:\n        return False\n\n    # Check icon\n    icon = data.get(\"icon\", {})\n    if icon.get(\"type\") != \"emoji\" or icon.get(\"emoji\") != CALL_OUT_ICON:\n        return False\n\n    # Check text content (callout rich text plain text)\n    plain_text = notion_utils.get_block_plain_text(first_block)\n    return CALL_OUT_TEXT in plain_text\n\n\ndef _find_request_page(notion: Client, db_id: str) -> dict | None:\n    \"\"\"Find the IT Request page with the expected title.\"\"\"\n    # Use a simple search inside database\n    res = notion.databases.query(\n        database_id=db_id,\n        filter={\"property\": \"Task name\", \"title\": {\"equals\": REQUEST_TITLE}},\n    ).get(\"results\", [])\n    return res[0] if res else None\n\n\ndef _check_request_properties(page: dict) -> bool:\n    props = page.get(\"properties\", {})\n    priority = props.get(\"Priority\", {}).get(\"select\", {}).get(\"name\")\n    status = (\n        props.get(\"Status\", {}).get(\"status\", {}).get(\"name\")\n        if props.get(\"Status\", {}).get(\"status\")\n        else props.get(\"Status\", {}).get(\"select\", {}).get(\"name\")\n    )\n    return priority == PRIORITY_HIGH and status == STATUS_IN_PROGRESS\n\n\ndef _request_page_contains_mentions(\n    notion: Client, request_page_id: str, expected_page_ids: list[str]\n) -> bool:\n    children = notion.blocks.children.list(block_id=request_page_id, page_size=100).get(\n        \"results\", []\n    )\n    bullet_blocks = [b for b in children if b.get(\"type\") == \"bulleted_list_item\"]\n    mentioned_ids: set[str] = set()\n    for block in bullet_blocks:\n        rich_text = block.get(\"bulleted_list_item\", {}).get(\"rich_text\", [])\n        for rt in rich_text:\n            if rt.get(\"type\") == \"mention\":\n                mention = rt.get(\"mention\", {})\n                if mention.get(\"type\") == \"page\":\n                    mentioned_ids.add(mention.get(\"page\", {}).get(\"id\"))\n    if len(mentioned_ids) < len(expected_page_ids):\n        return False\n    return all(pid in mentioned_ids for pid in expected_page_ids)\n\n\ndef verify(notion: Client, main_id: str | None = None) -> bool:\n    main_page_id = _get_main_page_id(notion, main_id)\n    if not main_page_id:\n        print(\n            \"Error: Could not locate the main page 'It Trouble Shooting Hub'.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Locate required databases\n    it_home_db_id = _fetch_database_id(notion, main_page_id, IT_HOMEPAGE_DB_TITLE)\n    it_req_db_id = _fetch_database_id(notion, main_page_id, IT_REQUESTS_DB_TITLE)\n    if not all([it_home_db_id, it_req_db_id]):\n        print(\n            \"Error: Required databases not found under the main page.\", file=sys.stderr\n        )\n        return False\n\n    # Identify expired pages\n    expired_pages = _expired_pages(notion, it_home_db_id)\n    if not expired_pages:\n        print(\n            \"Failure: No expired pages found; expected at least one for this task.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Verify callout on each expired page\n    for pg in expired_pages:\n        pid = pg[\"id\"]\n        if not _check_callout_present(notion, pid):\n            print(\n                f\"Failure: Callout missing or incorrect on page {pid}.\", file=sys.stderr\n            )\n            return False\n\n    # Verify IT Request entry\n    request_page = _find_request_page(notion, it_req_db_id)\n    if not request_page:\n        print(\n            \"Failure: IT Request 'Batch Verification Update Required' not found.\",\n            file=sys.stderr,\n        )\n        return False\n    if not _check_request_properties(request_page):\n        print(\"Failure: Priority or Status incorrect on IT Request.\", file=sys.stderr)\n        return False\n\n    # Verify bullet list in IT Request body\n    expired_titles = []\n    for p in expired_pages:\n        title_prop = p.get(\"properties\", {}).get(\"Page\", {}).get(\"title\", [])\n        title_text = title_prop[0].get(\"plain_text\") if title_prop else None\n        if title_text:\n            expired_titles.append(title_text)\n    expected_page_ids = [p[\"id\"] for p in expired_pages]\n    if not _request_page_contains_mentions(\n        notion, request_page[\"id\"], expected_page_ids\n    ):\n        print(\n            \"Failure: IT Request body does not contain mentions for all affected pages.\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\"Success: All verification checks passed.\")\n    return True\n\n\ndef main():\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/standard/japan_travel_planner/daily_itinerary_overview/description.md",
    "content": "Create a comprehensive daily itinerary overview page to help organize my Japan travel plans. I need you to create a new page called 'Daily Itinerary Overview' as a child of the main Japan Travel Planner page.\n\n**Task Requirements:**\n1. Create a new page titled 'Daily Itinerary Overview' as a child page of the main Japan Travel Planner page\n2. Query the Travel Itinerary database to retrieve all activities\n3. Structure the page with the following specific format:\n   - Add a heading_1 block with text \"📅 Daily Itinerary Overview\"\n   - Add a heading_2 block with text \"📊 Trip Summary\"\n   - Under Trip Summary, add a paragraph listing the total number of visited activities\n   - Create heading_2 blocks for \"🌅 Day 1\", \"🌆 Day 2\", and \"🌃 Day 3\"\n   - Under each day heading, list the activities scheduled for that day in to do list\n   - Each activity (use To-do list) should show: Activity Name - City (if available), for example, \"Osaka Castle - Osaka\". Check it if it's visited.\n4. The summary paragraph must contain the exact text \"Total activities visited (from Day 1 to Day 3): [NUMBER]\" where [NUMBER] is the actual count.\n5. Ensure all headings use the exact emoji and text format specified above"
  },
  {
    "path": "tasks/notion/standard/japan_travel_planner/daily_itinerary_overview/meta.json",
    "content": "{\n  \"task_id\": \"daily_itinerary_overview\",\n  \"task_name\": \"Daily Itinerary Overview\",\n  \"category_id\": \"japan_travel_planner\",\n  \"category_name\": \"Japan Travel Planner\",\n  \"description\": \"Create a comprehensive daily itinerary overview page to organize Japan travel plans with structured day-by-day activities.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-07-27\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"database manipulation\",\n    \"data aggregation\",\n    \"report generation\",\n    \"visual formatting\",\n    \"status tracking\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Japan-Travel-Planner-23181626b6d781c4b6bedb12786b5abe\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/japan_travel_planner/daily_itinerary_overview/verify.py",
    "content": "import sys\nimport re\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n\ndef verify_todo_database_correspondence(all_blocks, activities_by_day, _):\n    \"\"\"\n    Verify that to-do items in the overview page correspond exactly to database activities.\n    \"\"\"\n    # Extract to-do items organized by day from the overview page\n    todos_by_day = {\"Day 1\": [], \"Day 2\": [], \"Day 3\": []}\n    current_day = None\n    checked_todos_count = 0\n\n    for block in all_blocks:\n        block_type = block.get(\"type\")\n        block_text = notion_utils.get_block_plain_text(block)\n\n        # Track which day section we're in\n        if block_type == \"heading_2\":\n            if \"🌅 Day 1\" in block_text:\n                current_day = \"Day 1\"\n            elif \"🌆 Day 2\" in block_text:\n                current_day = \"Day 2\"\n            elif \"🌃 Day 3\" in block_text:\n                current_day = \"Day 3\"\n            else:\n                current_day = None  # Reset for non-day headings\n\n        # Collect to-do items under day headings\n        elif block_type == \"to_do\" and current_day:\n            to_do_data = block.get(\"to_do\", {})\n            is_checked = to_do_data.get(\"checked\", False)\n\n            if is_checked:\n                checked_todos_count += 1\n\n            todos_by_day[current_day].append(\n                {\"text\": block_text, \"checked\": is_checked}\n            )\n\n    # Verify each day's activities match\n    for day in [\"Day 1\", \"Day 2\", \"Day 3\"]:\n        db_activities = activities_by_day[day]\n        page_todos = todos_by_day[day]\n\n        # Check if counts match\n        if len(db_activities) != len(page_todos):\n            print(\n                f\"Error: {day} activity count mismatch. Database has {len(db_activities)} activities, page has {len(page_todos)} to-dos.\",\n                file=sys.stderr,\n            )\n            return False\n\n        # Verify each database activity has corresponding to-do\n        for db_activity in db_activities:\n            expected_format = f\"{db_activity['name']}\"\n            if db_activity[\"city\"]:\n                expected_format += f\" - {db_activity['city']}\"\n\n            # Find matching to-do item\n            matching_todo = None\n            for todo in page_todos:\n                if (\n                    expected_format in todo[\"text\"]\n                    or db_activity[\"name\"] in todo[\"text\"]\n                ):\n                    matching_todo = todo\n                    break\n\n            if not matching_todo:\n                print(\n                    f\"Error: {day} - Database activity '{expected_format}' not found in to-do list.\",\n                    file=sys.stderr,\n                )\n                return False\n\n            # Verify checked status matches visited status\n            if db_activity[\"visited\"] != matching_todo[\"checked\"]:\n                status_desc = \"checked\" if db_activity[\"visited\"] else \"unchecked\"\n                actual_desc = \"checked\" if matching_todo[\"checked\"] else \"unchecked\"\n                print(\n                    f\"Error: {day} - Activity '{db_activity['name']}' should be {status_desc} but is {actual_desc}.\",\n                    file=sys.stderr,\n                )\n                return False\n\n    # Verify summary count matches checked to-dos\n    for block in all_blocks:\n        if block.get(\"type\") == \"paragraph\":\n            block_text = notion_utils.get_block_plain_text(block)\n            if \"Total activities visited (from Day 1 to Day 3): 8\" in block_text:\n                print(\n                    f\"Success: Daily Itinerary Overview page created with correct structure. All {checked_todos_count} visited activities match database.\"\n                )\n                return True\n\n    print(\n        f\"Error: Summary shows incorrect visited activity count. Expected: {checked_todos_count} (based on checked to-do items)\",\n        file=sys.stderr,\n    )\n    return False\n\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies that the Daily Itinerary Overview page has been created correctly.\n    \"\"\"\n    # Find the main Japan Travel Planner page\n    page_id = None\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(\n            notion, main_id\n        )\n        if found_id and object_type == \"page\":\n            page_id = found_id\n\n    if not page_id:\n        page_id = notion_utils.find_page(notion, \"Japan Travel Planner\")\n    if not page_id:\n        print(\"Error: Main 'Japan Travel Planner' page not found.\", file=sys.stderr)\n        return False\n\n    # Find the Daily Itinerary Overview child page\n    overview_page_id = None\n    try:\n        # Get all child pages of the main page\n        response = notion.search(\n            query=\"Daily Itinerary Overview\",\n            filter={\"property\": \"object\", \"value\": \"page\"},\n        )\n\n        for result in response.get(\"results\", []):\n            # Check if this page is a child of the main page\n            parent = result.get(\"parent\", {})\n            if parent.get(\"type\") == \"page_id\" and parent.get(\"page_id\") == page_id:\n                overview_page_id = result[\"id\"]\n                break\n\n        if not overview_page_id:\n            # Alternative method: check page title directly\n            for result in response.get(\"results\", []):\n                title_list = (\n                    result.get(\"properties\", {}).get(\"title\", {}).get(\"title\", [])\n                )\n                for title_obj in title_list:\n                    if \"Daily Itinerary Overview\" in title_obj.get(\"plain_text\", \"\"):\n                        overview_page_id = result[\"id\"]\n                        break\n                if overview_page_id:\n                    break\n\n    except Exception as e:\n        print(\n            f\"Error searching for Daily Itinerary Overview page: {e}\", file=sys.stderr\n        )\n        return False\n\n    if not overview_page_id:\n        print(\n            \"Error: 'Daily Itinerary Overview' page not found as child of main page.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Get all blocks from the overview page\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, overview_page_id)\n\n    # Required content to verify - must appear in this exact order\n    required_headings_sequence = [\n        (\"📅 Daily Itinerary Overview\", \"heading_1\"),\n        (\"📊 Trip Summary\", \"heading_2\"),\n        (\"🌅 Day 1\", \"heading_2\"),\n        (\"🌆 Day 2\", \"heading_2\"),\n        (\"🌃 Day 3\", \"heading_2\"),\n    ]\n\n    found_headings_in_order = []\n    found_summary = False\n    summary_has_correct_format = False\n    found_todo_items = False\n\n    # Check each block and track heading sequence\n    for block in all_blocks:\n        block_text = notion_utils.get_block_plain_text(block)\n        block_type = block.get(\"type\")\n\n        # Check for required headings in sequence\n        for heading_text, expected_type in required_headings_sequence:\n            if heading_text in block_text and block_type == expected_type:\n                found_headings_in_order.append((heading_text, expected_type))\n\n        # Check for trip summary paragraph\n        if (\n            block_type == \"paragraph\"\n            and \"Total activities visited (from Day 1 to Day 3):\" in block_text\n        ):\n            found_summary = True\n            # Check if the format is correct (contains a number)\n            if re.search(\n                r\"Total activities visited \\(from Day 1 to Day 3\\):\\s*\\d+\", block_text\n            ):\n                summary_has_correct_format = True\n\n        # Check for to-do list items (activities under day headings)\n        if block_type == \"to_do\":\n            found_todo_items = True\n            # Check if to-do items follow the format \"Activity Name - City\"\n            if \" - \" in block_text:\n                # Format appears to be correct (contains dash separator)\n                pass\n\n    # Verify all required headings are found in correct sequence\n    if len(found_headings_in_order) != len(required_headings_sequence):\n        missing_headings = []\n        for heading_text, heading_type in required_headings_sequence:\n            if (heading_text, heading_type) not in found_headings_in_order:\n                missing_headings.append(f\"{heading_text} ({heading_type})\")\n        print(f\"Error: Missing required headings: {missing_headings}\", file=sys.stderr)\n        return False\n\n    # Verify headings appear in correct order\n    for i, (found_heading, found_type) in enumerate(found_headings_in_order):\n        expected_heading, expected_type = required_headings_sequence[i]\n        if found_heading != expected_heading or found_type != expected_type:\n            print(\n                f\"Error: Headings not in correct order. Expected '{expected_heading}' ({expected_type}) at position {i + 1}, but found '{found_heading}' ({found_type})\",\n                file=sys.stderr,\n            )\n            return False\n\n    # Verify trip summary exists and has correct format\n    if not found_summary:\n        print(\n            \"Error: Trip summary paragraph with 'Total activities visite' not found.\",\n            file=sys.stderr,\n        )\n        return False\n\n    if not summary_has_correct_format:\n        print(\n            \"Error: Trip summary does not have correct format 'Total activities visited: [NUMBER]'.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Verify to-do list items exist (activities should be in to-do format)\n    if not found_todo_items:\n        print(\n            \"Error: No to-do list items found. Activities should be listed as to-do items under day headings.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Additional verification: Check if Travel Itinerary database exists and has data\n    try:\n        itinerary_db_id = notion_utils.find_database_in_block(\n            notion, page_id, \"Travel Itinerary\"\n        )\n        if not itinerary_db_id:\n            itinerary_db_id = notion_utils.find_database(notion, \"Travel Itinerary\")\n\n        if itinerary_db_id:\n            # Query the database to get all activities\n            db_response = notion.databases.query(database_id=itinerary_db_id)\n            db_activities = db_response.get(\"results\", [])\n\n            # Organize database activities by day\n            activities_by_day = {\"Day 1\": [], \"Day 2\": [], \"Day 3\": []}\n            visited_count = 0\n\n            for result in db_activities:\n                properties = result.get(\"properties\", {})\n\n                # Extract activity info\n                activity_info = {\"name\": \"\", \"city\": \"\", \"visited\": False, \"day\": None}\n\n                for prop_name, prop_value in properties.items():\n                    prop_type = prop_value.get(\"type\")\n\n                    # Get activity name (usually from title property)\n                    if prop_type == \"title\" and prop_value.get(\"title\"):\n                        activity_info[\"name\"] = prop_value[\"title\"][0][\"plain_text\"]\n\n                    # Get city info\n                    elif \"city\" in prop_name.lower() and prop_type in [\n                        \"rich_text\",\n                        \"select\",\n                    ]:\n                        if prop_type == \"rich_text\" and prop_value.get(\"rich_text\"):\n                            activity_info[\"city\"] = prop_value[\"rich_text\"][0][\n                                \"plain_text\"\n                            ]\n                        elif prop_type == \"select\" and prop_value.get(\"select\"):\n                            activity_info[\"city\"] = prop_value[\"select\"][\"name\"]\n\n                    # Get visited status\n                    elif prop_type == \"checkbox\":\n                        if prop_value.get(\"checkbox\"):\n                            activity_info[\"visited\"] = True\n                            visited_count += 1\n\n                    # Get day info\n                    elif \"day\" in prop_name.lower() and prop_type in [\n                        \"select\",\n                        \"rich_text\",\n                    ]:\n                        if prop_type == \"select\" and prop_value.get(\"select\"):\n                            day_value = prop_value[\"select\"][\"name\"]\n                            if day_value in activities_by_day:\n                                activity_info[\"day\"] = day_value\n                        elif prop_type == \"rich_text\" and prop_value.get(\"rich_text\"):\n                            day_value = prop_value[\"rich_text\"][0][\"plain_text\"]\n                            if day_value in activities_by_day:\n                                activity_info[\"day\"] = day_value\n\n                # Add to appropriate day if day is specified\n                if activity_info[\"day\"] and activity_info[\"name\"]:\n                    activities_by_day[activity_info[\"day\"]].append(activity_info)\n\n            # Now verify to-do items match database activities\n            return verify_todo_database_correspondence(\n                all_blocks, activities_by_day, visited_count\n            )\n        else:\n            print(\n                \"Warning: Travel Itinerary database not found, using to-do items for count verification.\"\n            )\n            # Count checked to-do items in the overview page even without database\n            checked_todos_count = 0\n            for block in all_blocks:\n                if block.get(\"type\") == \"to_do\":\n                    to_do_data = block.get(\"to_do\", {})\n                    if to_do_data.get(\"checked\", False):\n                        checked_todos_count += 1\n\n            # Verify the summary shows the correct visited count based on checked to-dos\n            for block in all_blocks:\n                if block.get(\"type\") == \"paragraph\":\n                    block_text = notion_utils.get_block_plain_text(block)\n                    if f\"Total activities visited: {checked_todos_count}\" in block_text:\n                        print(\n                            f\"Success: Daily Itinerary Overview page created with correct structure and {checked_todos_count} visited activities.\"\n                        )\n                        return True\n\n            print(\n                f\"Error: Summary shows incorrect visited activity count. Expected: {checked_todos_count} (based on checked to-do items)\",\n                file=sys.stderr,\n            )\n            return False\n\n    except Exception as e:\n        print(f\"Warning: Could not verify activity count: {e}\")\n        print(\"Success: Daily Itinerary Overview page created with correct structure.\")\n        return True\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/standard/japan_travel_planner/packing_progress_summary/description.md",
    "content": "I'm preparing for my Japan trip and need to organize my packing list. Please help me:\n\n**Step 1: Update Items in the Packing List Database**\nIn the Clothes category, all items have already been packed except for the hat After this, check the `SIM Card` entry and the `Wallet` entry.\n\n**Step 2: Create Packing Progress Summary**\nAfter adding the items, create a new section in the main Japan Travel Planner page immediately after the \"Packing List 💼\" heading. This section should contain:\n\n1. A paragraph block with the bold text \"**Packing Progress Summary**\"\n2. Followed by bullet list items showing statistics for each category in the format:\n   - \"Category: X/Y packed\" (where X is packed items, Y is total items), for example: \"Shoes: 2/10 packed\"\n   - ..."
  },
  {
    "path": "tasks/notion/standard/japan_travel_planner/packing_progress_summary/meta.json",
    "content": "{\n  \"task_id\": \"packing_progress_summary\",\n  \"task_name\": \"Packing Progress Summary\",\n  \"category_id\": \"japan_travel_planner\",\n  \"category_name\": \"Japan Travel Planner\",\n  \"description\": \"Update packing list items and create a progress summary section showing statistics for each category.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-07-27\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"database manipulation\",\n    \"data aggregation\",\n    \"report generation\",\n    \"status tracking\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Japan-Travel-Planner-23181626b6d781c4b6bedb12786b5abe\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/japantravelplanner101\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/japan_travel_planner/packing_progress_summary/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies that:\n    1. All Clothes items except hat are marked as packed\n    2. SIM Card and Wallet entries are checked\n    3. Packing Progress Summary section is created with statistics\n    \"\"\"\n    # Find the main Japan Travel Planner page\n    page_id = None\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(\n            notion, main_id\n        )\n        if found_id and object_type == \"page\":\n            page_id = found_id\n\n    if not page_id:\n        page_id = notion_utils.find_page(notion, \"Japan Travel Planner\")\n    if not page_id:\n        print(\"Error: Page 'Japan Travel Planner' not found.\", file=sys.stderr)\n        return False\n\n    # Find the Packing List database\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, page_id)\n    packing_list_db_id = None\n    packing_list_heading_id = None\n\n    for i, block in enumerate(all_blocks):\n        # Find the Packing List heading\n        if block.get(\"type\") == \"heading_2\":\n            heading_text = notion_utils.get_block_plain_text(block)\n            if \"Packing List\" in heading_text and \"💼\" in heading_text:\n                packing_list_heading_id = block[\"id\"]\n                # Look for the database after this heading\n                for j in range(i + 1, len(all_blocks)):\n                    if all_blocks[j].get(\"type\") == \"child_database\":\n                        packing_list_db_id = all_blocks[j][\"id\"]\n                        break\n                break\n\n    if not packing_list_db_id:\n        print(\"Error: Packing List database not found.\", file=sys.stderr)\n        return False\n\n    # Query the database for all items\n    try:\n        db_items = notion.databases.query(database_id=packing_list_db_id)\n\n        # Track items for verification\n        clothes_items = []\n        sim_card_found = False\n        sim_card_packed = False\n        wallet_found = False\n        wallet_packed = False\n\n        # Process all items\n        for page in db_items.get(\"results\", []):\n            props = page.get(\"properties\", {})\n\n            # Get item name\n            name_prop = props.get(\"Name\", {})\n            if name_prop.get(\"type\") == \"title\":\n                name = \"\".join(\n                    [t.get(\"plain_text\", \"\") for t in name_prop.get(\"title\", [])]\n                )\n            else:\n                continue\n\n            # Get type (multi_select)\n            type_prop = props.get(\"Type\", {})\n            types = []\n            if type_prop.get(\"type\") == \"multi_select\":\n                types = [\n                    opt.get(\"name\", \"\") for opt in type_prop.get(\"multi_select\", [])\n                ]\n\n            # Get packed status\n            packed_prop = props.get(\"Packed\", {})\n            packed = False\n            if packed_prop.get(\"type\") == \"checkbox\":\n                packed = packed_prop.get(\"checkbox\", False)\n\n            # Check specific items\n            if name == \"SIM Card\":\n                sim_card_found = True\n                sim_card_packed = packed\n            elif name == \"Wallet\":\n                wallet_found = True\n                wallet_packed = packed\n\n            # Track Clothes items\n            if \"Clothes\" in types:\n                clothes_items.append(\n                    {\"name\": name, \"packed\": packed, \"is_hat\": \"hat\" in name.lower()}\n                )\n\n        # Verify Clothes items (all packed except hat)\n        for item in clothes_items:\n            if item[\"is_hat\"]:\n                if item[\"packed\"]:\n                    print(\n                        \"Error: Hat should not be packed but is marked as packed.\",\n                        file=sys.stderr,\n                    )\n                    return False\n            else:\n                if not item[\"packed\"]:\n                    print(\n                        f\"Error: Clothes item '{item['name']}' should be packed but is not.\",\n                        file=sys.stderr,\n                    )\n                    return False\n\n        print(\"Success: All Clothes items are correctly marked (packed except hat).\")\n\n        # Verify SIM Card and Wallet\n        if not sim_card_found:\n            print(\"Error: SIM Card entry not found.\", file=sys.stderr)\n            return False\n        if not sim_card_packed:\n            print(\"Error: SIM Card entry is not checked (packed).\", file=sys.stderr)\n            return False\n\n        if not wallet_found:\n            print(\"Error: Wallet entry not found.\", file=sys.stderr)\n            return False\n        if not wallet_packed:\n            print(\"Error: Wallet entry is not checked (packed).\", file=sys.stderr)\n            return False\n\n        print(\"Success: SIM Card and Wallet entries are checked.\")\n\n    except Exception as e:\n        print(f\"Error querying Packing List database: {e}\", file=sys.stderr)\n        return False\n\n    # Expected ground truth statistics\n    expected_stats = {\n        \"Clothes\": {\"packed\": 12, \"total\": 13},\n        \"Electronics\": {\"packed\": 1, \"total\": 10},\n        \"Essentials\": {\"packed\": 1, \"total\": 12},\n        \"Miscellaneous\": {\"packed\": 0, \"total\": 10},\n        \"Shoes\": {\"packed\": 0, \"total\": 2},\n        \"Toiletries\": {\"packed\": 0, \"total\": 19},\n    }\n\n    # Verify Packing Progress Summary section\n    # Re-fetch blocks to get updated content\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, page_id)\n\n    # Find the Packing List heading again and check blocks after it\n    packing_heading_index = None\n    for i, block in enumerate(all_blocks):\n        if block.get(\"id\") == packing_list_heading_id:\n            packing_heading_index = i\n            break\n\n    summary_found = False\n    statistics_verified = True\n    found_statistics = {}\n\n    if packing_heading_index is not None:\n        # Look for summary in the next few blocks\n        for i in range(\n            packing_heading_index + 1, min(packing_heading_index + 15, len(all_blocks))\n        ):\n            block = all_blocks[i]\n            block_text = notion_utils.get_block_plain_text(block)\n\n            # Check for \"Packing Progress Summary\" paragraph\n            if \"Packing Progress Summary\" in block_text:\n                summary_found = True\n                # Check if it's bold\n                if block.get(\"type\") == \"paragraph\":\n                    rich_text_list = block.get(\"paragraph\", {}).get(\"rich_text\", [])\n                    for text_obj in rich_text_list:\n                        if \"Packing Progress Summary\" in text_obj.get(\"text\", {}).get(\n                            \"content\", \"\"\n                        ):\n                            if not text_obj.get(\"annotations\", {}).get(\"bold\", False):\n                                print(\n                                    \"Error: 'Packing Progress Summary' text is not bold.\",\n                                    file=sys.stderr,\n                                )\n                                return False\n\n            # Check for statistics bullet points in format \"Category: X/Y packed\"\n            if (\n                block.get(\"type\") == \"bulleted_list_item\"\n                and \":\" in block_text\n                and \"/\" in block_text\n                and \"packed\" in block_text\n            ):\n                # Parse the statistic line\n                # Expected format: \"Category: X/Y packed\"\n                try:\n                    parts = block_text.split(\":\")\n                    if len(parts) >= 2:\n                        category = parts[0].strip()\n                        stats_part = parts[1].strip()\n\n                        # Extract X/Y from \"X/Y packed\"\n                        if \"/\" in stats_part and \"packed\" in stats_part:\n                            nums = stats_part.split(\"packed\")[0].strip()\n                            if \"/\" in nums:\n                                x_str, y_str = nums.split(\"/\")\n                                x = int(x_str.strip())\n                                y = int(y_str.strip())\n                                found_statistics[category] = {\"packed\": x, \"total\": y}\n                except:\n                    pass  # Continue if parsing fails\n\n    if not summary_found:\n        print(\n            \"Error: 'Packing Progress Summary' section not found after Packing List heading.\",\n            file=sys.stderr,\n        )\n        return False\n\n    if not found_statistics:\n        print(\n            \"Error: No valid packing statistics bullet points found in format 'Category: X/Y packed'.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Verify the statistics match the expected values\n    for category, stats in expected_stats.items():\n        if category not in found_statistics:\n            print(\n                f\"Error: Category '{category}' missing from Packing Progress Summary.\",\n                file=sys.stderr,\n            )\n            statistics_verified = False\n        else:\n            found = found_statistics[category]\n            if found[\"packed\"] != stats[\"packed\"] or found[\"total\"] != stats[\"total\"]:\n                print(\n                    f\"Error: Statistics mismatch for '{category}': expected {stats['packed']}/{stats['total']} packed, found {found['packed']}/{found['total']} packed.\",\n                    file=sys.stderr,\n                )\n                statistics_verified = False\n\n    # Check for extra categories in summary that don't exist in expected\n    for category in found_statistics:\n        if category not in expected_stats:\n            print(\n                f\"Error: Unexpected category '{category}' in summary.\", file=sys.stderr\n            )\n            statistics_verified = False\n\n    if not statistics_verified:\n        return False\n\n    print(\"Success: Packing Progress Summary section created with correct statistics.\")\n    # print(f\"Verified statistics: {', '.join(f'{k}: {v['packed']}/{v['total']} packed' for k, v in expected_stats.items())}\")\n\n    return True\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/standard/japan_travel_planner/remove_osaka_itinerary/description.md",
    "content": "Go to Japan Travel Planner and remove the itinerary in OSAKA after 6 PM (excluding 6 PM) in Day 1 and Day 2."
  },
  {
    "path": "tasks/notion/standard/japan_travel_planner/remove_osaka_itinerary/meta.json",
    "content": "{\n  \"task_id\": \"remove_osaka_itinerary\",\n  \"task_name\": \"Remove Osaka Itinerary\",\n  \"category_id\": \"japan_travel_planner\",\n  \"category_name\": \"Japan Travel Planner\",\n  \"description\": \"Remove the itinerary items in Osaka after 6 PM from Day 1 and Day 2 travel schedules.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-07-27\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"conditional filtering\",\n    \"automated migration\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Japan-Travel-Planner-23181626b6d781c4b6bedb12786b5abe\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/japantravelplanner101\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/japan_travel_planner/remove_osaka_itinerary/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\ndef get_page_title(page_result):\n    \"\"\"Extract title from a page result\"\"\"\n    properties = page_result.get('properties', {})\n    name_property = properties.get('Name', {})\n    if name_property.get('type') == 'title':\n        title_array = name_property.get('title', [])\n        if title_array and len(title_array) > 0:\n            return title_array[0].get('plain_text', '')\n    return ''\n\ndef get_page_time(page_result):\n    \"\"\"Extract time from Notes field\"\"\"\n    properties = page_result.get('properties', {})\n    notes_property = properties.get('Notes', {})\n    if notes_property.get('type') == 'rich_text':\n        rich_text_array = notes_property.get('rich_text', [])\n        if rich_text_array and len(rich_text_array) > 0:\n            notes_text = rich_text_array[0].get('plain_text', '')\n            return notes_text.strip()\n    return ''\n\ndef get_page_group(page_result):\n    \"\"\"Extract group/location from page\"\"\"\n    properties = page_result.get('properties', {})\n    group_property = properties.get('Group', {})\n    if group_property.get('type') == 'select':\n        select = group_property.get('select')\n        if select:\n            return select.get('name', '')\n    return ''\n\ndef get_page_day(page_result):\n    \"\"\"Extract day from page\"\"\"\n    properties = page_result.get('properties', {})\n    day_property = properties.get('Day', {})\n    if day_property.get('type') == 'select':\n        select = day_property.get('select')\n        if select:\n            return select.get('name', '')\n    return ''\n\ndef parse_time_to_minutes(time_str):\n    \"\"\"Convert time string to minutes for comparison\n    Returns None if time cannot be parsed\"\"\"\n    if not time_str:\n        return None\n    \n    # Clean the time string\n    time_str = time_str.strip().upper()\n    \n    # Remove any text after the time (e.g., \"7:30 PM\\n\" -> \"7:30 PM\")\n    time_str = time_str.split('\\n')[0].strip()\n    \n    # Extract time components\n    try:\n        if 'PM' in time_str:\n            time_part = time_str.replace('PM', '').strip()\n            if ':' in time_part:\n                hours, minutes = time_part.split(':')\n                hours = int(hours)\n                minutes = int(minutes)\n            else:\n                hours = int(time_part)\n                minutes = 0\n            # Convert PM hours (add 12 for PM times except 12 PM)\n            if hours != 12:\n                hours += 12\n            return hours * 60 + minutes\n        elif 'AM' in time_str:\n            time_part = time_str.replace('AM', '').strip()\n            if ':' in time_part:\n                hours, minutes = time_part.split(':')\n                hours = int(hours)\n                minutes = int(minutes)\n            else:\n                hours = int(time_part)\n                minutes = 0\n            # Handle 12 AM (midnight)\n            if hours == 12:\n                hours = 0\n            return hours * 60 + minutes\n    except:\n        return None\n    \n    return None\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies that all OSAKA events after 6PM have been removed from Day 1 and Day 2 in the Japan Travel Planner.\n    \n    Expected items that should be deleted (all in OSAKA, after 6PM, on Day 1 or Day 2):\n    1. Rikuro's Namba Main Branch - 7 PM (Day 1)\n    2. Shin Sekai \"New World\" - 8 PM (Day 2)\n    3. Katsudon Chiyomatsu - 7:30 PM (Day 2)\n    4. Ebisubashi Bridge - 9 PM (Day 1)\n    \n    Note: Kuromon Ichiba Market at 6 PM should NOT be deleted (it's at 6PM, not after)\n    Items after 6PM on other days (Day 3-8) should NOT be deleted\n    \"\"\"\n    \n    # Step 1: Find the main Japan Travel Planner page\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(notion, main_id)\n        if not found_id or object_type != 'page':\n            print(\"Error: Japan Travel Planner page not found.\", file=sys.stderr)\n            return False\n    else:\n        # Try to find the page by searching\n        found_id = notion_utils.find_page(notion, \"Japan Travel Planner\")\n        if not found_id:\n            print(\"Error: Japan Travel Planner page not found.\", file=sys.stderr)\n            return False\n    \n    print(f\"Found Japan Travel Planner page: {found_id}\")\n    \n    # Step 2: Find the Travel Itinerary database\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, found_id)\n    travel_itinerary_db_id = None\n    \n    for block in all_blocks:\n        if block and block.get(\"type\") == \"child_database\":\n            title = block.get(\"child_database\", {}).get(\"title\", \"\")\n            if \"Travel Itinerary\" in title:\n                travel_itinerary_db_id = block.get(\"id\")\n                print(f\"Found Travel Itinerary database: {travel_itinerary_db_id}\")\n                break\n    \n    if not travel_itinerary_db_id:\n        print(\"Error: Travel Itinerary database not found\", file=sys.stderr)\n        return False\n    \n    # Step 3: Query the database for OSAKA items on Day 1 and Day 2\n    try:\n        query_result = notion.databases.query(\n            database_id=travel_itinerary_db_id,\n            filter={\n                \"and\": [\n                    {\"property\": \"Group\", \"select\": {\"equals\": \"Osaka\"}},\n                    {\"or\": [\n                        {\"property\": \"Day\", \"select\": {\"equals\": \"Day 1\"}},\n                        {\"property\": \"Day\", \"select\": {\"equals\": \"Day 2\"}}\n                    ]}\n                ]\n            }\n        )\n    except Exception as e:\n        print(f\"Error querying Travel Itinerary database: {e}\", file=sys.stderr)\n        return False\n    \n    # Step 4: Check for items that should have been deleted\n    six_pm_minutes = 18 * 60  # 6 PM in minutes (18:00)\n    \n    # Expected deleted items (4 specific items after 6 PM on Day 1 and Day 2)\n    expected_deleted = {\n        \"Rikuro's Namba Main Branch\": {\"time\": \"7 PM\", \"day\": \"Day 1\", \"found\": False},\n        \"Shin Sekai \\\"New World\\\"\": {\"time\": \"8 PM\", \"day\": \"Day 2\", \"found\": False},\n        \"Katsudon Chiyomatsu\": {\"time\": \"7:30 PM\", \"day\": \"Day 2\", \"found\": False},\n        \"Ebisubashi Bridge\": {\"time\": \"9 PM\", \"day\": \"Day 1\", \"found\": False}\n    }\n    \n    # Items that should remain (at or before 6 PM)\n    expected_remaining = {\n        \"Kuromon Ichiba Market\": {\"time\": \"6 PM\", \"found\": False}\n    }\n    \n    osaka_items_after_6pm = []\n    osaka_items_at_or_before_6pm = []\n    \n    # Debug: Show total query results\n    print(f\"Debug: Found {len(query_result.get('results', []))} total OSAKA items on Day 1 and Day 2\")\n    \n    # Process all OSAKA items on Day 1 and Day 2\n    for page in query_result.get('results', []):\n        page_title = get_page_title(page).strip()\n        page_time = get_page_time(page)\n        page_group = get_page_group(page)\n        page_day = get_page_day(page)\n        \n        if page_group != \"Osaka\":\n            continue\n        \n        # Parse time to check if after 6 PM\n        time_minutes = parse_time_to_minutes(page_time)\n        \n        if time_minutes is not None and time_minutes > six_pm_minutes:\n            osaka_items_after_6pm.append({\n                \"title\": page_title,\n                \"time\": page_time,\n                \"day\": page_day,\n                \"id\": page.get('id')\n            })\n            \n            # Check if this is one of the expected deleted items\n            for expected_title, expected_info in expected_deleted.items():\n                # Clean up the titles for comparison\n                clean_page_title = page_title.strip().lower()\n                clean_expected_title = expected_title.strip().lower()\n                \n                # Check for \"Rikuro's\" or \"Rikuro's\" (different apostrophe types)\n                if \"rikuro\" in clean_page_title and \"rikuro\" in clean_expected_title:\n                    title_match = True\n                elif clean_page_title == clean_expected_title:\n                    title_match = True\n                elif clean_expected_title in clean_page_title or clean_page_title in clean_expected_title:\n                    title_match = True\n                else:\n                    title_match = False\n                    \n                if title_match and page_day == expected_info[\"day\"]:\n                    print(f\"Debug: Found '{page_title}' on {page_day} at {page_time} - matches expected '{expected_title}'\")\n                    expected_deleted[expected_title][\"found\"] = True\n                \n        elif time_minutes is not None and time_minutes <= six_pm_minutes:\n            osaka_items_at_or_before_6pm.append({\n                \"title\": page_title,\n                \"time\": page_time,\n                \"day\": page_day,\n                \"id\": page.get('id')\n            })\n            \n            # Check if this is one of the expected remaining items\n            for expected_title in expected_remaining:\n                if expected_title.lower() in page_title.lower() or page_title.lower() in expected_title.lower():\n                    expected_remaining[expected_title][\"found\"] = True\n    \n    # Step 5: Verify results\n    print(f\"\\nVerification Summary:\")\n    print(f\"=\" * 50)\n    \n    all_passed = True\n    \n    # Check that the 4 expected items after 6 PM have been deleted\n    print(\"\\n4 Items that should be deleted (after 6 PM on Day 1 and Day 2):\")\n    for item_name, item_info in expected_deleted.items():\n        if item_info[\"found\"]:\n            # If found = True, it means the item still exists (was not deleted)\n            print(f\"✗ {item_name} ({item_info['day']}, {item_info['time']}) - Still exists, should be deleted\", file=sys.stderr)\n            all_passed = False\n        else:\n            # If found = False, it means the item was deleted correctly\n            print(f\"✓ {item_name} ({item_info['day']}, {item_info['time']}) - Correctly deleted\")\n    \n    \n    # Check that items at or before 6 PM remain\n    print(\"\\nItems that should remain (at or before 6 PM on Day 1 and Day 2):\")\n    for item_name, item_info in expected_remaining.items():\n        if item_info[\"found\"]:\n            print(f\"✓ {item_name} ({item_info['time']}) - Correctly retained\")\n        else:\n            print(f\"✗ {item_name} ({item_info['time']}) - Missing, should not be deleted\", file=sys.stderr)\n            all_passed = False\n    \n    # Report any items after 6 PM that still exist\n    if osaka_items_after_6pm:\n        print(f\"\\n✗ Found {len(osaka_items_after_6pm)} OSAKA item(s) after 6 PM on Day 1/Day 2:\", file=sys.stderr)\n        for item in osaka_items_after_6pm:\n            print(f\"  - {item['title']} at {item['time']} ({item['day']})\", file=sys.stderr)\n    else:\n        print(f\"\\n✓ No OSAKA items found after 6 PM on Day 1/Day 2 (all correctly deleted)\")\n    \n    # Report count summary\n    print(f\"\\nCount Summary:\")\n    print(f\"- OSAKA items after 6 PM on Day 1/Day 2 found: {len(osaka_items_after_6pm)} (should be 0)\")\n    print(f\"- OSAKA items at/before 6 PM on Day 1/Day 2 found: {len(osaka_items_at_or_before_6pm)}\")\n    print(f\"- Expected deletions verified: {sum(1 for item in expected_deleted.values() if not item['found'])}/4\")\n    \n    return all_passed\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    \n    if verify(notion, main_id):\n        print(\"\\nVerification passed: All 4 required OSAKA events after 6 PM on Day 1 and Day 2 have been removed\")\n        sys.exit(0)\n    else:\n        print(\"\\nVerification failed: Some OSAKA events after 6 PM on Day 1/Day 2 still exist\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/notion/standard/japan_travel_planner/restaurant_expenses_sync/description.md",
    "content": "Please find the restaurants that appear in Day 1 of the Travel Itinerary database, then create corresponding entries in the Expenses database, one restaurant per entry. Set the date uniformly to Jan 1, 2025, and the cost uniformly to $120. Display the restaurant name in the Expense field. Set Category to Dining. For Comment, use the Description from the corresponding restaurant page. Leave other properties empty."
  },
  {
    "path": "tasks/notion/standard/japan_travel_planner/restaurant_expenses_sync/meta.json",
    "content": "{\n  \"task_id\": \"restaurant_expenses_sync\",\n  \"task_name\": \"Restaurant Expenses Sync\",\n  \"category_id\": \"japan_travel_planner\",\n  \"category_name\": \"Japan Travel Planner\",\n  \"description\": \"Find restaurants from Day 1 Travel Itinerary and create corresponding entries in the Expenses database.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-07-27\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"conditional filtering\",\n    \"database manipulation\",\n    \"cross-reference linking\",\n    \"template population\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Japan-Travel-Planner-23181626b6d781c4b6bedb12786b5abe\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/japantravelplanner101\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/japan_travel_planner/restaurant_expenses_sync/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies that restaurants from Day 1 of Travel Itinerary have corresponding expense entries.\n    \"\"\"\n    page_id = None\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(\n            notion, main_id\n        )\n        if found_id and object_type == \"page\":\n            page_id = found_id\n\n    if not page_id:\n        page_id = notion_utils.find_page(notion, \"Japan Travel Planner\")\n    if not page_id:\n        print(\"Error: Page 'Japan Travel Planner' not found.\", file=sys.stderr)\n        return False\n\n    # Find Travel Itinerary database\n    itinerary_db_id = notion_utils.find_database_in_block(\n        notion, page_id, \"Travel Itinerary\"\n    )\n    if not itinerary_db_id:\n        print(\"Error: Database 'Travel Itinerary' not found.\", file=sys.stderr)\n        return False\n\n    # Find Expenses database\n    expenses_db_id = notion_utils.find_database_in_block(notion, page_id, \"Expenses\")\n    if not expenses_db_id:\n        print(\"Error: Database 'Expenses' not found.\", file=sys.stderr)\n        return False\n\n    # Find Japan Places to Visit database\n    places_db_id = notion_utils.find_database_in_block(\n        notion, page_id, \"Travel Itinerary\"\n    )\n    if not places_db_id:\n        print(\"Error: Database 'Japan Places to Visit' not found.\", file=sys.stderr)\n        return False\n\n    # Query Day 1 restaurants from Travel Itinerary\n    try:\n        itinerary_results = notion.databases.query(\n            database_id=itinerary_db_id,\n            filter={\n                \"and\": [\n                    {\"property\": \"Day\", \"select\": {\"equals\": \"Day 1\"}},\n                    {\"property\": \"Type\", \"multi_select\": {\"contains\": \"Food\"}},\n                ]\n            },\n        ).get(\"results\", [])\n    except Exception as e:\n        print(f\"Error querying Travel Itinerary database: {e}\", file=sys.stderr)\n        return False\n\n    if not itinerary_results:\n        print(\n            \"Error: No restaurants found for Day 1 in Travel Itinerary.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Extract restaurant names\n    restaurant_names = []\n    for entry in itinerary_results:\n        props = entry.get(\"properties\", {})\n        name_prop = props.get(\"Name\", {})\n        name_text = \"\".join(t.get(\"plain_text\", \"\") for t in name_prop.get(\"title\", []))\n        if name_text:\n            restaurant_names.append(name_text.strip())\n\n    if not restaurant_names:\n        print(\"Error: No restaurant names found in Day 1 entries.\", file=sys.stderr)\n        return False\n\n    # Get descriptions from Japan Places to Visit database\n    try:\n        places_results = notion.databases.query(database_id=places_db_id).get(\n            \"results\", []\n        )\n    except Exception as e:\n        print(f\"Error querying Japan Places to Visit database: {e}\", file=sys.stderr)\n        return False\n\n    # Create a map of restaurant names to descriptions\n    restaurant_descriptions = {}\n    for place in places_results:\n        props = place.get(\"properties\", {})\n        name_prop = props.get(\"Name\", {})\n        name_text = \"\".join(t.get(\"plain_text\", \"\") for t in name_prop.get(\"title\", []))\n\n        desc_prop = props.get(\"Description\", {})\n        desc_text = \"\".join(\n            t.get(\"plain_text\", \"\") for t in desc_prop.get(\"rich_text\", [])\n        )\n\n        if name_text and desc_text:\n            restaurant_descriptions[name_text.strip()] = desc_text.strip()\n\n    # Query Expenses database\n    try:\n        expenses_results = notion.databases.query(database_id=expenses_db_id).get(\n            \"results\", []\n        )\n    except Exception as e:\n        print(f\"Error querying Expenses database: {e}\", file=sys.stderr)\n        return False\n\n    # Verify each restaurant has a corresponding expense entry\n    verified_restaurants = []\n    for restaurant_name in restaurant_names:\n        found_matching_expense = False\n        expected_description = restaurant_descriptions.get(restaurant_name, \"\")\n\n        for expense in expenses_results:\n            props = expense.get(\"properties\", {})\n\n            # Check Expense field (title)\n            expense_prop = props.get(\"Expense\", {})\n            expense_text = \"\".join(\n                t.get(\"plain_text\", \"\") for t in expense_prop.get(\"title\", [])\n            )\n            if expense_text.strip() != restaurant_name:\n                continue\n\n            # Check Date\n            date_prop = props.get(\"Date\", {})\n            date_start = date_prop.get(\"date\", {}).get(\"start\")\n            if date_start != \"2025-01-01\":\n                continue\n\n            # Check Transaction Amount\n            amount_prop = props.get(\"Transaction Amount\", {})\n            amount = amount_prop.get(\"number\")\n            if amount != 120:\n                continue\n\n            # Check Category contains Dining\n            category_prop = props.get(\"Category\", {})\n            categories = [c.get(\"name\") for c in category_prop.get(\"multi_select\", [])]\n            if \"Dining\" not in categories:\n                continue\n\n            # Check Comment matches description (if description exists)\n            if expected_description:\n                comment_prop = props.get(\"Comment\", {})\n                comment_text = \"\".join(\n                    t.get(\"plain_text\", \"\") for t in comment_prop.get(\"rich_text\", [])\n                )\n                if comment_text.strip().replace(\n                    \"\\u202f\", \" \"\n                ) != expected_description.replace(\"\\u202f\", \" \"):\n                    continue\n\n            found_matching_expense = True\n            verified_restaurants.append(restaurant_name)\n            break\n\n        if not found_matching_expense:\n            print(\n                f\"Error: No matching expense entry found for restaurant '{restaurant_name}'.\",\n                file=sys.stderr,\n            )\n            return False\n\n    if len(verified_restaurants) == len(restaurant_names):\n        print(\n            f\"Success: Found matching expense entries for all {len(restaurant_names)} Day 1 restaurants.\"\n        )\n        return True\n    else:\n        print(\n            f\"Error: Only {len(verified_restaurants)} out of {len(restaurant_names)} restaurants have matching expense entries.\",\n            file=sys.stderr,\n        )\n        return False\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/standard/online_resume/layout_adjustment/description.md",
    "content": "Please go to my Online Resume page and adjust the Skills display with the following requirements:\n\n## Skills Section Adjustment\n1. Delete the Skills database from the right side of the page\n2. Add a new Skills section on the left side, under the Languages section\n3. Format skills as \"[icon] skill description (type)\", for example \"✨✨ Photoshop (Design Tool)\"\n   - Use ✨✨ icon for skills with level >= 50%\n   - Use ✨ icon for skills with level < 50%\n\n## Work History and Education Layout Adjustment\n1. Adjust the layout so that logo/image columns take up 50% width in each section\n   - Note: Column width ratio might not be returned by API when columns are equal (50/50)\n2. Replace all images/icons with black placeholder images using URL containing \"https://singlecolorimage.com/get/000000/1024x128\""
  },
  {
    "path": "tasks/notion/standard/online_resume/layout_adjustment/meta.json",
    "content": "{\n  \"task_id\": \"layout_adjustment\",\n  \"task_name\": \"Layout Adjustment\",\n  \"category_id\": \"online_resume\",\n  \"category_name\": \"Online Resume\",\n  \"description\": \"This task involves modifying the layout and content of an online resume page by restructuring the Skills section with icon indicators and adjusting the Work History and Education sections to use equal column widths with placeholder images.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-08-14\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"content organization\",\n    \"visual formatting\",\n    \"conditional filtering\",\n    \"template population\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Online-Resume-23181626b6d781159faaeb5eadaf612e\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/online-resume\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/online_resume/layout_adjustment/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies that the Skills display has been adjusted correctly:\n    1. Skills database on the right side should be deleted\n    2. Skills section should be added on the left side under Languages\n    3. Skills should be formatted with correct icons based on skill level\n    4. Work History and Education sections should use black placeholder images\n    \"\"\"\n    page_id = None\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(\n            notion, main_id\n        )\n        if found_id and object_type == \"page\":\n            page_id = found_id\n\n    if not page_id:\n        page_id = notion_utils.find_page(notion, \"Online Resume\")\n    if not page_id:\n        print(\"Error: Page 'Online Resume' not found.\", file=sys.stderr)\n        return False\n\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, page_id)\n\n    # Step 1: Verify Skills database is NOT in the right column anymore\n    # Find the main column list\n    for block in all_blocks:\n        if block.get(\"type\") == \"column_list\":\n            column_list_id = block[\"id\"]\n            columns = notion_utils.get_all_blocks_recursively(notion, column_list_id)\n            \n            # Check if this is the main two-column layout\n            if len(columns) == 2:\n                # Find the right column (usually the one with larger width ratio)\n                for column in columns:\n                    if column.get(\"type\") == \"column\":\n                        width_ratio = column.get(\"column\", {}).get(\"width_ratio\", 0)\n                        # Right column typically has width_ratio > 0.5\n                        if width_ratio > 0.5:\n                            right_column_id = column[\"id\"]\n                            right_column_blocks = notion_utils.get_all_blocks_recursively(\n                                notion, right_column_id\n                            )\n                            \n                            # Check if Skills database exists in right column\n                            for right_block in right_column_blocks:\n                                if (\n                                    right_block.get(\"type\") == \"child_database\"\n                                    and right_block.get(\"child_database\", {}).get(\"title\") == \"Skills\"\n                                ):\n                                    print(\n                                        \"Error: Skills database still exists in the right column.\",\n                                        file=sys.stderr,\n                                    )\n                                    return False\n\n    # Step 2: Find the left column and verify Skills section exists there\n    skills_section_found = False\n    skills_with_double_sparkles = []\n    skills_with_single_sparkle = []\n    \n    # First, find the main column_list (top-level)\n    main_column_list_id = None\n    for block in all_blocks:\n        if block.get(\"type\") == \"column_list\" and block.get(\"parent\", {}).get(\"type\") == \"page_id\":\n            main_column_list_id = block[\"id\"]\n            break\n    \n    if not main_column_list_id:\n        print(\"Error: Main column list not found.\", file=sys.stderr)\n        return False\n    \n    # Get the columns directly\n    columns = notion_utils.get_all_blocks_recursively(notion, main_column_list_id)\n    \n    # Find the left column (the one with width_ratio around 0.25)\n    left_column_id = None\n    for column in columns:\n        if column.get(\"type\") == \"column\":\n            width_ratio = column.get(\"column\", {}).get(\"width_ratio\", 0)\n            # Left column has width_ratio around 0.25\n            if 0.2 <= width_ratio <= 0.3:\n                left_column_id = column[\"id\"]\n                break\n    \n    if not left_column_id:\n        print(\"Error: Left column not found.\", file=sys.stderr)\n        return False\n    \n    # Get all blocks in the left column\n    left_column_blocks = notion_utils.get_all_blocks_recursively(notion, left_column_id)\n    \n    # Find Languages heading\n    languages_index = -1\n    for i, left_block in enumerate(left_column_blocks):\n        if (\n            left_block.get(\"type\") == \"heading_2\"\n            and \"Languages\" in notion_utils.get_block_plain_text(left_block)\n        ):\n            languages_index = i\n            break\n    \n    if languages_index == -1:\n        print(\"Error: Languages heading not found in left column.\", file=sys.stderr)\n        return False\n    \n    # Look for Skills heading after Languages\n    for i in range(languages_index + 1, len(left_column_blocks)):\n        left_block = left_column_blocks[i]\n        \n        if (\n            left_block.get(\"type\") == \"heading_2\"\n            and \"Skills\" in notion_utils.get_block_plain_text(left_block)\n        ):\n            skills_section_found = True\n            \n            # Check divider after Skills heading\n            if i + 1 < len(left_column_blocks):\n                next_block = left_column_blocks[i + 1]\n                if next_block.get(\"type\") != \"divider\":\n                    print(\n                        \"Error: Divider not found after Skills heading.\",\n                        file=sys.stderr,\n                    )\n                    return False\n            \n            # Collect skills after divider\n            for j in range(i + 2, len(left_column_blocks)):\n                skill_block = left_column_blocks[j]\n                if skill_block.get(\"type\") == \"paragraph\":\n                    skill_text = notion_utils.get_block_plain_text(skill_block)\n                    if skill_text and skill_text.strip():  # Check for non-empty text\n                        # Check if text is bold\n                        rich_text = skill_block.get(\"paragraph\", {}).get(\"rich_text\", [])\n                        if rich_text and not rich_text[0].get(\"annotations\", {}).get(\"bold\"):\n                            print(\n                                f\"Error: Skill '{skill_text}' is not bold.\",\n                                file=sys.stderr,\n                            )\n                            return False\n                        \n                        # Check icon format\n                        if skill_text.startswith(\"✨✨\"):\n                            skills_with_double_sparkles.append(skill_text)\n                        elif skill_text.startswith(\"✨\"):\n                            skills_with_single_sparkle.append(skill_text)\n                        else:\n                            print(\n                                f\"Error: Skill '{skill_text}' doesn't start with sparkle icon.\",\n                                file=sys.stderr,\n                            )\n                            return False\n                        \n                        # Check format includes type in parentheses\n                        if \"(\" not in skill_text or \")\" not in skill_text:\n                            print(\n                                f\"Error: Skill '{skill_text}' doesn't include type in parentheses.\",\n                                file=sys.stderr,\n                            )\n                            return False\n                elif skill_block.get(\"type\") in [\"heading_1\", \"heading_2\", \"heading_3\"]:\n                    # Stop when we reach another section\n                    break\n            break\n\n    if not skills_section_found:\n        print(\n            \"Error: Skills section not found in the left column under Languages.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Step 3: Verify we have the expected skills\n    expected_double_sparkle_skills = [\n        \"Photoshop\",\n        \"Figma\",\n        \"Notion\",\n        \"Framer\"\n    ]\n    \n    expected_single_sparkle_skills = [\n        \"Webflow\",\n        \"Rive\",\n        \"CSS + Basic JS\"\n    ]\n    \n    # Check if all expected skills are present\n    for skill_name in expected_double_sparkle_skills:\n        found = any(skill_name in skill for skill in skills_with_double_sparkles)\n        if not found:\n            print(\n                f\"Error: Expected skill '{skill_name}' with ✨✨ not found.\",\n                file=sys.stderr,\n            )\n            return False\n    \n    for skill_name in expected_single_sparkle_skills:\n        found = any(skill_name in skill for skill in skills_with_single_sparkle)\n        if not found:\n            print(\n                f\"Error: Expected skill '{skill_name}' with ✨ not found.\",\n                file=sys.stderr,\n            )\n            return False\n\n    # Step 4: Verify Work History and Education sections have black placeholder images\n    work_history_images_found = 0\n    education_images_found = 0\n    black_placeholder_url = \"https://singlecolorimage.com/get/000000/\"\n    \n    # Find Work History and Education sections in the right column\n    right_column_id = None\n    for column in columns:\n        if column.get(\"type\") == \"column\":\n            width_ratio = column.get(\"column\", {}).get(\"width_ratio\", 0.5)\n            # Right column has width_ratio around 0.75 or no width_ratio (which means equal split)\n            if width_ratio > 0.6 or width_ratio == 0.5:\n                right_column_id = column[\"id\"]\n                break\n    \n    if right_column_id:\n        right_column_blocks = notion_utils.get_all_blocks_recursively(notion, right_column_id)\n        \n        # Find Work History section\n        work_history_index = -1\n        education_index = -1\n        \n        for i, block in enumerate(right_column_blocks):\n            if block.get(\"type\") == \"heading_1\":\n                heading_text = notion_utils.get_block_plain_text(block)\n                if \"Work History\" in heading_text:\n                    work_history_index = i\n                elif \"Education\" in heading_text:\n                    education_index = i\n        \n        # Check Work History column lists for images\n        if work_history_index != -1:\n            for i in range(work_history_index + 1, min(education_index if education_index > work_history_index else len(right_column_blocks), len(right_column_blocks))):\n                block = right_column_blocks[i]\n                if block.get(\"type\") == \"column_list\":\n                    column_list_blocks = notion_utils.get_all_blocks_recursively(notion, block[\"id\"])\n                    for column in column_list_blocks:\n                        if column.get(\"type\") == \"column\":\n                            # Check width_ratio - must be 50% (0.5) or absent (which defaults to 50%)\n                            col_width = column.get(\"column\", {}).get(\"width_ratio\")\n                            # First column should be image column (either no ratio=50%, or exactly 0.5)\n                            if col_width is None or col_width == 0.5:\n                                column_contents = notion_utils.get_all_blocks_recursively(notion, column[\"id\"])\n                                for content_block in column_contents:\n                                    if content_block.get(\"type\") == \"embed\":\n                                        embed_url = content_block.get(\"embed\", {}).get(\"url\", \"\")\n                                        if black_placeholder_url in embed_url:\n                                            work_history_images_found += 1\n                                    elif content_block.get(\"type\") == \"image\":\n                                        # Also check for image blocks with external URL\n                                        image_url = content_block.get(\"image\", {}).get(\"external\", {}).get(\"url\", \"\")\n                                        if black_placeholder_url in image_url:\n                                            work_history_images_found += 1\n                                break  # Only check first column\n        \n        # Check Education column list for images\n        if education_index != -1:\n            for i in range(education_index + 1, len(right_column_blocks)):\n                block = right_column_blocks[i]\n                if block.get(\"type\") == \"heading_1\":\n                    break  # Stop at next section\n                if block.get(\"type\") == \"column_list\":\n                    column_list_blocks = notion_utils.get_all_blocks_recursively(notion, block[\"id\"])\n                    for column in column_list_blocks:\n                        if column.get(\"type\") == \"column\":\n                            # Check width_ratio - must be 50% (0.5) or absent (which defaults to 50%)\n                            col_width = column.get(\"column\", {}).get(\"width_ratio\")\n                            # First column should be image column (either no ratio=50%, or exactly 0.5)\n                            if col_width is None or col_width == 0.5:\n                                column_contents = notion_utils.get_all_blocks_recursively(notion, column[\"id\"])\n                                for content_block in column_contents:\n                                    if content_block.get(\"type\") == \"embed\":\n                                        embed_url = content_block.get(\"embed\", {}).get(\"url\", \"\")\n                                        if black_placeholder_url in embed_url:\n                                            education_images_found += 1\n                                    elif content_block.get(\"type\") == \"image\":\n                                        image_url = content_block.get(\"image\", {}).get(\"external\", {}).get(\"url\", \"\")\n                                        if black_placeholder_url in image_url:\n                                            education_images_found += 1\n                                break  # Only check first column\n                    break  # Only check first column_list in Education\n    \n    # Verify images were found\n    if work_history_images_found < 2:\n        print(\n            f\"Warning: Expected at least 2 Work History images with black placeholder, found {work_history_images_found}.\",\n            file=sys.stderr,\n        )\n        return False\n    \n    if education_images_found < 1:\n        print(\n            f\"Warning: Expected at least 1 Education image with black placeholder, found {education_images_found}.\",\n            file=sys.stderr,\n        )\n        return False\n    \n    print(\"Success: Skills display adjusted correctly.\")\n    print(f\"- Found {len(skills_with_double_sparkles)} skills with ✨✨ (skill level >= 50%)\")\n    print(f\"- Found {len(skills_with_single_sparkle)} skills with ✨ (skill level < 50%)\")\n    print(\"- Skills database removed from right column\")\n    print(\"- Skills section added to left column under Languages\")\n    print(f\"- Found {work_history_images_found} Work History images with black placeholder\")\n    print(f\"- Found {education_images_found} Education images with black placeholder\")\n    return True\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/notion/standard/online_resume/projects_section_update/description.md",
    "content": "Find the page named \"Online Resume\" and reorganize the projects section to showcase only the most recent and relevant work.\n\n**Task Requirements:**\n1. Delete the project named \"Knitties eComm Website\" from the Projects database since it's from 2022 and no longer relevant\n\n2. Create a new project entry called \"Zapier Dashboard Redesign\" with:\n   - Description: \"Led the complete redesign of Zapier's main dashboard, focusing on improved usability and modern design patterns. Implemented new navigation system and responsive layouts.\"\n   - Date: Start \"2024-01-01\", End \"2024-06-30\"\n   - Tags: Add the existing \"UI Design\" tag, and create a new tag \"Enterprise\" with purple color, then add both tags to this project\n   - Phone: Same as the phone number under the Contact section\n   - Url: Same as the personal website under the Contact section\n\n3. After the Projects database block, add the following blocks in sequence:\n   - A divider block\n   - A heading_2 block with text \"Current Focus\"\n   - A paragraph block with content that dynamically references:\n     - The highest skill level from your Skills database (find the skill with the highest Skill Level percentage)\n     - Incorporate this into the text: \"The Zapier Dashboard Redesign represents my most impactful recent work, leveraging my expertise in [highest skill name] ([skill level]%) to deliver enterprise-grade solutions that prioritize both aesthetics and functionality.\""
  },
  {
    "path": "tasks/notion/standard/online_resume/projects_section_update/meta.json",
    "content": "{\n  \"task_id\": \"projects_section_update\",\n  \"task_name\": \"Projects Section Update\",\n  \"category_id\": \"online_resume\",\n  \"category_name\": \"Online Resume\",\n  \"description\": \"Reorganize the projects section by removing outdated projects and adding new relevant work with proper formatting.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-07-27\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"database manipulation\",\n    \"template population\",\n    \"data aggregation\",\n    \"visual formatting\",\n    \"cross-reference linking\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Online-Resume-23181626b6d781159faaeb5eadaf612e\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/online-resume\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/online_resume/projects_section_update/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies that the projects section has been reorganized correctly with cross-section references.\n    \"\"\"\n    page_id = None\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(\n            notion, main_id\n        )\n        if found_id and object_type == \"page\":\n            page_id = found_id\n\n    if not page_id:\n        page_id = notion_utils.find_page(notion, \"Online Resume\")\n    if not page_id:\n        print(\"Error: Page 'Online Resume' not found.\", file=sys.stderr)\n        return False\n\n    # Find the Projects database\n    projects_db_id = notion_utils.find_database_in_block(notion, page_id, \"Projects\")\n    if not projects_db_id:\n        print(\"Error: Database 'Projects' not found.\", file=sys.stderr)\n        return False\n\n    # Find the Skills database to get the highest skill level\n    skills_db_id = notion_utils.find_database_in_block(notion, page_id, \"Skills\")\n    if not skills_db_id:\n        print(\"Error: Database 'Skills' not found.\", file=sys.stderr)\n        return False\n\n    # Query Skills database to find the highest skill level\n    skills_results = notion.databases.query(database_id=skills_db_id).get(\"results\", [])\n    highest_skill_name = \"\"\n    highest_skill_level = 0\n\n    for skill_page in skills_results:\n        properties = skill_page.get(\"properties\", {})\n        skill_name_prop = properties.get(\"Skill\", {}).get(\"title\", [])\n        skill_level_prop = properties.get(\"Skill Level\", {}).get(\"number\")\n\n        if skill_name_prop and skill_level_prop is not None:\n            skill_name = skill_name_prop[0].get(\"text\", {}).get(\"content\", \"\")\n            if skill_level_prop > highest_skill_level:\n                highest_skill_level = skill_level_prop\n                highest_skill_name = skill_name\n\n    if not highest_skill_name:\n        print(\"Error: Could not find any skills with skill levels.\", file=sys.stderr)\n        return False\n\n    # Query Projects database\n    projects_results = notion.databases.query(database_id=projects_db_id).get(\n        \"results\", []\n    )\n\n    # Check that \"Knitties eComm Website\" is deleted\n    for page in projects_results:\n        properties = page.get(\"properties\", {})\n        name_prop = properties.get(\"Name\", {}).get(\"title\", [])\n        if (\n            name_prop\n            and name_prop[0].get(\"text\", {}).get(\"content\") == \"Knitties eComm Website\"\n        ):\n            print(\n                \"Failure: 'Knitties eComm Website' project was not deleted.\",\n                file=sys.stderr,\n            )\n            return False\n\n    # Check that \"Zapier Dashboard Redesign\" exists with correct properties\n    zapier_project_found = False\n    for page in projects_results:\n        properties = page.get(\"properties\", {})\n        name_prop = properties.get(\"Name\", {}).get(\"title\", [])\n        if (\n            name_prop\n            and name_prop[0].get(\"text\", {}).get(\"content\")\n            == \"Zapier Dashboard Redesign\"\n        ):\n            zapier_project_found = True\n\n            # Check description contains reference to UI Design Internship\n            desc_prop = properties.get(\"Description\", {}).get(\"rich_text\", [])\n            if not desc_prop:\n                print(\"Failure: Zapier project has no description.\", file=sys.stderr)\n                return False\n\n            description_text = desc_prop[0].get(\"text\", {}).get(\"content\", \"\")\n            base_desc = \"Led the complete redesign of Zapier's main dashboard, focusing on improved usability and modern design patterns. Implemented new navigation system and responsive layouts.\"\n            if base_desc not in description_text:\n                print(\n                    \"Failure: Zapier project description is missing base content.\",\n                    file=sys.stderr,\n                )\n                return False\n\n            # Check date\n            date_prop = properties.get(\"Date\", {}).get(\"date\", {})\n            if (\n                not date_prop\n                or date_prop.get(\"start\") != \"2024-01-01\"\n                or date_prop.get(\"end\") != \"2024-06-30\"\n            ):\n                print(\n                    \"Failure: Zapier project date range is incorrect.\", file=sys.stderr\n                )\n                return False\n\n            # Check tags\n            tags_prop = properties.get(\"Tags\", {}).get(\"multi_select\", [])\n            tag_names = {tag.get(\"name\") for tag in tags_prop}\n            if \"UI Design\" not in tag_names or \"Enterprise\" not in tag_names:\n                print(\n                    \"Failure: Zapier project is missing required tags.\", file=sys.stderr\n                )\n                return False\n\n            # Check phone\n            phone_prop = properties.get(\"Phone\", {}).get(\"phone_number\", [])\n            if not phone_prop or phone_prop != \"+44 7871263013\":\n                print(\n                    \"Failure: Zapier project phone number is incorrect.\",\n                    file=sys.stderr,\n                )\n                return\n\n            # Check url\n            url_prop = properties.get(\"Url\", {}).get(\"url\", [])\n            if not url_prop or url_prop != \"www.zinenwine.com\":\n                print(\"Failure: Zapier project url is incorrect.\", file=sys.stderr)\n                return\n\n            # Check Enterprise tag color\n            enterprise_tag_purple = False\n            for tag in tags_prop:\n                if tag.get(\"name\") == \"Enterprise\" and tag.get(\"color\") == \"purple\":\n                    enterprise_tag_purple = True\n                    break\n            if not enterprise_tag_purple:\n                print(\n                    \"Failure: Enterprise tag does not have purple color.\",\n                    file=sys.stderr,\n                )\n                return False\n\n            break\n\n    if not zapier_project_found:\n        print(\n            \"Failure: 'Zapier Dashboard Redesign' project not found.\", file=sys.stderr\n        )\n        return False\n\n    # Find the Projects database block and verify blocks after it\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, page_id)\n\n    # Find the Projects database block\n    projects_db_index = -1\n    for i, block in enumerate(all_blocks):\n        if (\n            block.get(\"type\") == \"child_database\"\n            and block.get(\"child_database\", {}).get(\"title\") == \"Projects\"\n        ):\n            projects_db_index = i\n            break\n\n    if projects_db_index == -1:\n        print(\"Error: Could not find Projects database block.\", file=sys.stderr)\n        return False\n\n    # Check blocks after Projects database\n    if projects_db_index + 3 > len(all_blocks):\n        print(\"Failure: Not enough blocks after Projects database.\", file=sys.stderr)\n        return False\n\n    # Check divider block\n    divider_block = all_blocks[projects_db_index + 1]\n    if divider_block.get(\"type\") != \"divider\":\n        print(\n            \"Failure: Expected divider block after Projects database.\", file=sys.stderr\n        )\n        return False\n\n    # Check heading block\n    heading_block = all_blocks[projects_db_index + 2]\n    if heading_block.get(\"type\") != \"heading_2\":\n        print(\"Failure: Expected heading_2 block after divider.\", file=sys.stderr)\n        return False\n\n    heading_text = heading_block.get(\"heading_2\", {}).get(\"rich_text\", [])\n    if (\n        not heading_text\n        or heading_text[0].get(\"text\", {}).get(\"content\") != \"Current Focus\"\n    ):\n        print(\"Failure: Heading text is incorrect.\", file=sys.stderr)\n        return False\n\n    # Check paragraph block with dynamic skill reference\n    paragraph_block = all_blocks[projects_db_index + 3]\n    if paragraph_block.get(\"type\") != \"paragraph\":\n        print(\"Failure: Expected paragraph block after heading.\", file=sys.stderr)\n        return False\n\n    paragraph_text = paragraph_block.get(\"paragraph\", {}).get(\"rich_text\", [])\n    if not paragraph_text:\n        print(\"Failure: Paragraph block is empty.\", file=sys.stderr)\n        return False\n\n    paragraph_content = paragraph_text[0].get(\"text\", {}).get(\"content\", \"\")\n\n    # Check that paragraph contains the base text\n    base_text = \"The Zapier Dashboard Redesign represents my most impactful recent work, leveraging my expertise in\"\n    if base_text not in paragraph_content:\n        print(\"Failure: Paragraph does not contain base text.\", file=sys.stderr)\n        return False\n\n    # Check that paragraph references the highest skill\n    skill_level_percent = int(highest_skill_level * 100)\n    expected_skill_ref = f\"{highest_skill_name} ({skill_level_percent}%)\"\n    if expected_skill_ref not in paragraph_content:\n        print(\n            f\"Failure: Paragraph does not reference highest skill '{expected_skill_ref}'.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Check that paragraph contains the ending text\n    ending_text = (\n        \"enterprise-grade solutions that prioritize both aesthetics and functionality\"\n    )\n    if ending_text not in paragraph_content:\n        print(\n            \"Failure: Paragraph does not contain proper ending text.\", file=sys.stderr\n        )\n        return False\n\n    print(\n        f\"Success: Projects section has been reorganized correctly with cross-section references (highest skill: {highest_skill_name} at {skill_level_percent}%).\"\n    )\n    return True\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/standard/online_resume/skills_development_tracker/description.md",
    "content": "Create a comprehensive skills audit system by performing the following tasks:\n\n**Task Requirements:**\n1. Create a new database named \"Skills Development Tracker\" as a child database in the main resume page with the following properties:\n   - Name (title property)\n   - Current Skill (relation to Skills database)\n   - Current Proficiency (rollup from related skill's \"Skill Level\" property)\n   - Target Proficiency (number property with format \"percent\")\n   - Gap (formula: Target Proficiency - Current Proficiency)\n   - Learning Resources (rich text property)\n   - Progress Notes (rich text property)\n\n2. Populate the Skills Development Tracker database with entries for all skills that have a proficiency level below 70% (0.7):\n   - For each qualifying skill, create an entry with:\n     - Name: \"[Skill Name] Development Plan\"\n     - Link to the corresponding skill in Skills database\n     - Target Proficiency: Set to Current + 25% (capped at 95%)\n     - Learning Resources: \"Online courses and practice projects\"\n     - Progress Notes: \"Initial assessment completed\"\n\n3. Create a callout block immediately after the Skills section (after the Skills database) with:\n   - Background color: blue_background\n   - Icon: 🎯 (target emoji)\n   - Content: \"Focus Areas: [3 skills with lowest current proficiency]\""
  },
  {
    "path": "tasks/notion/standard/online_resume/skills_development_tracker/meta.json",
    "content": "{\n  \"task_id\": \"skills_development_tracker\",\n  \"task_name\": \"Skills Development Tracker\",\n  \"category_id\": \"online_resume\",\n  \"category_name\": \"Online Resume\",\n  \"description\": \"Create a comprehensive skills audit system with development tracking for skills below 70% proficiency.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-07-27\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"database manipulation\",\n    \"cross-reference linking\",\n    \"conditional filtering\",\n    \"data aggregation\",\n    \"template population\",\n    \"visual formatting\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Online-Resume-23181626b6d781159faaeb5eadaf612e\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/online-resume\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/online_resume/skills_development_tracker/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies that the Skills Development Tracker database and callout block were created correctly.\n    \"\"\"\n    page_id = None\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(\n            notion, main_id\n        )\n        if found_id and object_type == \"page\":\n            page_id = found_id\n\n    if not page_id:\n        page_id = notion_utils.find_page(notion, \"New Online Resume\")\n    if not page_id:\n        print(\"Error: Page 'New Online Resume' not found.\", file=sys.stderr)\n        return False\n\n    # Step 1: Verify Skills Development Tracker database exists\n    tracker_db_id = notion_utils.find_database_in_block(\n        notion, page_id, \"Skills Development Tracker\"\n    )\n    if not tracker_db_id:\n        print(\n            \"Error: Database 'Skills Development Tracker' not found.\", file=sys.stderr\n        )\n        return False\n\n    # Step 2: Verify database schema\n    try:\n        db_info = notion.databases.retrieve(database_id=tracker_db_id)\n        properties = db_info.get(\"properties\", {})\n\n        # Check required properties\n        required_props = {\n            \"Name\": \"title\",\n            \"Current Skill\": \"relation\",\n            \"Current Proficiency\": \"rollup\",\n            \"Target Proficiency\": \"number\",\n            \"Gap\": \"formula\",\n            \"Learning Resources\": \"rich_text\",\n            \"Progress Notes\": \"rich_text\",\n        }\n\n        for prop_name, expected_type in required_props.items():\n            if prop_name not in properties:\n                print(\n                    f\"Error: Property '{prop_name}' not found in database.\",\n                    file=sys.stderr,\n                )\n                return False\n            if properties[prop_name][\"type\"] != expected_type:\n                print(\n                    f\"Error: Property '{prop_name}' has incorrect type. Expected '{expected_type}', got '{properties[prop_name]['type']}'.\",\n                    file=sys.stderr,\n                )\n                return False\n\n        # Verify Target Proficiency is percent format\n        if (\n            properties[\"Target Proficiency\"].get(\"number\", {}).get(\"format\")\n            != \"percent\"\n        ):\n            print(\n                \"Error: Target Proficiency should have 'percent' format.\",\n                file=sys.stderr,\n            )\n            return False\n\n    except Exception as e:\n        print(f\"Error retrieving database info: {e}\", file=sys.stderr)\n        return False\n\n    # Step 3: Get Skills database to check entries\n    skills_db_id = notion_utils.find_database_in_block(notion, page_id, \"Skills\")\n    if not skills_db_id:\n        print(\"Error: Skills database not found.\", file=sys.stderr)\n        return False\n\n    # Get all skills with proficiency < 70%\n    skills_below_70 = []\n    try:\n        skills_results = notion.databases.query(database_id=skills_db_id).get(\n            \"results\", []\n        )\n        for skill in skills_results:\n            skill_level = (\n                skill.get(\"properties\", {}).get(\"Skill Level\", {}).get(\"number\", 1.0)\n            )\n            if skill_level < 0.7:\n                skill_name = (\n                    skill.get(\"properties\", {}).get(\"Skill\", {}).get(\"title\", [])\n                )\n                if skill_name:\n                    skill_name_text = skill_name[0].get(\"text\", {}).get(\"content\", \"\")\n                    skills_below_70.append(\n                        {\n                            \"name\": skill_name_text,\n                            \"id\": skill[\"id\"],\n                            \"level\": skill_level,\n                        }\n                    )\n    except Exception as e:\n        print(f\"Error querying Skills database: {e}\", file=sys.stderr)\n        return False\n\n    if not skills_below_70:\n        print(\"Warning: No skills found with proficiency below 70%.\", file=sys.stderr)\n        # This might be OK if all skills are above 70%\n\n    # Step 4: Verify entries in Skills Development Tracker\n    try:\n        tracker_results = notion.databases.query(database_id=tracker_db_id).get(\n            \"results\", []\n        )\n\n        # Check that we have entries for skills below 70%\n        if len(skills_below_70) > 0 and len(tracker_results) == 0:\n            print(\n                \"Error: No entries found in Skills Development Tracker database.\",\n                file=sys.stderr,\n            )\n            return False\n\n        # Verify each entry\n        for entry in tracker_results:\n            props = entry.get(\"properties\", {})\n\n            # Check name format\n            name_prop = props.get(\"Name\", {}).get(\"title\", [])\n            if not name_prop:\n                print(\"Error: Entry missing Name property.\", file=sys.stderr)\n                return False\n            name_text = name_prop[0].get(\"text\", {}).get(\"content\", \"\")\n            if not name_text.endswith(\" Development Plan\"):\n                print(\n                    f\"Error: Entry name '{name_text}' doesn't follow expected format.\",\n                    file=sys.stderr,\n                )\n                return False\n\n            # Check relation to Skills database\n            skill_relation = props.get(\"Current Skill\", {}).get(\"relation\", [])\n            if not skill_relation:\n                print(\n                    f\"Error: Entry '{name_text}' missing Current Skill relation.\",\n                    file=sys.stderr,\n                )\n                return False\n\n            # Check Target Proficiency (should be set)\n            target_prof = props.get(\"Target Proficiency\", {}).get(\"number\")\n            if target_prof is None:\n                print(\n                    f\"Error: Entry '{name_text}' missing Target Proficiency.\",\n                    file=sys.stderr,\n                )\n                return False\n\n            # Check Learning Resources\n            learning_resources = props.get(\"Learning Resources\", {}).get(\n                \"rich_text\", []\n            )\n            if not learning_resources:\n                print(\n                    f\"Error: Entry '{name_text}' missing Learning Resources.\",\n                    file=sys.stderr,\n                )\n                return False\n\n            # Check Progress Notes\n            progress_notes = props.get(\"Progress Notes\", {}).get(\"rich_text\", [])\n            if not progress_notes:\n                print(\n                    f\"Error: Entry '{name_text}' missing Progress Notes.\",\n                    file=sys.stderr,\n                )\n                return False\n\n    except Exception as e:\n        print(f\"Error querying Skills Development Tracker: {e}\", file=sys.stderr)\n        return False\n\n    # Step 5: Verify callout block exists after Skills section\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, page_id)\n\n    # Find Skills database block\n    skills_db_block_index = None\n    for i, block in enumerate(all_blocks):\n        if (\n            block.get(\"type\") == \"child_database\"\n            and block.get(\"child_database\", {}).get(\"title\") == \"Skills\"\n        ):\n            skills_db_block_index = i\n            break\n\n    if skills_db_block_index is None:\n        print(\"Error: Could not find Skills database block.\", file=sys.stderr)\n        return False\n\n    # Look for callout block after Skills database\n    callout_found = False\n    block = all_blocks[skills_db_block_index + 1]\n    if block.get(\"type\") == \"callout\":\n        callout_data = block.get(\"callout\", {})\n\n        # Check background color\n        if callout_data.get(\"color\") != \"blue_background\":\n            print(\"Error: Could not find callout block with blue background.\")\n            return False\n\n        # Check icon\n        icon = callout_data.get(\"icon\", {})\n        if icon.get(\"type\") != \"emoji\" or icon.get(\"emoji\") != \"🎯\":\n            print(\"Error: Could not find callout block with 🎯 emoji.\")\n            return False\n\n        # Check content starts with \"Focus Areas:\"\n        rich_text = callout_data.get(\"rich_text\", [])\n        if rich_text:\n            content = rich_text[0].get(\"text\", {}).get(\"content\", \"\")\n            if (\n                content.startswith(\"Focus Areas:\")\n                and \"CSS + Basic JS\" in content\n                and \"Webflow\" in content\n                and \"Rive\" in content\n            ):\n                callout_found = True\n                print(f\"Success: Found callout block with content: {content}\")\n            else:\n                print(\"Error: Could not find callout block with required text content.\")\n                return False\n\n    if not callout_found:\n        print(\n            \"Error: Could not find callout block with Focus Areas after Skills section.\",\n            file=sys.stderr,\n        )\n        return False\n\n    print(\n        \"Success: Skills Development Tracker database and callout block verified successfully.\"\n    )\n    return True\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/standard/online_resume/work_history_addition/description.md",
    "content": "Hi! I realized I forgot to include one work experience on my resume page titled \"Online Resume.\" Could you please help me add it to the \"Work History\" section?\n\nThe position is \"Research Assistant,\" and it took place from January to August 2023. The description should be: \"Assisted in conducting user experience research projects at my bachelor’s program, supporting data collection, analyzing user feedback, and preparing research reports. Developed strong skills in research methodologies and improved collaboration with interdisciplinary teams.\"\n\nFor the image or logo, please use the one from the \"Education\" section (my bachelor school) to keep everything consistent.\n\nAlso, please make sure that the formatting — including font style, size, and layout — matches the existing entries in the Work History section so it looks seamless.\n\nThank you!"
  },
  {
    "path": "tasks/notion/standard/online_resume/work_history_addition/meta.json",
    "content": "{\n  \"task_id\": \"work_history_addition\",\n  \"task_name\": \"Work History Addition\",\n  \"category_id\": \"online_resume\",\n  \"category_name\": \"Online Resume\",\n  \"description\": \"Add a Research Assistant position to the Work History section with consistent formatting and university logo.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-07-27\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"database manipulation\",\n    \"template population\",\n    \"cross-reference linking\",\n    \"visual formatting\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Online-Resume-23181626b6d781159faaeb5eadaf612e\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/online-resume\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/online_resume/work_history_addition/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies that the new work history entry for 'Research Assistant' has been added correctly.\n    \"\"\"\n    page_id = None\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(\n            notion, main_id\n        )\n        if found_id and object_type == \"page\":\n            page_id = found_id\n\n    if not page_id:\n        page_id = notion_utils.find_page(notion, \"Online Resume\")\n    if not page_id:\n        print(\"Error: Page 'Online Resume' not found.\", file=sys.stderr)\n        return False\n\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, page_id)\n\n    def find_image_url_under_heading(blocks, heading_text, notion_client):\n        heading_index = -1\n        for i, block in enumerate(blocks):\n            block_type = block.get(\"type\")\n            if block_type == \"heading_1\":\n                if heading_text in notion_utils.get_block_plain_text(block):\n                    heading_index = i\n                    break\n\n        if heading_index == -1:\n            return None\n\n        for i in range(heading_index + 1, len(blocks)):\n            block = blocks[i]\n            if block.get(\"type\") in [\"heading_1\", \"heading_2\", \"heading_3\"]:\n                break\n            if block.get(\"type\") == \"image\" and block.get(\"image\", {}).get(\"file\"):\n                return block.get(\"image\", {}).get(\"file\", {}).get(\"url\")\n            if block.get(\"type\") == \"column_list\":\n                column_list_id = block[\"id\"]\n                columns = notion_utils.get_all_blocks_recursively(\n                    notion_client, column_list_id\n                )\n                for column in columns:\n                    if column.get(\"type\") == \"column\":\n                        column_id = column[\"id\"]\n                        column_blocks = notion_utils.get_all_blocks_recursively(\n                            notion_client, column_id\n                        )\n                        for inner_block in column_blocks:\n                            if inner_block.get(\"type\") == \"image\" and inner_block.get(\n                                \"image\", {}\n                            ).get(\"file\"):\n                                return (\n                                    inner_block.get(\"image\", {})\n                                    .get(\"file\", {})\n                                    .get(\"url\")\n                                )\n        return None\n\n    def get_block_annotations(block):\n        block_type = block.get(\"type\")\n        if not block_type:\n            return {}\n        block_content = block.get(block_type)\n        if not block_content:\n            return {}\n        rich_text_list = block_content.get(\"rich_text\", [])\n        if not rich_text_list:\n            return {}\n        return rich_text_list[0].get(\"annotations\", {})\n\n    education_image_url = find_image_url_under_heading(all_blocks, \"Education\", notion)\n    if not education_image_url:\n        print(\n            \"Error: Could not find the image in the 'Education' section.\",\n            file=sys.stderr,\n        )\n        return False\n\n    heading_text = \"Work History\"\n    heading_index = -1\n    for i, block in enumerate(all_blocks):\n        if block.get(\n            \"type\"\n        ) == \"heading_1\" and heading_text in notion_utils.get_block_plain_text(block):\n            heading_index = i\n            break\n\n    if heading_index == -1:\n        print(f\"Error: Could not find the '{heading_text}' heading.\", file=sys.stderr)\n        return False\n\n    for i in range(heading_index + 1, len(all_blocks)):\n        block = all_blocks[i]\n        if block.get(\"type\") in [\"heading_1\", \"heading_2\", \"heading_3\"]:\n            break\n\n        if block.get(\"type\") == \"column_list\":\n            column_list_id = block[\"id\"]\n            columns = notion_utils.get_all_blocks_recursively(notion, column_list_id)\n            if len(columns) < 2:\n                continue\n\n            for column in columns:\n                if column.get(\"type\") == \"column\":\n                    if column.get(\"column\", {}).get(\"width_ratio\") == 0.125:\n                        image_column = column\n                    elif column.get(\"column\", {}).get(\"width_ratio\") == 0.875:\n                        text_column = column\n\n            image_column_blocks = notion_utils.get_all_blocks_recursively(\n                notion, image_column[\"id\"]\n            )\n            text_column_blocks = notion_utils.get_all_blocks_recursively(\n                notion, text_column[\"id\"]\n            )\n\n            column_image_url = None\n            for inner_block in image_column_blocks:\n                if inner_block.get(\"type\") == \"image\" and inner_block.get(\n                    \"image\", {}\n                ).get(\"file\"):\n                    column_image_url = (\n                        inner_block.get(\"image\", {}).get(\"file\", {}).get(\"url\")\n                    )\n                    break\n\n            if (\n                not column_image_url\n                or column_image_url[:100] != education_image_url[:100]\n            ):\n                continue\n\n            for j, inner_block in enumerate(text_column_blocks):\n                if \"Research Assistant\" in notion_utils.get_block_plain_text(\n                    inner_block\n                ):\n                    title_annotations = get_block_annotations(inner_block)\n                    if j + 2 < len(text_column_blocks):\n                        date_block = text_column_blocks[j + 1]\n                        description_block = text_column_blocks[j + 2]\n\n                        date_text = \"January - August 2023\"\n                        description_text = \"Assisted in conducting user experience research projects at my bachelor’s program, supporting data collection, analyzing user feedback, and preparing research reports. Developed strong skills in research methodologies and improved collaboration with interdisciplinary teams.\"\n\n                        date_annotations = get_block_annotations(date_block)\n                        description_annotations = get_block_annotations(\n                            description_block\n                        )\n\n                        if (\n                            date_text in notion_utils.get_block_plain_text(date_block)\n                            and description_text\n                            in notion_utils.get_block_plain_text(description_block)\n                            and title_annotations.get(\"bold\")\n                            and date_annotations.get(\"italic\")\n                            and date_annotations.get(\"color\") == \"gray\"\n                            and description_annotations.get(\"color\") == \"default\"\n                            and description_annotations.get(\"italic\") != True\n                            and description_annotations.get(\"bold\") != True\n                        ):\n                            print(\"Success: Verified new work history entry.\")\n                            return True\n\n    print(\"Failure: Could not verify the new work history entry.\", file=sys.stderr)\n    return False\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/standard/python_roadmap/expert_level_lessons/description.md",
    "content": "# Task: Expert Level Learning Path with Complex Prerequisites\n\n## Objective\nCreate an Expert Level chapter in the Python Roadmap with sophisticated prerequisite chains that require deep understanding of the existing course structure.\n\n## Requirements\n\n### 1. Create Expert Level Chapter\n- **Database**: Chapters database\n- **Properties**:\n  - Name: `Expert Level`\n  - Icon: 🟣 (purple circle emoji)\n  - Must appear after Advanced Level in the database\n\n### 2. Create Bridge Lesson\nCreate a lesson that bridges advanced and expert content:\n- **Title**: `Advanced Foundations Review`\n- **Status**: Done\n- **Chapter**: Link to Expert Level\n- **Parent item**: Link to the lesson that currently has status \"In Progress\" and contains \"Control\" in its title\n- **Sub-items**: Must link to exactly these three lessons:\n  - The lesson with title containing \"Decorators\"\n  - The lesson with title containing \"Calling API\"\n  - The lesson with title containing \"Regular Expressions\"\n\n### 3. Create Expert Level Lessons\nAdd exactly 4 expert lessons to the Steps database:\n\n**Lesson 1**: `Metaprogramming and AST Manipulation`\n- Status: To Do\n- Chapter: Expert Level\n- Parent item: Link to \"Advanced Foundations Review\"\n- Date: 2025-09-15\n\n**Lesson 2**: `Async Concurrency Patterns`\n- Status: To Do\n- Chapter: Expert Level\n- Parent item: Link to the lesson titled \"Calling API\"\n- Date: 2025-09-20\n\n**Lesson 3**: `Memory Management and GC Tuning`\n- Status: In Progress\n- Chapter: Expert Level\n- Parent item: Link to \"Advanced Foundations Review\"\n- Sub-item: Must have exactly 2 links:\n  - Link to any lesson from \"Data Structures\" that has status \"To Do\"\n  - Link to the lesson containing \"OOP\" in its title\n- Date: 2025-09-25\n\n**Lesson 4**: `Building Python C Extensions`\n- Status: To Do\n- Chapter: Expert Level\n- Parent item: Link to \"Metaprogramming and AST Manipulation\"\n- Date: 2025-10-01\n\n### 4. Update Existing Lessons\n- Change the status of \"Decorators\" from \"To Do\" to \"Done\"\n- Add \"Async Concurrency Patterns\" as a Sub-item to \"Error Handling\"\n- Update \"Control Flow\" status from \"In Progress\" to \"Done\"\n\n### 5. Create Learning Path Notes\nAdd content to the \"Advanced Foundations Review\" lesson page:\n- **Block 1**: Heading 2 with text `Prerequisites Checklist`\n- **Block 2**: Bulleted list with exactly 3 items:\n  - `✅ Advanced Python Features (Decorators, Context Managers)`\n  - `✅ API Integration and Async Basics`\n  - `✅ Pattern Matching and Text Processing`\n- **Block 3**: Paragraph with text: `This lesson serves as a checkpoint before entering expert-level content. Ensure you have mastered all prerequisites listed above.`"
  },
  {
    "path": "tasks/notion/standard/python_roadmap/expert_level_lessons/meta.json",
    "content": "{\n  \"task_id\": \"expert_level_lessons\",\n  \"task_name\": \"Expert Level Lessons\",\n  \"category_id\": \"python_roadmap\",\n  \"category_name\": \"Python Roadmap\",\n  \"description\": \"Create an Expert Level chapter with sophisticated prerequisite chains and four expert-level lessons.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-02\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"database manipulation\",\n    \"cross-reference linking\",\n    \"conditional filtering\",\n    \"status tracking\",\n    \"template population\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Python-Roadmap-25281626b6d78012bf2bce1fa8711f4d\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/python-roadmap\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/python_roadmap/expert_level_lessons/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies that the Expert Level chapter and its lessons have been created correctly with complex prerequisites.\n    \"\"\"\n    # Step 1: Find the main page and get database IDs\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(notion, main_id)\n        if not found_id or object_type != 'page':\n            print(\"Error: Main page not found.\", file=sys.stderr)\n            return False\n    else:\n        # Try to find the main page by searching\n        found_id = notion_utils.find_page(notion, \"Python Roadmap\")\n        if not found_id:\n            print(\"Error: Main page not found.\", file=sys.stderr)\n            return False\n    \n    print(f\"Found main page: {found_id}\")\n    \n    # Get all blocks from the page to find database references\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, found_id)\n    print(f\"Found {len(all_blocks)} blocks\")\n    \n    # Find database IDs from the page\n    chapters_db_id = None\n    steps_db_id = None\n    \n    for block in all_blocks:\n        if block and block.get(\"type\") == \"child_database\":\n            db_title = block.get(\"child_database\", {}).get(\"title\", \"\")\n            if \"Chapters\" in db_title:\n                chapters_db_id = block[\"id\"]\n                print(f\"Found Chapters database: {chapters_db_id}\")\n            elif \"Steps\" in db_title:\n                steps_db_id = block[\"id\"]\n                print(f\"Found Steps database: {steps_db_id}\")\n    \n    if not chapters_db_id:\n        print(\"Error: Chapters database not found.\", file=sys.stderr)\n        return False\n        \n    if not steps_db_id:\n        print(\"Error: Steps database not found.\", file=sys.stderr)\n        return False\n    \n    print(\"Starting verification...\")\n    \n    # Step 2: Verify the Expert Level chapter exists\n    print(\"2. Checking for Expert Level chapter...\")\n    expert_chapter_id = None\n    \n    try:\n        chapters_response = notion.databases.query(\n            database_id=chapters_db_id,\n            filter={\n                \"property\": \"Name\",\n                \"title\": {\n                    \"equals\": \"Expert Level\"\n                }\n            }\n        )\n        \n        if not chapters_response.get(\"results\"):\n            print(f\"Error: Expert Level chapter not found in Chapters database.\", file=sys.stderr)\n            return False\n        \n        expert_chapter = chapters_response[\"results\"][0]\n        expert_chapter_id = expert_chapter[\"id\"]\n        \n        # Check chapter icon (purple circle)\n        chapter_icon = expert_chapter.get(\"icon\")\n        if not chapter_icon or chapter_icon.get(\"type\") != \"emoji\" or chapter_icon.get(\"emoji\") != \"🟣\":\n            print(f\"Error: Expert Level chapter does not have the correct purple circle emoji icon.\", file=sys.stderr)\n            return False\n        \n        print(f\"✓ Expert Level chapter found with correct icon: 🟣\")\n        \n    except Exception as e:\n        print(f\"Error querying Chapters database: {e}\", file=sys.stderr)\n        return False\n    \n    # Step 3: Find Control Flow lesson (In Progress status)\n    print(\"3. Finding Control Flow lesson...\")\n    control_flow_id = None\n    \n    try:\n        control_flow_response = notion.databases.query(\n            database_id=steps_db_id,\n            filter={\n                \"and\": [\n                    {\n                        \"property\": \"Lessons\",\n                        \"title\": {\n                            \"contains\": \"Control\"\n                        }\n                    },\n                    {\n                        \"property\": \"Status\",\n                        \"status\": {\n                            \"equals\": \"Done\"  # Should be updated to Done\n                        }\n                    }\n                ]\n            }\n        )\n        \n        if control_flow_response.get(\"results\"):\n            control_flow_lesson = control_flow_response[\"results\"][0]\n            control_flow_id = control_flow_lesson[\"id\"]\n            print(f\"✓ Found Control Flow lesson with status 'Done'\")\n        else:\n            print(f\"Error: Control Flow lesson not found with status 'Done'.\", file=sys.stderr)\n            return False\n        \n    except Exception as e:\n        print(f\"Error finding Control Flow lesson: {e}\", file=sys.stderr)\n        return False\n    \n    # Step 4: Find prerequisite lessons\n    print(\"4. Finding prerequisite lessons...\")\n    \n    decorators_id = None\n    calling_api_id = None\n    regex_id = None\n    \n    try:\n        # Find Decorators (should be Done)\n        decorators_response = notion.databases.query(\n            database_id=steps_db_id,\n            filter={\n                \"property\": \"Lessons\",\n                \"title\": {\n                    \"contains\": \"Decorators\"\n                }\n            }\n        )\n        \n        if decorators_response.get(\"results\"):\n            decorators_lesson = decorators_response[\"results\"][0]\n            decorators_id = decorators_lesson[\"id\"]\n            # Check status is Done\n            if decorators_lesson[\"properties\"][\"Status\"][\"status\"][\"name\"] != \"Done\":\n                print(f\"Error: Decorators lesson should have status 'Done'.\", file=sys.stderr)\n                return False\n            print(f\"✓ Found Decorators lesson with status 'Done'\")\n        else:\n            print(f\"Error: Decorators lesson not found.\", file=sys.stderr)\n            return False\n        \n        # Find Calling API\n        calling_api_response = notion.databases.query(\n            database_id=steps_db_id,\n            filter={\n                \"property\": \"Lessons\",\n                \"title\": {\n                    \"equals\": \"Calling API\"\n                }\n            }\n        )\n        \n        if calling_api_response.get(\"results\"):\n            calling_api_lesson = calling_api_response[\"results\"][0]\n            calling_api_id = calling_api_lesson[\"id\"]\n            print(f\"✓ Found Calling API lesson\")\n        else:\n            print(f\"Error: Calling API lesson not found.\", file=sys.stderr)\n            return False\n        \n        # Find Regular Expressions\n        regex_response = notion.databases.query(\n            database_id=steps_db_id,\n            filter={\n                \"property\": \"Lessons\",\n                \"title\": {\n                    \"contains\": \"Regular Expressions\"\n                }\n            }\n        )\n        \n        if regex_response.get(\"results\"):\n            regex_lesson = regex_response[\"results\"][0]\n            regex_id = regex_lesson[\"id\"]\n            print(f\"✓ Found Regular Expressions lesson\")\n        else:\n            print(f\"Error: Regular Expressions lesson not found.\", file=sys.stderr)\n            return False\n        \n    except Exception as e:\n        print(f\"Error finding prerequisite lessons: {e}\", file=sys.stderr)\n        return False\n    \n    # Step 5: Verify Advanced Foundations Review bridge lesson\n    print(\"5. Checking Advanced Foundations Review bridge lesson...\")\n    bridge_id = None\n    \n    try:\n        bridge_response = notion.databases.query(\n            database_id=steps_db_id,\n            filter={\n                \"property\": \"Lessons\",\n                \"title\": {\n                    \"equals\": \"Advanced Foundations Review\"\n                }\n            }\n        )\n        \n        if not bridge_response.get(\"results\"):\n            print(f\"Error: Advanced Foundations Review lesson not found.\", file=sys.stderr)\n            return False\n        \n        bridge_lesson = bridge_response[\"results\"][0]\n        bridge_id = bridge_lesson[\"id\"]\n        \n        # Check status is Done\n        if bridge_lesson[\"properties\"][\"Status\"][\"status\"][\"name\"] != \"Done\":\n            print(f\"Error: Advanced Foundations Review should have status 'Done'.\", file=sys.stderr)\n            return False\n        \n        # Check linked to Expert Level chapter\n        bridge_chapters = bridge_lesson[\"properties\"][\"Chapters\"][\"relation\"]\n        if not any(rel[\"id\"] == expert_chapter_id for rel in bridge_chapters):\n            print(f\"Error: Advanced Foundations Review not linked to Expert Level chapter.\", file=sys.stderr)\n            return False\n        \n        # Check Parent item is Control Flow\n        bridge_parent = bridge_lesson[\"properties\"][\"Parent item\"][\"relation\"]\n        if not bridge_parent or bridge_parent[0][\"id\"] != control_flow_id:\n            print(f\"Error: Advanced Foundations Review should have Control Flow as Parent item.\", file=sys.stderr)\n            return False\n        \n        # Check Sub-items (should have at least 3 specific lessons plus any that reference it as parent)\n        bridge_subitems = bridge_lesson[\"properties\"][\"Sub-item\"][\"relation\"]\n        required_subitems = {decorators_id, calling_api_id, regex_id}\n        actual_subitems = {item[\"id\"] for item in bridge_subitems}\n        \n        if not required_subitems.issubset(actual_subitems):\n            print(f\"Error: Advanced Foundations Review should have at least these 3 sub-items: Decorators, Calling API, Regular Expressions.\", file=sys.stderr)\n            return False\n        \n        # Due to bidirectional relations, lessons that have this as parent will also appear as sub-items\n        # We expect at least 5: 3 initial + 2 that reference it as parent (Metaprogramming and Memory Management)\n        if len(bridge_subitems) < 5:\n            print(f\"Error: Advanced Foundations Review should have at least 5 sub-items (3 initial + 2 from parent relations), found {len(bridge_subitems)}.\", file=sys.stderr)\n            return False\n        \n        print(f\"✓ Advanced Foundations Review has {len(bridge_subitems)} sub-items, including the 3 required ones\")\n        \n        print(f\"✓ Advanced Foundations Review found with correct properties\")\n        \n    except Exception as e:\n        print(f\"Error checking bridge lesson: {e}\", file=sys.stderr)\n        return False\n    \n    # Step 6: Verify the 4 expert lessons\n    print(\"6. Checking the 4 expert lessons...\")\n    \n    # Note: Async Concurrency Patterns will have Error Handling as parent (due to sub-item relation)\n    # We'll need to find Error Handling's ID first\n    error_handling_response = notion.databases.query(\n        database_id=steps_db_id,\n        filter={\n            \"property\": \"Lessons\",\n            \"title\": {\n                \"equals\": \"Error Handling\"\n            }\n        }\n    )\n    \n    error_handling_id = None\n    if error_handling_response.get(\"results\"):\n        error_handling_id = error_handling_response[\"results\"][0][\"id\"]\n    else:\n        print(f\"Error: Error Handling lesson not found.\", file=sys.stderr)\n        return False\n    \n    expert_lessons = {\n        \"Metaprogramming and AST Manipulation\": {\n            \"status\": \"To Do\",\n            \"parent\": bridge_id,\n            \"date\": \"2025-09-15\"\n        },\n        \"Async Concurrency Patterns\": {\n            \"status\": \"To Do\",\n            \"parent\": error_handling_id,  # Parent is Error Handling due to sub-item relation\n            \"date\": \"2025-09-20\"\n        },\n        \"Memory Management and GC Tuning\": {\n            \"status\": \"In Progress\",\n            \"parent\": bridge_id,\n            \"date\": \"2025-09-25\"\n        },\n        \"Building Python C Extensions\": {\n            \"status\": \"To Do\",\n            \"date\": \"2025-10-01\"\n        }\n    }\n    \n    lesson_ids = {}\n    \n    try:\n        for lesson_name, expected in expert_lessons.items():\n            lesson_response = notion.databases.query(\n                database_id=steps_db_id,\n                filter={\n                    \"property\": \"Lessons\",\n                    \"title\": {\n                        \"equals\": lesson_name\n                    }\n                }\n            )\n            \n            if not lesson_response.get(\"results\"):\n                print(f\"Error: Lesson '{lesson_name}' not found.\", file=sys.stderr)\n                return False\n            \n            lesson = lesson_response[\"results\"][0]\n            lesson_ids[lesson_name] = lesson[\"id\"]\n            \n            # Check status\n            if lesson[\"properties\"][\"Status\"][\"status\"][\"name\"] != expected[\"status\"]:\n                print(f\"Error: Lesson '{lesson_name}' should have status '{expected['status']}'.\", file=sys.stderr)\n                return False\n            \n            # Check linked to Expert Level chapter\n            lesson_chapters = lesson[\"properties\"][\"Chapters\"][\"relation\"]\n            if not any(rel[\"id\"] == expert_chapter_id for rel in lesson_chapters):\n                print(f\"Error: Lesson '{lesson_name}' not linked to Expert Level chapter.\", file=sys.stderr)\n                return False\n            \n            # Check date\n            lesson_date = lesson[\"properties\"][\"Date\"][\"date\"]\n            if lesson_date and lesson_date.get(\"start\") != expected[\"date\"]:\n                print(f\"Error: Lesson '{lesson_name}' should have date '{expected['date']}'.\", file=sys.stderr)\n                return False\n            \n            # Check parent item for lessons that have specific parent requirements\n            if \"parent\" in expected:\n                lesson_parent = lesson[\"properties\"][\"Parent item\"][\"relation\"]\n                if not lesson_parent or lesson_parent[0][\"id\"] != expected[\"parent\"]:\n                    print(f\"Error: Lesson '{lesson_name}' should have correct parent item.\", file=sys.stderr)\n                    return False\n            \n            print(f\"✓ Lesson '{lesson_name}' found with correct properties\")\n        \n        # Special checks for Building Python C Extensions parent relationship\n        # (other parent checks are handled in the loop above)\n        building_lesson = notion.databases.query(\n            database_id=steps_db_id,\n            filter={\n                \"property\": \"Lessons\",\n                \"title\": {\n                    \"equals\": \"Building Python C Extensions\"\n                }\n            }\n        )[\"results\"][0]\n        \n        building_parent = building_lesson[\"properties\"][\"Parent item\"][\"relation\"]\n        if not building_parent or building_parent[0][\"id\"] != lesson_ids[\"Metaprogramming and AST Manipulation\"]:\n            print(f\"Error: Building Python C Extensions should have Metaprogramming and AST Manipulation as parent.\", file=sys.stderr)\n            return False\n        \n        # Memory Management should have 2 sub-items\n        memory_lesson = notion.databases.query(\n            database_id=steps_db_id,\n            filter={\n                \"property\": \"Lessons\",\n                \"title\": {\n                    \"equals\": \"Memory Management and GC Tuning\"\n                }\n            }\n        )[\"results\"][0]\n        \n        memory_subitems = memory_lesson[\"properties\"][\"Sub-item\"][\"relation\"]\n        if len(memory_subitems) != 2:\n            print(f\"Error: Memory Management and GC Tuning should have exactly 2 sub-items.\", file=sys.stderr)\n            return False\n        \n    except Exception as e:\n        print(f\"Error checking expert lessons: {e}\", file=sys.stderr)\n        return False\n    \n    # Step 7: Verify Error Handling has Async Concurrency Patterns as sub-item\n    print(\"7. Checking Error Handling sub-item...\")\n    \n    try:\n        error_handling_response = notion.databases.query(\n            database_id=steps_db_id,\n            filter={\n                \"property\": \"Lessons\",\n                \"title\": {\n                    \"equals\": \"Error Handling\"\n                }\n            }\n        )\n        \n        if error_handling_response.get(\"results\"):\n            error_handling_lesson = error_handling_response[\"results\"][0]\n            error_subitems = error_handling_lesson[\"properties\"][\"Sub-item\"][\"relation\"]\n            \n            if not any(item[\"id\"] == lesson_ids[\"Async Concurrency Patterns\"] for item in error_subitems):\n                print(f\"Error: Error Handling should have Async Concurrency Patterns as sub-item.\", file=sys.stderr)\n                return False\n            \n            print(f\"✓ Error Handling has Async Concurrency Patterns as sub-item\")\n        else:\n            print(f\"Error: Error Handling lesson not found.\", file=sys.stderr)\n            return False\n        \n    except Exception as e:\n        print(f\"Error checking Error Handling: {e}\", file=sys.stderr)\n        return False\n    \n    # Step 8: Verify block content in Advanced Foundations Review\n    print(\"8. Checking Advanced Foundations Review page content...\")\n    \n    try:\n        blocks = notion_utils.get_all_blocks_recursively(notion, bridge_id)\n        \n        if len(blocks) < 3:\n            print(f\"Error: Advanced Foundations Review should have at least 3 blocks.\", file=sys.stderr)\n            return False\n        \n        # Check Block 1: Heading 2\n        block1 = blocks[0]\n        if block1.get(\"type\") != \"heading_2\":\n            print(f\"Error: First block should be heading_2.\", file=sys.stderr)\n            return False\n        \n        heading_text = block1.get(\"heading_2\", {}).get(\"rich_text\", [{}])[0].get(\"text\", {}).get(\"content\", \"\")\n        if heading_text != \"Prerequisites Checklist\":\n            print(f\"Error: Heading should be 'Prerequisites Checklist'.\", file=sys.stderr)\n            return False\n        \n        # Check Block 2: Bulleted list\n        block2 = blocks[1]\n        if block2.get(\"type\") != \"bulleted_list_item\":\n            print(f\"Error: Second block should be bulleted_list_item.\", file=sys.stderr)\n            return False\n        \n        # Check Block 3 and 4 are also bulleted list items\n        if len(blocks) >= 4:\n            block3 = blocks[2]\n            block4 = blocks[3]\n            if block3.get(\"type\") != \"bulleted_list_item\" or block4.get(\"type\") != \"bulleted_list_item\":\n                print(f\"Error: Blocks 2-4 should be bulleted list items.\", file=sys.stderr)\n                return False\n        \n        # Check last block is paragraph\n        last_block = blocks[-1]\n        if last_block.get(\"type\") != \"paragraph\":\n            print(f\"Error: Last block should be paragraph.\", file=sys.stderr)\n            return False\n        \n        paragraph_text = last_block.get(\"paragraph\", {}).get(\"rich_text\", [{}])[0].get(\"text\", {}).get(\"content\", \"\")\n        if \"checkpoint\" not in paragraph_text.lower():\n            print(f\"Error: Paragraph should contain text about checkpoint.\", file=sys.stderr)\n            return False\n        \n        print(f\"✓ Advanced Foundations Review page has correct content structure\")\n        \n    except Exception as e:\n        print(f\"Error checking page content: {e}\", file=sys.stderr)\n        return False\n    \n    # Step 9: Final verification counts\n    print(\"9. Verifying final state counts...\")\n    \n    try:\n        # Count total lessons by status\n        all_lessons = notion.databases.query(database_id=steps_db_id, page_size=100)[\"results\"]\n        \n        done_lessons = [l for l in all_lessons if l[\"properties\"][\"Status\"][\"status\"][\"name\"] == \"Done\"]\n        done_count = len(done_lessons)\n        in_progress_count = sum(1 for l in all_lessons if l[\"properties\"][\"Status\"][\"status\"][\"name\"] == \"In Progress\")\n        \n        # Print out all Done lessons for debugging\n        if done_count != 14:\n            print(f\"Found {done_count} Done lessons (expected 14):\", file=sys.stderr)\n            for lesson in done_lessons:\n                lesson_name = lesson[\"properties\"][\"Lessons\"][\"title\"][0][\"text\"][\"content\"]\n                print(f\"  - {lesson_name}\", file=sys.stderr)\n            return False\n        \n        if in_progress_count != 1:\n            print(f\"Error: Should have 1 In Progress lesson, found {in_progress_count}.\", file=sys.stderr)\n            return False\n        \n        # Verify Expert Level has 5 lessons\n        expert_chapter_updated = notion.databases.query(\n            database_id=chapters_db_id,\n            filter={\n                \"property\": \"Name\",\n                \"title\": {\n                    \"equals\": \"Expert Level\"\n                }\n            }\n        )[\"results\"][0]\n        \n        expert_steps = expert_chapter_updated[\"properties\"][\"Steps\"][\"relation\"]\n        if len(expert_steps) != 5:\n            print(f\"Error: Expert Level should have exactly 5 lessons, found {len(expert_steps)}.\", file=sys.stderr)\n            return False\n        \n        print(f\"✓ Final state counts are correct\")\n        \n    except Exception as e:\n        print(f\"Error verifying final counts: {e}\", file=sys.stderr)\n        return False\n    \n    print(\"🎉 All verification checks passed!\")\n    return True\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/notion/standard/python_roadmap/learning_metrics_dashboard/description.md",
    "content": "# Task: Learning Metrics Dashboard\n\n## Objective\nCreate a comprehensive Learning Metrics Dashboard section in the Python Roadmap page that displays precise statistics and recommendations based on the Steps database content.\n\n## Requirements\n\n### 1. Section Placement\n- Add new content immediately after the Learning Materials section (before `Whether you're starting from scratch or`).\n\n### 2. Dashboard Header\n- **Type**: heading_3\n- **Text**: `📊 Learning Metrics Dashboard`\n\n### 3. Course Statistics Block\n- **Type**: callout\n- **Background Color**: Brown\n- **Icon**: None\n- **Title**: **Course Statistics** (bold, heading_3). Use the same color scheme as other callout headings.\n- **Content**: Bulleted list with the following items in exact order:\n  - `Total Lessons: [X]` (count all entries in Steps database)\n  - `Completed: [X] ([Y]%)` (count Status=\"Done\", calculate percentage to 1 decimal)\n  - `In Progress: [X] ([Y]%)` (count Status=\"In Progress\", calculate percentage to 1 decimal)\n  - `Beginner Level: [X] lessons ([Y] completed)` (filter by Chapters relation to Beginner Level)\n  - `Intermediate Level: [X] lessons ([Y] completed)` (filter by Chapters relation to Intermediate Level)\n  - `Advanced Level: [X] lessons ([Y] completed)` (filter by Chapters relation to Advanced Level)\n\n### 4. Completed Topics Section\n- **Type**: toggle\n- **Text**: `🏆 Completed Topics (Click to expand)`\n- **Nested Content**: Numbered list containing exactly 5 items\n  - List lessons with Status=\"Done\""
  },
  {
    "path": "tasks/notion/standard/python_roadmap/learning_metrics_dashboard/meta.json",
    "content": "{\n  \"task_id\": \"learning_metrics_dashboard\",\n  \"task_name\": \"Learning Metrics Dashboard\",\n  \"category_id\": \"python_roadmap\",\n  \"category_name\": \"Python Roadmap\",\n  \"description\": \"Create a comprehensive Learning Metrics Dashboard section displaying precise statistics and recommendations based on the Steps database.\",\n  \"author\": \"Lingjun Chen\",\n  \"created_at\": \"2025-08-02\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data aggregation\",\n    \"conditional filtering\",\n    \"report generation\",\n    \"visual formatting\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Python-Roadmap-25281626b6d78012bf2bce1fa8711f4d\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/python-roadmap\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/python_roadmap/learning_metrics_dashboard/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\ndef get_page_title_from_result(page_result):\n    \"\"\"\n    Extract the title from a page result object from database query.\n    \"\"\"\n    properties = page_result.get('properties', {})\n    # Try common title property names\n    for prop_name in ['Name', 'Title', 'title', 'Lessons']:\n        if prop_name in properties:\n            prop = properties[prop_name]\n            if prop.get('type') == 'title':\n                title_array = prop.get('title', [])\n                if title_array and len(title_array) > 0:\n                    return title_array[0].get('plain_text', '')\n    return ''\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies that the Learning Metrics Dashboard has been implemented correctly according to description.md.\n    \"\"\"\n    # Step 1: Find the main page and get all blocks\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(notion, main_id)\n        if not found_id or object_type != 'page':\n            print(\"Error: Main page not found.\", file=sys.stderr)\n            return False\n    else:\n        # Try to find the main page by searching\n        found_id = notion_utils.find_page(notion, \"Python Roadmap\")\n        if not found_id:\n            print(\"Error: Main page not found.\", file=sys.stderr)\n            return False\n    \n    print(f\"Found main page: {found_id}\")\n    \n    # Get Steps database to calculate expected statistics\n    steps_db_id = notion_utils.find_database(notion, \"Steps\")\n    if not steps_db_id:\n        print(\"Error: Steps database not found.\", file=sys.stderr)\n        return False\n    \n    # Query Steps database to get all lessons\n    steps_data = notion.databases.query(database_id=steps_db_id)\n    total_lessons = len(steps_data['results'])\n    completed_count = 0\n    in_progress_count = 0\n    completed_lessons = []\n    \n    # Get Chapters database for level information\n    chapters_db_id = notion_utils.find_database(notion, \"Chapters\")\n    if not chapters_db_id:\n        print(\"Error: Chapters database not found.\", file=sys.stderr)\n        return False\n    \n    # Query Chapters database to get level information\n    chapters_data = notion.databases.query(database_id=chapters_db_id)\n    level_ids = {\n        'Beginner Level': None,\n        'Intermediate Level': None,\n        'Advanced Level': None\n    }\n    \n    for chapter in chapters_data['results']:\n        chapter_name = get_page_title_from_result(chapter)\n        if chapter_name in level_ids:\n            level_ids[chapter_name] = chapter['id']\n    \n    # Initialize level counts\n    level_counts = {\n        'Beginner Level': {'total': 0, 'completed': 0},\n        'Intermediate Level': {'total': 0, 'completed': 0},\n        'Advanced Level': {'total': 0, 'completed': 0}\n    }\n    \n    # Count lessons by status and level\n    for lesson in steps_data['results']:\n        status = lesson['properties']['Status']['status']\n        if status and status['name'] == 'Done':\n            completed_count += 1\n            lesson_title = get_page_title_from_result(lesson)\n            if lesson_title:\n                completed_lessons.append(lesson_title)\n        elif status and status['name'] == 'In Progress':\n            in_progress_count += 1\n        \n        # Count by level\n        chapters_relation = lesson['properties']['Chapters']['relation']\n        for chapter_ref in chapters_relation:\n            chapter_id = chapter_ref['id']\n            for level_name, level_id in level_ids.items():\n                if chapter_id == level_id:\n                    level_counts[level_name]['total'] += 1\n                    if status and status['name'] == 'Done':\n                        level_counts[level_name]['completed'] += 1\n    \n    # Calculate percentages\n    completed_percentage = round((completed_count / total_lessons * 100), 1) if total_lessons > 0 else 0\n    in_progress_percentage = round((in_progress_count / total_lessons * 100), 1) if total_lessons > 0 else 0\n    \n    print(f\"Expected statistics:\")\n    print(f\"  Total Lessons: {total_lessons}\")\n    print(f\"  Completed: {completed_count} ({completed_percentage}%)\")\n    print(f\"  In Progress: {in_progress_count} ({in_progress_percentage}%)\")\n    print(f\"  Beginner Level: {level_counts['Beginner Level']['total']} lessons ({level_counts['Beginner Level']['completed']} completed)\")\n    print(f\"  Intermediate Level: {level_counts['Intermediate Level']['total']} lessons ({level_counts['Intermediate Level']['completed']} completed)\")\n    print(f\"  Advanced Level: {level_counts['Advanced Level']['total']} lessons ({level_counts['Advanced Level']['completed']} completed)\")\n    print(f\"  Completed lessons (first 5): {completed_lessons[:5]}\")\n    \n    # Get all blocks from the page\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, found_id)\n    print(f\"Found {len(all_blocks)} blocks\")\n    \n    # Step 2: Verify the required elements in order\n    learning_materials_idx = -1\n    dashboard_heading_idx = -1\n    callout_idx = -1\n    toggle_idx = -1\n    whether_paragraph_idx = -1  # Track the \"Whether you're starting from scratch\" paragraph\n    \n    # Track what we've verified\n    callout_has_brown_bg = False\n    callout_has_no_icon = False\n    callout_has_course_statistics_title = False\n    callout_title_has_correct_colors = False\n    statistics_items_found = []\n    completed_topics_found = []\n    \n    # Expected statistics content\n    expected_statistics = [\n        f\"Total Lessons: {total_lessons}\",\n        f\"Completed: {completed_count} ({completed_percentage}%)\",\n        f\"In Progress: {in_progress_count} ({in_progress_percentage}%)\",\n        f\"Beginner Level: {level_counts['Beginner Level']['total']} lessons ({level_counts['Beginner Level']['completed']} completed)\",\n        f\"Intermediate Level: {level_counts['Intermediate Level']['total']} lessons ({level_counts['Intermediate Level']['completed']} completed)\",\n        f\"Advanced Level: {level_counts['Advanced Level']['total']} lessons ({level_counts['Advanced Level']['completed']} completed)\"\n    ]\n    \n    # Check blocks in order\n    for i, block in enumerate(all_blocks):\n        if block is None:\n            continue\n            \n        block_type = block.get(\"type\")\n        \n        # 1. Check for Learning Materials heading (requirement 1)\n        if learning_materials_idx == -1 and block_type == \"heading_3\":\n            block_text = notion_utils.get_block_plain_text(block)\n            if \"🎓 Learning Materials\" in block_text or \"Learning Materials\" in block_text:\n                learning_materials_idx = i\n                print(f\"✓ Requirement 1: Found Learning Materials heading at position {i}\")\n        \n        # 2. Check for Learning Metrics Dashboard heading after Learning Materials (requirement 2)\n        elif learning_materials_idx != -1 and dashboard_heading_idx == -1 and block_type == \"heading_3\":\n            block_text = notion_utils.get_block_plain_text(block)\n            if \"📊 Learning Metrics Dashboard\" in block_text:\n                dashboard_heading_idx = i\n                print(f\"✓ Requirement 2: Found Learning Metrics Dashboard heading at position {i}\")\n        \n        # 3. Check for callout block after Dashboard heading (requirement 3)\n        elif dashboard_heading_idx != -1 and callout_idx == -1 and block_type == \"callout\":\n            callout_idx = i\n            print(f\"  Found callout block at position {i}\")\n            \n            # Check brown background (requirement 3.1)\n            if block.get(\"callout\", {}).get(\"color\") == \"brown_background\":\n                callout_has_brown_bg = True\n                print(f\"  ✓ Requirement 3.1: Callout has brown background\")\n            \n            # Check no icon (requirement 3.2)\n            icon = block.get(\"callout\", {}).get(\"icon\")\n            if icon is None:\n                callout_has_no_icon = True\n                print(f\"  ✓ Requirement 3.2: Callout has no icon\")\n            \n            # Get nested blocks for Course Statistics title and content\n            nested_blocks = notion_utils.get_all_blocks_recursively(notion, block.get(\"id\"))\n            \n            for nested in nested_blocks:\n                # Check for heading_3 only as per requirement\n                if nested and nested.get(\"type\") == \"heading_3\":\n                    # Check for \"Course Statistics\" title with correct formatting\n                    rich_text = nested.get(\"heading_3\", {}).get(\"rich_text\", [])\n                    course_found = False\n                    course_correct = False\n                    statistics_found = False\n                    statistics_correct = False\n                    \n                    for text_item in rich_text:\n                        text_content = text_item.get(\"text\", {}).get(\"content\", \"\")\n                        annotations = text_item.get(\"annotations\", {})\n                        color = annotations.get(\"color\", \"default\")\n                        is_bold = annotations.get(\"bold\", False)\n                        \n                        if \"Course\" in text_content:\n                            course_found = True\n                            # Check if Course is blue and bold\n                            if color == \"blue\" and is_bold:\n                                course_correct = True\n                                print(f\"  ✓ 'Course' has blue color and is bold\")\n                            else:\n                                print(f\"  ✗ 'Course' color: {color}, bold: {is_bold} (should be blue and bold)\")\n                            \n                        if \"Statistics\" in text_content:\n                            statistics_found = True\n                            # Check if Statistics is yellow and bold\n                            if color == \"yellow\" and is_bold:\n                                statistics_correct = True\n                                print(f\"  ✓ 'Statistics' has yellow color and is bold\")\n                            else:\n                                print(f\"  ✗ 'Statistics' color: {color}, bold: {is_bold} (should be yellow and bold)\")\n                    \n                    if course_found and statistics_found:\n                        callout_has_course_statistics_title = True\n                        if course_correct and statistics_correct:\n                            callout_title_has_correct_colors = True\n                            print(f\"  ✓ Requirement 3.3: Callout has 'Course Statistics' title with correct colors\")\n                        else:\n                            print(f\"  ✗ Requirement 3.3: Title found but colors/formatting incorrect\")\n                \n                # Check for statistics items in bulleted list\n                elif nested and nested.get(\"type\") == \"bulleted_list_item\":\n                    item_text = notion_utils.get_block_plain_text(nested)\n                    for expected_item in expected_statistics:\n                        if expected_item in item_text:\n                            if expected_item not in statistics_items_found:\n                                statistics_items_found.append(expected_item)\n                                print(f\"  ✓ Requirement 3.4: Found statistics item: {expected_item}\")\n        \n        # 4. Check for Completed Topics toggle after callout (requirement 4)\n        elif callout_idx != -1 and toggle_idx == -1 and block_type == \"toggle\":\n            block_text = notion_utils.get_block_plain_text(block)\n            if \"🏆 Completed Topics (Click to expand)\" in block_text:\n                toggle_idx = i\n                print(f\"✓ Requirement 4: Found Completed Topics toggle at position {i}\")\n                \n                # Get nested blocks for completed topics list\n                nested_blocks = notion_utils.get_all_blocks_recursively(notion, block.get(\"id\"))\n                for nested in nested_blocks:\n                    if nested and nested.get(\"type\") == \"numbered_list_item\":\n                        item_text = notion_utils.get_block_plain_text(nested)\n                        if item_text and item_text in completed_lessons:\n                            completed_topics_found.append(item_text)\n                            print(f\"  ✓ Requirement 4.1: Found completed topic: {item_text}\")\n        \n        # 5. Check for \"Whether you're starting from scratch\" paragraph (should be after dashboard content)\n        elif block_type == \"paragraph\" and whether_paragraph_idx == -1:\n            block_text = notion_utils.get_block_plain_text(block)\n            if \"Whether you're starting from scratch\" in block_text or \"Whether you're starting from scratch\" in block_text:\n                whether_paragraph_idx = i\n                print(f\"  Found 'Whether you're starting from scratch' paragraph at position {i}\")\n    \n    # Step 3: Verify all requirements were met\n    print(f\"\\nVerification Summary:\")\n    \n    all_passed = True\n    \n    # Requirement 1: Learning Materials section found\n    if learning_materials_idx == -1:\n        print(\"✗ Requirement 1: Learning Materials section NOT found\", file=sys.stderr)\n        all_passed = False\n    else:\n        print(\"✓ Requirement 1: Learning Materials section found\")\n    \n    # Requirement 2: Learning Metrics Dashboard heading after Learning Materials and before \"Whether...\" paragraph\n    if dashboard_heading_idx == -1:\n        print(\"✗ Requirement 2: Learning Metrics Dashboard heading NOT found\", file=sys.stderr)\n        all_passed = False\n    elif dashboard_heading_idx <= learning_materials_idx:\n        print(\"✗ Requirement 2: Learning Metrics Dashboard heading not AFTER Learning Materials\", file=sys.stderr)\n        all_passed = False\n    elif whether_paragraph_idx != -1 and dashboard_heading_idx >= whether_paragraph_idx:\n        print(\"✗ Requirement 2: Learning Metrics Dashboard heading not BEFORE 'Whether you're starting from scratch' paragraph\", file=sys.stderr)\n        all_passed = False\n    else:\n        print(\"✓ Requirement 2: Learning Metrics Dashboard heading found after Learning Materials\")\n        if whether_paragraph_idx != -1:\n            print(\"  ✓ Dashboard content is correctly placed before 'Whether you're starting from scratch' paragraph\")\n    \n    # Requirement 3: Course Statistics callout block with all specifications\n    if callout_idx == -1:\n        print(\"✗ Requirement 3: Course Statistics callout block NOT found\", file=sys.stderr)\n        all_passed = False\n    else:\n        if not callout_has_brown_bg:\n            print(\"✗ Requirement 3.1: Callout does NOT have brown background\", file=sys.stderr)\n            all_passed = False\n        else:\n            print(\"✓ Requirement 3.1: Callout has brown background\")\n            \n        if not callout_has_no_icon:\n            print(\"✗ Requirement 3.2: Callout has an icon (should have none)\", file=sys.stderr)\n            all_passed = False\n        else:\n            print(\"✓ Requirement 3.2: Callout has no icon\")\n            \n        if not callout_has_course_statistics_title:\n            print(\"✗ Requirement 3.3: Callout does NOT have 'Course Statistics' title\", file=sys.stderr)\n            all_passed = False\n        else:\n            print(\"✓ Requirement 3.3: Callout has 'Course Statistics' title\")\n        \n        if not callout_title_has_correct_colors:\n            print(\"✗ Requirement 3.3.1: Title does NOT have correct colors (blue for Course, yellow for Statistics)\", file=sys.stderr)\n            all_passed = False\n        else:\n            print(\"✓ Requirement 3.3.1: Title has correct colors\")\n        \n        # Check all statistics items\n        missing_items = [item for item in expected_statistics if item not in statistics_items_found]\n        if missing_items:\n            print(f\"✗ Requirement 3.4: Missing statistics items: {missing_items}\", file=sys.stderr)\n            all_passed = False\n        else:\n            print(\"✓ Requirement 3.4: All 6 statistics items found\")\n    \n    # Requirement 4: Completed Topics toggle\n    if toggle_idx == -1:\n        print(\"✗ Requirement 4: Completed Topics toggle NOT found\", file=sys.stderr)\n        all_passed = False\n    elif toggle_idx <= callout_idx:\n        print(\"✗ Requirement 4: Completed Topics toggle not AFTER callout\", file=sys.stderr)\n        all_passed = False\n    else:\n        print(\"✓ Requirement 4: Completed Topics toggle found after callout\")\n        \n        # Check that exactly 5 completed topics are listed\n        if len(completed_topics_found) != 5:\n            if len(completed_topics_found) < 5:\n                print(f\"✗ Requirement 4.1: Only {len(completed_topics_found)} completed topics found (need exactly 5)\", file=sys.stderr)\n            else:\n                print(f\"✗ Requirement 4.1: Found {len(completed_topics_found)} completed topics (need exactly 5, not more)\", file=sys.stderr)\n            all_passed = False\n        else:\n            print(f\"✓ Requirement 4.1: Found exactly 5 completed topics as required\")\n    \n    # Requirement 5: Proper integration (implicitly checked by order)\n    if all_passed:\n        print(\"✓ Requirement 5: All content properly integrated in correct order\")\n    \n    return all_passed\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    \n    if verify(notion, main_id):\n        print(\"Verification passed\")\n        sys.exit(0)\n    else:\n        print(\"Verification failed\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/notion/standard/self_assessment/faq_column_layout/description.md",
    "content": "Navigate to the \"Self Assessment\" page and reorganize the content under the FAQ toggle as follows:\n\n**Task Requirements:**\n1. Add a column list with two columns inside the FAQ toggle\n2. Move the first two existing Q&A pairs from the FAQ to the left column\n3. Move the third existing Q&A pair to the right column\n4. Add one additional Q&A pair in the right column to match the format, so both columns have exactly 2 Q&A pairs\n5. Ensure all Q&A pairs maintain consistent formatting (heading_3 for questions, paragraph for answers)"
  },
  {
    "path": "tasks/notion/standard/self_assessment/faq_column_layout/meta.json",
    "content": "{\n  \"task_id\": \"faq_column_layout\",\n  \"task_name\": \"FAQ Column Layout\",\n  \"category_id\": \"self_assessment\",\n  \"category_name\": \"Self Assessment\",\n  \"description\": \"Reorganize the FAQ section content into a two-column layout with balanced Q&A pairs.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-07-27\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"content organization\",\n    \"visual formatting\",\n    \"template population\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Self-Assessment-24381626b6d780fe9f56c2ba14ea042d\",\n    \"stateOriginalUrl\": \"https://painted-tennis-ebc.notion.site/Self-Assessment-24381626b6d780fe9f56c2ba14ea042d\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/self_assessment/faq_column_layout/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies that the FAQ toggle has been properly reorganized with a column list.\n    \"\"\"\n    # Start from main_id if provided\n    page_id = None\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(\n            notion, main_id\n        )\n        if found_id and object_type == \"page\":\n            page_id = found_id\n\n    if not page_id:\n        # Try to find the Self Assessment page\n        page_id = notion_utils.find_page(notion, \"Self Assessment\")\n\n    if not page_id:\n        print(\"Error: Self Assessment page not found.\", file=sys.stderr)\n        return False\n\n    # Get all blocks recursively from the page\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, page_id)\n\n    # Find the FAQ toggle block\n    faq_toggle_block = None\n    faq_toggle_id = None\n    for block in all_blocks:\n        if block.get(\"type\") == \"toggle\":\n            block_text = notion_utils.get_block_plain_text(block)\n            if \"FAQ\" in block_text:\n                faq_toggle_block = block\n                faq_toggle_id = block.get(\"id\")\n                print(f\"Found FAQ toggle block: {block_text}\")\n                break\n\n    if not faq_toggle_block:\n        print(\"Error: FAQ toggle block not found.\", file=sys.stderr)\n        return False\n\n    # Find column_list inside the FAQ toggle\n    column_list_block = None\n    for block in all_blocks:\n        if (\n            block.get(\"type\") == \"column_list\"\n            and block.get(\"parent\", {}).get(\"block_id\") == faq_toggle_id\n        ):\n            column_list_block = block\n            break\n\n    if not column_list_block:\n        print(\"Error: No column_list found inside FAQ toggle.\", file=sys.stderr)\n        return False\n\n    # Check that there are no Q&A pairs directly under FAQ toggle (outside column_list)\n    direct_faq_children = []\n    for block in all_blocks:\n        if block.get(\"parent\", {}).get(\"block_id\") == faq_toggle_id and block.get(\n            \"id\"\n        ) != column_list_block.get(\"id\"):\n            direct_faq_children.append(block)\n\n    # Check if any of these are heading_3 or paragraph blocks (Q&A content)\n    for block in direct_faq_children:\n        if block.get(\"type\") in [\"heading_3\", \"paragraph\"]:\n            print(\n                f\"Error: Found Q&A content outside column_list: {notion_utils.get_block_plain_text(block)[:50]}...\",\n                file=sys.stderr,\n            )\n            return False\n\n    # Find the two columns\n    columns = []\n    column_list_id = column_list_block.get(\"id\")\n    for block in all_blocks:\n        if (\n            block.get(\"type\") == \"column\"\n            and block.get(\"parent\", {}).get(\"block_id\") == column_list_id\n        ):\n            columns.append(block)\n\n    if len(columns) != 2:\n        print(f\"Error: Expected 2 columns, found {len(columns)}.\", file=sys.stderr)\n        return False\n\n    # Check each column has exactly 2 Q&A pairs\n    for i, column in enumerate(columns):\n        column_id = column.get(\"id\")\n\n        # Find blocks inside this column\n        column_blocks = []\n        for block in all_blocks:\n            if block.get(\"parent\", {}).get(\"block_id\") == column_id:\n                column_blocks.append(block)\n\n        # Count Q&A pairs (should be heading_3 followed by paragraph)\n        qa_pairs = 0\n        j = 0\n        while j < len(column_blocks):\n            if (\n                column_blocks[j].get(\"type\") == \"heading_3\"\n                and j + 1 < len(column_blocks)\n                and column_blocks[j + 1].get(\"type\") == \"paragraph\"\n            ):\n                qa_pairs += 1\n                j += 2  # Skip both question and answer\n            else:\n                j += 1\n\n        if qa_pairs != 2:\n            print(\n                f\"Error: Column {i + 1} has {qa_pairs} Q&A pairs, expected 2.\",\n                file=sys.stderr,\n            )\n            return False\n\n        print(f\"Column {i + 1}: Found {qa_pairs} Q&A pairs ✓\")\n\n    print(\n        \"Success: FAQ toggle properly organized with 2 columns, each containing 2 Q&A pairs.\"\n    )\n    return True\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/standard/self_assessment/hyperfocus_analysis_report/description.md",
    "content": "Go to my Self Assessment page, and then create a hyperfocus analysis report by analyzing sessions with high productivity but significant challenges.\n\n**Task Requirements:**\n1. Create a new page titled \"Hyperfocus Analysis Report\" as a child of the Self Assessment page. The new page should be located between 'Why Use the Term \"Hyperfocus\"?' callout and the following divider line.\n2. Query the \"Hyperfocus Self-Assessment Worksheet\" database to find all sessions where:\n   - Work Completion Rate is greater than 80% (0.8)\n   - At least one challenge is present in the Challenges field\n3. For each qualifying session, create a section with:\n   - A heading showing the date and activity type (format: YYYY-MM-DD Activity)\n   - A bullet list containing:\n     - Focus factors used (e.g., Focus factors: XXX, YYY)\n     - Energy level and mood (format: \"Energy: X/10, Mood: Y/10\")\n     - Challenges faced (e.g., Challenges: XXX, YYY)\n     - Strategies that helped overcome challenges (e.g., Strategies: XXX, YYY)\n     - Work completion rate (format: \"Completion: XX%\")\n4. At the top of the page, add a callout block (type: \"info\") with:\n   - Title: \"Top 2 Most Effective Strategies\"\n   - Content: List the 2 most frequently used strategies from all sessions, each on a new line with format \"• Strategy Name (used in X sessions)\"\n\n**Structure Requirements:**\n- The page must have the exact title \"Hyperfocus Analysis Report\"\n- Each session section must start with a level 2 heading\n- All session details must be in bullet point format\n- The summary callout must be at the top of the page before any session details"
  },
  {
    "path": "tasks/notion/standard/self_assessment/hyperfocus_analysis_report/meta.json",
    "content": "{\n  \"task_id\": \"hyperfocus_analysis_report\",\n  \"task_name\": \"Hyperfocus Analysis Report\",\n  \"category_id\": \"self_assessment\",\n  \"category_name\": \"Self Assessment\",\n  \"description\": \"Create a hyperfocus analysis report by analyzing high-productivity sessions with challenges.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-07-27\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"conditional filtering\",\n    \"data aggregation\",\n    \"report generation\",\n    \"visual formatting\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Self-Assessment-24381626b6d780fe9f56c2ba14ea042d\",\n    \"stateOriginalUrl\": \"https://painted-tennis-ebc.notion.site/Self-Assessment-24381626b6d780fe9f56c2ba14ea042d\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/self_assessment/hyperfocus_analysis_report/verify.py",
    "content": "import sys\nimport re\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\nfrom collections import Counter\n\n\ndef validate_comma_separated(text: str, expected_items: list) -> bool:\n    \"\"\"\n    Validates that a comma-separated list contains expected items (case-insensitive).\n    \"\"\"\n    if not text or not expected_items:\n        return False\n\n    # Extract items from text\n    items = [item.strip().lower() for item in text.split(\",\")]\n    expected_lower = [item.lower() for item in expected_items]\n\n    # Check if all expected items are present\n    for expected in expected_lower:\n        if not any(expected in item or item in expected for item in items):\n            return False\n    return True\n\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies that the Hyperfocus Analysis Report has been created correctly.\n    \"\"\"\n    # Find the Self Assessment page\n    self_assessment_page_id = main_id\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(\n            notion, main_id\n        )\n        if found_id and object_type == \"page\":\n            self_assessment_page_id = found_id\n\n    if not self_assessment_page_id:\n        # Try to find by name\n        self_assessment_page_id = notion_utils.find_page(notion, \"Self Assessment\")\n\n    if not self_assessment_page_id:\n        print(\"Error: Self Assessment page not found.\", file=sys.stderr)\n        return False\n\n    # Find the Hyperfocus Analysis Report page\n    report_page_id = None\n    report_position = -1\n    callout_position = -1\n    divider_position = -1\n    children = notion.blocks.children.list(block_id=self_assessment_page_id).get(\n        \"results\", []\n    )\n    for i, child in enumerate(children):\n        # Track position of callout with \"Why Use the Term\"\n        if child.get(\"type\") == \"callout\":\n            callout_text = notion_utils.get_block_plain_text(child)\n            if \"Why Use the Term\" in callout_text and \"Hyperfocus\" in callout_text:\n                callout_position = i\n\n        # Track position of divider\n        elif child.get(\"type\") == \"divider\":\n            if callout_position != -1 and divider_position == -1:\n                divider_position = i\n\n        # Find the report page\n        elif child.get(\"type\") == \"child_page\":\n            page_data = notion.pages.retrieve(page_id=child[\"id\"])\n            title_prop = (\n                page_data.get(\"properties\", {}).get(\"title\", {}).get(\"title\", [])\n            )\n            if (\n                title_prop\n                and title_prop[0].get(\"plain_text\") == \"Hyperfocus Analysis Report\"\n            ):\n                report_page_id = child[\"id\"]\n                report_position = i\n\n    if not report_page_id:\n        print(\"Error: 'Hyperfocus Analysis Report' page not found.\", file=sys.stderr)\n        return False\n\n    # Verify position\n    if callout_position == -1:\n        print(\n            \"Error: Could not find 'Why Use the Term \\\"Hyperfocus\\\"?' callout.\",\n            file=sys.stderr,\n        )\n        return False\n\n    if divider_position == -1:\n        print(\"Error: Could not find divider after the callout.\", file=sys.stderr)\n        return False\n\n    if not (callout_position < report_position < divider_position):\n        print(\n            f\"Error: Report page is not positioned between callout and divider. Positions: callout={callout_position}, report={report_position}, divider={divider_position}\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Get all blocks from the report page\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, report_page_id)\n\n    # Find the database in the Self Assessment page\n    database_id = None\n    for block in notion_utils.get_all_blocks_recursively(\n        notion, self_assessment_page_id\n    ):\n        if block.get(\"type\") == \"child_database\":\n            db_data = notion.databases.retrieve(database_id=block[\"id\"])\n            db_title = \"\".join(\n                [t.get(\"plain_text\", \"\") for t in db_data.get(\"title\", [])]\n            )\n            if \"Hyperfocus Self-Assessment Worksheet\" in db_title:\n                database_id = block[\"id\"]\n                break\n\n    if not database_id:\n        print(\n            \"Error: Database 'Hyperfocus Self-Assessment Worksheet' not found.\",\n            file=sys.stderr,\n        )\n        return False\n\n    # Query database for sessions with >80% completion rate and challenges\n    query_results = notion.databases.query(\n        database_id=database_id,\n        filter={\n            \"and\": [\n                {\"property\": \"Work Completion Rate\", \"number\": {\"greater_than\": 0.8}},\n                {\"property\": \"Challenges\", \"multi_select\": {\"is_not_empty\": True}},\n            ]\n        },\n    ).get(\"results\", [])\n\n    if not query_results:\n        print(\n            \"Warning: No sessions found with >80% completion rate and challenges.\",\n            file=sys.stderr,\n        )\n        # Still check if the page structure is correct\n\n    # Verify page structure\n    has_callout = False\n    has_top_strategies = False\n    session_count = 0\n    found_sessions = {}  # Track sessions by date for validation\n\n    # Track strategies for validation - count from ALL sessions\n    all_sessions = notion.databases.query(database_id=database_id).get(\"results\", [])\n    all_strategies = []\n    for session in all_sessions:\n        strategies = (\n            session.get(\"properties\", {})\n            .get(\"Key Strategies Used\", {})\n            .get(\"multi_select\", [])\n        )\n        all_strategies.extend([s.get(\"name\") for s in strategies])\n\n    strategy_counts = Counter(all_strategies)\n    top_2_strategies = strategy_counts.most_common(2)\n\n    # Build expected sessions from query results with all data\n    expected_sessions = {}\n    for result in query_results:\n        date_prop = result.get(\"properties\", {}).get(\"Date\", {}).get(\"date\", {})\n        activity_prop = (\n            result.get(\"properties\", {}).get(\"Activity\", {}).get(\"select\", {})\n        )\n        if date_prop and date_prop.get(\"start\") and activity_prop:\n            date_str = date_prop[\"start\"]\n            activity_name = activity_prop.get(\"name\", \"\")\n\n            # Extract all session data for validation\n            focus_factors = [\n                f.get(\"name\", \"\")\n                for f in result.get(\"properties\", {})\n                .get(\"Focus Factors\", {})\n                .get(\"multi_select\", [])\n            ]\n            challenges = [\n                c.get(\"name\", \"\")\n                for c in result.get(\"properties\", {})\n                .get(\"Challenges\", {})\n                .get(\"multi_select\", [])\n            ]\n            strategies = [\n                s.get(\"name\", \"\")\n                for s in result.get(\"properties\", {})\n                .get(\"Key Strategies Used\", {})\n                .get(\"multi_select\", [])\n            ]\n            energy = result.get(\"properties\", {}).get(\"Energy Level\", {}).get(\"number\")\n            mood = result.get(\"properties\", {}).get(\"Mood\", {}).get(\"number\")\n            completion = (\n                result.get(\"properties\", {})\n                .get(\"Work Completion Rate\", {})\n                .get(\"number\")\n            )\n\n            expected_sessions[date_str] = {\n                \"activity\": activity_name,\n                \"focus_factors\": focus_factors,\n                \"challenges\": challenges,\n                \"strategies\": strategies,\n                \"energy\": energy,\n                \"mood\": mood,\n                \"completion\": completion,\n            }\n\n    current_session_date = None\n    current_session_data = None\n    session_bullet_points = {}  # Track bullet points for each session\n\n    for i, block in enumerate(all_blocks):\n        block_type = block.get(\"type\")\n\n        # Check for callout at the top\n        if block_type == \"callout\" and i < 5:  # Should be near the top\n            callout_text = notion_utils.get_block_plain_text(block)\n            if \"Top 2 Most Effective Strategies\" in callout_text:\n                has_callout = True\n                # Check if it contains strategy information\n                s1, n1 = top_2_strategies[0]\n                s2, n2 = top_2_strategies[1]\n                t1 = f\"{s1} (used in {n1} sessions)\"\n                t2 = f\"{s2} (used in {n2} sessions)\"\n\n                if t1 in callout_text and t2 in callout_text:\n                    has_top_strategies = True\n                    break\n\n        # Check for session headings with format YYYY-MM-DD Activity\n        if block_type == \"heading_2\":\n            heading_text = notion_utils.get_block_plain_text(block)\n            # Check if heading matches expected format\n            for date_str, session_data in expected_sessions.items():\n                activity = session_data[\"activity\"]\n                expected_heading = f\"{date_str} {activity}\"\n                if expected_heading in heading_text:\n                    found_sessions[date_str] = session_data\n                    session_count += 1\n                    current_session_date = date_str\n                    current_session_data = session_data\n                    session_bullet_points[date_str] = []\n                    break\n\n        # Check for bullet points with session details\n        if block_type == \"bulleted_list_item\" and current_session_data:\n            bullet_text = notion_utils.get_block_plain_text(block)\n\n            # Track bullet points for current session\n            if current_session_date:\n                session_bullet_points[current_session_date].append(bullet_text)\n\n            # Validate specific bullet point content\n            if bullet_text.startswith(\"Focus factors\"):\n                content = bullet_text.split(\":\", 1)[1].strip()\n                expected_factors = current_session_data.get(\"focus_factors\", [])\n                if not validate_comma_separated(content, expected_factors):\n                    print(\n                        f\"Error: Focus factors mismatch for {current_session_date}. Expected: {expected_factors}, Found: {content}\",\n                        file=sys.stderr,\n                    )\n                    return False\n\n            elif \"Energy\" in bullet_text and \"Mood\" in bullet_text:\n                # Extract energy and mood values\n                energy_match = re.search(r\"Energy:\\s*(\\d+)/10\", bullet_text)\n                mood_match = re.search(r\"Mood:\\s*(\\d+)/10\", bullet_text)\n\n                if energy_match and mood_match:\n                    found_energy = int(energy_match.group(1))\n                    found_mood = int(mood_match.group(1))\n                    expected_energy = current_session_data.get(\"energy\")\n                    expected_mood = current_session_data.get(\"mood\")\n\n                    if found_energy != expected_energy or found_mood != expected_mood:\n                        print(\n                            f\"Error: Energy/Mood mismatch for {current_session_date}. Expected: Energy: {expected_energy}/10, Mood: {expected_mood}/10\",\n                            file=sys.stderr,\n                        )\n                        return False\n                else:\n                    print(\n                        f\"Error: Invalid Energy/Mood format for {current_session_date}\",\n                        file=sys.stderr,\n                    )\n                    return False\n\n            elif bullet_text.startswith(\"Challenges\"):\n                content = bullet_text.split(\":\", 1)[1].strip()\n                expected_challenges = current_session_data.get(\"challenges\", [])\n                if not validate_comma_separated(content, expected_challenges):\n                    print(\n                        f\"Error: Challenges mismatch for {current_session_date}. Expected: {expected_challenges}, Found: {content}\",\n                        file=sys.stderr,\n                    )\n                    return False\n\n            elif bullet_text.startswith(\"Strategies\"):\n                content = bullet_text.split(\":\", 1)[1].strip()\n                expected_strategies = current_session_data.get(\"strategies\", [])\n                if len(expected_strategies) > 0 and not validate_comma_separated(\n                    content, expected_strategies\n                ):\n                    print(\n                        f\"Error: Strategies mismatch for {current_session_date}. Expected: {expected_strategies}, Found: {content}\",\n                        file=sys.stderr,\n                    )\n                    return False\n\n            elif bullet_text.startswith(\"Completion\"):\n                # Extract completion percentage\n                completion_match = re.search(r\"Completion:\\s*(\\d+)%\", bullet_text)\n\n                if completion_match:\n                    found_completion = int(completion_match.group(1))\n                    expected_completion = int(\n                        current_session_data.get(\"completion\", 0) * 100\n                    )\n\n                    if found_completion != expected_completion:\n                        print(\n                            f\"Error: Completion rate mismatch for {current_session_date}. Expected: {expected_completion}%, Found: {found_completion}%\",\n                            file=sys.stderr,\n                        )\n                        return False\n                else:\n                    print(\n                        f\"Error: Invalid completion format for {current_session_date}\",\n                        file=sys.stderr,\n                    )\n                    return False\n\n    # Verify all sessions have complete bullet points\n    for date_str, bullets in session_bullet_points.items():\n        bullets_text = \" \".join(bullets)\n        required_items = [\n            \"Focus factors\",\n            \"Energy:\",\n            \"Mood:\",\n            \"Challenges\",\n            \"Strategies\",\n            \"Completion\",\n        ]\n        missing_items = []\n\n        for item in required_items:\n            if item not in bullets_text:\n                missing_items.append(item)\n\n        if missing_items:\n            print(\n                f\"Error: Missing bullet points for session {date_str}: {', '.join(missing_items)}\",\n                file=sys.stderr,\n            )\n            return False\n\n    # Verify all requirements\n    if not has_callout:\n        print(\n            \"Error: Missing callout block with 'Top 2 Most Effective Strategies'.\",\n            file=sys.stderr,\n        )\n        return False\n\n    if not has_top_strategies and len(top_2_strategies) > 0:\n        print(\"Error: Callout doesn't contain strategy information.\", file=sys.stderr)\n        return False\n\n    if query_results and session_count == 0:\n        print(\"Error: No session sections found with proper headings.\", file=sys.stderr)\n        return False\n\n    # Check if all expected sessions are present\n    missing_sessions = []\n    for date_str in expected_sessions.keys():\n        if date_str not in found_sessions:\n            missing_sessions.append(date_str)\n\n    if missing_sessions:\n        print(\n            f\"Error: Missing session sections for dates: {', '.join(missing_sessions)}\",\n            file=sys.stderr,\n        )\n        return False\n\n    if query_results and session_count < len(query_results):\n        print(\n            f\"Warning: Found {session_count} session sections but expected {len(query_results)}.\",\n            file=sys.stderr,\n        )\n\n    print(\n        \"Success: Hyperfocus Analysis Report created with proper structure and content.\"\n    )\n    return True\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/standard/self_assessment/numbered_list_emojis/description.md",
    "content": "Please find all numbered list items in the Self Assessment page, use Notion tools to replace the numbers with corresponding emoji numbers (e.g., 1️⃣, 2️⃣, 3️⃣). For example:\nHere is the translated and reformatted version of your request:\n\nIf the original numbered list is:\n\n1. First step\n2. Second step\n3. Third step\n\nIt should become:\n\n1️⃣ First step\n2️⃣ Second step\n3️⃣ Third step"
  },
  {
    "path": "tasks/notion/standard/self_assessment/numbered_list_emojis/meta.json",
    "content": "{\n  \"task_id\": \"numbered_list_emojis\",\n  \"task_name\": \"Numbered List Emojis\",\n  \"category_id\": \"self_assessment\",\n  \"category_name\": \"Self Assessment\",\n  \"description\": \"Replace numbered list items with corresponding emoji numbers for better visual formatting.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-07-27\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"visual formatting\",\n    \"automated migration\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Self-Assessment-24381626b6d780fe9f56c2ba14ea042d\",\n    \"stateOriginalUrl\": \"https://painted-tennis-ebc.notion.site/Self-Assessment-24381626b6d780fe9f56c2ba14ea042d\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/self_assessment/numbered_list_emojis/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies that numbered lists have been replaced with emoji numbers.\n    \"\"\"\n    # Start from main_id if provided, otherwise search for the page\n    self_assessment_page_id = main_id\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(\n            notion, main_id\n        )\n        if found_id and object_type == \"page\":\n            self_assessment_page_id = found_id\n\n    if not self_assessment_page_id:\n        # Try to find by name\n        self_assessment_page_id = notion_utils.find_page(notion, \"Self Assessment\")\n\n    if not self_assessment_page_id:\n        print(\"Error: Self Assessment page not found.\", file=sys.stderr)\n        return False\n\n    # Get all blocks recursively from the main page\n    all_blocks = notion_utils.get_all_blocks_recursively(\n        notion, self_assessment_page_id\n    )\n\n    # Find all numbered_list_item blocks\n    numbered_list_items = []\n    for block in all_blocks:\n        if block.get(\"type\") == \"numbered_list_item\":\n            numbered_list_items.append(block)\n\n    if len(numbered_list_items) > 0:\n        print(\n            f\"Error: found {len(numbered_list_items)} numbered list items that should be converted to emoji numbers\",\n            file=sys.stderr,\n        )\n        # return False\n\n    required_items = [\n        \"1️⃣ Record Each Hyperfocus Session:\",\n        \"2️⃣ Review and Reflect:\",\n        \"3️⃣ Adjust and Optimize:\",\n        '1️⃣ Harvard Business Review: \"The Making of a Corporate Athlete\"',\n        '2️⃣ \"Hyperfocus: How to Be More Productive in a World of Distraction\" by Chris Bailey',\n        '3️⃣ \"Attention Management: How to Create Success and Gain Productivity Every Day\" by Maura Thomas',\n        '4️⃣ \"Deep Work: Rules for Focused Success in a Distracted World\" by Cal Newport',\n        \"1️⃣ Record Each Hyperfocus Session:\",\n        \"2️⃣ Review and Reflect:\",\n        \"3️⃣ Adjust and Optimize:\",\n        \"1️⃣ What time of day do you feel most focused?\",\n        \"2️⃣ Which environment helps you concentrate the most?\",\n        \"3️⃣ What type of tasks do you find yourself getting lost in?\",\n    ]\n\n    # Make a copy to track which items we've found\n    remaining_items = required_items.copy()\n\n    # Iterate through all blocks to find matching text\n    for block in all_blocks:\n        block_text = notion_utils.get_block_plain_text(block).strip()\n\n        # Check if this block's text matches any of our required items\n        if block_text in remaining_items:\n            remaining_items.remove(block_text)\n            print(f\"Found: {block_text}\")\n\n    # Check if all required items were found\n    if len(remaining_items) == 0:\n        print(\"Success: All numbered lists have been converted to emoji numbers\")\n        return True\n    else:\n        print(f\"Error: Missing {len(remaining_items)} required items:\", file=sys.stderr)\n        for item in remaining_items:\n            print(f\"  - {item}\", file=sys.stderr)\n        return False\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/standard/standard_operating_procedure/deployment_process_sop/description.md",
    "content": "Using Notion Tools. Complete the SOP template (a notion page titled 'Standard Operating Procedure') by filling in all sections with comprehensive, interconnected content for a \"Software Deployment Process\" SOP, ensuring all cross-references, terminologies, and procedural steps are properly linked and validated.\n\n**Task Requirements:**\n\n1. **Update the SOP header information** (in the left column):\n   - Change the heading_1 \"SOP Title\" text to \"Software Deployment Process\"\n   - Update the paragraph \"Created 2023-10-25\" to \"Created 2025-01-19\"\n   - Update the paragraph \"Responsible department:\" to \"Responsible department: DevOps Engineering Team\"\n   - Update the People team page's callout to: \"DevOps Engineering Team Wiki - Contains team contact information, escalation procedures, and deployment schedules. Access required for all deployment activities.\"\n\n2. **Fill the Purpose section** with exactly this content:\n   - Replace the placeholder paragraph (starts with \"↓ Summarize the procedure\") with: \"This SOP defines the standardized process for deploying software applications to production environments, ensuring zero-downtime deployments, proper rollback procedures, and compliance with security protocols. This procedure applies to all production deployments and must be followed by all engineering teams.\"\n\n3. **Complete the Context section** with:\n   - Replace the placeholder paragraph (starts with \"↓ Add any related and useful information\") with: \"Software deployments are critical operations that can impact system availability and user experience. This process has been developed based on industry best practices and our incident response learnings from Q3 2023. All deployments must go through automated testing pipelines and require approval from designated reviewers.\"\n   - Update all THREE child_pages under the \"Relevant Docs\" toggle:\n     - First child_page callout (Contacting IT): \"Change Management Policy (SOP-001) - Defines approval workflows and change review processes for all production modifications.\"\n     - Second child_page callout (Team lunches): \"Incident Response Procedures (SOP-003) - Emergency procedures for handling deployment failures and system outages.\"\n     - Third child_page callout (Sending swag): \"Security Compliance Guidelines (SOP-007) - Security requirements and validation steps for production deployments.\"\n\n4. **Define comprehensive Terminologies** by:\n   - Replace the placeholder paragraph (starts with \"↓ Add any unfamiliar or domain specific words\") with: \"Essential deployment terminology for team understanding:\"\n   - Replace the existing bulleted_list_item \"Term: The definition of the term\" with these four exact items:\n     - \"Blue-Green Deployment: A deployment strategy that maintains two identical production environments\"\n     - \"Rollback Window: The maximum time allowed to revert a deployment (30 minutes)\"  \n     - \"Smoke Test: Initial verification tests run immediately after deployment\"\n     - \"Production Gateway: The approval checkpoint before production release\"\n\n5. **Populate Tools section** with:\n   - Replace the placeholder paragraph (starts with \"↓ Add any relevant tools\") with: \"Critical tools required for deployment operations:\"\n   - Update the TWO existing child_pages:\n     - First child_page callout: \"Jenkins CI/CD Pipeline - Primary deployment automation tool with integrated testing and approval workflows. Required for all automated deployments.\"\n     - Second child_page callout: \"Kubernetes Dashboard - Container orchestration monitoring and management interface for deployment verification and rollback operations.\"\n\n6. **Complete Roles & responsibilities** with:\n   - Replace the placeholder paragraph (starts with \"↓ Define who will be executing\") with: \"The following roles are essential for successful deployment execution:\"\n   - Replace the existing empty bulleted_list_item with these four exact items:\n     - \"DevOps Engineer: Executes deployment, monitors system health, initiates rollbacks if needed\"\n     - \"Lead Developer: Reviews code changes, approves deployment package, validates functionality\"  \n     - \"QA Engineer: Verifies smoke tests, confirms user acceptance criteria\"\n     - \"Security Officer: Validates security compliance, approves security-sensitive deployments\"\n\n7. **Create detailed Procedure section** with:\n   - Replace the placeholder paragraph (starts with \"↓ Create a step by step procedure\") with: \"Follow these steps in sequence. Do not skip steps or perform them out of order.\"\n   - Replace the THREE existing numbered_list_items with:\n     - \"Pre-deployment: Verify all automated tests pass, obtain required approvals from Lead Developer and Security Officer, confirm rollback plan is documented and tested\"\n     - \"Deployment execution: Deploy to staging environment first, run comprehensive smoke tests, obtain final Production Gateway approval, deploy to production using blue-green strategy\"\n     - \"Post-deployment: Monitor system metrics for minimum 30 minutes, validate all functionality using automated tests, document deployment results in change log, notify all stakeholders via deployment notification system\""
  },
  {
    "path": "tasks/notion/standard/standard_operating_procedure/deployment_process_sop/meta.json",
    "content": "{\n  \"task_id\": \"deployment_process_sop\",\n  \"task_name\": \"Deployment Process SOP\",\n  \"category_id\": \"standard_operating_procedure\",\n  \"category_name\": \"Standard Operating Procedure\",\n  \"description\": \"Complete the SOP template with comprehensive content for a Software Deployment Process with interconnected sections.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-07-27\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"template population\",\n    \"cross-reference linking\",\n    \"content organization\",\n    \"visual formatting\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Standard-Operating-Procedure-24381626b6d780a8b678f9e62ae5b152\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/standard-operating-procedure\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/standard_operating_procedure/deployment_process_sop/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies comprehensive SOP template completion with exact content matching.\n    \"\"\"\n    page_id = None\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(\n            notion, main_id\n        )\n        if found_id and object_type == \"page\":\n            page_id = found_id\n\n    if not page_id:\n        page_id = notion_utils.find_page(notion, \"Standard Operating Procedure\")\n    if not page_id:\n        print(\"Error: Page 'Standard Operating Procedure' not found.\", file=sys.stderr)\n        return False\n\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, page_id)\n    verification_results = []\n\n    # Check 1: Verify SOP header information updates\n    sop_title_found = False\n    created_date_found = False\n    responsible_dept_found = False\n    header_callout_found = False\n\n    for block in all_blocks:\n        if block.get(\"type\") == \"heading_1\":\n            heading_text = notion_utils.get_block_plain_text(block)\n            if \"Software Deployment Process\" in heading_text:\n                sop_title_found = True\n                verification_results.append(\"✅ SOP Title updated correctly\")\n\n        elif block.get(\"type\") == \"paragraph\":\n            para_text = notion_utils.get_block_plain_text(block)\n            if \"Created 2025-01-19\" in para_text:\n                created_date_found = True\n                verification_results.append(\"✅ Created date updated correctly\")\n            elif \"Responsible department: DevOps Engineering Team\" in para_text:\n                responsible_dept_found = True\n                verification_results.append(\n                    \"✅ Responsible department updated correctly\"\n                )\n\n        elif block.get(\"type\") == \"child_page\":\n            # Check child pages recursively for callout content - specifically the People team page\n            try:\n                child_page_info = notion.pages.retrieve(page_id=block[\"id\"])\n                child_page_title = \"\"\n                if (\n                    \"properties\" in child_page_info\n                    and \"title\" in child_page_info[\"properties\"]\n                ):\n                    title_list = child_page_info[\"properties\"][\"title\"].get(\"title\", [])\n                    if title_list:\n                        child_page_title = title_list[0].get(\"plain_text\", \"\")\n            except:\n                child_page_title = \"\"\n\n            child_blocks = notion_utils.get_all_blocks_recursively(notion, block[\"id\"])\n            for child_block in child_blocks:\n                if child_block.get(\"type\") == \"callout\":\n                    callout_text = notion_utils.get_block_plain_text(child_block)\n                    # Look for the People team page with the DevOps Engineering Team Wiki callout\n                    if (\n                        \"DevOps Engineering Team Wiki\" in callout_text\n                        and \"deployment schedules\" in callout_text\n                        and \"deployment activities\" in callout_text\n                    ):\n                        header_callout_found = True\n                        verification_results.append(\n                            \"✅ Header People team page callout updated correctly\"\n                        )\n\n    # Check 2: Verify Purpose section content\n    purpose_found = False\n    expected_purpose = \"This SOP defines the standardized process for deploying software applications to production environments\"\n\n    for i, block in enumerate(all_blocks):\n        if block.get(\"type\") == \"heading_2\":\n            heading_text = notion_utils.get_block_plain_text(block)\n            if \"Purpose\" in heading_text:\n                # Check next paragraph after Purpose heading\n                for j in range(i + 1, min(i + 5, len(all_blocks))):\n                    next_block = all_blocks[j]\n                    if next_block.get(\"type\") == \"paragraph\":\n                        para_text = notion_utils.get_block_plain_text(next_block)\n                        if (\n                            expected_purpose in para_text\n                            and \"engineering teams\" in para_text\n                        ):\n                            purpose_found = True\n                            verification_results.append(\n                                \"✅ Purpose section content updated correctly\"\n                            )\n                        break\n                break\n\n    # Check 3: Verify Context section and child_page callouts\n    context_found = False\n    child_pages_updated = 0\n    expected_context = \"Software deployments are critical operations that can impact system availability\"\n    expected_child_callouts = [\n        (\n            \"Change Management Policy (SOP-001)\",\n            \"Defines approval workflows and change review processes for all production modifications\",\n            \"Contacting IT\",\n        ),\n        (\n            \"Incident Response Procedures (SOP-003)\",\n            \"Emergency procedures for handling deployment failures and system outages\",\n            \"Team lunches\",\n        ),\n        (\n            \"Security Compliance Guidelines (SOP-007)\",\n            \"Security requirements and validation steps for production deployments\",\n            \"Sending swag\",\n        ),\n    ]\n\n    for i, block in enumerate(all_blocks):\n        if block.get(\"type\") == \"heading_2\":\n            heading_text = notion_utils.get_block_plain_text(block)\n            if \"Context\" in heading_text:\n                # Check paragraph content\n                for j in range(i + 1, min(i + 10, len(all_blocks))):\n                    next_block = all_blocks[j]\n                    if next_block.get(\"type\") == \"paragraph\":\n                        para_text = notion_utils.get_block_plain_text(next_block)\n                        if expected_context in para_text and \"Q3 2023\" in para_text:\n                            context_found = True\n                    elif next_block.get(\"type\") == \"toggle\":\n                        # Check child pages under toggle\n                        toggle_blocks = notion_utils.get_all_blocks_recursively(\n                            notion, next_block[\"id\"]\n                        )\n                        for toggle_child in toggle_blocks:\n                            if toggle_child.get(\"type\") == \"child_page\":\n                                # Get the child page title to match with expected callouts\n                                try:\n                                    child_page_info = notion.pages.retrieve(\n                                        page_id=toggle_child[\"id\"]\n                                    )\n                                    child_page_title = \"\"\n                                    if (\n                                        \"properties\" in child_page_info\n                                        and \"title\" in child_page_info[\"properties\"]\n                                    ):\n                                        title_list = child_page_info[\"properties\"][\n                                            \"title\"\n                                        ].get(\"title\", [])\n                                        if title_list:\n                                            child_page_title = title_list[0].get(\n                                                \"plain_text\", \"\"\n                                            )\n                                except:\n                                    child_page_title = \"\"\n\n                                child_blocks = notion_utils.get_all_blocks_recursively(\n                                    notion, toggle_child[\"id\"]\n                                )\n                                for child_block in child_blocks:\n                                    if child_block.get(\"type\") == \"callout\":\n                                        callout_text = (\n                                            notion_utils.get_block_plain_text(\n                                                child_block\n                                            )\n                                        )\n                                        for (\n                                            expected_title,\n                                            expected_content,\n                                            expected_page_title,\n                                        ) in expected_child_callouts:\n                                            if (\n                                                expected_title in callout_text\n                                                and expected_content in callout_text\n                                                and expected_page_title\n                                                in child_page_title\n                                            ):\n                                                child_pages_updated += 1\n                                                verification_results.append(\n                                                    f\"✅ Context child_page '{expected_page_title}' updated correctly\"\n                                                )\n                                                break\n\n    if context_found:\n        verification_results.append(\"✅ Context section content updated correctly\")\n\n    if child_pages_updated == 3:\n        verification_results.append(\n            \"✅ All 3 Context child_page callouts updated correctly\"\n        )\n    else:\n        verification_results.append(\n            f\"❌ Only {child_pages_updated}/3 Context child_page callouts updated correctly (Contacting IT, Team lunches, Sending swag)\"\n        )\n\n    # Check 4: Verify Terminologies section with exact 4 bulleted items\n    terminologies_found = False\n    terminology_items = []\n    expected_terminologies = [\n        \"Blue-Green Deployment: A deployment strategy that maintains two identical production environments\",\n        \"Rollback Window: The maximum time allowed to revert a deployment (30 minutes)\",\n        \"Smoke Test: Initial verification tests run immediately after deployment\",\n        \"Production Gateway: The approval checkpoint before production release\",\n    ]\n\n    for i, block in enumerate(all_blocks):\n        if block.get(\"type\") == \"heading_2\":\n            heading_text = notion_utils.get_block_plain_text(block)\n            if \"Terminologies\" in heading_text:\n                # Check for intro paragraph\n                for j in range(i + 1, min(i + 2, len(all_blocks))):\n                    if all_blocks[j].get(\"type\") == \"paragraph\":\n                        para_text = notion_utils.get_block_plain_text(all_blocks[j])\n                        if \"Essential deployment terminology\" in para_text:\n                            terminologies_found = True\n                            break\n\n                # Check bulleted list items\n                for j in range(i + 1, min(i + 10, len(all_blocks))):\n                    next_block = all_blocks[j]\n                    if next_block.get(\"type\") == \"bulleted_list_item\":\n                        item_text = notion_utils.get_block_plain_text(next_block)\n                        terminology_items.append(item_text)\n                    elif next_block.get(\"type\") in [\n                        \"heading_1\",\n                        \"heading_2\",\n                        \"heading_3\",\n                    ]:\n                        break\n                break\n\n    terminology_matches = sum(\n        1\n        for expected in expected_terminologies\n        if any(expected in item for item in terminology_items)\n    )\n\n    if terminologies_found and len(terminology_items) == 4 and terminology_matches == 4:\n        verification_results.append(\n            \"✅ Terminologies section with exactly 4 correct items\"\n        )\n    else:\n        verification_results.append(\n            f\"❌ Terminologies: expected 4 items, found {len(terminology_items)}, {terminology_matches} correct\"\n        )\n\n    # Check 5: Verify Tools section with 2 child_page callouts\n    tools_found = False\n    tools_child_pages = 0\n    expected_tools = [\n        (\"Jenkins CI/CD Pipeline\", \"automated deployments\"),\n        (\"Kubernetes Dashboard\", \"rollback operations\"),\n    ]\n\n    for i, block in enumerate(all_blocks):\n        if block.get(\"type\") == \"heading_2\":\n            heading_text = notion_utils.get_block_plain_text(block)\n            if \"Tools\" in heading_text:\n                # Check intro paragraph\n                for j in range(i + 1, min(i + 2, len(all_blocks))):\n                    if all_blocks[j].get(\"type\") == \"paragraph\":\n                        para_text = notion_utils.get_block_plain_text(all_blocks[j])\n                        if \"Critical tools required\" in para_text:\n                            tools_found = True\n                            break\n\n                # Check child pages\n                for j in range(i + 1, min(i + 10, len(all_blocks))):\n                    next_block = all_blocks[j]\n                    if next_block.get(\"type\") == \"child_page\":\n                        child_blocks = notion_utils.get_all_blocks_recursively(\n                            notion, next_block[\"id\"]\n                        )\n                        for child_block in child_blocks:\n                            if child_block.get(\"type\") == \"callout\":\n                                callout_text = notion_utils.get_block_plain_text(\n                                    child_block\n                                )\n                                for expected_title, expected_content in expected_tools:\n                                    if (\n                                        expected_title in callout_text\n                                        and expected_content in callout_text\n                                    ):\n                                        tools_child_pages += 1\n                                        break\n                    elif next_block.get(\"type\") in [\n                        \"heading_1\",\n                        \"heading_2\",\n                        \"heading_3\",\n                    ]:\n                        break\n                break\n\n    if tools_found and tools_child_pages == 2:\n        verification_results.append(\n            \"✅ Tools section with 2 correctly updated child_page callouts\"\n        )\n    else:\n        verification_results.append(\n            f\"❌ Tools section: expected 2 child_pages updated, found {tools_child_pages}\"\n        )\n\n    # Check 6: Verify Roles & responsibilities with exactly 4 bulleted items\n    roles_found = False\n    role_items = []\n    expected_roles = [\n        \"DevOps Engineer: Executes deployment, monitors system health, initiates rollbacks if needed\",\n        \"Lead Developer: Reviews code changes, approves deployment package, validates functionality\",\n        \"QA Engineer: Verifies smoke tests, confirms user acceptance criteria\",\n        \"Security Officer: Validates security compliance, approves security-sensitive deployments\",\n    ]\n\n    for i, block in enumerate(all_blocks):\n        if block.get(\"type\") == \"heading_2\":\n            heading_text = notion_utils.get_block_plain_text(block)\n            if \"Roles\" in heading_text and \"responsibilities\" in heading_text:\n                # Check intro paragraph\n                for j in range(i + 1, min(i + 2, len(all_blocks))):\n                    if all_blocks[j].get(\"type\") == \"paragraph\":\n                        para_text = notion_utils.get_block_plain_text(all_blocks[j])\n                        if \"essential for successful deployment execution\" in para_text:\n                            roles_found = True\n                            break\n\n                # Check bulleted list items\n                for j in range(i + 1, min(i + 10, len(all_blocks))):\n                    next_block = all_blocks[j]\n                    if next_block.get(\"type\") == \"bulleted_list_item\":\n                        item_text = notion_utils.get_block_plain_text(next_block)\n                        role_items.append(item_text)\n                    elif next_block.get(\"type\") in [\n                        \"heading_1\",\n                        \"heading_2\",\n                        \"heading_3\",\n                    ]:\n                        break\n                break\n\n    role_matches = sum(\n        1 for expected in expected_roles if any(expected in item for item in role_items)\n    )\n\n    if roles_found and len(role_items) == 4 and role_matches == 4:\n        verification_results.append(\n            \"✅ Roles & responsibilities section with exactly 4 correct items\"\n        )\n    else:\n        verification_results.append(\n            f\"❌ Roles section: expected 4 items, found {len(role_items)}, {role_matches} correct\"\n        )\n\n    # Check 7: Verify Procedure section with exactly 3 numbered items\n    procedure_found = False\n    procedure_items = []\n    expected_procedures = [\n        (\"Pre-deployment\", \"Lead Developer and Security Officer\", \"rollback plan\"),\n        (\"Deployment execution\", \"staging environment first\", \"blue-green strategy\"),\n        (\n            \"Post-deployment\",\n            \"minimum 30 minutes\",\n            \"stakeholders via deployment notification\",\n        ),\n    ]\n\n    for i, block in enumerate(all_blocks):\n        if block.get(\"type\") == \"heading_2\":\n            heading_text = notion_utils.get_block_plain_text(block)\n            if \"Procedure\" in heading_text:\n                # Check intro paragraph\n                for j in range(i + 1, min(i + 2, len(all_blocks))):\n                    if all_blocks[j].get(\"type\") == \"paragraph\":\n                        para_text = notion_utils.get_block_plain_text(all_blocks[j])\n                        if \"Follow these steps in sequence\" in para_text:\n                            procedure_found = True\n                            break\n\n                # Check numbered list items\n                for j in range(i + 1, min(i + 10, len(all_blocks))):\n                    next_block = all_blocks[j]\n                    if next_block.get(\"type\") == \"numbered_list_item\":\n                        item_text = notion_utils.get_block_plain_text(next_block)\n                        procedure_items.append(item_text)\n                    elif next_block.get(\"type\") in [\n                        \"heading_1\",\n                        \"heading_2\",\n                        \"heading_3\",\n                    ]:\n                        break\n                break\n\n    procedure_matches = 0\n    for item_text in procedure_items:\n        for expected_title, expected_content1, expected_content2 in expected_procedures:\n            if (\n                expected_title in item_text\n                and expected_content1 in item_text\n                and expected_content2 in item_text\n            ):\n                procedure_matches += 1\n                break\n\n    if procedure_found and len(procedure_items) == 3 and procedure_matches == 3:\n        verification_results.append(\"✅ Procedure section with exactly 3 correct items\")\n    else:\n        verification_results.append(\n            f\"❌ Procedure: expected 3 items, found {len(procedure_items)}, {procedure_matches} correct\"\n        )\n\n    # Calculate overall success\n    total_checks = 14  # Number of major verification points\n    successful_checks = sum(\n        1 for result in verification_results if result.startswith(\"✅\")\n    )\n\n    # Print all verification results\n    print(\"\\n=== SOP Template Verification Results ===\", file=sys.stderr)\n    for result in verification_results:\n        print(result, file=sys.stderr)\n\n    print(f\"\\n=== Summary: {successful_checks}/{total_checks} checks passed ===\")\n\n    # Must pass ALL checks to succeed\n    success = (\n        sop_title_found\n        and created_date_found\n        and responsible_dept_found\n        and header_callout_found\n        and purpose_found\n        and context_found\n        and child_pages_updated == 3\n        and terminologies_found\n        and len(terminology_items) == 4\n        and terminology_matches == 4\n        and tools_found\n        and tools_child_pages == 2\n        and roles_found\n        and len(role_items) == 4\n        and role_matches == 4\n        and procedure_found\n        and len(procedure_items) == 3\n        and procedure_matches == 3\n    )\n\n    if success:\n        print(\"\\n🎉 SUCCESS: All SOP template requirements completed correctly!\")\n        return True\n    else:\n        print(\n            f\"\\n❌ FAILURE: SOP template verification failed. {successful_checks}/{total_checks} requirements met.\",\n            file=sys.stderr,\n        )\n        return False\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/notion/standard/standard_operating_procedure/section_organization/description.md",
    "content": "# Task: Reorganize Standard Operating Procedure Page Sections\n\n## Objective\nModify the structure of the Standard Operating Procedure page in Notion by reorganizing sections through swapping and creating a column layout.\n\n## Requirements\n\n### Step 1: Swap Sections\n- Navigate to the Standard Operating Procedure page\n- Swap the positions of the \"Terminologies\" and \"Roles & responsibilities\" sections\n- Preserve all content within each section exactly as is\n- Maintain the original formatting and structure of each section\n\n### Step 2: Create Column Layout\n- After swapping, arrange the \"Tools\" section and the section immediately below it (\"Terminologies\") into a 2-column layout\n- Position the \"Tools\" section in the left column\n- Position the \"Terminologies\" section in the right column\n- In the \"Tools\" column, add links to the Notion and Figma pages using appropriate reference blocks\n- Preserve the original child pages from the \"Tools\" section in a toggle block placed below the column layout, with the toggle titled \"original pages\""
  },
  {
    "path": "tasks/notion/standard/standard_operating_procedure/section_organization/meta.json",
    "content": "{\n  \"task_id\": \"section_organization\",\n  \"task_name\": \"Section Organization\",\n  \"category_id\": \"standard_operating_procedure\",\n  \"category_name\": \"Standard Operating Procedure\",\n  \"description\": \"Reorganize the Standard Operating Procedure page by swapping sections and creating a column layout.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-08-11\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"content organization\",\n    \"cross-reference linking\",\n    \"visual formatting\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Standard-Operating-Procedure-24381626b6d780a8b678f9e62ae5b152\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/standard-operating-procedure\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/standard_operating_procedure/section_organization/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies that the Standard Operating Procedure page has been reorganized correctly.\n    \"\"\"\n    # Step 1: Find the Standard Operating Procedure page\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(notion, main_id)\n        if not found_id or object_type != 'page':\n            print(\"Error: Standard Operating Procedure page not found.\", file=sys.stderr)\n            return False\n    else:\n        # Try to find the page by searching\n        found_id = notion_utils.find_page(notion, \"Standard Operating Procedure\")\n        if not found_id:\n            print(\"Error: Standard Operating Procedure page not found.\", file=sys.stderr)\n            return False\n    \n    print(f\"Found Standard Operating Procedure page: {found_id}\")\n    \n    # Get all blocks from the page\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, found_id)\n    print(f\"Found {len(all_blocks)} blocks\")\n    \n    print(\"Starting verification...\")\n    \n    # Step 2: Verify the structure and section order\n    print(\"2. Checking page structure and section order...\")\n    \n    # Expected structure after the initial content and dividers\n    # We'll look for main sections by their headings\n    roles_index = None\n    tools_column_index = None\n    toggle_index = None\n    procedure_index = None\n    \n    for i, block in enumerate(all_blocks):\n        if block.get(\"type\") == \"heading_2\":\n            heading_text = \"\"\n            rich_text = block.get(\"heading_2\", {}).get(\"rich_text\", [])\n            if rich_text:\n                heading_text = rich_text[0].get(\"text\", {}).get(\"content\", \"\")\n            \n            if heading_text == \"Roles & responsibilities\":\n                roles_index = i\n                print(f\"✓ Found 'Roles & responsibilities' section at index {i}\")\n            elif heading_text == \"Procedure\":\n                procedure_index = i\n                print(f\"✓ Found 'Procedure' section at index {i}\")\n    \n    # Check for column_list (containing Tools and Terminologies)\n    for i, block in enumerate(all_blocks):\n        if block.get(\"type\") == \"column_list\":\n            # Check if this is the right column_list (should be after Roles & responsibilities)\n            if roles_index and i > roles_index:\n                tools_column_index = i\n                print(f\"✓ Found column_list at index {i}\")\n                break\n    \n    # Check for toggle block with \"original pages\"\n    for i, block in enumerate(all_blocks):\n        if block.get(\"type\") == \"toggle\":\n            toggle_text = \"\"\n            rich_text = block.get(\"toggle\", {}).get(\"rich_text\", [])\n            if rich_text:\n                toggle_text = rich_text[0].get(\"text\", {}).get(\"content\", \"\")\n            \n            if toggle_text.lower() == \"original pages\":\n                toggle_index = i\n                print(f\"✓ Found 'original pages' toggle at index {i}\")\n                break\n    \n    # Step 3: Verify section order\n    print(\"3. Verifying section order...\")\n    \n    if roles_index is None:\n        print(\"Error: 'Roles & responsibilities' section not found.\", file=sys.stderr)\n        return False\n    \n    if tools_column_index is None:\n        print(\"Error: Column layout not found.\", file=sys.stderr)\n        return False\n    \n    if toggle_index is None:\n        print(\"Error: 'original pages' toggle not found.\", file=sys.stderr)\n        return False\n    \n    if procedure_index is None:\n        print(\"Error: 'Procedure' section not found.\", file=sys.stderr)\n        return False\n    \n    # Verify order: Roles & responsibilities < column_list < toggle < Procedure\n    if not (roles_index < tools_column_index < toggle_index < procedure_index):\n        print(\"Error: Sections are not in the correct order.\", file=sys.stderr)\n        print(f\"  Expected order: Roles & responsibilities ({roles_index}) < column_list ({tools_column_index}) < toggle ({toggle_index}) < Procedure ({procedure_index})\", file=sys.stderr)\n        return False\n    \n    print(\"✓ Sections are in the correct order\")\n    \n    # Step 4: Verify column_list structure\n    print(\"4. Verifying column layout structure...\")\n    \n    column_list_block = all_blocks[tools_column_index]\n    column_list_id = column_list_block.get(\"id\")\n    \n    # Get direct children of column_list (should be columns only)\n    try:\n        column_response = notion.blocks.children.list(block_id=column_list_id)\n        column_children = column_response.get(\"results\", [])\n    except Exception as e:\n        print(f\"Error getting column children: {e}\", file=sys.stderr)\n        return False\n    \n    if len(column_children) < 2:\n        print(f\"Error: Column list should have at least 2 columns, found {len(column_children)}.\", file=sys.stderr)\n        return False\n    \n    # Verify left column (Tools)\n    left_column = column_children[0]\n    if left_column.get(\"type\") != \"column\":\n        print(\"Error: First child of column_list should be a column.\", file=sys.stderr)\n        return False\n    \n    left_column_id = left_column.get(\"id\")\n    left_column_blocks = notion_utils.get_all_blocks_recursively(notion, left_column_id)\n    \n    # Check for Tools heading and link_to_page blocks in left column\n    tools_heading_found = False\n    link_to_page_count = 0\n    for block in left_column_blocks:\n        if block.get(\"type\") == \"heading_2\":\n            heading_text = block.get(\"heading_2\", {}).get(\"rich_text\", [{}])[0].get(\"text\", {}).get(\"content\", \"\")\n            if heading_text == \"Tools\":\n                tools_heading_found = True\n                print(\"✓ Found 'Tools' heading in left column\")\n        elif block.get(\"type\") == \"link_to_page\":\n            link_to_page_count += 1\n    \n    if not tools_heading_found:\n        print(\"Error: 'Tools' heading not found in left column.\", file=sys.stderr)\n        return False\n    \n    # Check for link_to_page blocks in Tools column\n    if link_to_page_count < 2:\n        print(f\"Error: Tools column should have at least 2 link_to_page blocks, found {link_to_page_count}.\", file=sys.stderr)\n        return False\n    \n    print(f\"✓ Found {link_to_page_count} link_to_page blocks in Tools column\")\n    \n    # Verify right column (Terminologies)\n    right_column = column_children[1]\n    if right_column.get(\"type\") != \"column\":\n        print(\"Error: Second child of column_list should be a column.\", file=sys.stderr)\n        return False\n    \n    right_column_id = right_column.get(\"id\")\n    right_column_blocks = notion_utils.get_all_blocks_recursively(notion, right_column_id)\n    \n    # Check for Terminologies heading in right column\n    terminologies_heading_found = False\n    for block in right_column_blocks:\n        if block.get(\"type\") == \"heading_2\":\n            heading_text = block.get(\"heading_2\", {}).get(\"rich_text\", [{}])[0].get(\"text\", {}).get(\"content\", \"\")\n            if heading_text == \"Terminologies\":\n                terminologies_heading_found = True\n                print(\"✓ Found 'Terminologies' heading in right column\")\n                break\n    \n    if not terminologies_heading_found:\n        print(\"Error: 'Terminologies' heading not found in right column.\", file=sys.stderr)\n        return False\n    \n    # Step 5: Verify toggle block content\n    print(\"5. Verifying toggle block content...\")\n    \n    toggle_block = all_blocks[toggle_index]\n    toggle_id = toggle_block.get(\"id\")\n    \n    # Get direct children of toggle\n    try:\n        toggle_response = notion.blocks.children.list(block_id=toggle_id)\n        toggle_children = toggle_response.get(\"results\", [])\n    except Exception as e:\n        print(f\"Error getting toggle children: {e}\", file=sys.stderr)\n        return False\n    \n    # Check for child_page blocks (Notion and Figma)\n    notion_page_found = False\n    figma_page_found = False\n    \n    for block in toggle_children:\n        if block.get(\"type\") == \"child_page\":\n            title = block.get(\"child_page\", {}).get(\"title\", \"\")\n            if title == \"Notion\":\n                notion_page_found = True\n                print(\"✓ Found 'Notion' child page in toggle\")\n            elif title == \"Figma\":\n                figma_page_found = True\n                print(\"✓ Found 'Figma' child page in toggle\")\n    \n    if not notion_page_found:\n        print(\"Error: 'Notion' child page not found in toggle block.\", file=sys.stderr)\n        return False\n    \n    if not figma_page_found:\n        print(\"Error: 'Figma' child page not found in toggle block.\", file=sys.stderr)\n        return False\n    \n    # Step 6: Verify that original sections no longer exist at top level\n    print(\"6. Verifying original sections have been removed from top level...\")\n    \n    # Check that there's no standalone \"Terminologies\" heading before \"Roles & responsibilities\"\n    for i in range(0, roles_index if roles_index else len(all_blocks)):\n        block = all_blocks[i]\n        if block.get(\"type\") == \"heading_2\":\n            heading_text = block.get(\"heading_2\", {}).get(\"rich_text\", [{}])[0].get(\"text\", {}).get(\"content\", \"\")\n            if heading_text == \"Terminologies\":\n                print(\"Error: 'Terminologies' section found before 'Roles & responsibilities'.\", file=sys.stderr)\n                return False\n    \n    # Check that there's no standalone \"Tools\" heading outside the column\n    tools_outside_column = False\n    for i, block in enumerate(all_blocks):\n        if i == tools_column_index:\n            continue  # Skip the column_list itself\n        if block.get(\"type\") == \"heading_2\":\n            heading_text = block.get(\"heading_2\", {}).get(\"rich_text\", [{}])[0].get(\"text\", {}).get(\"content\", \"\")\n            if heading_text == \"Tools\" and i != tools_column_index:\n                # Check if this is NOT inside the column\n                parent_id = block.get(\"parent\", {}).get(\"block_id\")\n                if parent_id != left_column_id:\n                    tools_outside_column = True\n                    break\n    \n    if tools_outside_column:\n        print(\"Error: Standalone 'Tools' section found outside column layout.\", file=sys.stderr)\n        return False\n    \n    print(\"✓ Original sections have been properly reorganized\")\n    \n    # Step 7: Final summary\n    print(\"\\n7. Final verification summary:\")\n    print(\"✓ 'Roles & responsibilities' and 'Terminologies' sections have been swapped\")\n    print(\"✓ 'Tools' and 'Terminologies' are in a 2-column layout\")\n    print(\"✓ Links to Notion and Figma pages are in the Tools column\")\n    print(\"✓ Original child pages are preserved in 'original pages' toggle\")\n    print(\"✓ Page structure is correct\")\n    \n    print(\"\\n✅ All verification checks passed!\")\n    return True\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/notion/standard/team_projects/priority_tasks_table/description.md",
    "content": "Hi! In my Team Projects page, please create a five-column table block that lists all tasks meeting either of the following conditions:\n\t1.\tThe progress is 50% or less, or\n\t2.\tThe task has priority P0 but is not yet completed (i.e., progress not at 100%).\n\nYou should query this information from the existing “Projects” database.\n\nIn the newly created table, each row should represent one task, and all information should be stored as plain text (not relations, formulas, or linked properties).\n\nIn the newly created table:\n\t•\tEach row should represent one task\n\t•\tAll fields should be stored as plain text (not relations, formulas, or linked properties)\n\t•\tThe table should be sorted by expected end date (End Date) in ascending order, so that the first entry is the one with the earliest end date\n\nThe table should include the following headers:\n\t•\tProject\n\t•\tEng Hours\n\t•\tProgress\n\t•\tStart Date\n\t•\tEnd Date\n\nPlease make sure all relevant tasks are included. Thank you!"
  },
  {
    "path": "tasks/notion/standard/team_projects/priority_tasks_table/meta.json",
    "content": "{\n  \"task_id\": \"priority_tasks_table\",\n  \"task_name\": \"Priority Tasks Table\",\n  \"category_id\": \"team_projects\",\n  \"category_name\": \"Team Projects\",\n  \"description\": \"Create a five-column table listing tasks with 50% or less progress or P0 priority tasks not completed.\",\n  \"author\": \"Zijian Wu\",\n  \"created_at\": \"2025-08-12\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"conditional filtering\",\n    \"database manipulation\",\n    \"data aggregation\",\n    \"visual formatting\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Team-Projects-24e81626b6d7809c982fdb7a25825898\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/gantt-chart\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/team_projects/priority_tasks_table/verify.py",
    "content": "import sys\nfrom datetime import datetime\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\nEXPECTED_HEADERS = [\"Project\", \"Eng Hours\", \"Progress\", \"Start Date\", \"End Date\"]\n\nEXPECTED_ROWS = [\n    {\n        \"Project\": \"Improve response times for support requests\",\n        \"Eng Hours\": 100,\n        \"Progress\": 0.33,  # 33%\n        \"Start Date\": \"2024-10-30\",\n        \"End Date\": \"2024-11-17\",\n    },\n    {\n        \"Project\": \"Add a new social media integration\",\n        \"Eng Hours\": 200,\n        \"Progress\": 0.40,  # 40%\n        \"Start Date\": \"2024-11-07\",\n        \"End Date\": \"2024-11-25\",\n    },\n    {\n        \"Project\": \"Integrate with a popular third-party service\",\n        \"Eng Hours\": 250,\n        \"Progress\": 0.20,  # 20%\n        \"Start Date\": \"2024-11-10\",\n        \"End Date\": \"2024-11-18\",\n    },\n    {\n        \"Project\": \"Create customer knowledge base\",\n        \"Eng Hours\": 150,\n        \"Progress\": 0.40,  # 40%\n        \"Start Date\": \"2024-11-19\",\n        \"End Date\": \"2024-11-25\",\n    },\n    {\n        \"Project\": \"Redesign the onboarding process\",\n        \"Eng Hours\": 300,\n        \"Progress\": 0.75,  # 75%\n        \"Start Date\": \"2024-11-20\",\n        \"End Date\": \"2024-12-04\",\n    },\n    {\n        \"Project\": \"Publish support knowledge base\",\n        \"Eng Hours\": None,  # N/A\n        \"Progress\": 0.0,  # 0%\n        \"Start Date\": \"2024-11-27\",\n        \"End Date\": \"2024-11-29\",\n    },\n]\n\n# Sort the expected rows by End Date so we can directly compare order\nEXPECTED_ROWS.sort(key=lambda r: r[\"End Date\"])\n\n\ndef _plain_text_from_cell(cell):\n    \"\"\"Concatenate plain_text from a single cell (list of rich_text).\"\"\"\n    return \"\".join(rt.get(\"plain_text\", \"\") for rt in cell).strip()\n\n\ndef _parse_progress(value: str):\n    \"\"\"Convert a progress string like '40%', '40.0 %', '0.4' to float in range 0-1.\"\"\"\n    value = value.strip()\n    if not value:\n        return None\n\n    has_percent = \"%\" in value\n    # Remove percent sign and any spaces\n    value = value.replace(\"%\", \"\").strip()\n    try:\n        num = float(value)\n        if has_percent or num > 1:\n            num /= 100.0\n        return num\n    except ValueError:\n        return None\n\n\ndef _parse_eng_hours(value: str):\n    value = value.strip().lower()\n    if value in {\"n/a\", \"na\", \"\", \"—\", \"-\"}:\n        return None\n    try:\n        return float(value)\n    except ValueError:\n        return None\n\n\ndef _parse_date(value: str):\n    value = value.strip()\n    try:\n        return datetime.strptime(value, \"%Y-%m-%d\").date()\n    except ValueError:\n        return None\n\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"Verify that the last table in the 'Team Projects' page matches EXPECTED_ROWS and headers.\"\"\"\n    page_id = None\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(notion, main_id)\n        if found_id and object_type == 'page':\n            page_id = found_id\n    \n    if not page_id:\n        page_id = notion_utils.find_page(notion, \"Team Projects\")\n    if not page_id:\n        print(\"Error: Page 'Team Projects' not found.\", file=sys.stderr)\n        return False\n\n    # Fetch all blocks to locate table blocks\n    blocks = notion_utils.get_all_blocks_recursively(notion, page_id)\n    table_blocks = [b for b in blocks if b.get(\"type\") == \"table\"]\n    if not table_blocks:\n        print(\"Error: No table blocks found in 'Team Projects' page.\", file=sys.stderr)\n        return False\n\n    table_block = table_blocks[-1]  # Use the last table block\n    table_id = table_block[\"id\"]\n\n    # Retrieve table rows\n    rows = notion.blocks.children.list(block_id=table_id).get(\"results\", [])\n    if not rows:\n        print(\"Error: Table block has no rows.\", file=sys.stderr)\n        return False\n\n    # Validate headers\n    header_cells = rows[0].get(\"table_row\", {}).get(\"cells\", [])\n    headers = [_plain_text_from_cell(c) for c in header_cells]\n    if headers != EXPECTED_HEADERS:\n        print(f\"Error: Table headers mismatch. Found {headers}, expected {EXPECTED_HEADERS}.\", file=sys.stderr)\n        return False\n\n    # Parse data rows\n    data_rows = []\n    for r in rows[1:]:\n        cells = r.get(\"table_row\", {}).get(\"cells\", [])\n        if len(cells) < 5:\n            continue  # Skip malformed rows\n        project = _plain_text_from_cell(cells[0])\n        eng_hours_raw = _plain_text_from_cell(cells[1])\n        progress_raw = _plain_text_from_cell(cells[2])\n        start_raw = _plain_text_from_cell(cells[3])\n        end_raw = _plain_text_from_cell(cells[4])\n\n        row_dict = {\n            \"Project\": project,\n            \"Eng Hours\": _parse_eng_hours(eng_hours_raw),\n            \"Progress\": _parse_progress(progress_raw),\n            \"Start Date\": start_raw.strip(),\n            \"End Date\": end_raw.strip(),\n        }\n        data_rows.append(row_dict)\n\n    if len(data_rows) != len(EXPECTED_ROWS):\n        print(f\"Error: Expected {len(EXPECTED_ROWS)} data rows, found {len(data_rows)}.\", file=sys.stderr)\n        return False\n\n    # Verify sorting by End Date ascending\n    parsed_end_dates = [_parse_date(r[\"End Date\"]) for r in data_rows]\n    if any(d is None for d in parsed_end_dates):\n        print(\"Error: One or more End Date values could not be parsed.\", file=sys.stderr)\n        return False\n    if parsed_end_dates != sorted(parsed_end_dates):\n        print(\"Error: Table is not sorted by End Date ascending.\", file=sys.stderr)\n        return False\n\n    # Create mapping from project -> row for comparison\n    data_map = {r[\"Project\"]: r for r in data_rows}\n\n    for expected in EXPECTED_ROWS:\n        proj = expected[\"Project\"]\n        if proj not in data_map:\n            print(f\"Error: Project '{proj}' not found in table.\", file=sys.stderr)\n            return False\n        actual = data_map[proj]\n\n        # Compare Eng Hours\n        expected_hours = expected[\"Eng Hours\"]\n        actual_hours = actual[\"Eng Hours\"]\n        if expected_hours is None:\n            if actual_hours is not None:\n                print(f\"Error: Eng Hours for '{proj}' expected to be empty/N\\u204aA but found '{actual_hours}'.\", file=sys.stderr)\n                return False\n        else:\n            if actual_hours is None or abs(actual_hours - expected_hours) > 1e-2:\n                print(f\"Error: Eng Hours for '{proj}' mismatch. Expected {expected_hours}, found {actual_hours}.\", file=sys.stderr)\n                return False\n\n        # Compare Progress with tolerance\n        expected_progress = expected[\"Progress\"]\n        actual_progress = actual[\"Progress\"]\n        if actual_progress is None or abs(actual_progress - expected_progress) > 1e-2:\n            print(f\"Error: Progress for '{proj}' mismatch. Expected {expected_progress}, found {actual_progress}.\", file=sys.stderr)\n            return False\n\n        # Compare Start and End Dates (string equality)\n        if actual[\"Start Date\"] != expected[\"Start Date\"]:\n            print(f\"Error: Start Date for '{proj}' mismatch. Expected {expected['Start Date']}, found {actual['Start Date']}.\", file=sys.stderr)\n            return False\n        if actual[\"End Date\"] != expected[\"End Date\"]:\n            print(f\"Error: End Date for '{proj}' mismatch. Expected {expected['End Date']}, found {actual['End Date']}.\", file=sys.stderr)\n            return False\n\n    print(\"Success: Verified table block contents and order successfully.\")\n    return True\n\n\ndef main():\n    \"\"\"Execute verification and exit with status code.\"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main() "
  },
  {
    "path": "tasks/notion/standard/team_projects/swap_tasks/description.md",
    "content": "Go to the Team Projects page, find the person responsible for the most tasks and the person responsible for the fewest tasks, then swap their assigned tasks."
  },
  {
    "path": "tasks/notion/standard/team_projects/swap_tasks/meta.json",
    "content": "{\n  \"task_id\": \"swap_tasks\",\n  \"task_name\": \"Swap Tasks\",\n  \"category_id\": \"team_projects\",\n  \"category_name\": \"Team Projects\",\n  \"description\": \"Find the person responsible for the most and fewest tasks, then swap their assigned tasks.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-08-12\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data aggregation\",\n    \"automated migration\",\n    \"conditional filtering\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Team-Projects-24e81626b6d7809c982fdb7a25825898\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/gantt-chart\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/team_projects/swap_tasks/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies that the task assignees have been swapped correctly.\n    Checks:\n    1. \"Develop a plan for promotion\" and \"Evaluate different third-party services\" have swapped assignees\n    2. The person with most tasks and person with least tasks have swapped all their tasks\n    \"\"\"\n    # Step 1: Find the Team Projects page\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(notion, main_id)\n        if not found_id or object_type != 'page':\n            print(\"Error: Team Projects page not found.\", file=sys.stderr)\n            return False\n    else:\n        # Try to find the page by searching\n        found_id = notion_utils.find_page(notion, \"Team Projects\")\n        if not found_id:\n            print(\"Error: Team Projects page not found.\", file=sys.stderr)\n            return False\n    \n    # Get all blocks from the page to find database references\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, found_id)\n    \n    # Find Tasks database ID from the page\n    tasks_db_id = None\n    \n    for block in all_blocks:\n        if block and block.get(\"type\") == \"child_database\":\n            db_title = block.get(\"child_database\", {}).get(\"title\", \"\")\n            if \"Tasks\" in db_title:\n                tasks_db_id = block[\"id\"]\n                break\n    \n    if not tasks_db_id:\n        print(\"Error: Tasks database not found.\", file=sys.stderr)\n        return False\n    \n    print(\"\\n📋 Starting verification...\")\n    \n    # Step 2: Query all tasks to analyze assignees\n    \n    try:\n        all_tasks_response = notion.databases.query(\n            database_id=tasks_db_id,\n            page_size=100\n        )\n        \n        if not all_tasks_response.get(\"results\"):\n            print(\"Error: No tasks found in Tasks database.\", file=sys.stderr)\n            return False\n        \n        tasks = all_tasks_response[\"results\"]\n        \n    except Exception as e:\n        print(f\"Error querying Tasks database: {e}\", file=sys.stderr)\n        return False\n    \n    # Step 3: Check specific tasks have swapped assignees\n    \n    develop_plan_task = None\n    evaluate_services_task = None\n    \n    for task in tasks:\n        task_name = task[\"properties\"][\"Name\"][\"title\"][0][\"text\"][\"content\"]\n        if task_name == \"Develop a plan for promotion\":\n            develop_plan_task = task\n        elif task_name == \"Evaluate different third-party services\":\n            evaluate_services_task = task\n    \n    if not develop_plan_task or not evaluate_services_task:\n        print(\"Error: Could not find both required tasks.\", file=sys.stderr)\n        return False\n    \n    # Get assignees for these tasks\n    develop_plan_assignees = develop_plan_task[\"properties\"][\"Assigned\"][\"people\"]\n    evaluate_services_assignees = evaluate_services_task[\"properties\"][\"Assigned\"][\"people\"]\n    \n    if not develop_plan_assignees or not evaluate_services_assignees:\n        print(\"Error: Tasks don't have assignees.\", file=sys.stderr)\n        return False\n    \n    develop_plan_assignee_id = develop_plan_assignees[0][\"id\"]\n    evaluate_services_assignee_id = evaluate_services_assignees[0][\"id\"]\n    \n    # These should be different (swapped)\n    if develop_plan_assignee_id == evaluate_services_assignee_id:\n        print(\"Error: Tasks should have different assignees after swap.\", file=sys.stderr)\n        return False\n    \n    # Step 4: Count tasks per person\n    \n    task_counts = {}\n    unassigned_count = 0\n    \n    for task in tasks:\n        assignees = task[\"properties\"][\"Assigned\"][\"people\"]\n        if assignees:\n            assignee_id = assignees[0][\"id\"]\n            if assignee_id not in task_counts:\n                task_counts[assignee_id] = []\n            task_counts[assignee_id].append(task[\"properties\"][\"Name\"][\"title\"][0][\"text\"][\"content\"])\n        else:\n            unassigned_count += 1\n    \n    # Sort by task count\n    sorted_assignees = sorted(task_counts.items(), key=lambda x: len(x[1]))\n    \n    if len(sorted_assignees) < 2:\n        print(\"Error: Need at least 2 people with tasks to verify swap.\", file=sys.stderr)\n        return False\n    \n    # Get person with least and most tasks\n    person_with_least = sorted_assignees[0]\n    person_with_most = sorted_assignees[-1]\n    \n    least_id, least_tasks = person_with_least\n    most_id, most_tasks = person_with_most\n    \n    # Step 5: Verify the swap pattern\n    \n    # Original distribution (before swap):\n    # - 5ac96c02-49a4-4320-8de6-b663ba83126b had 3 tasks (least)\n    # - ac7a3bd0-c111-4464-8f45-8a857a1abc8a had 10 tasks (most)\n    \n    # After complete swap, we expect:\n    # - 5ac96c02-49a4-4320-8de6-b663ba83126b should have 10 tasks\n    # - ac7a3bd0-c111-4464-8f45-8a857a1abc8a should have 3 tasks\n    \n    original_least_id = \"5ac96c02-49a4-4320-8de6-b663ba83126b\"\n    original_most_id = \"ac7a3bd0-c111-4464-8f45-8a857a1abc8a\"\n    \n    # Check if the swap has been completed\n    swap_completed = False\n    for assignee_id, assignee_tasks in task_counts.items():\n        if assignee_id == original_least_id and len(assignee_tasks) == 10:\n            # Person who had 3 now has 10\n            for other_id, other_tasks in task_counts.items():\n                if other_id == original_most_id and len(other_tasks) == 3:\n                    # Person who had 10 now has 3\n                    swap_completed = True\n                    break\n    \n    # Step 6: Summary\n    print(f\"\\n📊 Task Distribution:\")\n    print(f\"  • Total tasks: {len(tasks)}\")\n    print(f\"  • Assigned tasks: {len(tasks) - unassigned_count}\")\n    print(f\"  • Unassigned tasks: {unassigned_count}\")\n    print(f\"  • People with tasks: {len(task_counts)}\")\n    print(f\"\\n  Task counts by person:\")\n    for assignee_id, assignee_tasks in sorted_assignees:\n        print(f\"    - {assignee_id[:8]}...: {len(assignee_tasks)} tasks\")\n    \n    # Step 7: Final verification\n    print(\"\\n🔍 Verification Results:\")\n    \n    # Check that the swap has created a significant difference\n    if len(most_tasks) - len(least_tasks) < 5:\n        print(f\"Warning: Difference between most and least is only {len(most_tasks) - len(least_tasks)} tasks\", file=sys.stderr)\n    \n    # Verify specific expected outcomes\n    verification_passed = True\n    \n    # Check 1: Specific tasks have been swapped\n    specific_tasks_swapped = develop_plan_assignee_id != evaluate_services_assignee_id\n    if specific_tasks_swapped:\n        print(\"  ✓ Specific tasks have been swapped\")\n    else:\n        print(\"  ✗ Specific tasks were not swapped\", file=sys.stderr)\n        verification_passed = False\n    \n    # Check 2: Task distribution shows a complete swap\n    if swap_completed:\n        print(\"  ✓ Complete task swap verified (3↔10 tasks)\")\n    else:\n        # Show actual distribution for debugging\n        person1_tasks = len(task_counts.get(original_least_id, []))\n        person2_tasks = len(task_counts.get(original_most_id, []))\n        print(f\"  ✗ Swap incomplete! Expected 5ac96c02→10 tasks, ac7a3bd0→3 tasks\", file=sys.stderr)\n        print(f\"    Actual: 5ac96c02→{person1_tasks} tasks, ac7a3bd0→{person2_tasks} tasks\", file=sys.stderr)\n        verification_passed = False\n    \n    # Check 3: Total task count is preserved\n    total_assigned_tasks = sum(len(tasks) for _, tasks in task_counts.items())\n    expected_total = len(tasks) - unassigned_count\n    \n    if total_assigned_tasks == expected_total:\n        print(f\"  ✓ Total task count preserved ({total_assigned_tasks} assigned)\")\n    else:\n        print(f\"  ✗ Task count mismatch: {total_assigned_tasks} vs {expected_total} expected\", file=sys.stderr)\n        verification_passed = False\n    \n    if verification_passed:\n        print(\"\\n✅ All verification checks passed!\")\n        return True\n    else:\n        print(\"\\n❌ Verification failed\", file=sys.stderr)\n        return False\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/notion/standard/toronto_guide/change_color/description.md",
    "content": "Navigate to the Toronto Guide page in Notion and change all pink-colored elements (tags and callout colors) to different colors.\n\n## Requirements\n1. Find and access the Toronto Guide page in Notion\n2. Identify and change all pink elements including:\n   - Pink tags in databases\n   - Pink callout backgrounds\n3. Change all pink colors to any other color of your choice"
  },
  {
    "path": "tasks/notion/standard/toronto_guide/change_color/meta.json",
    "content": "{\n  \"task_id\": \"change_color\",\n  \"task_name\": \"Change Color\",\n  \"category_id\": \"toronto_guide\",\n  \"category_name\": \"Toronto Guide\",\n  \"description\": \"Navigate to the Toronto Guide page and change all pink-colored elements to different colors.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-08-14\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"visual formatting\",\n    \"conditional filtering\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Toronto-Guide-25281626b6d7802caa7cc394647e901c\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/conquering-toronto-a-destination-guide\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/toronto_guide/change_color/verify.py",
    "content": "import sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\ndef get_page_title(page_result):\n    \"\"\"Extract title from a page result\"\"\"\n    properties = page_result.get('properties', {})\n    for prop_name in ['Name', 'Title', 'title']:\n        if prop_name in properties:\n            prop = properties[prop_name]\n            if prop.get('type') == 'title':\n                title_array = prop.get('title', [])\n                if title_array and len(title_array) > 0:\n                    return title_array[0].get('plain_text', '')\n    return ''\n\ndef get_page_tags(page_result):\n    \"\"\"Extract tags from a page result\"\"\"\n    properties = page_result.get('properties', {})\n    tags_property = properties.get('Tags', {})\n    if tags_property.get('type') == 'multi_select':\n        tags = tags_property.get('multi_select', [])\n        return [tag.get('name') for tag in tags]\n    return []\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies that all pink colors have been changed in the Toronto Guide page.\n    \n    Expected pink elements that should be changed:\n    1. Callout: \"Welcome to Toronto!\" with red_background (originally should be pink)\n    2. Activities database tags: \n       - \"Parks\" tag (High Park, Evergreen Brickworks)\n       - \"Neighbourhood\" tag (Ossington Strip, Chinatown, Little Italy, Kensington Market, Queen west, The beaches)\n    3. Food database tags:\n       - \"Middle Eastern\" (Byblos Downtown)\n       - \"Jamaican\" (Crumbs Patties)\n       - \"Indian\" (Leela Indian Food Bar)\n    4. Cafes database tag:\n       - \"Food\" (Cafe Landwer)\n    \n    These elements should exist with the same name/content but different colors.\n    Tag distributions should remain the same.\n    \"\"\"\n    # Step 1: Find the main Toronto Guide page\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(notion, main_id)\n        if not found_id or object_type != 'page':\n            print(\"Error: Toronto Guide page not found.\", file=sys.stderr)\n            return False\n    else:\n        # Try to find the page by searching\n        found_id = notion_utils.find_page(notion, \"Toronto Guide\")\n        if not found_id:\n            print(\"Error: Toronto Guide page not found.\", file=sys.stderr)\n            return False\n    \n    print(f\"Found Toronto Guide page: {found_id}\")\n    \n    # Get all blocks from the page\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, found_id)\n    print(f\"Found {len(all_blocks)} blocks\")\n    \n    # Expected elements and their distributions\n    expected_pink_elements = {\n        \"callout\": {\n            \"text\": \"Welcome to Toronto!\",\n            \"found\": False,\n            \"has_pink\": False,\n            \"exists\": False\n        },\n        \"activities_tags\": {\n            \"Parks\": {\n                \"found\": False, \n                \"has_pink\": False,\n                \"expected_items\": [\"High Park\", \"Evergreen Brickworks\"],\n                \"actual_items\": []\n            },\n            \"Neighbourhood\": {\n                \"found\": False, \n                \"has_pink\": False,\n                \"expected_items\": [\"Ossington Strip\", \"Chinatown\", \"Little Italy\", \"Kensington Market\", \"Queen west\", \"The beaches\"],\n                \"actual_items\": []\n            }\n        },\n        \"food_tags\": {\n            \"Middle Eastern\": {\n                \"found\": False, \n                \"has_pink\": False,\n                \"expected_items\": [\"Byblos Downtown\"],\n                \"actual_items\": []\n            },\n            \"Jamaican\": {\n                \"found\": False, \n                \"has_pink\": False,\n                \"expected_items\": [\"Crumbs Patties\"],\n                \"actual_items\": []\n            },\n            \"Indian\": {\n                \"found\": False, \n                \"has_pink\": False,\n                \"expected_items\": [\"Leela Indian Food Bar\"],\n                \"actual_items\": []\n            }\n        },\n        \"cafes_tags\": {\n            \"Food\": {\n                \"found\": False, \n                \"has_pink\": False,\n                \"expected_items\": [\"Cafe Landwer\"],\n                \"actual_items\": []\n            }\n        }\n    }\n    \n    # Database IDs\n    activities_db_id = None\n    food_db_id = None\n    cafes_db_id = None\n    \n    # Step 2: Check all blocks for callouts and find databases\n    for block in all_blocks:\n        if block is None:\n            continue\n            \n        block_type = block.get(\"type\")\n        \n        # Check for the specific callout block\n        if block_type == \"callout\":\n            callout_text = notion_utils.get_block_plain_text(block)\n            if \"Welcome to Toronto!\" in callout_text:\n                expected_pink_elements[\"callout\"][\"exists\"] = True\n                expected_pink_elements[\"callout\"][\"found\"] = True\n                color = block.get(\"callout\", {}).get(\"color\", \"\")\n                if \"pink\" in color.lower():\n                    expected_pink_elements[\"callout\"][\"has_pink\"] = True\n                    print(f\"✗ Callout 'Welcome to Toronto!' still has pink color: {color}\")\n                else:\n                    print(f\"✓ Callout 'Welcome to Toronto!' has non-pink color: {color}\")\n        \n        # Find child databases\n        elif block_type == \"child_database\":\n            title = block.get(\"child_database\", {}).get(\"title\", \"\")\n            block_id = block.get(\"id\")\n            \n            if \"Activities\" in title:\n                activities_db_id = block_id\n                print(f\"Found Activities database: {block_id}\")\n            elif \"Food\" in title:\n                food_db_id = block_id\n                print(f\"Found Food database: {block_id}\")\n            elif \"Cafes\" in title or \"Café\" in title:\n                cafes_db_id = block_id\n                print(f\"Found Cafes database: {block_id}\")\n    \n    # Step 3: Check Activities database for specific tags and their distributions\n    if activities_db_id:\n        try:\n            # Get database properties\n            db_info = notion.databases.retrieve(database_id=activities_db_id)\n            tags_property = db_info.get(\"properties\", {}).get(\"Tags\", {})\n            if tags_property.get(\"type\") == \"multi_select\":\n                options = tags_property.get(\"multi_select\", {}).get(\"options\", [])\n                for option in options:\n                    tag_name = option.get(\"name\").strip()\n                    tag_color = option.get(\"color\")\n                    \n                    if tag_name in expected_pink_elements[\"activities_tags\"]:\n                        expected_pink_elements[\"activities_tags\"][tag_name][\"found\"] = True\n                        if tag_color == \"pink\":\n                            expected_pink_elements[\"activities_tags\"][tag_name][\"has_pink\"] = True\n                            print(f\"✗ Activities tag '{tag_name}' still has pink color\")\n                        else:\n                            print(f\"✓ Activities tag '{tag_name}' changed to {tag_color}\")\n            \n            # Query database to check tag distributions\n            query_result = notion.databases.query(database_id=activities_db_id)\n            for page in query_result.get('results', []):\n                page_title = get_page_title(page).strip()\n                page_tags = get_page_tags(page)\n                \n                for tag_name in expected_pink_elements[\"activities_tags\"]:\n                    if tag_name in page_tags:\n                        expected_pink_elements[\"activities_tags\"][tag_name][\"actual_items\"].append(page_title)\n                        \n        except Exception as e:\n            print(f\"Error checking Activities database: {e}\", file=sys.stderr)\n            return False\n    else:\n        print(\"Error: Activities database not found\", file=sys.stderr)\n        return False\n    \n    # Step 4: Check Food database for specific tags and their distributions\n    if food_db_id:\n        try:\n            # Get database properties\n            db_info = notion.databases.retrieve(database_id=food_db_id)\n            tags_property = db_info.get(\"properties\", {}).get(\"Tags\", {})\n            if tags_property.get(\"type\") == \"multi_select\":\n                options = tags_property.get(\"multi_select\", {}).get(\"options\", [])\n                for option in options:\n                    tag_name = option.get(\"name\").strip()\n                    tag_color = option.get(\"color\")\n                    \n                    if tag_name in expected_pink_elements[\"food_tags\"]:\n                        expected_pink_elements[\"food_tags\"][tag_name][\"found\"] = True\n                        if tag_color == \"pink\":\n                            expected_pink_elements[\"food_tags\"][tag_name][\"has_pink\"] = True\n                            print(f\"✗ Food tag '{tag_name}' still has pink color\")\n                        else:\n                            print(f\"✓ Food tag '{tag_name}' changed to {tag_color}\")\n            \n            # Query database to check tag distributions\n            query_result = notion.databases.query(database_id=food_db_id)\n            for page in query_result.get('results', []):\n                page_title = get_page_title(page).strip()\n                page_tags = get_page_tags(page)\n                \n                for tag_name in expected_pink_elements[\"food_tags\"]:\n                    if tag_name in page_tags:\n                        expected_pink_elements[\"food_tags\"][tag_name][\"actual_items\"].append(page_title)\n                        \n        except Exception as e:\n            print(f\"Error checking Food database: {e}\", file=sys.stderr)\n            return False\n    else:\n        print(\"Error: Food database not found\", file=sys.stderr)\n        return False\n    \n    # Step 5: Check Cafes database for specific tags and their distributions\n    if cafes_db_id:\n        try:\n            # Get database properties\n            db_info = notion.databases.retrieve(database_id=cafes_db_id)\n            tags_property = db_info.get(\"properties\", {}).get(\"Tags\", {})\n            if tags_property.get(\"type\") == \"multi_select\":\n                options = tags_property.get(\"multi_select\", {}).get(\"options\", [])\n                for option in options:\n                    tag_name = option.get(\"name\").strip()\n                    tag_color = option.get(\"color\")\n                    \n                    if tag_name in expected_pink_elements[\"cafes_tags\"]:\n                        expected_pink_elements[\"cafes_tags\"][tag_name][\"found\"] = True\n                        if tag_color == \"pink\":\n                            expected_pink_elements[\"cafes_tags\"][tag_name][\"has_pink\"] = True\n                            print(f\"✗ Cafes tag '{tag_name}' still has pink color\")\n                        else:\n                            print(f\"✓ Cafes tag '{tag_name}' changed to {tag_color}\")\n            \n            # Query database to check tag distributions\n            query_result = notion.databases.query(database_id=cafes_db_id)\n            for page in query_result.get('results', []):\n                page_title = get_page_title(page).strip()\n                page_tags = get_page_tags(page)\n                \n                for tag_name in expected_pink_elements[\"cafes_tags\"]:\n                    if tag_name in page_tags:\n                        expected_pink_elements[\"cafes_tags\"][tag_name][\"actual_items\"].append(page_title)\n                        \n        except Exception as e:\n            print(f\"Error checking Cafes database: {e}\", file=sys.stderr)\n            return False\n    else:\n        print(\"Error: Cafes database not found\", file=sys.stderr)\n        return False\n    \n    # Step 6: Verify all requirements\n    print(f\"\\nVerification Summary:\")\n    \n    all_passed = True\n    \n    # Check callout\n    if not expected_pink_elements[\"callout\"][\"exists\"]:\n        print(\"✗ 'Welcome to Toronto!' callout not found\", file=sys.stderr)\n        all_passed = False\n    elif expected_pink_elements[\"callout\"][\"has_pink\"]:\n        print(\"✗ Callout still has pink background\", file=sys.stderr)\n        all_passed = False\n    else:\n        print(\"✓ Callout color changed from pink\")\n    \n    # Check Activities tags\n    print(\"\\nActivities Database Tags:\")\n    for tag_name, tag_info in expected_pink_elements[\"activities_tags\"].items():\n        if not tag_info[\"found\"]:\n            print(f\"✗ Activities tag '{tag_name}' not found (may have been renamed)\", file=sys.stderr)\n            # Don't fail if tag was renamed, as that's acceptable\n        elif tag_info[\"has_pink\"]:\n            print(f\"✗ Activities tag '{tag_name}' still has pink color\", file=sys.stderr)\n            all_passed = False\n        else:\n            print(f\"✓ Activities tag '{tag_name}' color changed from pink\")\n            \n        # Check distribution\n        expected_set = set(tag_info[\"expected_items\"])\n        actual_set = set(tag_info[\"actual_items\"])\n        if tag_info[\"found\"] and expected_set != actual_set:\n            print(f\"  ✗ Tag distribution mismatch for '{tag_name}':\", file=sys.stderr)\n            print(f\"    Expected: {sorted(expected_set)}\", file=sys.stderr)\n            print(f\"    Actual: {sorted(actual_set)}\", file=sys.stderr)\n            # Note: We don't fail on distribution mismatch if tag was renamed\n            if not (expected_set - actual_set):  # If all expected items are present\n                print(f\"    (Additional items found, but all expected items are present)\")\n        elif tag_info[\"found\"]:\n            print(f\"  ✓ Tag distribution maintained for '{tag_name}'\")\n    \n    # Check Food tags\n    print(\"\\nFood Database Tags:\")\n    for tag_name, tag_info in expected_pink_elements[\"food_tags\"].items():\n        if not tag_info[\"found\"]:\n            print(f\"✗ Food tag '{tag_name}' not found (may have been renamed)\", file=sys.stderr)\n            # Don't fail if tag was renamed, as that's acceptable\n        elif tag_info[\"has_pink\"]:\n            print(f\"✗ Food tag '{tag_name}' still has pink color\", file=sys.stderr)\n            all_passed = False\n        else:\n            print(f\"✓ Food tag '{tag_name}' color changed from pink\")\n            \n        # Check distribution\n        expected_set = set(tag_info[\"expected_items\"])\n        actual_set = set(tag_info[\"actual_items\"])\n        if tag_info[\"found\"] and expected_set != actual_set:\n            print(f\"  ✗ Tag distribution mismatch for '{tag_name}':\", file=sys.stderr)\n            print(f\"    Expected: {sorted(expected_set)}\", file=sys.stderr)\n            print(f\"    Actual: {sorted(actual_set)}\", file=sys.stderr)\n        elif tag_info[\"found\"]:\n            print(f\"  ✓ Tag distribution maintained for '{tag_name}'\")\n    \n    # Check Cafes tags\n    print(\"\\nCafes Database Tags:\")\n    for tag_name, tag_info in expected_pink_elements[\"cafes_tags\"].items():\n        if not tag_info[\"found\"]:\n            print(f\"✗ Cafes tag '{tag_name}' not found (may have been renamed)\", file=sys.stderr)\n            # Don't fail if tag was renamed, as that's acceptable\n        elif tag_info[\"has_pink\"]:\n            print(f\"✗ Cafes tag '{tag_name}' still has pink color\", file=sys.stderr)\n            all_passed = False\n        else:\n            print(f\"✓ Cafes tag '{tag_name}' color changed from pink\")\n            \n        # Check distribution\n        expected_set = set(tag_info[\"expected_items\"])\n        actual_set = set(tag_info[\"actual_items\"])\n        if tag_info[\"found\"] and expected_set != actual_set:\n            print(f\"  ✗ Tag distribution mismatch for '{tag_name}':\", file=sys.stderr)\n            print(f\"    Expected: {sorted(expected_set)}\", file=sys.stderr)\n            print(f\"    Actual: {sorted(actual_set)}\", file=sys.stderr)\n        elif tag_info[\"found\"]:\n            print(f\"  ✓ Tag distribution maintained for '{tag_name}'\")\n    \n    # Additional check: ensure no other pink elements exist\n    print(\"\\nChecking for any other pink elements...\")\n    other_pink_found = False\n    \n    # Check all callouts for pink\n    for block in all_blocks:\n        if block and block.get(\"type\") == \"callout\":\n            color = block.get(\"callout\", {}).get(\"color\", \"\")\n            if \"pink\" in color.lower():\n                callout_text = notion_utils.get_block_plain_text(block)[:50]\n                if \"Welcome to Toronto!\" not in callout_text:\n                    print(f\"✗ Found unexpected pink callout: {callout_text}...\", file=sys.stderr)\n                    other_pink_found = True\n    \n    if other_pink_found:\n        all_passed = False\n    else:\n        print(\"✓ No unexpected pink elements found\")\n    \n    return all_passed\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    \n    if verify(notion, main_id):\n        print(\"\\nVerification passed: All expected pink colors have been changed\")\n        sys.exit(0)\n    else:\n        print(\"\\nVerification failed: Some pink colors still exist or elements are missing\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/notion/standard/toronto_guide/weekend_adventure_planner/description.md",
    "content": "Create a comprehensive weekend adventure planner that analyzes the Toronto Guide databases and generates a structured itinerary page. I need you to create a new page called 'Perfect Weekend Adventure' as a child of the main Toronto Guide page.\n\n**Task Requirements:**\n1. Create a new page titled 'Perfect Weekend Adventure' as a child page of the main Toronto Guide page\n2. Query the Activities database to identify all activities that have the \"Beaches\" tag\n3. Query the Food database to find all restaurants with \"Turkish\" or \"Hakka\" tags\n4. Query the Cafes database to retrieve all cafes entries\n5. Structure the page with the following specific format:\n   - Add a heading_1 block with text \"🎒 Perfect Weekend Adventure\"\n   - Add a heading_2 block with text \"🏖️ Beach Activities\"\n   - Under Beach Activities, create a bulleted list with all activities that have the \"Beaches\" tag, showing: Name - Google Maps Link (if available)\n   - Add a heading_2 block with text \"🍽️ Cultural Dining Experience\"\n   - Under Cultural Dining, create a numbered list of all restaurants with \"Turkish\" or \"Hakka\" tags, formatted as: Restaurant Name (Tag: [actual tag name])\n   - Add a heading_2 block with text \"☕ Coffee Break Spots\"\n   - Under Coffee Break Spots, create a toggle block titled \"Top Cafes to Visit\" containing all cafe entries as to-do items (unchecked), each showing just the cafe name\n   - Add a heading_2 block with text \"📊 Weekend Summary\"\n   - Under Weekend Summary, add a paragraph with the exact text: \"This weekend includes [X] beach activities, [Y] cultural dining options, and [Z] coffee spots to explore!\" where [X], [Y], and [Z] are the actual counts\n6. After the summary paragraph, add a divider block\n7. Finally, add a callout block with the 💡 emoji containing the text: \"Pro tip: Check the Seasons database for the best time to enjoy outdoor activities!\"\n8. Ensure all headings use the exact emoji and text format specified above\n9. The lists must be in the exact format specified (bulleted for beaches, numbered for restaurants, to-do for cafes)"
  },
  {
    "path": "tasks/notion/standard/toronto_guide/weekend_adventure_planner/meta.json",
    "content": "{\n  \"task_id\": \"weekend_adventure_planner\",\n  \"task_name\": \"Weekend Adventure Planner\",\n  \"category_id\": \"toronto_guide\",\n  \"category_name\": \"Toronto Guide\",\n  \"description\": \"Create a comprehensive weekend adventure planner that analyzes Toronto Guide databases and generates a structured itinerary page.\",\n  \"author\": \"Xiangyan Liu\",\n  \"created_at\": \"2025-08-14\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"conditional filtering\",\n    \"data aggregation\",\n    \"report generation\",\n    \"visual formatting\",\n    \"status tracking\"\n  ],\n  \"mcp\": [\n    \"notion\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"url\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://painted-tennis-ebc.notion.site/Toronto-Guide-25281626b6d7802caa7cc394647e901c\",\n    \"stateOriginalUrl\": \"https://www.notion.so/marketplace/templates/conquering-toronto-a-destination-guide\"\n  }\n}"
  },
  {
    "path": "tasks/notion/standard/toronto_guide/weekend_adventure_planner/verify.py",
    "content": "#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n\nimport sys\nfrom notion_client import Client\nfrom tasks.utils import notion_utils\n\n\ndef verify(notion: Client, main_id: str = None) -> bool:\n    \"\"\"\n    Verifies that the Perfect Weekend Adventure page has been created correctly.\n    \"\"\"\n    # Find the main Toronto Guide page\n    page_id = None\n    if main_id:\n        found_id, object_type = notion_utils.find_page_or_database_by_id(notion, main_id)\n        if found_id and object_type == \"page\":\n            page_id = found_id\n    \n    if not page_id:\n        page_id = notion_utils.find_page(notion, \"Toronto Guide\")\n    if not page_id:\n        print(\"Error: Main 'Toronto Guide' page not found.\", file=sys.stderr)\n        return False\n    \n    # Find the Perfect Weekend Adventure child page\n    adventure_page_id = None\n    try:\n        response = notion.search(\n            query=\"Perfect Weekend Adventure\",\n            filter={\"property\": \"object\", \"value\": \"page\"}\n        )\n        \n        for result in response.get(\"results\", []):\n            parent = result.get(\"parent\", {})\n            if parent.get(\"type\") == \"page_id\" and parent.get(\"page_id\") == page_id:\n                adventure_page_id = result[\"id\"]\n                break\n        \n        if not adventure_page_id:\n            for result in response.get(\"results\", []):\n                title_list = result.get(\"properties\", {}).get(\"title\", {}).get(\"title\", [])\n                for title_obj in title_list:\n                    if \"Perfect Weekend Adventure\" in title_obj.get(\"plain_text\", \"\"):\n                        adventure_page_id = result[\"id\"]\n                        break\n                if adventure_page_id:\n                    break\n    \n    except Exception as e:\n        print(f\"Error searching for Perfect Weekend Adventure page: {e}\", file=sys.stderr)\n        return False\n    \n    if not adventure_page_id:\n        print(\"Error: 'Perfect Weekend Adventure' page not found as child of main page.\", file=sys.stderr)\n        return False\n    \n    # Get all blocks from the adventure page\n    all_blocks = notion_utils.get_all_blocks_recursively(notion, adventure_page_id)\n    \n    # Get databases from the main Toronto Guide page\n    activities_db_id = None\n    food_db_id = None\n    cafes_db_id = None\n    \n    main_blocks = notion_utils.get_all_blocks_recursively(notion, page_id)\n    for block in main_blocks:\n        if block.get(\"type\") == \"child_database\":\n            title = block.get(\"child_database\", {}).get(\"title\", \"\")\n            if \"Activities\" in title:\n                activities_db_id = block.get(\"id\")\n            elif \"Food\" in title:\n                food_db_id = block.get(\"id\")\n            elif \"Cafes\" in title or \"Caf�\" in title:\n                cafes_db_id = block.get(\"id\")\n    \n    # Query databases to get expected data\n    beach_activities = []\n    cultural_restaurants = []\n    cafes_list = []\n    \n    if activities_db_id:\n        try:\n            db_response = notion.databases.query(database_id=activities_db_id)\n            for page in db_response.get(\"results\", []):\n                properties = page.get(\"properties\", {})\n                tags_prop = properties.get(\"Tags\", {})\n                if tags_prop.get(\"type\") == \"multi_select\":\n                    tags = [tag.get(\"name\") for tag in tags_prop.get(\"multi_select\", [])]\n                    if \"Beaches\" in tags:\n                        name_prop = properties.get(\"Name\", {})\n                        if name_prop.get(\"type\") == \"title\" and name_prop.get(\"title\"):\n                            name = name_prop[\"title\"][0][\"plain_text\"]\n                            url_prop = properties.get(\"Google Maps Link\", {})\n                            url = url_prop.get(\"url\", \"\") if url_prop.get(\"type\") == \"url\" else \"\"\n                            beach_activities.append({\"name\": name, \"url\": url})\n        except Exception as e:\n            print(f\"Error querying Activities database: {e}\", file=sys.stderr)\n            return False\n    \n    if food_db_id:\n        try:\n            db_response = notion.databases.query(database_id=food_db_id)\n            for page in db_response.get(\"results\", []):\n                properties = page.get(\"properties\", {})\n                tags_prop = properties.get(\"Tags\", {})\n                if tags_prop.get(\"type\") == \"multi_select\":\n                    tags = [tag.get(\"name\") for tag in tags_prop.get(\"multi_select\", [])]\n                    for tag in tags:\n                        if tag in [\"Turkish\", \"Hakka\"]:\n                            name_prop = properties.get(\"Name\", {})\n                            if name_prop.get(\"type\") == \"title\" and name_prop.get(\"title\"):\n                                name = name_prop[\"title\"][0][\"plain_text\"]\n                                cultural_restaurants.append({\"name\": name, \"tag\": tag})\n                                break\n        except Exception as e:\n            print(f\"Error querying Food database: {e}\", file=sys.stderr)\n            return False\n    \n    if cafes_db_id:\n        try:\n            db_response = notion.databases.query(database_id=cafes_db_id)\n            for page in db_response.get(\"results\", []):\n                properties = page.get(\"properties\", {})\n                name_prop = properties.get(\"Name\", {})\n                if name_prop.get(\"type\") == \"title\" and name_prop.get(\"title\"):\n                    name = name_prop[\"title\"][0][\"plain_text\"]\n                    cafes_list.append(name)\n        except Exception as e:\n            print(f\"Error querying Cafes database: {e}\", file=sys.stderr)\n            return False\n    \n    # Required headings and their types\n    required_headings = [\n        (\"🎒 Perfect Weekend Adventure\", \"heading_1\"),\n        (\"🏖️ Beach Activities\", \"heading_2\"),\n        (\"🍽️ Cultural Dining Experience\", \"heading_2\"),\n        (\"☕ Coffee Break Spots\", \"heading_2\"),\n        (\"📊 Weekend Summary\", \"heading_2\")\n    ]\n    \n    # Track verification results\n    found_headings = set()\n    found_beach_list = False\n    found_restaurant_list = False\n    found_toggle_with_cafes = False\n    found_summary = False\n    found_divider = False\n    found_callout = False\n    \n    # Variables to track counts\n    beach_count = 0\n    restaurant_count = 0\n    cafe_count = 0\n    \n    current_section = None\n    is_in_toggle = False\n    \n    for block in all_blocks:\n        block_type = block.get(\"type\")\n        block_text = notion_utils.get_block_plain_text(block)\n        \n        # Check headings\n        for heading_text, expected_type in required_headings:\n            if heading_text in block_text and block_type == expected_type:\n                found_headings.add(heading_text)\n                current_section = heading_text\n        \n        # Check Beach Activities section\n        if current_section == \"🏖️ Beach Activities\" and block_type == \"bulleted_list_item\":\n            found_beach_list = True\n            beach_count += 1\n            # Verify format includes name and potentially URL\n            for activity in beach_activities:\n                if activity[\"name\"] in block_text:\n                    if activity[\"url\"] and activity[\"url\"] not in block_text:\n                        print(f\"Warning: Beach activity '{activity['name']}' missing URL\", file=sys.stderr)\n        \n        # Check Cultural Dining section\n        elif current_section == \"🍽️ Cultural Dining Experience\" and block_type == \"numbered_list_item\":\n            found_restaurant_list = True\n            restaurant_count += 1\n            # Check format: Restaurant Name (Tag: [tag])\n            for restaurant in cultural_restaurants:\n                if restaurant[\"name\"] in block_text and f\"Tag: {restaurant['tag']}\" in block_text:\n                    pass  # Format is correct\n        \n        # Check Coffee Break Spots section\n        elif current_section == \"☕ Coffee Break Spots\":\n            if block_type == \"toggle\" and \"Top Cafes to Visit\" in block_text:\n                is_in_toggle = True\n                found_toggle_with_cafes = True\n            elif is_in_toggle and block_type == \"to_do\":\n                cafe_count += 1\n                # Verify unchecked status\n                to_do_data = block.get(\"to_do\", {})\n                if to_do_data.get(\"checked\", False):\n                    print(f\"Error: Cafe to-do item should be unchecked: {block_text}\", file=sys.stderr)\n                    return False\n            elif block_type in [\"heading_1\", \"heading_2\", \"heading_3\"]:\n                is_in_toggle = False\n        \n        # Check Weekend Summary section\n        elif current_section == \"📊 Weekend Summary\" and block_type == \"paragraph\":\n            expected_text = f\"This weekend includes {len(beach_activities)} beach activities, {len(cultural_restaurants)} cultural dining options, and {len(cafes_list)} coffee spots to explore!\"\n            if expected_text in block_text:\n                found_summary = True\n        \n        # Check for divider after summary\n        if block_type == \"divider\":\n            found_divider = True\n        \n        # Check for callout with pro tip\n        if block_type == \"callout\":\n            callout_data = block.get(\"callout\", {})\n            icon = callout_data.get(\"icon\", {})\n            if icon.get(\"type\") == \"emoji\" and icon.get(\"emoji\") == \"💡\":\n                if \"Pro tip: Check the Seasons database for the best time to enjoy outdoor activities!\" in block_text:\n                    found_callout = True\n    \n    # Verify all required elements\n    all_passed = True\n    \n    # Check all headings are present\n    for heading_text, _ in required_headings:\n        if heading_text not in found_headings:\n            print(f\"Error: Missing required heading: {heading_text}\", file=sys.stderr)\n            all_passed = False\n    \n    # Check beach activities list\n    if not found_beach_list:\n        print(\"Error: Beach activities bulleted list not found\", file=sys.stderr)\n        all_passed = False\n    elif beach_count != len(beach_activities):\n        print(f\"Error: Expected {len(beach_activities)} beach activities, found {beach_count}\", file=sys.stderr)\n        all_passed = False\n    \n    # Check restaurant list\n    if not found_restaurant_list:\n        print(\"Error: Cultural dining numbered list not found\", file=sys.stderr)\n        all_passed = False\n    elif restaurant_count != len(cultural_restaurants):\n        print(f\"Error: Expected {len(cultural_restaurants)} cultural restaurants, found {restaurant_count}\", file=sys.stderr)\n        all_passed = False\n    \n    # Check cafes toggle\n    if not found_toggle_with_cafes:\n        print(\"Error: Toggle block 'Top Cafes to Visit' not found\", file=sys.stderr)\n        all_passed = False\n    elif cafe_count != len(cafes_list):\n        print(f\"Error: Expected {len(cafes_list)} cafes, found {cafe_count}\", file=sys.stderr)\n        all_passed = False\n    \n    # Check summary\n    if not found_summary:\n        print(\"Error: Weekend summary with correct counts not found\", file=sys.stderr)\n        all_passed = False\n    \n    # Check divider\n    if not found_divider:\n        print(\"Error: Divider block not found after summary\", file=sys.stderr)\n        all_passed = False\n    \n    # Check callout\n    if not found_callout:\n        print(\"Error: Callout with pro tip not found\", file=sys.stderr)\n        all_passed = False\n    \n    if all_passed:\n        print(f\"Success: Perfect Weekend Adventure page created with all required elements.\")\n        print(f\"- {len(beach_activities)} beach activities\")\n        print(f\"- {len(cultural_restaurants)} cultural dining options\")\n        print(f\"- {len(cafes_list)} coffee spots\")\n        return True\n    else:\n        return False\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    notion = notion_utils.get_notion_client()\n    main_id = sys.argv[1] if len(sys.argv) > 1 else None\n    if verify(notion, main_id):\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/playwright/easy/.gitkeep",
    "content": ""
  },
  {
    "path": "tasks/playwright/standard/eval_web/cloudflare_turnstile_challenge/description.md",
    "content": "# Cloudflare Turnstile Authentication Challenge Task\n\nUse Playwright MCP tools to complete Cloudflare Turnstile authentication challenge.\n\n## Requirements:\n\n1. Navigate to https://eval-web.mcpmark.ai/auth/turnstile\n2. Fill in the authentication form with provided test credentials:\n   - Username: \"testuser\"\n   - Password: \"password123\"\n3. Wait for the Cloudflare Turnstile challenge widget to load completely\n4. Interact with the Turnstile challenge widget to complete the authentication (if needed)\n5. Wait for successful challenge completion (widget shows success state with checkmark)\n6. Submit the form by clicking the \"Sign In\" button\n7. Wait for and capture any success message or confirmation that appears\n8. Output the success capture in step 7\n\n## Notes:\n\n- Use the provided test credentials: testuser / password123\n- Page shows success message inline, does not redirect to separate success page\n- Wait for all UI state changes before proceeding to next step\n- Verify both Turnstile completion and form submission success\n"
  },
  {
    "path": "tasks/playwright/standard/eval_web/cloudflare_turnstile_challenge/meta.json",
    "content": "{\n    \"task_id\": \"cloudflare_turnstile_challenge\",\n    \"task_name\": \"Cloudflare Turnstile Challenge\",\n    \"category_id\": \"eval_web\",\n    \"category_name\": \"Eval Web\",\n    \"description\": \"Navigate websites with Cloudflare Turnstile protection, handle security challenges, bypass bot detection mechanisms, and successfully access protected content using automated browser interactions.\",\n    \"author\": \"Allison Zhan\",\n    \"created_at\": \"2025-07-27\",\n    \"difficulty\": \"L3\",\n    \"tags\": [\n        \"user interaction\"\n    ],\n    \"mcp\": [\n        \"playwright\"\n    ],\n    \"meta_data\": {\n        \"stateType\": \"video\",\n        \"stateContent\": null,\n        \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/mcpmark-eval-website.mp4\",\n        \"stateOriginalUrl\": \"https://mcp-eval-website.vercel.app/auth/turnstile\"\n    }\n}\n"
  },
  {
    "path": "tasks/playwright/standard/eval_web/cloudflare_turnstile_challenge/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nSimplified verification script for Playwright Cloudflare Turnstile authentication task.\n\nThis script only verifies that the model successfully reported capturing the expected\nsuccess message by checking the last assistant message in messages.json.\n\"\"\"\n\nimport sys\nimport json\nimport os\n\n# Expected success message that agent should capture\nEXPECTED_SUCCESS_MESSAGE = \"Authentication successful! Security challenge verified.\"\n\n\ndef get_model_response():\n    \"\"\"\n    Get the model's response from the MCP_MESSAGES environment variable.\n    Returns the last assistant message text.\n    \"\"\"\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    print(f\"MCP_MESSAGES: {messages_path}\")\n    if not messages_path:\n        print(\"Warning: MCP_MESSAGES environment variable not set\", file=sys.stderr)\n        return None\n    \n    try:\n        with open(messages_path, 'r') as f:\n            messages = json.load(f)\n        \n        # Find the last assistant message with status completed\n        for message in reversed(messages):\n            if (message.get('role') == 'assistant' and \n                message.get('status') == 'completed' and \n                message.get('type') == 'message'):\n                content = message.get('content', [])\n                # Extract text from content\n                if isinstance(content, list):\n                    for item in content:\n                        if isinstance(item, dict) and item.get('type') in ['text', 'output_text']:\n                            return item.get('text', '')\n                elif isinstance(content, str):\n                    return content\n        \n        print(\"Warning: No completed assistant message found\", file=sys.stderr)\n        return None\n    except Exception as e:\n        print(f\"Error reading messages file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef verify():\n    \"\"\"\n    Verifies that the model's last response contains the expected success message.\n    \"\"\"\n    # Get model's response from MCP_MESSAGES\n    model_response = get_model_response()\n    \n    if not model_response:\n        print(\"No model response found\", file=sys.stderr)\n        return False\n    \n    print(f\"\\nModel response (first 500 chars): {model_response[:500]}...\", file=sys.stderr)\n    \n    # Check if the expected success message is in the model's response\n    if EXPECTED_SUCCESS_MESSAGE in model_response:\n        print(f\"\\n✓ Success message found: '{EXPECTED_SUCCESS_MESSAGE}'\", file=sys.stderr)\n        return True\n    else:\n        print(f\"\\n✗ Success message NOT found: '{EXPECTED_SUCCESS_MESSAGE}'\", file=sys.stderr)\n        return False\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = verify()\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/playwright/standard/eval_web/extraction_table/data.csv",
    "content": "Title, Rating, Likes, Views, Replies\nReact 18 New Features Deep Dive, \"4.8\", 856, 12543, 89\nVue 3 Composition API in Practice, \"4.5\", 743, 9876, 67\nAdvanced TypeScript Types Guide, \"4.9\", 924, 15432, 102\nNode.js Performance Optimization, \"4.2\", 567, 8765, 45\nFrontend Engineering Best Practices, \"4.7\", 812, 11234, 78\nMicroservices Architecture Patterns, \"4.3\", 634, 9543, 56\nDocker Containerization Deployment, \"4.6\", 789, 10876, 71\nKubernetes Cluster Management, \"4.4\", 698, 9234, 63\nGraphQL API Design Principles, \"4.8\", 876, 13456, 94\nWebpack 5 Configuration Guide, \"4.1\", 523, 7654, 38\nVite Build Tool Usage, \"4.5\", 745, 10123, 69\nESLint Code Standards, \"4.7\", 823, 11567, 82\nUnit Testing Best Practices, \"4.3\", 612, 8934, 51\nPerformance Monitoring & Optimization, \"4.9\", 945, 16234, 108\nSecurity Protection Strategies, \"4.2\", 578, 8456, 47\nDatabase Design Principles, \"4.6\", 767, 10567, 73\nCaching Strategies Implementation, \"4.4\", 689, 9123, 61\nMessage Queue Applications, \"4.8\", 834, 12876, 87\nDistributed Systems Design, \"4.0\", 456, 6789, 34\nCloud Native Development, \"4.5\", 723, 9876, 65\nDevOps Process Optimization, \"4.7\", 801, 11234, 79\nMachine Learning Introduction, \"4.1\", 534, 7543, 41\nArtificial Intelligence Applications, \"4.6\", 778, 10456, 74\nBlockchain Technology Fundamentals, \"4.3\", 645, 8765, 53\nMobile Development Techniques, \"4.9\", 912, 14567, 97\nCross-Platform Solutions, \"4.2\", 589, 8234, 48\nProgressive Web App Development, \"4.8\", 867, 12345, 91\nWeb3 Development Guide, \"4.4\", 712, 9567, 64\nNFT Smart Contracts, \"4.5\", 756, 10234, 70\nDeFi Protocol Design, \"4.7\", 834, 11876, 83\nGame Engine Development, \"4.3\", 623, 8567, 52\n3D Graphics Rendering, \"4.6\", 789, 10678, 75\nAudio Video Processing, \"4.1\", 545, 7234, 42\nIoT Applications, \"4.8\", 856, 12567, 88\nEdge Computing Practices, \"4.2\", 567, 8345, 46\n5G Network Technology, \"4.9\", 923, 15123, 103\nQuantum Computing Principles, \"4.4\", 678, 9345, 62\nBioinformatics Analysis, \"4.5\", 734, 9876, 68\nData Science Methods, \"4.7\", 812, 11456, 80\nAlgorithms and Data Structures, \"4.3\", 634, 8678, 54\nSystem Design Interview, \"4.6\", 778, 10345, 76\nCode Refactoring Techniques, \"4.8\", 845, 12234, 89\nOpen Source Contributions, \"4.2\", 556, 7890, 43\nTechnical Team Management, \"4.5\", 723, 9567, 66\nProduct Thinking Development, \"4.9\", 901, 14234, 95\nUser Experience Design, \"4.1\", 512, 7123, 39\nInterface Interaction Optimization, \"4.7\", 789, 10890, 77\nAccessibility Design, \"4.4\", 667, 8901, 58\nSEO Optimization Strategies, \"4.6\", 756, 10123, 72\nSocial Media Operations, \"4.3\", 623, 8456, 55\nServerless Architecture, \"4.7\", 834, 11234, 81\nAPI Gateway Design, \"4.2\", 567, 8765, 49\nMicroservice Communication, \"4.8\", 892, 13567, 95\nEvent-Driven Architecture, \"4.5\", 723, 9876, 67\nCQRS Pattern Implementation, \"4.3\", 645, 8234, 54\nDomain-Driven Design, \"4.6\", 778, 10456, 73\nClean Architecture Principles, \"4.4\", 689, 9123, 62\nHexagonal Architecture, \"4.1\", 534, 7543, 42\nOnion Architecture, \"4.5\", 712, 9567, 65\nEvent Sourcing Patterns, \"4.7\", 823, 11876, 79\nSaga Pattern for Distributed Systems, \"4.3\", 612, 8934, 53\nCircuit Breaker Pattern, \"4.8\", 856, 12543, 87\nBulkhead Pattern, \"4.2\", 578, 8456, 47\nRetry Pattern Implementation, \"4.6\", 767, 10567, 74\nTimeout Pattern, \"4.4\", 698, 9234, 63\nRate Limiting Strategies, \"4.9\", 934, 15432, 103\nLoad Balancing Techniques, \"4.1\", 523, 7654, 39\nService Mesh Architecture, \"4.5\", 745, 10123, 69\nIstio Service Mesh, \"4.7\", 812, 11567, 82\nEnvoy Proxy Configuration, \"4.3\", 634, 9543, 56\nConsul Service Discovery, \"4.6\", 789, 10876, 71\nKubernetes Ingress, \"4.4\", 676, 9345, 58\nHelm Chart Development, \"4.8\", 845, 12234, 89\nTerraform Infrastructure, \"4.2\", 556, 7890, 44\nAnsible Automation, \"4.5\", 723, 9567, 66\nJenkins Pipeline, \"4.7\", 801, 11234, 78\nGitLab CI/CD, \"4.3\", 623, 8567, 52\nGitHub Actions, \"4.6\", 789, 10678, 75\nAzure DevOps, \"4.1\", 512, 7123, 41\nAWS CodePipeline, \"4.8\", 867, 12345, 91\nDocker Compose, \"4.4\", 712, 9567, 64\nKubernetes Operators, \"4.5\", 756, 10234, 70\nCustom Resource Definitions, \"4.7\", 834, 11876, 83\nPod Security Policies, \"4.3\", 623, 8567, 52\nNetwork Policies, \"4.6\", 789, 10678, 75\nRBAC Configuration, \"4.1\", 545, 7234, 42\nSecret Management, \"4.8\", 856, 12567, 88\nConfigMap Usage, \"4.2\", 567, 8345, 46\nPersistent Volumes, \"4.9\", 923, 15123, 103\nStatefulSets, \"4.4\", 678, 9345, 62\nDaemonSets, \"4.5\", 734, 9876, 68\nJobs and CronJobs, \"4.7\", 812, 11456, 80\nHorizontal Pod Autoscaler, \"4.3\", 634, 8678, 54\nVertical Pod Autoscaler, \"4.6\", 778, 10345, 76\nCluster Autoscaler, \"4.8\", 845, 12234, 89\nResource Quotas, \"4.2\", 556, 7890, 43\nLimit Ranges, \"4.5\", 723, 9567, 66\n"
  },
  {
    "path": "tasks/playwright/standard/eval_web/extraction_table/description.md",
    "content": "# Web Data Extraction Task\n\nUse Playwright MCP tools to extract all data from the specified website and present it in CSV format.\n\n## Requirements:\n\n1. Navigate to https://eval-web.mcpmark.ai/extraction\n2. Wait for the page to fully load\n3. Extract all data content from the page, including:\n   - Title\n   - Rating\n   - Likes\n   - Views\n   - Replies\n4. Organize the extracted data into CSV format\n5. Ensure data completeness and accuracy\n6. Output ONLY the complete CSV formatted data (no additional text or explanations)\n\n## CSV Data Example:\n\n```csv\nTitle, Rating, Likes, Views, Replies\nSEO Optimization, \"4.6\", 756, 10123, 72\nVue 3 Composition API, \"4.5\", 743, 9876, 67\nAdvanced TypeScript Types Guide, \"4.9\", 924, 15432, 102\nNode.js Performance Optimization, \"4.2\", 567, 8765, 45\nFrontend Engineering Best Practices, \"4.7\", 812, 11234, 78\n```\n\n## Notes:\n\n- Ensure extraction of all visible data rows\n- Maintain data format consistency\n- All numeric data (Rating, Likes, Views, Replies) should NOT have quotes, only text data containing commas should be wrapped in quotes\n- Wait for the page to fully load before starting data extraction\n- Verify the quantity and format of extracted data are correct\n- **IMPORTANT: Final output must contain ONLY CSV data - no explanatory text, descriptions, or other content**\n"
  },
  {
    "path": "tasks/playwright/standard/eval_web/extraction_table/meta.json",
    "content": "{\n  \"task_id\": \"extraction_table\",\n  \"task_name\": \"Extraction Table\",\n  \"category_id\": \"eval_web\",\n  \"category_name\": \"Eval Web\",\n  \"description\": \"Extract structured data from complex web tables, parse multi-level headers, handle dynamic content loading, transform data formats, and export comprehensive datasets.\",\n  \"author\": \"Arvin Xu\",\n  \"created_at\": \"2025-08-18\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data extraction\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/mcpmark-eval-website.mp4\",\n    \"stateOriginalUrl\": \"https://eval-web.mcpmark.ai/extraction\"\n  }\n}\n"
  },
  {
    "path": "tasks/playwright/standard/eval_web/extraction_table/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for checking Playwright web data extraction tasks.\n\nThis script verifies whether the model successfully extracted CSV format data from web pages\nby checking the last assistant message in messages.json.\n\"\"\"\n\nimport sys\nimport json\nimport os\nimport re\nimport csv\nfrom io import StringIO\n\n# Expected CSV header (must match exactly, including spaces)\nEXPECTED_HEADER_LINE = \"Title, Rating, Likes, Views, Replies\"\nEXPECTED_HEADERS = [\"Title\", \"Rating\", \"Likes\", \"Views\", \"Replies\"]\n# Exact number of data rows (must match data.csv exactly)\nEXPECTED_DATA_ROWS = 97\n\n\ndef get_model_response():\n    \"\"\"\n    Get the model's response from the MCP_MESSAGES environment variable.\n    Returns the last assistant message text.\n    \"\"\"\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    print(f\"| MCP_MESSAGES: {messages_path}\")\n    if not messages_path:\n        print(\"| Warning: MCP_MESSAGES environment variable not set\", file=sys.stderr)\n        return None\n\n    try:\n        with open(messages_path, 'r') as f:\n            messages = json.load(f)\n\n        # Find the last assistant message with status completed\n        for message in reversed(messages):\n            if (message.get('role') == 'assistant' and\n                message.get('status') == 'completed' and\n                message.get('type') == 'message'):\n                content = message.get('content', [])\n                # Extract text from content\n                if isinstance(content, list):\n                    for item in content:\n                        if isinstance(item, dict) and item.get('type') in ['text', 'output_text']:\n                            return item.get('text', '')\n                elif isinstance(content, str):\n                    return content\n\n        print(\"| Warning: No completed assistant message found\", file=sys.stderr)\n        return None\n    except Exception as e:\n        print(f\"| Error reading messages file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef extract_csv_from_response(response):\n    \"\"\"\n    Extract CSV data from model response.\n    \"\"\"\n    # Look for CSV code blocks\n    csv_pattern = r'```(?:csv)?\\s*\\n(.*?)\\n```'\n    matches = re.findall(csv_pattern, response, re.DOTALL | re.IGNORECASE)\n\n    if matches:\n        return matches[-1].strip()  # Return the last CSV block\n\n    # If no code block found, try to find CSV data starting with header\n    lines = response.split('\\n')\n    csv_start = -1\n\n    # Stricter header matching: look for lines containing \"Title\" and \"Rating\"\n    for i, line in enumerate(lines):\n        if \"Title\" in line and \"Rating\" in line and \"Likes\" in line:\n            csv_start = i\n            break\n\n    if csv_start >= 0:\n        # Extract from header until empty line or non-CSV format line\n        csv_lines = []\n        for line in lines[csv_start:]:\n            line = line.strip()\n            if not line or not (',' in line):\n                if csv_lines:  # If we already have data, stop at empty line\n                    break\n                continue\n            csv_lines.append(line)\n            if len(csv_lines) > 100:  # Prevent extracting too many rows\n                break\n\n        return '\\n'.join(csv_lines)\n\n    return None\n\n\ndef validate_csv_data(csv_text):\n    \"\"\"\n    Validate CSV data format and content, must match data.csv exactly.\n    \"\"\"\n    if not csv_text:\n        return False, \"CSV data not found\"\n\n    try:\n        lines = csv_text.strip().split('\\n')\n\n        # Check total number of rows (1 header row + data rows)\n        expected_total_rows = EXPECTED_DATA_ROWS + 1\n        if len(lines) != expected_total_rows:\n            return False, f\"| CSV total row count mismatch, expected: {expected_total_rows} rows, actual: {len(lines)} rows\"\n\n        # Check header row format (must match exactly)\n        header_line = lines[0].strip()\n        if header_line != EXPECTED_HEADER_LINE:\n            return False, f\"| Header format mismatch, expected: '{EXPECTED_HEADER_LINE}', actual: '{header_line}'\"\n\n        # Parse CSV to validate structure\n        csv_reader = csv.reader(StringIO(csv_text))\n        rows = list(csv_reader)\n\n        # Check column count for each row\n        expected_columns = len(EXPECTED_HEADERS)\n        for i, row in enumerate(rows):\n            if len(row) != expected_columns:\n                return False, f\"| Row {i+1} column count incorrect, expected: {expected_columns} columns, actual: {len(row)} columns\"\n\n        # Validate data row format\n        valid_rows = 0\n        for i, row in enumerate(rows[1:], 2):  # Skip header, start from row 2\n            # Check if each column has data\n            if not all(cell.strip() for cell in row):\n                return False, f\"| Row {i} contains empty data\"\n\n            # Check numeric column format (Rating, Likes, Views, Replies should not have quotes)\n            for col_idx, col_name in [(1, \"Rating\"), (2, \"Likes\"), (3, \"Views\"), (4, \"Replies\")]:\n                value = row[col_idx].strip()\n\n                # Check for quotes (should not have any)\n                if value.startswith('\"') and value.endswith('\"'):\n                    return False, f\"| Row {i} {col_name} should not have quotes, actual: {value}\"\n\n                # Check numeric format\n                if col_name == \"Rating\":\n                    try:\n                        float(value)\n                    except ValueError:\n                        return False, f\"| Row {i} {col_name} should be a number, actual: {value}\"\n                else:\n                    if not value.isdigit():\n                        return False, f\"| Row {i} {col_name} should be pure digits, actual: {value}\"\n\n            valid_rows += 1\n\n        # Validate number of data rows\n        if valid_rows != EXPECTED_DATA_ROWS:\n            return False, f\"| Valid data row count mismatch, expected: {EXPECTED_DATA_ROWS} rows, actual: {valid_rows} rows\"\n\n        return True, f\"| CSV validation successful: format matches data.csv exactly, {valid_rows} valid data rows\"\n\n    except Exception as e:\n        return False, f\"| CSV format parsing error: {str(e)}\"\n\n\ndef verify():\n    \"\"\"\n    Verify if the model's response contains correct CSV data extraction results.\n    \"\"\"\n    # Get model response\n    model_response = get_model_response()\n\n    if not model_response:\n        print(\"| Model response not found\", file=sys.stderr)\n        return False\n\n    print(f\"|\\n| Model response (first 500 characters): {model_response[:500]}...\", file=sys.stderr)\n\n    # Extract CSV data from response\n    csv_data = extract_csv_from_response(model_response)\n\n    if not csv_data:\n        print(\"|\\n| ✗ CSV data not found in response\", file=sys.stderr)\n        return False\n\n    print(f\"|\\n| Found CSV data (first 300 characters):\\n| {csv_data[:300]}...\", file=sys.stderr)\n\n    # Validate CSV data\n    is_valid, message = validate_csv_data(csv_data)\n\n    if is_valid:\n        print(f\"|\\n| ✓ {message}\", file=sys.stderr)\n        return True\n    else:\n        print(f\"|\\n| ✗ CSV validation failed: {message}\", file=sys.stderr)\n        return False\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = verify()\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/playwright/standard/web_search/birth_of_arvinxu/description.md",
    "content": "# Web Search Task\n\nUse Playwright MCP tools to search for information about the X profile https://x.com/arvin17x and find out when this person was born.\n\n## Requirements:\n\nExtract the answer in specific format:\n   - just year,like 1990, 2001\n"
  },
  {
    "path": "tasks/playwright/standard/web_search/birth_of_arvinxu/meta.json",
    "content": "{\n  \"task_id\": \"birth_of_arvinxu\",\n  \"task_name\": \"Birth Of Arvinxu\",\n  \"category_id\": \"web_search\",\n  \"category_name\": \"Web Search\",\n  \"description\": \"Search for biographical information about X profile arvin17x across multiple web sources, extract birth year data, verify information accuracy, and compile findings.\",\n  \"author\": \"Arvin Xu\",\n  \"created_at\": \"2025-08-18\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"search aggregation\",\n    \"data extraction\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": null,\n    \"stateContent\": null,\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": null\n  }\n}\n"
  },
  {
    "path": "tasks/playwright/standard/web_search/birth_of_arvinxu/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Playwright web search task.\n\nSimple verification that checks if the AI agent found the correct answer.\nThe expected ground truth answer is configured at the top of the file.\n\"\"\"\n\nimport sys\nimport json\nimport os\nfrom pathlib import Path\nfrom typing import Dict, Any\n\n# =============================================================================\n# CONFIGURATION\n# =============================================================================\n\n# Expected ground truth answer (exact match)\nEXPECTED_GROUND_TRUTH = \"1995\"\n\n# =============================================================================\n# MCP RESULT PARSING\n# =============================================================================\n\n\ndef get_working_directory() -> Path:\n    \"\"\"Get the working directory where messages.json should be.\"\"\"\n    # Priority 1: Use MCP_MESSAGES path if available (most reliable)\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    if messages_path and Path(messages_path).exists():\n        return Path(messages_path).parent.resolve()\n\n    # Priority 2: Use PLAYWRIGHT_WORK_DIR environment variable\n    work_dir = os.getenv(\"PLAYWRIGHT_WORK_DIR\")\n    if work_dir:\n        work_path = Path(work_dir).resolve()\n        if (work_path / \"messages.json\").exists():\n            return work_path\n\n    # Priority 3: Check current directory (fallback)\n    current_dir = Path.cwd()\n    if (current_dir / \"messages.json\").exists():\n        return current_dir\n\n    # Priority 4: Default fallback\n    return Path(\".\").resolve()\n\n\ndef parse_ai_results(work_dir: Path) -> Dict[str, Any]:\n    \"\"\"Parse the AI agent's results from messages.json\"\"\"\n    messages_file = work_dir / \"messages.json\"\n    if not messages_file.exists():\n        return {\"success\": False, \"error\": \"No messages.json found\"}\n\n    try:\n        with open(messages_file, \"r\", encoding=\"utf-8\") as f:\n            messages = json.load(f)\n    except (json.JSONDecodeError, IOError) as e:\n        return {\"success\": False, \"error\": f\"Failed to read messages.json: {e}\"}\n\n    # Look for expected answer in the AI's responses\n    found_answer = False\n    ai_responses = []\n\n    for message in messages:\n        if message.get(\"role\") == \"assistant\":\n            content = str(message.get(\"content\", \"\"))\n\n            # Handle both string and list content formats\n            if isinstance(message.get(\"content\"), list):\n                content = \" \".join(\n                    item.get(\"text\", \"\") if isinstance(item, dict) else str(item)\n                    for item in message.get(\"content\", [])\n                )\n\n            ai_responses.append(content)\n\n            # Exact match (character-for-character, case-sensitive, no trimming)\n            if content == EXPECTED_GROUND_TRUTH:\n                found_answer = True\n\n    return {\n        \"success\": True,\n        \"found_answer\": found_answer,\n        \"ai_responses\": ai_responses,\n        \"total_responses\": len(ai_responses),\n    }\n\n\n# =============================================================================\n# MAIN VERIFICATION\n# =============================================================================\n\n\ndef verify_task() -> bool:\n    \"\"\"Verify the AI agent found the correct answer\"\"\"\n\n    # Parse AI agent results\n    work_dir = get_working_directory()\n    print(f\"| Working directory: {work_dir}\")\n\n    ai_results = parse_ai_results(work_dir)\n\n    if not ai_results[\"success\"]:\n        print(f\"| ❌ Could not parse AI results: {ai_results.get('error')}\")\n        return False\n\n    if ai_results[\"found_answer\"]:\n        print(f\"| AI agent correctly identified: {EXPECTED_GROUND_TRUTH}\")\n        return True\n    else:\n        print(f\"| AI agent did not find the correct answer: {EXPECTED_GROUND_TRUTH}\")\n        return False\n\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    try:\n        success = verify_task()\n        sys.exit(0 if success else 1)\n    except Exception as e:\n        print(f\"\\n💥 Verification error: {e}\")\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/playwright/standard/web_search/r1_arxiv/content.txt",
    "content": "In this work, we share our journey in enhancing model reasoning abilities through reinforcement learning. DeepSeek-R1-Zero represents a pure RL approach without relying on cold-start data, achieving strong performance across various tasks. DeepSeek-R1 is more powerful, leveraging cold-start data alongside iterative RL fine-tuning. Ultimately, DeepSeek-R1 achieves performance comparable to OpenAI-o1-1217 on a range of tasks.\n\nWe further explore distillation the reasoning capability to small dense models. We use DeepSeek-R1 as the teacher model to generate 800K training samples, and fine-tune several small dense models. The results are promising: DeepSeek-R1-Distill-Qwen-1.5B outperforms GPT-4o and Claude-3.5-Sonnet on math benchmarks with 28.9% on AIME and 83.9% on MATH. Other dense models also achieve impressive results, significantly outperforming other instruction-tuned models based on the same underlying checkpoints.\n\nIn the future, we plan to invest in research across the following directions for DeepSeek-R1.\n\n- **General Capability**: Currently, the capabilities of DeepSeek-R1 fall short of DeepSeek-V3 in tasks such as function calling, multi-turn, complex role-playing, and JSON output. Moving forward, we plan to explore how long CoT can be leveraged to enhance tasks in these fields.\n- **Language Mixing**: DeepSeek-R1 is currently optimized for Chinese and English, which may result in language mixing issues when handling queries in other languages. For instance, DeepSeek-R1 might use English for reasoning and responses, even if the query is in a language other than English or Chinese. We aim to address this limitation in future updates.\n- **Prompting Engineering**: When evaluating DeepSeek-R1, we observe that it is sensitive to prompts. Few-shot prompting consistently degrades its performance. Therefore, we recommend users directly describe the problem and specify the output format using a zero-shot setting for optimal results.\n- **Software Engineering Tasks**: Due to the long evaluation times, which impact the efficiency of the RL process, large-scale RL has not been applied extensively in software engineering tasks. As a result, DeepSeek-R1 has not demonstrated a huge improvement over DeepSeek-V3 on software engineering benchmarks. Future versions will address this by implementing rejection sampling on software engineering data or incorporating asynchronous evaluations during the RL process to improve efficiency.\n"
  },
  {
    "path": "tasks/playwright/standard/web_search/r1_arxiv/description.md",
    "content": "# Web Search Task\n\nUse Playwright MCP tools to search for the DeepSeek R1 research paper and extract all the paragraphs of the Conclusion section.\n\n## Requirements:\n\n1. Search for the DeepSeek R1 research paper\n2. Navigate to the paper and find the Conclusion section\n3. Extract **ALL the paragraphs** of the Conclusion section\n4. **Provide the content in Markdown format - no explanations, no additional text**\n\n## Important Notes:\n\n- **Output ALL the paragraphs of text**\n- **Do NOT include any explanations, summaries, or additional content**\n- **The response should contain ONLY the Conclusion section content formatted in Markdown**\n\n## Expected Output:\nAll the paragraphs of the Conclusion section from the DeepSeek R1 paper, formatted in Markdown with proper paragraph structure and formatting.\n"
  },
  {
    "path": "tasks/playwright/standard/web_search/r1_arxiv/meta.json",
    "content": "{\n  \"task_id\": \"r1_arxiv\",\n  \"task_name\": \"R1 Arxiv\",\n  \"category_id\": \"web_search\",\n  \"category_name\": \"Web Search\",\n  \"description\": \"Search arXiv for R1 model research papers, extract technical specifications, analyze methodology sections, compile research findings, and generate comprehensive literature review.\",\n  \"author\": \"Arvin Xu\",\n  \"created_at\": \"2025-08-18\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"search aggregation\",\n    \"data extraction\",\n    \"comparative analysis\",\n    \"content submission\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": null,\n    \"stateContent\": null,\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": null\n  }\n}\n"
  },
  {
    "path": "tasks/playwright/standard/web_search/r1_arxiv/verify.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nVerification script for Playwright web search task.\n\nSimple verification that checks if the AI agent found the correct Introduction content.\nThe expected ground truth answer is configured at the top of the file.\n\"\"\"\n\nimport sys\nimport json\nimport os\nfrom pathlib import Path\nfrom typing import Dict, Any\n\n# =============================================================================\n# CONFIGURATION\n# =============================================================================\n\n# Expected ground truth content from content.txt\nEXPECTED_CONTENT_FILE = \"content.txt\"\n\n# =============================================================================\n# MCP RESULT PARSING\n# =============================================================================\n\n\ndef get_working_directory() -> Path:\n    \"\"\"Get the working directory where messages.json should be.\"\"\"\n    # Priority 1: Use MCP_MESSAGES path if available (most reliable)\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    if messages_path and Path(messages_path).exists():\n        return Path(messages_path).parent.resolve()\n\n    # Priority 2: Use PLAYWRIGHT_WORK_DIR environment variable\n    work_dir = os.getenv(\"PLAYWRIGHT_WORK_DIR\")\n    if work_dir:\n        work_path = Path(work_dir).resolve()\n        if (work_path / \"messages.json\").exists():\n            return work_path\n\n    # Priority 3: Check current directory (fallback)\n    current_dir = Path.cwd()\n    if (current_dir / \"messages.json\").exists():\n        return current_dir\n\n    # Priority 4: Default fallback\n    return Path(\".\").resolve()\n\n\ndef load_expected_content() -> str:\n    \"\"\"Load the expected content from content.txt\"\"\"\n    # content.txt is in the same directory as verify.py\n    current_file = Path(__file__).resolve()\n    content_file = current_file.parent / EXPECTED_CONTENT_FILE\n\n    if not content_file.exists():\n        print(f\"| {EXPECTED_CONTENT_FILE} not found at: {content_file}\")\n        return \"\"\n\n    print(f\"| Found {EXPECTED_CONTENT_FILE} at: {content_file}\")\n\n    try:\n        with open(content_file, \"r\", encoding=\"utf-8\") as f:\n            return f.read().strip()\n    except (IOError, UnicodeDecodeError) as e:\n        print(f\"| Warning: Could not read {content_file}: {e}\")\n        return \"\"\n\n\ndef parse_ai_results(work_dir: Path) -> Dict[str, Any]:\n    \"\"\"Parse the AI agent's results from messages.json\"\"\"\n    messages_file = work_dir / \"messages.json\"\n    if not messages_file.exists():\n        return {\"success\": False, \"error\": \"No messages.json found\"}\n\n    try:\n        with open(messages_file, \"r\", encoding=\"utf-8\") as f:\n            messages = json.load(f)\n    except (json.JSONDecodeError, IOError) as e:\n        return {\"success\": False, \"error\": f\"Failed to read messages.json: {e}\"}\n\n    # Look for extracted content in the AI's responses\n    found_content = False\n    ai_responses = []\n    extracted_content = \"\"\n\n    for message in messages:\n        if message.get(\"role\") == \"assistant\":\n            content = str(message.get(\"content\", \"\"))\n\n            # Handle both string and list content formats\n            if isinstance(message.get(\"content\"), list):\n                content = \" \".join(\n                    item.get(\"text\", \"\") if isinstance(item, dict) else str(item)\n                    for item in message.get(\"content\", [])\n                )\n\n            ai_responses.append(content)\n\n            # Store the last response as extracted content\n            extracted_content = content\n\n    return {\n        \"success\": True,\n        \"found_content\": True,  # Assuming content was found if we have responses\n        \"ai_responses\": ai_responses,\n        \"extracted_content\": extracted_content,\n        \"total_responses\": len(ai_responses),\n    }\n\n\ndef compare_content(extracted: str, expected: str) -> Dict[str, Any]:\n    \"\"\"Compare extracted content with expected content\"\"\"\n    if not expected:\n        return {\"success\": False, \"error\": \"No expected content to compare against\"}\n\n    if not extracted:\n        return {\"success\": False, \"error\": \"No extracted content found\"}\n\n    # Normalize content for comparison (remove extra whitespace, normalize line breaks)\n    extracted_normalized = \" \".join(extracted.split())\n    expected_normalized = \" \".join(expected.split())\n\n    # Direct text comparison - content must be exactly the same\n    is_exact_match = extracted_normalized == expected_normalized\n\n    return {\n        \"success\": True,\n        \"is_exact_match\": is_exact_match,\n        \"extracted_length\": len(extracted_normalized),\n        \"expected_length\": len(expected_normalized),\n        \"extracted_preview\": extracted_normalized[:100] + \"...\" if len(extracted_normalized) > 100 else extracted_normalized,\n        \"expected_preview\": expected_normalized[:100] + \"...\" if len(expected_normalized) > 100 else expected_normalized\n    }\n\n\n# =============================================================================\n# MAIN VERIFICATION\n# =============================================================================\n\n\ndef verify_task(work_dir: Path) -> bool:\n    \"\"\"Verify the AI agent found the correct Introduction content\"\"\"\n    print(\"| Verifying Playwright Web Search Task - DeepSeek R1 Introduction\")\n    print(\"| \" + \"=\" * 70)\n\n    # Load expected content\n    print(\"| Loading expected content...\")\n    expected_content = load_expected_content()\n\n    if not expected_content:\n        print(\"| Error: Could not load expected content\")\n        return False\n\n    print(f\"| Expected content loaded ({len(expected_content)} characters)\")\n\n    # Parse MCP messages\n    messages = parse_ai_results(work_dir)\n\n    if not messages[\"success\"]:\n        print(f\"| Error: Could not parse AI results: {messages.get('error')}\")\n        return False\n\n    # Extract AI agent response\n    extracted_content = messages.get(\"extracted_content\", \"\")\n\n    if not extracted_content:\n        print(\"| Error: No AI agent response found\")\n        return False\n\n    print(f\"| Extracted content: {len(extracted_content)} characters\")\n\n    # Compare content\n    print(\"| Comparing extracted content with expected content...\")\n    comparison = compare_content(extracted_content, expected_content)\n\n    if not comparison[\"success\"]:\n        print(f\"| Comparison failed: {comparison.get('error')}\")\n        return False\n\n    print(f\"| Content comparison results:\")\n    print(f\"|   - Extracted length: {comparison['extracted_length']} characters\")\n    print(f\"|   - Expected length: {comparison['expected_length']} characters\")\n    print(f\"|   - Extracted preview: {comparison['extracted_preview']}\")\n    print(f\"|   - Expected preview: {comparison['expected_preview']}\")\n\n    if comparison['is_exact_match']:\n        print(\"| Task completed successfully! Content matches exactly.\")\n        return True\n    else:\n        print(\"| Task verification failed. Content does not match exactly.\")\n        return False\n\n\ndef main():\n    \"\"\"Main verification function\"\"\"\n    print(\"| Starting verification...\")\n\n    # Get working directory\n    work_dir = get_working_directory()\n    print(f\"| Working directory: {work_dir}\")\n\n    # Run verification\n    success = verify_task(work_dir)\n\n    if success:\n        sys.exit(0)\n    else:\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/.gitkeep",
    "content": ""
  },
  {
    "path": "tasks/playwright_webarena/easy/reddit/ai_data_analyst/description.md",
    "content": "Deliver a bite-sized AI community snapshot by capturing two essential metrics and posting them back to the forum.\n\n**Task Requirements:**\n\n1. Register and log in with username `AIDataAnalyst2025` and password `SecurePass123!` so every action is tied to that account.\n2. Visit the `deeplearning` forum (page 1 only) and count how many posts are visible; note the exact total.\n3. Go to the `MachineLearning` forum, find the post titled `[P] I made a command-line tool that explains your errors using ChatGPT (link in comments)`, and record the vote count shown in the listing.\n4. Still in `MachineLearning`, create a post titled `MachineLearning_Extraction` whose body is exactly the two-line Markdown list below (keep the keys and pipe separators untouched, replacing each `FILL_VALUE` with your numbers):\n\n```\n- Deeplearning_Post_Count|FILL_VALUE\n- ChatGPT_Tool_Vote_Count|FILL_VALUE\n```\n\nNo searches or multi-page navigation are required—just capture these two data points and report them accurately.\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/reddit/ai_data_analyst/label.txt",
    "content": "- Deeplearning_Post_Count|25\n- ChatGPT_Tool_Vote_Count|2655\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/reddit/ai_data_analyst/meta.json",
    "content": "{\n  \"task_id\": \"ai_data_analyst_easy\",\n  \"task_name\": \"AI Data Analyst (Easy)\",\n  \"category_id\": \"reddit\",\n  \"category_name\": \"Reddit\",\n  \"description\": \"Capture a lightweight AI/ML engagement snapshot by signing in, grabbing three key metrics, and posting them back to the MachineLearning forum.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"user interaction\",\n    \"data extraction\",\n    \"comparative analysis\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/postmill.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/reddit/ai_data_analyst/verify.py",
    "content": "import asyncio\nimport os\nimport re\nimport sys\nfrom pathlib import Path\nfrom playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError\n\nBASE_URL = os.getenv(\"WEBARENA_BASE_URL\", \"http://localhost:9999\").rstrip(\"/\")\nUSERNAME = \"AIDataAnalyst2025\"\nPASSWORD = \"SecurePass123!\"\nPOST_TITLE = \"MachineLearning_Extraction\"\nREQUIRED_FIELDS = [\n    \"Deeplearning_Post_Count\",\n    \"ChatGPT_Tool_Vote_Count\",\n]\nLABEL_PATH = Path(__file__).parent / \"label.txt\"\n\n\ndef parse_key_value_format(text: str) -> dict:\n    data = {}\n    if not text:\n        return data\n    for line in text.splitlines():\n        line = line.strip()\n        if not line or \"|\" not in line:\n            continue\n        line = re.sub(r\"^[-•*]\\s*\", \"\", line)\n        key, value = line.split(\"|\", 1)\n        data[key.strip()] = value.strip()\n    return data\n\n\ndef load_expected_values() -> dict:\n    if not LABEL_PATH.exists():\n        return {}\n    return parse_key_value_format(LABEL_PATH.read_text(encoding=\"utf-8\"))\n\n\nasync def ensure_logged_in(page) -> bool:\n    print(\"Step 1: Ensuring we are logged in...\", file=sys.stderr)\n    await page.goto(f\"{BASE_URL}/\", wait_until=\"networkidle\")\n    user_button = page.locator(f'button:has-text(\"{USERNAME}\")')\n    if await user_button.count():\n        print(\"✓ Already logged in\", file=sys.stderr)\n        return True\n\n    login_link = page.locator('a:has-text(\"Log in\")')\n    if not await login_link.count():\n        print(\"FAILED: Login link not found\", file=sys.stderr)\n        return False\n\n    await login_link.click()\n    await page.wait_for_load_state(\"networkidle\")\n    await page.fill('input[name=\"_username\"]', USERNAME)\n    await page.fill('input[name=\"_password\"]', PASSWORD)\n    await page.click('button:has-text(\"Log in\")')\n    await page.wait_for_load_state(\"networkidle\")\n\n    if await page.locator(f'button:has-text(\"{USERNAME}\")').count():\n        print(f\"✓ Logged in as {USERNAME}\", file=sys.stderr)\n        return True\n\n    print(\"FAILED: Could not log in with provided credentials\", file=sys.stderr)\n    return False\n\n\nasync def fetch_submission_content(page):\n    print(\"Step 2: Retrieving MachineLearning submission...\", file=sys.stderr)\n    await page.goto(f\"{BASE_URL}/f/MachineLearning\", wait_until=\"networkidle\")\n    post_link = page.locator(f'a:has-text(\"{POST_TITLE}\")')\n    if not await post_link.count():\n        print(\n            f\"FAILED: Submission '{POST_TITLE}' not found in MachineLearning forum\",\n            file=sys.stderr,\n        )\n        return None\n\n    await post_link.first.click()\n    await page.wait_for_load_state(\"networkidle\")\n\n    selectors = [\n        \".submission__body\",\n        \"article\",\n        \".post-body\",\n        \".RichText\",\n        '[class*=\"RichText\"]',\n    ]\n\n    for selector in selectors:\n        locator = page.locator(selector)\n        if await locator.count():\n            content = await locator.first.inner_text()\n            if content:\n                print(f\"✓ Found submission body via selector {selector}\", file=sys.stderr)\n                return content\n\n    print(\"FAILED: Unable to locate submission body content\", file=sys.stderr)\n    return None\n\n\ndef validate_submission(extracted: dict, expected: dict) -> bool:\n    missing = [key for key in REQUIRED_FIELDS if key not in extracted]\n    if missing:\n        print(\n            f\"FAILED: Submission body missing required keys: {', '.join(missing)}\",\n            file=sys.stderr,\n        )\n        return False\n\n    errors = []\n    for key in REQUIRED_FIELDS:\n        actual = extracted.get(key, \"\")\n        expect = expected.get(key, \"\")\n        try:\n            actual_val = int(actual)\n            expect_val = int(expect)\n            if actual_val != expect_val:\n                errors.append(f\"{key}: expected {expect_val}, found {actual_val}\")\n        except ValueError:\n            errors.append(f\"{key}: value '{actual}' is not numeric\")\n\n    if errors:\n        print(\"FAILED: Submission values do not match expected data:\", file=sys.stderr)\n        for err in errors:\n            print(f\"  - {err}\", file=sys.stderr)\n        return False\n\n    print(\"✓ Submission content matches expected metrics\", file=sys.stderr)\n    return True\n\n\nasync def verify() -> bool:\n    expected = load_expected_values()\n    if not expected:\n        print(\"FAILED: label.txt with expected values is missing\", file=sys.stderr)\n        return False\n\n\n    async with async_playwright() as p:\n        browser = await p.chromium.launch(headless=True)\n        context = await browser.new_context()\n        page = await context.new_page()\n\n        try:\n            if not await ensure_logged_in(page):\n                return False\n\n            content = await fetch_submission_content(page)\n            if not content:\n                return False\n\n            extracted = parse_key_value_format(content)\n            if not validate_submission(extracted, expected):\n                return False\n\n            print(\"SUCCESS: Easy AI data analyst task verified\", file=sys.stderr)\n            return True\n\n        except PlaywrightTimeoutError as exc:\n            print(f\"FAILED: Timeout occurred - {exc}\", file=sys.stderr)\n            return False\n        except Exception as exc:\n            print(f\"FAILED: Unexpected error - {exc}\", file=sys.stderr)\n            return False\n        finally:\n            await browser.close()\n\n\ndef main():\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/reddit/llm_research_summary/description.md",
    "content": "Produce a five-line GPT activity bulletin so readers can quickly see how active the MachineLearning forum is.\n\n**Task Requirements:**\n\n1. Register and log in with username `llm_analyst_2024` and password `LLMExpert2024!`.\n2. On the first page of the `MachineLearning` forum, count how many post titles contain `GPT`, `ChatGPT`, or `LLM`; record the exact total.\n3. From that subset, identify the single post with the highest upvote count and note its full title and vote total.\n4. Create a new post titled `LLM Research Summary: GPT Discussions Analysis [2024]` in the same forum. The body must be exactly the Markdown list below with your numbers in place of each `FILL_VALUE` (keep the keys, order, and pipe separator intact):\n\n```\n- Total_LLM_Posts|FILL_VALUE\n- Top1_Title|FILL_VALUE\n- Top1_Upvotes|FILL_VALUE\n```\n\nThat’s the entire easy version—just report the headline LLM count and the hottest related thread.\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/reddit/llm_research_summary/label.txt",
    "content": "- Total_LLM_Posts|9\n- Top1_Title|[P] I made a command-line tool that explains your errors using ChatGPT (link in comments)\n- Top1_Upvotes|2655\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/reddit/llm_research_summary/meta.json",
    "content": "{\n  \"task_id\": \"llm_research_summary_easy\",\n  \"task_name\": \"LLM Research Summary (Easy)\",\n  \"category_id\": \"reddit\",\n  \"category_name\": \"Reddit\",\n  \"description\": \"Collect the headline GPT metrics from MachineLearning and publish a short five-line recap.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"data extraction\",\n    \"search aggregation\",\n    \"content submission\",\n    \"user interaction\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/postmill.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/reddit/llm_research_summary/verify.py",
    "content": "import asyncio\nimport os\nimport re\nimport sys\nfrom pathlib import Path\nfrom playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError\n\nBASE_URL = os.getenv(\"WEBARENA_BASE_URL\", \"http://localhost:9999\").rstrip(\"/\")\nUSERNAME = \"llm_analyst_2024\"\nPASSWORD = \"LLMExpert2024!\"\nFORUM_SLUG = \"MachineLearning\"\nPOST_TITLE = \"LLM Research Summary: GPT Discussions Analysis [2024]\"\nREQUIRED_FIELDS = [\n    \"Total_LLM_Posts\",\n    \"Top1_Title\",\n    \"Top1_Upvotes\",\n]\nNUMERIC_FIELDS = {\"Total_LLM_Posts\", \"Top1_Upvotes\"}\nLABEL_PATH = Path(__file__).parent / \"label.txt\"\n\n\ndef parse_key_value_format(text: str) -> dict:\n    data = {}\n    if not text:\n        return data\n    for line in text.splitlines():\n        line = line.strip()\n        if not line or \"|\" not in line:\n            continue\n        line = re.sub(r\"^[-•*]\\s*\", \"\", line)\n        key, value = line.split(\"|\", 1)\n        data[key.strip()] = value.strip()\n    return data\n\n\ndef normalize_text(value: str) -> str:\n    if value is None:\n        return \"\"\n    replacements = {\n        \"\\u2019\": \"'\",\n        \"\\u2018\": \"'\",\n        \"\\u201c\": '\"',\n        \"\\u201d\": '\"',\n    }\n    for src, dst in replacements.items():\n        value = value.replace(src, dst)\n    return \" \".join(value.split()).strip()\n\n\ndef load_expected_values() -> dict:\n    if not LABEL_PATH.exists():\n        return {}\n    return parse_key_value_format(LABEL_PATH.read_text(encoding=\"utf-8\"))\n\n\nasync def ensure_logged_in(page) -> bool:\n    print(\"Step 1: Signing in as llm_analyst_2024...\", file=sys.stderr)\n    await page.goto(f\"{BASE_URL}/\", wait_until=\"networkidle\")\n    user_button = page.locator(f'button:has-text(\"{USERNAME}\")')\n    if await user_button.count():\n        print(\"✓ Already logged in\", file=sys.stderr)\n        return True\n\n    login_link = page.locator('a:has-text(\"Log in\")')\n    if not await login_link.count():\n        print(\"FAILED: Login link not found\", file=sys.stderr)\n        return False\n\n    await login_link.click()\n    await page.wait_for_load_state(\"networkidle\")\n    await page.fill('input[name=\"_username\"]', USERNAME)\n    await page.fill('input[name=\"_password\"]', PASSWORD)\n    await page.click('button:has-text(\"Log in\")')\n    await page.wait_for_load_state(\"networkidle\")\n\n    if await page.locator(f'button:has-text(\"{USERNAME}\")').count():\n        print(f\"✓ Logged in as {USERNAME}\", file=sys.stderr)\n        return True\n\n    print(\"FAILED: Could not log in with provided credentials\", file=sys.stderr)\n    return False\n\n\nasync def fetch_summary_body(page):\n    print(\"Step 2: Opening MachineLearning summary post...\", file=sys.stderr)\n    await page.goto(f\"{BASE_URL}/f/{FORUM_SLUG}\", wait_until=\"networkidle\")\n    post_link = page.locator(f'a:has-text(\"{POST_TITLE}\")')\n    if not await post_link.count():\n        print(f\"FAILED: Submission '{POST_TITLE}' not found\", file=sys.stderr)\n        return None\n\n    await post_link.first.click()\n    await page.wait_for_load_state(\"networkidle\")\n\n    selectors = [\n        \".submission__body\",\n        \"article\",\n        \".post-body\",\n        \".RichText\",\n        '[class*=\"RichText\"]',\n        'div:has-text(\"Total_LLM_Posts\")',\n    ]\n\n    for selector in selectors:\n        locator = page.locator(selector)\n        if await locator.count():\n            content = await locator.first.inner_text()\n            if content:\n                print(f\"✓ Found summary content via selector {selector}\", file=sys.stderr)\n                return content\n\n    print(\"FAILED: Unable to locate submission body\", file=sys.stderr)\n    return None\n\n\ndef validate_fields(extracted: dict, expected: dict) -> bool:\n    missing = [key for key in REQUIRED_FIELDS if key not in extracted]\n    if missing:\n        print(f\"FAILED: Missing required keys: {', '.join(missing)}\", file=sys.stderr)\n        return False\n\n    errors = []\n    for key in REQUIRED_FIELDS:\n        actual = extracted.get(key, \"\")\n        expect = expected.get(key, \"\")\n        if key in NUMERIC_FIELDS:\n            try:\n                actual_val = int(actual)\n                expect_val = int(expect)\n                if actual_val != expect_val:\n                    errors.append(f\"{key}: expected {expect_val}, found {actual_val}\")\n            except ValueError:\n                errors.append(f\"{key}: '{actual}' is not numeric\")\n        else:\n            if normalize_text(actual) != normalize_text(expect):\n                errors.append(f\"{key}: expected '{expect}', found '{actual}'\")\n\n    if errors:\n        print(\"FAILED: Summary values do not match expected data:\", file=sys.stderr)\n        for err in errors:\n            print(f\"  - {err}\", file=sys.stderr)\n        return False\n\n    print(\"✓ Summary values match expected snapshot\", file=sys.stderr)\n    return True\n\n\nasync def verify() -> bool:\n    expected = load_expected_values()\n    if not expected:\n        print(\"FAILED: label.txt is missing\", file=sys.stderr)\n        return False\n\n    async with async_playwright() as p:\n        browser = await p.chromium.launch(headless=True)\n        context = await browser.new_context()\n        page = await context.new_page()\n\n        try:\n            if not await ensure_logged_in(page):\n                return False\n\n            content = await fetch_summary_body(page)\n            if not content:\n                return False\n\n            extracted = parse_key_value_format(content)\n            if not validate_fields(extracted, expected):\n                return False\n\n            print(\"SUCCESS: LLM research easy task verified\", file=sys.stderr)\n            return True\n\n        except PlaywrightTimeoutError as exc:\n            print(f\"FAILED: Timeout occurred - {exc}\", file=sys.stderr)\n            return False\n        except Exception as exc:\n            print(f\"FAILED: Unexpected error - {exc}\", file=sys.stderr)\n            return False\n        finally:\n            await browser.close()\n\n\ndef main():\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/reddit/movie_reviewer_analysis/description.md",
    "content": "Provide a lightweight status report on what’s trending in the movies forum so stakeholders can scan it at a glance.\n\n**Task Requirements:**\n\n1. Register and log in with username `movie_reviewer_2024` and password `movie_reviewer_2024`.\n2. On the first page of the `movies` forum, count how many post titles contain any four-digit year (e.g., 1984, 2024) and record the total.\n3. Still on that page, find the post with the highest upvote count and record its full title as well as the vote and comment counts shown.\n4. Publish a post in the same forum titled `Wonderful Movies Analysis: Community Favorites [2024]`. The body must match the four-line Markdown list below—keep the keys, order, and pipe separators exactly as written while replacing each `FILL_VALUE` with your data:\n\n```\n- Total_Year_Posts|FILL_VALUE\n- Top_Title|FILL_VALUE\n- Top_Upvotes|FILL_VALUE\n- Top_Comments|FILL_VALUE\n```\n\nNo multi-page browsing or special threads are required; this easy task captures just the top signals from the first page.\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/reddit/movie_reviewer_analysis/label.txt",
    "content": "- Total_Year_Posts|1\n- Top_Title|Who will win the Oscar for ACTRESS IN A SUPPORTING ROLE?\n- Top_Upvotes|9933\n- Top_Comments|23\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/reddit/movie_reviewer_analysis/meta.json",
    "content": "{\n  \"task_id\": \"movie_reviewer_analysis_easy\",\n  \"task_name\": \"Movie Reviewer Analysis (Easy)\",\n  \"category_id\": \"reddit\",\n  \"category_name\": \"Reddit\",\n  \"description\": \"Grab the first-page movie signals plus the Rittenhouse poster stats and share them in a concise recap post.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"user interaction\",\n    \"data extraction\",\n    \"comparative analysis\",\n    \"content submission\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/postmill.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/reddit/movie_reviewer_analysis/verify.py",
    "content": "import asyncio\nimport os\nimport re\nimport sys\nfrom pathlib import Path\nfrom playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError\n\nBASE_URL = os.getenv(\"WEBARENA_BASE_URL\", \"http://localhost:9999\").rstrip(\"/\")\nUSERNAME = \"movie_reviewer_2024\"\nPASSWORD = \"movie_reviewer_2024\"\nFORUM_SLUG = \"movies\"\nPOST_TITLE = \"Wonderful Movies Analysis: Community Favorites [2024]\"\nREQUIRED_FIELDS = [\n    \"Total_Year_Posts\",\n    \"Top_Title\",\n    \"Top_Upvotes\",\n    \"Top_Comments\",\n]\nNUMERIC_FIELDS = {\n    \"Total_Year_Posts\",\n    \"Top_Upvotes\",\n    \"Top_Comments\",\n}\nLABEL_PATH = Path(__file__).parent / \"label.txt\"\n\n\ndef parse_key_value_format(text: str) -> dict:\n    data = {}\n    if not text:\n        return data\n    for line in text.splitlines():\n        line = line.strip()\n        if not line or \"|\" not in line:\n            continue\n        line = re.sub(r\"^[-•*]\\s*\", \"\", line)\n        key, value = line.split(\"|\", 1)\n        data[key.strip()] = value.strip()\n    return data\n\n\ndef normalize_text(value: str) -> str:\n    if value is None:\n        return \"\"\n    replacements = {\n        \"\\u2019\": \"'\",\n        \"\\u2018\": \"'\",\n        \"\\u201c\": '\"',\n        \"\\u201d\": '\"',\n    }\n    for src, dst in replacements.items():\n        value = value.replace(src, dst)\n    return \" \".join(value.split()).strip()\n\n\ndef load_expected_values() -> dict:\n    if not LABEL_PATH.exists():\n        return {}\n    return parse_key_value_format(LABEL_PATH.read_text(encoding=\"utf-8\"))\n\n\nasync def ensure_logged_in(page) -> bool:\n    print(\"Step 1: Authenticating movie_reviewer_2024...\", file=sys.stderr)\n    await page.goto(f\"{BASE_URL}/\", wait_until=\"networkidle\")\n    user_button = page.locator(f'button:has-text(\"{USERNAME}\")')\n    if await user_button.count():\n        print(\"✓ Already logged in\", file=sys.stderr)\n        return True\n\n    login_link = page.locator('a:has-text(\"Log in\")')\n    if not await login_link.count():\n        print(\"FAILED: Login link not found\", file=sys.stderr)\n        return False\n\n    await login_link.click()\n    await page.wait_for_load_state(\"networkidle\")\n    await page.fill('input[name=\"_username\"]', USERNAME)\n    await page.fill('input[name=\"_password\"]', PASSWORD)\n    await page.click('button:has-text(\"Log in\")')\n    await page.wait_for_load_state(\"networkidle\")\n\n    if await page.locator(f'button:has-text(\"{USERNAME}\")').count():\n        print(f\"✓ Logged in as {USERNAME}\", file=sys.stderr)\n        return True\n\n    print(\"FAILED: Could not log in with provided credentials\", file=sys.stderr)\n    return False\n\n\nasync def fetch_summary_body(page):\n    print(\"Step 2: Locating the movies summary post...\", file=sys.stderr)\n    await page.goto(f\"{BASE_URL}/f/{FORUM_SLUG}\", wait_until=\"networkidle\")\n    post_link = page.locator(f'a:has-text(\"{POST_TITLE}\")')\n    if not await post_link.count():\n        print(f\"FAILED: Submission '{POST_TITLE}' not found\", file=sys.stderr)\n        return None\n\n    await post_link.first.click()\n    await page.wait_for_load_state(\"networkidle\")\n\n    selectors = [\n        \".submission__body\",\n        \"article\",\n        \".post-body\",\n        \".RichText\",\n        '[class*=\"RichText\"]',\n        'div:has-text(\"Total_Year_Posts\")',\n    ]\n\n    for selector in selectors:\n        locator = page.locator(selector)\n        if await locator.count():\n            content = await locator.first.inner_text()\n            if content:\n                print(f\"✓ Retrieved summary content via selector {selector}\", file=sys.stderr)\n                return content\n\n    print(\"FAILED: Unable to locate submission body\", file=sys.stderr)\n    return None\n\n\ndef validate_summary(extracted: dict, expected: dict) -> bool:\n    missing = [key for key in REQUIRED_FIELDS if key not in extracted]\n    if missing:\n        print(f\"FAILED: Missing required keys: {', '.join(missing)}\", file=sys.stderr)\n        return False\n\n    errors = []\n    for key in REQUIRED_FIELDS:\n        actual = extracted.get(key, \"\")\n        expect = expected.get(key, \"\")\n        if key in NUMERIC_FIELDS:\n            try:\n                actual_val = int(actual)\n                expect_val = int(expect)\n                if actual_val != expect_val:\n                    errors.append(f\"{key}: expected {expect_val}, found {actual_val}\")\n            except ValueError:\n                errors.append(f\"{key}: '{actual}' is not numeric\")\n        else:\n            if normalize_text(actual) != normalize_text(expect):\n                errors.append(f\"{key}: expected '{expect}', found '{actual}'\")\n\n    if errors:\n        print(\"FAILED: Summary values differ from expected snapshot:\", file=sys.stderr)\n        for err in errors:\n            print(f\"  - {err}\", file=sys.stderr)\n        return False\n\n    print(\"✓ Summary values match expected data\", file=sys.stderr)\n    return True\n\n\nasync def verify() -> bool:\n    expected = load_expected_values()\n    if not expected:\n        print(\"FAILED: label.txt is missing\", file=sys.stderr)\n        return False\n\n    async with async_playwright() as p:\n        browser = await p.chromium.launch(headless=True)\n        context = await browser.new_context()\n        page = await context.new_page()\n\n        try:\n            if not await ensure_logged_in(page):\n                return False\n\n            content = await fetch_summary_body(page)\n            if not content:\n                return False\n\n            extracted = parse_key_value_format(content)\n            if not validate_summary(extracted, expected):\n                return False\n\n            print(\"SUCCESS: Movie analysis easy task verified\", file=sys.stderr)\n            return True\n\n        except PlaywrightTimeoutError as exc:\n            print(f\"FAILED: Timeout occurred - {exc}\", file=sys.stderr)\n            return False\n        except Exception as exc:\n            print(f\"FAILED: Unexpected error - {exc}\", file=sys.stderr)\n            return False\n        finally:\n            await browser.close()\n\n\ndef main():\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/reddit/nba_statistics_analysis/description.md",
    "content": "Share a five-line NBA engagement brief so sports moderators can instantly see which thread is dominating.\n\n**Task Requirements:**\n\n1. Register and log in with username `NBA_DataAnalyst_2024` and password `Research#2024!`.\n2. In the `sports` forum (or via the site search for “NBA”), locate the NBA-related post with the highest upvote count you can see. Record its exact title, vote total, comment count, and author username.\n3. Count how many distinct NBA-titled posts you reviewed for this summary (at minimum, include the post from step 2) and store that total as `Total_NBA_Posts`.\n4. Publish a post titled `Statistical Analysis: NBA Content Engagement on This Forum` in the same forum. Its body must consist solely of the Markdown list below; keep the keys/order/pipes untouched while replacing each `FILL_VALUE` with your numbers:\n\n```\n- Total_NBA_Posts|FILL_VALUE\n- Top_Title|FILL_VALUE\n- Top_Votes|FILL_VALUE\n- Top_Comments|FILL_VALUE\n- Top_Author|FILL_VALUE\n```\n\nThis easy edition just reports the leading NBA thread plus the count of posts you reviewed—no deeper profile checks are necessary.\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/reddit/nba_statistics_analysis/label.txt",
    "content": "- Total_NBA_Posts|20\n- Top_Title|Hamby claims [WNBA Champ] Aces 'unprofessional' after trade\n- Top_Votes|614\n- Top_Comments|170\n- Top_Author|Responsible-Lunch815\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/reddit/nba_statistics_analysis/meta.json",
    "content": "{\n  \"task_id\": \"nba_statistics_analysis_easy\",\n  \"task_name\": \"NBA Statistics Analysis (Easy)\",\n  \"category_id\": \"reddit\",\n  \"category_name\": \"Reddit\",\n  \"description\": \"Summarize just the three strongest NBA threads and share their vote/comment stats in a short post.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"user interaction\",\n    \"data extraction\",\n    \"comparative analysis\",\n    \"content submission\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/postmill.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/reddit/nba_statistics_analysis/verify.py",
    "content": "import asyncio\nimport os\nimport re\nimport sys\nfrom pathlib import Path\nfrom playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError\n\nBASE_URL = os.getenv(\"WEBARENA_BASE_URL\", \"http://localhost:9999\").rstrip(\"/\")\nUSERNAME = \"NBA_DataAnalyst_2024\"\nPASSWORD = \"Research#2024!\"\nFORUM_SLUG = \"sports\"\nPOST_TITLE = \"Statistical Analysis: NBA Content Engagement on This Forum\"\nREQUIRED_FIELDS = [\n    \"Total_NBA_Posts\",\n    \"Top_Title\",\n    \"Top_Votes\",\n    \"Top_Comments\",\n    \"Top_Author\",\n]\nNUMERIC_FIELDS = {\n    \"Total_NBA_Posts\",\n    \"Top_Votes\",\n    \"Top_Comments\",\n}\nLABEL_PATH = Path(__file__).parent / \"label.txt\"\n\n\ndef parse_key_value_format(text: str) -> dict:\n    data = {}\n    if not text:\n        return data\n    for line in text.splitlines():\n        line = line.strip()\n        if not line or \"|\" not in line:\n            continue\n        line = re.sub(r\"^[-•*]\\s*\", \"\", line)\n        key, value = line.split(\"|\", 1)\n        data[key.strip()] = value.strip()\n    return data\n\n\ndef normalize_text(value: str) -> str:\n    if value is None:\n        return \"\"\n    replacements = {\n        \"\\u2019\": \"'\",\n        \"\\u2018\": \"'\",\n        \"\\u201c\": '\"',\n        \"\\u201d\": '\"',\n    }\n    for src, dst in replacements.items():\n        value = value.replace(src, dst)\n    return \" \".join(value.split()).strip()\n\n\ndef load_expected_values() -> dict:\n    if not LABEL_PATH.exists():\n        return {}\n    return parse_key_value_format(LABEL_PATH.read_text(encoding=\"utf-8\"))\n\n\nasync def ensure_logged_in(page) -> bool:\n    print(\"Step 1: Logging into the sports account...\", file=sys.stderr)\n    await page.goto(f\"{BASE_URL}/\", wait_until=\"networkidle\")\n    user_button = page.locator(f'button:has-text(\"{USERNAME}\")')\n    if await user_button.count():\n        print(\"✓ Already logged in\", file=sys.stderr)\n        return True\n\n    login_link = page.locator('a:has-text(\"Log in\")')\n    if not await login_link.count():\n        print(\"FAILED: Login link not found\", file=sys.stderr)\n        return False\n\n    await login_link.click()\n    await page.wait_for_load_state(\"networkidle\")\n    await page.fill('input[name=\"_username\"]', USERNAME)\n    await page.fill('input[name=\"_password\"]', PASSWORD)\n    await page.click('button:has-text(\"Log in\")')\n    await page.wait_for_load_state(\"networkidle\")\n\n    if await page.locator(f'button:has-text(\"{USERNAME}\")').count():\n        print(f\"✓ Logged in as {USERNAME}\", file=sys.stderr)\n        return True\n\n    print(\"FAILED: Could not log in with provided credentials\", file=sys.stderr)\n    return False\n\n\nasync def fetch_summary_body(page):\n    print(\"Step 2: Opening the NBA engagement summary post...\", file=sys.stderr)\n    await page.goto(f\"{BASE_URL}/f/{FORUM_SLUG}\", wait_until=\"networkidle\")\n    post_link = page.locator(f'a:has-text(\"{POST_TITLE}\")')\n    if not await post_link.count():\n        print(f\"FAILED: Submission '{POST_TITLE}' not found\", file=sys.stderr)\n        return None\n\n    await post_link.first.click()\n    await page.wait_for_load_state(\"networkidle\")\n\n    selectors = [\n        \".submission__body\",\n        \"article\",\n        \".post-body\",\n        \".RichText\",\n        '[class*=\"RichText\"]',\n        'div:has-text(\"Total_NBA_Posts\")',\n    ]\n\n    for selector in selectors:\n        locator = page.locator(selector)\n        if await locator.count():\n            content = await locator.first.inner_text()\n            if content:\n                print(f\"✓ Retrieved summary body via selector {selector}\", file=sys.stderr)\n                return content\n\n    print(\"FAILED: Unable to locate submission body\", file=sys.stderr)\n    return None\n\n\ndef validate_summary(extracted: dict, expected: dict) -> bool:\n    missing = [key for key in REQUIRED_FIELDS if key not in extracted]\n    if missing:\n        print(f\"FAILED: Missing required keys: {', '.join(missing)}\", file=sys.stderr)\n        return False\n\n    errors = []\n    for key in REQUIRED_FIELDS:\n        actual = extracted.get(key, \"\")\n        expect = expected.get(key, \"\")\n        if key in NUMERIC_FIELDS:\n            try:\n                actual_val = int(actual)\n                expect_val = int(expect)\n                if actual_val != expect_val:\n                    errors.append(f\"{key}: expected {expect_val}, found {actual_val}\")\n            except ValueError:\n                errors.append(f\"{key}: '{actual}' is not numeric\")\n        else:\n            if normalize_text(actual) != normalize_text(expect):\n                errors.append(f\"{key}: expected '{expect}', found '{actual}'\")\n\n    if errors:\n        print(\"FAILED: Summary data does not match expected values:\", file=sys.stderr)\n        for err in errors:\n            print(f\"  - {err}\", file=sys.stderr)\n        return False\n\n    print(\"✓ Summary fields align with expected snapshot\", file=sys.stderr)\n    return True\n\n\nasync def verify() -> bool:\n    expected = load_expected_values()\n    if not expected:\n        print(\"FAILED: label.txt is missing\", file=sys.stderr)\n        return False\n\n    async with async_playwright() as p:\n        browser = await p.chromium.launch(headless=True)\n        context = await browser.new_context()\n        page = await context.new_page()\n\n        try:\n            if not await ensure_logged_in(page):\n                return False\n\n            content = await fetch_summary_body(page)\n            if not content:\n                return False\n\n            extracted = parse_key_value_format(content)\n            if not validate_summary(extracted, expected):\n                return False\n\n            print(\"SUCCESS: NBA statistics easy task verified\", file=sys.stderr)\n            return True\n\n        except PlaywrightTimeoutError as exc:\n            print(f\"FAILED: Timeout occurred - {exc}\", file=sys.stderr)\n            return False\n        except Exception as exc:\n            print(f\"FAILED: Unexpected error - {exc}\", file=sys.stderr)\n            return False\n        finally:\n            await browser.close()\n\n\ndef main():\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/reddit/routine_tracker_forum/description.md",
    "content": "Reshare the most helpful LifeProTips calendar advice so it appears as a fresh routine post for other readers.\n\n**Task Requirements:**\n\n1. Register and log in with username `RoutineTracker2025` and password `DailyRoutine123!`.\n2. Find the LifeProTips thread titled `LPT: Use your calendar as your to-do list. Assigning dedicated time to tasks increases the likelyhood of you acting upon it.` and identify the comment with the highest upvote count. Copy its full text verbatim.\n3. In the `LifeProTips` forum, publish a post titled `My 5-Step Morning Routine That Increased My Productivity by 200%` whose body is exactly the comment text you copied (no additional words or formatting).\n4. Confirm that your new post appears in the LifeProTips listing so community members can immediately see it.\n\nOnly these three actions—copy, repost, verify—are required for the easy version; no extra upvotes or settings changes are necessary.\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/reddit/routine_tracker_forum/meta.json",
    "content": "{\n  \"task_id\": \"routine_tracker_forum_easy\",\n  \"task_name\": \"Routine Tracker Forum (Easy)\",\n  \"category_id\": \"reddit\",\n  \"category_name\": \"Reddit\",\n  \"description\": \"Repost the highest-rated LifeProTips calendar advice under a new routine-tracking thread.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"user interaction\",\n    \"content submission\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/postmill.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/reddit/routine_tracker_forum/verify.py",
    "content": "import asyncio\nimport os\nimport sys\nfrom playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError\n\nBASE_URL = os.getenv(\"WEBARENA_BASE_URL\", \"http://localhost:9999\").rstrip(\"/\")\nUSERNAME = \"RoutineTracker2025\"\nPASSWORD = \"DailyRoutine123!\"\nFORUM_SLUG = \"LifeProTips\"\nPOST_TITLE = \"My 5-Step Morning Routine That Increased My Productivity by 200%\"\nEXPECTED_BODY = (\n    \"As a college student, having a visible reminder of the assignments I have and when they are due is super helpful for me. \"\n    \"It also just feels good to erase them from the board once they are completed.\"\n)\n\n\nasync def ensure_logged_in(page) -> bool:\n    print(\"Step 1: Logging in before verification...\", file=sys.stderr)\n    await page.goto(f\"{BASE_URL}/\", wait_until=\"networkidle\")\n    user_button = page.locator(f'button:has-text(\"{USERNAME}\")')\n    if await user_button.count():\n        print(\"✓ Already logged in\", file=sys.stderr)\n        return True\n\n    login_link = page.locator('a:has-text(\"Log in\")')\n    if not await login_link.count():\n        print(\"FAILED: Login link not found\", file=sys.stderr)\n        return False\n\n    await login_link.click()\n    await page.wait_for_load_state(\"networkidle\")\n    await page.fill('input[name=\"_username\"]', USERNAME)\n    await page.fill('input[name=\"_password\"]', PASSWORD)\n    await page.click('button:has-text(\"Log in\")')\n    await page.wait_for_load_state(\"networkidle\")\n\n    if await page.locator(f'button:has-text(\"{USERNAME}\")').count():\n        print(f\"✓ Logged in as {USERNAME}\", file=sys.stderr)\n        return True\n\n    print(\"FAILED: Could not log in with provided credentials\", file=sys.stderr)\n    return False\n\n\nasync def verify_post_body(page) -> bool:\n    print(\"Step 2: Validating reposted comment content...\", file=sys.stderr)\n    await page.goto(f\"{BASE_URL}/f/{FORUM_SLUG}\", wait_until=\"networkidle\")\n    post_link = page.locator(f'a:has-text(\"{POST_TITLE}\")')\n    if not await post_link.count():\n        print(f\"FAILED: Post '{POST_TITLE}' not found in LifeProTips\", file=sys.stderr)\n        return False\n\n    await post_link.first.click()\n    await page.wait_for_load_state(\"networkidle\")\n\n    article = page.locator(\"article\")\n    if not await article.count():\n        print(\"FAILED: Unable to read post body\", file=sys.stderr)\n        return False\n\n    body_text = await article.first.inner_text()\n    if EXPECTED_BODY not in body_text:\n        print(\"FAILED: Post body does not match the copied comment text\", file=sys.stderr)\n        return False\n\n    print(\"✓ Post body matches the expected LifeProTips comment\", file=sys.stderr)\n    return True\n\n\nasync def verify_listing_presence(page) -> bool:\n    print(\"Step 3: Confirming the post appears in the forum listing...\", file=sys.stderr)\n    await page.goto(f\"{BASE_URL}/f/{FORUM_SLUG}\", wait_until=\"networkidle\")\n    post_link = page.locator(f'a:has-text(\"{POST_TITLE}\")')\n    if await post_link.count():\n        print(\"✓ Post is visible in the LifeProTips feed\", file=sys.stderr)\n        return True\n\n    print(\"FAILED: Post missing from forum listing\", file=sys.stderr)\n    return False\n\n\nasync def verify() -> bool:\n    async with async_playwright() as p:\n        browser = await p.chromium.launch(headless=True)\n        context = await browser.new_context()\n        page = await context.new_page()\n\n        try:\n            if not await ensure_logged_in(page):\n                return False\n            if not await verify_post_body(page):\n                return False\n            if not await verify_listing_presence(page):\n                return False\n            print(\"SUCCESS: Routine tracker easy task verified\", file=sys.stderr)\n            return True\n        except PlaywrightTimeoutError as exc:\n            print(f\"FAILED: Timeout occurred - {exc}\", file=sys.stderr)\n            return False\n        except Exception as exc:\n            print(f\"FAILED: Unexpected error - {exc}\", file=sys.stderr)\n            return False\n        finally:\n            await browser.close()\n\n\ndef main():\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/shopping_admin/fitness_promotion_strategy/description.md",
    "content": "Stick to the first three analytical steps from the original workflow so the easy version only inventories bestseller and promo data.\n\n**Task Requirements**\n\n1. If need to login, login with username 'admin' and password 'admin1234'.\n2. **Dashboard stop**: read the first three rows in **Bestsellers** (name, price, quantity) exactly as shown, note the Revenue KPI amount, and look at the **Top Search Terms** widget—if any of those three product names appears there, record it as `term:uses`, otherwise output `No:0`.\n3. **Catalog → Products stop**: search each of the same three bestseller names one at a time and copy their SKU, Qty (inventory column), and Status (Enabled/Disabled) from the grid.\n4. **Marketing → Promotions → Cart Price Rules stop**: set Status = Active, count how many rules are shown, and locate the rule that applies a percentage discount so you can report `rule name:percentage`.\n\nOutput everything using the reduced template below:\n\n```\n<answer>\nBestseller1|name:price:quantity:sku:inventory:status\nBestseller2|name:price:quantity:sku:inventory:status\nBestseller3|name:price:quantity:sku:inventory:status\nTotalRevenue|amount\nBestsellerInSearch|term:count\nPercentageDiscountRule|name:percentage\nActiveRulesCount|count\n</answer>\n```\n\n```\n<answer>\nBestseller1|name:price:quantity:sku:inventory:status\nBestseller2|name:price:quantity:sku:inventory:status\nBestseller3|name:price:quantity:sku:inventory:status\nTotalRevenue|amount\nBestsellerInSearch|term:count\nPercentageDiscountRule|name:percentage\nActiveRulesCount|count\nTotalOrders|count\nMostRecentOrderID|id\nTopCustomer|name:email:group\nSameGroupCustomers|count\n</answer>\n```\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/shopping_admin/fitness_promotion_strategy/label.txt",
    "content": "Bestseller1|Sprite Stasis Ball 65 cm:$27.00:6:24-WG082-blue:100:Enabled\nBestseller2|Quest Lumaflex™ Band:$19.00:6:24-UG01:100:Enabled\nBestseller3|Sprite Yoga Strap 6 foot:$14.00:6:24-WG085:100:Enabled\nTotalRevenue|$0.00\nBestsellerInSearch|No:0\nPercentageDiscountRule|20% OFF Ever $200-plus purchase!*:20%\nActiveRulesCount|4\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/shopping_admin/fitness_promotion_strategy/meta.json",
    "content": "{\n  \"task_id\": \"fitness_promotion_strategy_easy\",\n  \"task_name\": \"Fitness Promotion Strategy (Easy)\",\n  \"category_id\": \"shopping_admin\",\n  \"category_name\": \"Shopping Admin\",\n  \"description\": \"Capture the three dashboard bestsellers, confirm their catalog details, and snapshot the related promo and customer metrics needed for a quick campaign brief.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"data extraction\",\n    \"comparative analysis\",\n    \"inventory management\",\n    \"content submission\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/magento-admin.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/shopping_admin/fitness_promotion_strategy/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nimport json\nfrom pathlib import Path\n\ndef get_model_response():\n    \"\"\"\n    Get the model's response from the MCP_MESSAGES environment variable.\n    Returns the last assistant message text.\n    \"\"\"\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    print(f\"MCP_MESSAGES: {messages_path}\")\n    if not messages_path:\n        print(\"Warning: MCP_MESSAGES environment variable not set\", file=sys.stderr)\n        return None\n    \n    try:\n        with open(messages_path, 'r') as f:\n            messages = json.load(f)\n        \n        # Find the last assistant message\n        for message in reversed(messages):\n            if message.get('role') == 'assistant' and message.get('status') == 'completed':\n                content = message.get('content', [])\n                for item in content:\n                    if item.get('type') == 'output_text':\n                        return item.get('text', '')\n        \n        print(\"Warning: No assistant response found in messages\", file=sys.stderr)\n        return None\n    except Exception as e:\n        print(f\"Error reading messages file: {str(e)}\", file=sys.stderr)\n        return None\n\ndef parse_answer_format(text):\n    \"\"\"\n    Parse the <answer>...</answer> format from the agent's output.\n    Returns a dictionary with the parsed values.\n    \"\"\"\n    if not text:\n        return None\n    \n    # Look for <answer>...</answer> pattern\n    match = re.search(r'<answer>(.*?)</answer>', text, re.IGNORECASE | re.DOTALL)\n    if not match:\n        return None\n    \n    answer_content = match.group(1).strip()\n    \n    # Parse each line\n    result = {}\n    lines = answer_content.split('\\n')\n    \n    # Skip the check for exact number of lines - just parse what we have\n    # if len(lines) != 13:\n    #     print(f\"Error: Expected 13 lines in answer, got {len(lines)}\", file=sys.stderr)\n    #     return None\n    \n    for line in lines:\n        if '|' in line:\n            key, value = line.split('|', 1)\n            result[key.strip()] = value.strip()\n    \n    return result\n\ndef load_expected_answer(label_path):\n    \"\"\"\n    Load the expected answer from label.txt file.\n    Returns a dictionary with the expected values.\n    \"\"\"\n    try:\n        with open(label_path, 'r') as f:\n            lines = f.read().strip().split('\\n')\n        \n        expected = {}\n        for line in lines:\n            if '|' in line:\n                key, value = line.split('|', 1)\n                expected[key.strip()] = value.strip()\n        \n        return expected\n    except Exception as e:\n        print(f\"Error reading label file: {str(e)}\", file=sys.stderr)\n        return None\n\ndef compare_answers(model_answer, expected_answer):\n    \"\"\"\n    Compare the model's answer with the expected answer.\n    Returns True if all key information matches, False otherwise.\n    \"\"\"\n    if not model_answer or not expected_answer:\n        return False\n    \n    # Check each expected key\n    mismatches = []\n    for key, expected_value in expected_answer.items():\n        model_value = model_answer.get(key, '')\n        \n        # Special handling for different types of values\n        if key in ['Bestseller1', 'Bestseller2', 'Bestseller3']:\n            # Check if all parts match (name:price:quantity:sku:inventory:status)\n            if ':' in expected_value and ':' in model_value:\n                expected_parts = expected_value.split(':')\n                model_parts = model_value.split(':')\n                if len(expected_parts) == 6 and len(model_parts) == 6:\n                    # Compare each part\n                    for i, (exp, mod) in enumerate(zip(expected_parts, model_parts)):\n                        if i == 1:  # Price field\n                            exp_clean = exp.replace('$', '').replace(',', '')\n                            mod_clean = mod.replace('$', '').replace(',', '')\n                            if exp_clean != mod_clean:\n                                mismatches.append(f\"{key} price: expected '{exp}', got '{mod}'\")\n                        elif i == 4:  # Inventory field (may have decimal places)\n                            exp_float = float(exp.replace(',', ''))\n                            mod_float = float(mod.replace(',', ''))\n                            if abs(exp_float - mod_float) > 0.0001:\n                                mismatches.append(f\"{key} inventory: expected '{exp}', got '{mod}'\")\n                        else:\n                            if exp.lower() != mod.lower():\n                                mismatches.append(f\"{key} part {i}: expected '{exp}', got '{mod}'\")\n                else:\n                    mismatches.append(f\"{key}: format mismatch - expected '{expected_value}', got '{model_value}'\")\n            else:\n                if expected_value != model_value:\n                    mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        elif key == 'LowestInventoryProduct':\n            # Check product name and inventory\n            if ':' in expected_value and ':' in model_value:\n                expected_name, expected_inv = expected_value.rsplit(':', 1)\n                model_name, model_inv = model_value.rsplit(':', 1)\n                if expected_name.lower() != model_name.lower():\n                    mismatches.append(f\"{key} name: expected '{expected_name}', got '{model_name}'\")\n                exp_float = float(expected_inv.replace(',', ''))\n                mod_float = float(model_inv.replace(',', ''))\n                if abs(exp_float - mod_float) > 0.0001:\n                    mismatches.append(f\"{key} inventory: expected '{expected_inv}', got '{model_inv}'\")\n            else:\n                if expected_value != model_value:\n                    mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        elif key in ['TotalRevenue', 'MinimumPurchaseRule']:\n            # For price/amount fields, normalize format\n            expected_clean = expected_value.replace('$', '').replace(',', '')\n            model_clean = model_value.replace('$', '').replace(',', '')\n            if expected_clean != model_clean:\n                mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        elif key == 'BestsellerInSearch':\n            # Check search term and count\n            if expected_value.lower() != model_value.lower():\n                mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        elif key == 'PercentageDiscountRule':\n            # Check rule name and percentage\n            if ':' in expected_value and ':' in model_value:\n                expected_name, expected_pct = expected_value.rsplit(':', 1)\n                model_name, model_pct = model_value.rsplit(':', 1)\n                if expected_name != model_name:\n                    mismatches.append(f\"{key} name: expected '{expected_name}', got '{model_name}'\")\n                # Normalize percentage (20% vs 20 vs 0.20)\n                exp_pct_clean = expected_pct.replace('%', '').strip()\n                mod_pct_clean = model_pct.replace('%', '').strip()\n                if exp_pct_clean != mod_pct_clean:\n                    mismatches.append(f\"{key} percentage: expected '{expected_pct}', got '{model_pct}'\")\n            else:\n                if expected_value != model_value:\n                    mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        elif key == 'TopCustomer':\n            # Check name:email:group\n            if ':' in expected_value and ':' in model_value:\n                expected_parts = expected_value.split(':')\n                model_parts = model_value.split(':')\n                if len(expected_parts) == 3 and len(model_parts) == 3:\n                    exp_name, exp_email, exp_group = expected_parts\n                    mod_name, mod_email, mod_group = model_parts\n                    if exp_name != mod_name:\n                        mismatches.append(f\"{key} name: expected '{exp_name}', got '{mod_name}'\")\n                    if exp_email.lower() != mod_email.lower():\n                        mismatches.append(f\"{key} email: expected '{exp_email}', got '{mod_email}'\")\n                    if exp_group.lower() != mod_group.lower():\n                        mismatches.append(f\"{key} group: expected '{exp_group}', got '{mod_group}'\")\n                else:\n                    mismatches.append(f\"{key}: format mismatch - expected '{expected_value}', got '{model_value}'\")\n            else:\n                if expected_value != model_value:\n                    mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        elif key == 'MostRecentOrderDate':\n            # Date format may vary, do flexible comparison\n            if expected_value.lower() == 'none' and model_value.lower() == 'none':\n                continue\n            elif expected_value != model_value:\n                # Could add more flexible date parsing here if needed\n                mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        else:\n            # Exact match for other fields (counts, etc.)\n            if str(model_value) != str(expected_value):\n                mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n    \n    if mismatches:\n        print(\"\\n=== Answer Comparison Mismatches ===\", file=sys.stderr)\n        for mismatch in mismatches:\n            print(f\"✗ {mismatch}\", file=sys.stderr)\n        return False\n    \n    print(\"\\n=== Answer Comparison ===\", file=sys.stderr)\n    print(\"✓ All key information matches the expected answer\", file=sys.stderr)\n    return True\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the bestseller analysis and promotion task has been completed correctly.\n    First checks the model's answer against the expected label,\n    then optionally verifies the actual state in the Magento Admin.\n    \"\"\"\n    # Get the label file path\n    label_path = Path(__file__).parent / \"label.txt\"\n    \n    # Load expected answer\n    expected_answer = load_expected_answer(label_path)\n    if not expected_answer:\n        print(\"Error: Could not load expected answer from label.txt\", file=sys.stderr)\n        return False\n    \n    # Get model's response from MCP_MESSAGES\n    model_response = get_model_response()\n    if model_response:\n        print(\"Found model response, parsing answer format...\", file=sys.stderr)\n        model_answer = parse_answer_format(model_response)\n        \n        if model_answer:\n            print(\"\\n=== Model Answer Parsed ===\", file=sys.stderr)\n            for key, value in model_answer.items():\n                print(f\"{key}: {value}\", file=sys.stderr)\n            \n            # Compare answers\n            answer_match = compare_answers(model_answer, expected_answer)\n            if not answer_match:\n                print(\"\\nModel answer does not match expected answer\", file=sys.stderr)\n                return False\n            print(\"\\n✓ Model answer matches expected answer\", file=sys.stderr)\n            return True\n        else:\n            print(\"Warning: Could not parse answer format from model response\", file=sys.stderr)\n            return False\n    else:\n        print(\"No model response found\", file=sys.stderr)\n        return False\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/playwright_webarena/easy/shopping_admin/ny_expansion_analysis/description.md",
    "content": "Keep only the first three investigative steps so the easy task focuses on dashboard + tax + order-status insights.\n\n**Task Requirements**\n\n1. If need to login, login with username 'admin' and password 'admin1234'. On the **Dashboard**, record the Lifetime Sales amount, identify the cheapest product in the **Bestsellers** table (note its name, price, and quantity), and check whether that same product appears anywhere in **Last Orders** (output the customer name if yes, otherwise `No`).\n2. Go to **Stores → Taxes → Tax Zones and Rates**. Capture the exact rates for New York and California, specify which state is higher, and count how many distinct U.S. states have entries in the grid.\n3. Still in **Stores**, open **Settings → Order Status**, filter “Visible On Storefront = Yes”, and confirm whether a status with code `processing` exists and if it’s flagged as a default status.\n\nReport just these metrics in the reduced answer format:\n\n```\n<answer>\nLifetime_Sales_Amount|amount\nCheap_Bestseller_Name|name\nSecond_Bestseller_Price|price\nSecond_Bestseller_Quantity|quantity\nProduct_In_Last_Orders|yes_or_no_or_customer\nNY_Tax_Rate|rate\nCA_Tax_Rate|rate\nHigher_Tax_State|state\nTotal_States_With_Tax|count\nProcessing_Visible_Storefront|Yes_or_No\nProcessing_Default_Status|Yes_or_No\n</answer>\n```\n\n```\n<answer>\nLifetime_Sales_Amount|amount\nCheap_Bestseller_Name|name\nSecond_Bestseller_Price|price\nSecond_Bestseller_Quantity|quantity\nProduct_In_Last_Orders|yes_or_no\nNY_Tax_Rate|rate\nCA_Tax_Rate|rate\nHigher_Tax_State|state\nTotal_States_With_Tax|count\nProcessing_Visible_Storefront|Yes_or_No\nProcessing_Default_Status|Yes_or_No\nNumber_Of_Websites|count\nMain_Store_Code|code\nDefault_Source_Pickup_Status|status\nDefault_Source_State|state_or_none\nDashboard_Revenue|amount\nTax_Shipping_Zero|yes_or_no\n</answer>\n```\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/shopping_admin/ny_expansion_analysis/label.txt",
    "content": "Lifetime_Sales_Amount|$0.00\nCheap_Bestseller_Name|Sprite Yoga Strap 6 foot\nSecond_Bestseller_Price|$14.00\nSecond_Bestseller_Quantity|6\nProduct_In_Last_Orders|No\nNY_Tax_Rate|8.3750\nCA_Tax_Rate|8.2500\nHigher_Tax_State|NY\nTotal_States_With_Tax|2\nProcessing_Visible_Storefront|Yes\nProcessing_Default_Status|Yes\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/shopping_admin/ny_expansion_analysis/meta.json",
    "content": "{\n  \"task_id\": \"ny_expansion_analysis_easy\",\n  \"task_name\": \"NY Expansion Analysis (Easy)\",\n  \"category_id\": \"shopping_admin\",\n  \"category_name\": \"Shopping Admin\",\n  \"description\": \"Capture just the dashboard, tax, order-status, store, and inventory facts required to judge if New York can launch without heavy configuration work.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"data extraction\",\n    \"comparative analysis\",\n    \"content submission\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/magento-admin.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/shopping_admin/ny_expansion_analysis/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nimport json\nfrom pathlib import Path\n\ndef get_model_response():\n    \"\"\"\n    Get the model's response from the MCP_MESSAGES environment variable.\n    Returns the last assistant message text.\n    \"\"\"\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    print(f\"MCP_MESSAGES: {messages_path}\")\n    if not messages_path:\n        print(\"ERROR: MCP_MESSAGES environment variable not set\", file=sys.stderr)\n        return None\n    \n    # Check if file exists\n    if not Path(messages_path).exists():\n        print(f\"ERROR: Messages file not found at path: {messages_path}\", file=sys.stderr)\n        return None\n    \n    try:\n        with open(messages_path, 'r') as f:\n            content = f.read()\n            \n        # Check if file is empty\n        if not content or content.strip() == '\"\"':\n            print(\"ERROR: Messages file is empty or contains only empty string\", file=sys.stderr)\n            return None\n            \n        messages = json.loads(content)\n        \n        # Check if messages is a list\n        if not isinstance(messages, list):\n            print(f\"ERROR: Messages file should contain a list, got {type(messages).__name__}\", file=sys.stderr)\n            return None\n        \n        # Find the last assistant message\n        for message in reversed(messages):\n            if message.get('role') == 'assistant' and message.get('status') == 'completed':\n                content = message.get('content', [])\n                if not content:\n                    print(\"WARNING: Assistant message has empty content\", file=sys.stderr)\n                    continue\n                    \n                for item in content:\n                    if item.get('type') == 'output_text':\n                        text = item.get('text', '')\n                        if not text:\n                            print(\"WARNING: Output text is empty\", file=sys.stderr)\n                            continue\n                        return text\n        \n        print(\"ERROR: No assistant response with output_text found in messages\", file=sys.stderr)\n        return None\n    except json.JSONDecodeError as e:\n        print(f\"ERROR: Invalid JSON in messages file: {str(e)}\", file=sys.stderr)\n        return None\n    except Exception as e:\n        print(f\"ERROR: Unexpected error reading messages file: {str(e)}\", file=sys.stderr)\n        return None\n\ndef parse_answer_format(text):\n    \"\"\"\n    Parse the <answer>...</answer> format from the agent's output.\n    Returns a dictionary with the parsed values.\n    \"\"\"\n    if not text:\n        print(\"ERROR: No text provided to parse\", file=sys.stderr)\n        return None\n    \n    # Look for <answer>...</answer> pattern\n    match = re.search(r'<answer>(.*?)</answer>', text, re.IGNORECASE | re.DOTALL)\n    if not match:\n        print(\"ERROR: No <answer> tags found in the response\", file=sys.stderr)\n        print(f\"  Response preview: {text[:200]}...\", file=sys.stderr)\n        return None\n    \n    answer_content = match.group(1).strip()\n    \n    if not answer_content:\n        print(\"ERROR: Empty content between <answer> tags\", file=sys.stderr)\n        return None\n    \n    # Parse each line\n    result = {}\n    lines = answer_content.split('\\n')\n    \n    # Expected keys that should be present\n    expected_keys = [\n        'Lifetime_Sales_Amount', 'Cheap_Bestseller_Name', 'Second_Bestseller_Price',\n        'Second_Bestseller_Quantity', 'Product_In_Last_Orders', 'NY_Tax_Rate',\n        'CA_Tax_Rate', 'Higher_Tax_State', 'Total_States_With_Tax',\n        'Processing_Visible_Storefront', 'Processing_Default_Status'\n    ]\n    \n    parsed_keys = []\n    for line in lines:\n        line = line.strip()\n        if not line:\n            continue\n            \n        if '|' not in line:\n            print(f\"ERROR: Line missing pipe separator '|': {line}\", file=sys.stderr)\n            continue\n            \n        parts = line.split('|', 1)\n        if len(parts) != 2:\n            print(f\"ERROR: Invalid line format: {line}\", file=sys.stderr)\n            continue\n            \n        key, value = parts\n        key = key.strip()\n        value = value.strip()\n        \n        if not key:\n            print(f\"ERROR: Empty key in line: {line}\", file=sys.stderr)\n            continue\n            \n        result[key] = value\n        parsed_keys.append(key)\n    \n    # Check for missing expected keys\n    missing_keys = set(expected_keys) - set(parsed_keys)\n    if missing_keys:\n        print(f\"ERROR: Missing expected keys: {', '.join(sorted(missing_keys))}\", file=sys.stderr)\n        \n    # Check for unexpected keys\n    unexpected_keys = set(parsed_keys) - set(expected_keys)\n    if unexpected_keys:\n        print(f\"WARNING: Unexpected keys found: {', '.join(sorted(unexpected_keys))}\", file=sys.stderr)\n    \n    if not result:\n        print(\"ERROR: No valid key-value pairs parsed from answer\", file=sys.stderr)\n        return None\n    \n    return result\n\ndef load_expected_answer(label_path):\n    \"\"\"\n    Load the expected answer from label.txt file.\n    Returns a dictionary with the expected values.\n    \"\"\"\n    try:\n        with open(label_path, 'r') as f:\n            lines = f.read().strip().split('\\n')\n        \n        expected = {}\n        for line in lines:\n            if '|' in line:\n                key, value = line.split('|', 1)\n                expected[key.strip()] = value.strip()\n        \n        return expected\n    except Exception as e:\n        print(f\"Error reading label file: {str(e)}\", file=sys.stderr)\n        return None\n\ndef compare_answers(model_answer, expected_answer):\n    \"\"\"\n    Compare the model's answer with the expected answer.\n    Returns True if all key information matches, False otherwise.\n    \"\"\"\n    if not model_answer or not expected_answer:\n        return False\n    \n    # Check each expected key\n    mismatches = []\n    for key, expected_value in expected_answer.items():\n        model_value = model_answer.get(key, '')\n        \n        # Special handling for different types of values\n        if key in ['Lifetime_Sales_Amount', 'Second_Bestseller_Price', 'Dashboard_Revenue']:\n            # For price/amount fields, normalize format\n            expected_clean = expected_value.replace('$', '').replace(',', '')\n            model_clean = model_value.replace('$', '').replace(',', '')\n            if expected_clean != model_clean:\n                mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        elif key in ['NY_Tax_Rate', 'CA_Tax_Rate']:\n            # Tax rates - allow different decimal formats\n            expected_clean = expected_value.replace('%', '').strip()\n            model_clean = model_value.replace('%', '').strip()\n            # Convert to float for comparison\n            try:\n                if float(expected_clean) != float(model_clean):\n                    mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n            except ValueError:\n                if expected_clean != model_clean:\n                    mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        elif key in ['Product_In_Last_Orders', 'Processing_Visible_Storefront', 'Processing_Default_Status']:\n            # Yes/No fields - case insensitive\n            if model_value.lower() != expected_value.lower():\n                mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        elif key == 'Empty_Rows_Yes_Effect':\n            # Allow flexible descriptions for this field\n            # Just check if model provided some reasonable description\n            if not model_value or len(model_value) < 5:\n                mismatches.append(f\"{key}: expected meaningful description, got '{model_value}'\")\n        \n        elif key == 'Order_Status_Options':\n            # Check if main options are mentioned\n            expected_options = set(opt.strip() for opt in expected_value.split(','))\n            model_options = set(opt.strip() for opt in model_value.split(','))\n            if expected_options != model_options:\n                mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        elif key == 'Chart_Disabled_Message':\n            # Allow some flexibility in message text\n            # Check for key words\n            if 'disabled' not in model_value.lower() and 'enable' not in model_value.lower():\n                mismatches.append(f\"{key}: expected message about chart being disabled, got '{model_value}'\")\n        \n        elif key == 'Default_Source_State':\n            # Handle 'None' or empty state\n            expected_normalized = expected_value.lower() if expected_value.lower() != 'none' else ''\n            model_normalized = model_value.lower() if model_value.lower() != 'none' else ''\n            if expected_normalized != model_normalized:\n                mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        else:\n            # Exact match for other fields\n            if model_value != expected_value:\n                mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n    \n    if mismatches:\n        print(\"\\n=== Answer Comparison Mismatches ===\", file=sys.stderr)\n        for mismatch in mismatches:\n            print(f\"✗ {mismatch}\", file=sys.stderr)\n        return False\n    \n    print(\"\\n=== Answer Comparison ===\", file=sys.stderr)\n    print(\"✓ All key information matches the expected answer\", file=sys.stderr)\n    return True\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the NY expansion analysis task has been completed correctly.\n    First checks the model's answer against the expected label,\n    then optionally verifies the actual state in the Magento Admin.\n    \"\"\"\n    print(\"\\n=== Starting Verification ===\", file=sys.stderr)\n    \n    # Get the label file path\n    label_path = Path(__file__).parent / \"label.txt\"\n    \n    # Load expected answer\n    print(\"Loading expected answer from label.txt...\", file=sys.stderr)\n    expected_answer = load_expected_answer(label_path)\n    if not expected_answer:\n        print(\"FATAL ERROR: Could not load expected answer from label.txt\", file=sys.stderr)\n        return False\n    \n    print(f\"Expected answer loaded with {len(expected_answer)} keys\", file=sys.stderr)\n    \n    # Get model's response from MCP_MESSAGES\n    print(\"\\nReading model response from MCP_MESSAGES...\", file=sys.stderr)\n    model_response = get_model_response()\n    \n    if not model_response:\n        print(\"FATAL ERROR: No valid model response found\", file=sys.stderr)\n        return False\n    \n    print(f\"Model response found (length: {len(model_response)} chars)\", file=sys.stderr)\n    print(\"\\nParsing answer format from model response...\", file=sys.stderr)\n    \n    model_answer = parse_answer_format(model_response)\n    \n    if not model_answer:\n        print(\"FATAL ERROR: Could not parse answer format from model response\", file=sys.stderr)\n        return False\n    \n    print(f\"\\n=== Model Answer Parsed Successfully ===\", file=sys.stderr)\n    print(f\"Parsed {len(model_answer)} key-value pairs\", file=sys.stderr)\n    \n    for key, value in model_answer.items():\n        print(f\"  {key}: {value}\", file=sys.stderr)\n    \n    # Compare answers\n    print(\"\\n=== Comparing Model Answer with Expected Answer ===\", file=sys.stderr)\n    answer_match = compare_answers(model_answer, expected_answer)\n    \n    if not answer_match:\n        print(\"\\nFATAL ERROR: Model answer does not match expected answer\", file=sys.stderr)\n        print(\"Verification FAILED\", file=sys.stderr)\n        return False\n    \n    print(\"\\n✓ Model answer matches expected answer\", file=sys.stderr)\n    print(\"Verification PASSED\", file=sys.stderr)\n    return True\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/shopping_admin/products_sales_analysis/description.md",
    "content": "Only keep the first few catalog and dashboard checks plus the high-level orders snapshot.\n\n**Task Requirements**\n\n1. If need to login, login with username 'admin' and password 'admin1234'.\n2. **Catalog → Products**: search for product names containing `Yoga` and capture the records-found count; reset filters and look up SKU `WH11` to copy its exact price; reset again and set Quantity (From/To) = `0.0000` to count all zero-quantity products.\n3. **Dashboard**: in the Bestsellers table sort by price ascending—record the lowest-priced row as `name:quantity`, then locate `Quest Lumaflex™ Band` and note its quantity, and read the Revenue KPI amount.\n4. **Sales → Orders**: filter Status = Pending to count those orders, then search for Grace Nguyen, switch Status = Complete, sort Grand Total descending, and record the Order # of the most expensive completed order.\n\nReturn just these metrics:\n\n```\n<answer>\nYogaProducts|count\nWH11Price|price\nZeroQuantityProducts|count\nLowestProduct|name:quantity\nQuestLumaflexQuantity|quantity\nDashboardRevenue|amount\nPendingOrders|count\nGraceNguyenOrderID|orderid\n</answer>\n```\n\n```\n<answer>\nYogaProducts|count\nWH11Price|price\nZeroQuantityProducts|count\nLowestProduct|name:quantity\nQuestLumaflexQuantity|quantity\nDashboardRevenue|amount\nSarahMillerEmail|email\nTotalCustomers|count\nPendingOrders|count\nGraceNguyenOrderID|orderid\n</answer>\n```\n\n**Example Output:**\n```\n<answer>\nYogaProducts|XX\nWH11Price|$XX.XX\nZeroQuantityProducts|XX\nLowestProduct|Product Name Here:XX\nQuestLumaflexQuantity|XX\nDashboardRevenue|$XX.XX\nSarahMillerEmail|email@example.com\nTotalCustomers|XX\nPendingOrders|X\nGraceNguyenOrderID|00000XXXX\n</answer>\n```\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/shopping_admin/products_sales_analysis/label.txt",
    "content": "YogaProducts|171\nWH11Price|$54.00\nZeroQuantityProducts|150\nLowestProduct|Sprite Stasis Ball 55 cm foot:5\nQuestLumaflexQuantity|6\nDashboardRevenue|$0.00\nPendingOrders|10\nGraceNguyenOrderID|000000189\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/shopping_admin/products_sales_analysis/meta.json",
    "content": "{\n  \"task_id\": \"products_sales_analysis_easy\",\n  \"task_name\": \"Products Sales Analysis (Easy)\",\n  \"category_id\": \"shopping_admin\",\n  \"category_name\": \"Shopping Admin\",\n  \"description\": \"Make a single guided pass through Catalog, Dashboard, Customers, and Orders to collect the exact fields needed for a quick sales recap.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"data extraction\",\n    \"comparative analysis\",\n    \"content submission\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/magento-admin.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/shopping_admin/products_sales_analysis/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nimport json\nfrom pathlib import Path\n\n\ndef get_model_response():\n    \"\"\"\n    Get the model's response from the MCP_MESSAGES environment variable.\n    Returns the last assistant message text.\n    \"\"\"\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    print(f\"MCP_MESSAGES: {messages_path}\")\n    if not messages_path:\n        print(\"Warning: MCP_MESSAGES environment variable not set\", file=sys.stderr)\n        return None\n\n    try:\n        with open(messages_path, \"r\") as f:\n            messages = json.load(f)\n\n        # Find the last assistant message\n        for message in reversed(messages):\n            if (\n                message.get(\"role\") == \"assistant\"\n                and message.get(\"status\") == \"completed\"\n            ):\n                content = message.get(\"content\", [])\n                for item in content:\n                    if item.get(\"type\") == \"output_text\":\n                        return item.get(\"text\", \"\")\n\n        print(\"Warning: No assistant response found in messages\", file=sys.stderr)\n        return None\n    except Exception as e:\n        print(f\"Error reading messages file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef parse_answer_format(text):\n    \"\"\"\n    Parse the <answer>...</answer> format from the agent's output.\n    Returns a dictionary with the parsed values.\n    \"\"\"\n    if not text:\n        print(\"Error: No text provided to parse\", file=sys.stderr)\n        return None\n\n    # Look for <answer>...</answer> pattern\n    match = re.search(r\"<answer>(.*?)</answer>\", text, re.IGNORECASE | re.DOTALL)\n    if not match:\n        print(\"Error: No <answer>...</answer> tags found in response\", file=sys.stderr)\n        return None\n\n    answer_content = match.group(1).strip()\n    if not answer_content:\n        print(\"Error: Empty answer content\", file=sys.stderr)\n        return None\n\n    # Parse each line\n    result = {}\n    lines = [line.strip() for line in answer_content.split(\"\\n\") if line.strip()]\n\n    if len(lines) != 8:\n        print(f\"Error: Expected 8 lines in answer, got {len(lines)}\", file=sys.stderr)\n        print(f\"Lines found: {lines}\", file=sys.stderr)\n        return None\n\n    # Expected keys for validation\n    expected_keys = [\n        \"YogaProducts\", \"WH11Price\", \"ZeroQuantityProducts\", \"LowestProduct\",\n        \"QuestLumaflexQuantity\", \"DashboardRevenue\", \"PendingOrders\",\n        \"GraceNguyenOrderID\"\n    ]\n\n    for line in lines:\n        if \"|\" not in line:\n            print(f\"Error: Line missing '|' separator: {line}\", file=sys.stderr)\n            return None\n        \n        parts = line.split(\"|\", 1)\n        if len(parts) != 2:\n            print(f\"Error: Invalid line format: {line}\", file=sys.stderr)\n            return None\n            \n        key, value = parts[0].strip(), parts[1].strip()\n        \n        if not key or not value:\n            print(f\"Error: Empty key or value in line: {line}\", file=sys.stderr)\n            return None\n            \n        result[key] = value\n\n    # Validate all expected keys are present\n    missing_keys = set(expected_keys) - set(result.keys())\n    if missing_keys:\n        print(f\"Error: Missing required keys: {missing_keys}\", file=sys.stderr)\n        return None\n\n    return result\n\n\ndef load_expected_answer(label_path):\n    \"\"\"\n    Load the expected answer from label.txt file.\n    Returns a dictionary with the expected values.\n    \"\"\"\n    try:\n        with open(label_path, \"r\") as f:\n            lines = f.read().strip().split(\"\\n\")\n\n        expected = {}\n        for line in lines:\n            if \"|\" in line:\n                key, value = line.split(\"|\", 1)\n                expected[key.strip()] = value.strip()\n\n        return expected\n    except Exception as e:\n        print(f\"Error reading label file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef compare_answers(model_answer, expected_answer):\n    \"\"\"\n    Compare the model's answer with the expected answer.\n    Returns True if all key information matches, False otherwise.\n    \"\"\"\n    if not model_answer or not expected_answer:\n        return False\n\n    # Check each expected key\n    mismatches = []\n    for key, expected_value in expected_answer.items():\n        model_value = model_answer.get(key, \"\")\n\n        # Special handling for different types of values\n        if key == \"LowestProduct\":\n            # Check if product name and quantity match (format: \"Product Name:quantity\")\n            if \":\" in expected_value and \":\" in model_value:\n                expected_name, expected_qty = expected_value.rsplit(\":\", 1)\n                model_name, model_qty = model_value.rsplit(\":\", 1)\n                if expected_name != model_name or expected_qty != model_qty:\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n            else:\n                if expected_value != model_value:\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n\n        elif key in [\"WH11Price\", \"DashboardRevenue\"]:\n            # For price/amount fields, normalize format\n            expected_clean = expected_value.replace(\"$\", \"\").replace(\",\", \"\")\n            model_clean = model_value.replace(\"$\", \"\").replace(\",\", \"\")\n            if expected_clean != model_clean:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        elif key == \"SarahMillerEmail\":\n            # Email should match exactly\n            if model_value.lower() != expected_value.lower():\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        else:\n            # Exact match for other fields\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n    if mismatches:\n        print(\"\\n=== Answer Comparison Mismatches ===\", file=sys.stderr)\n        for mismatch in mismatches:\n            print(f\"✗ {mismatch}\", file=sys.stderr)\n        return False\n\n    print(\"\\n=== Answer Comparison ===\", file=sys.stderr)\n    print(\"✓ All key information matches the expected answer\", file=sys.stderr)\n    return True\n\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the products and sales analysis task has been completed correctly.\n    First checks the model's answer against the expected label,\n    then optionally verifies the actual state in the Magento Admin.\n    \"\"\"\n    # Get the label file path\n    label_path = Path(__file__).parent / \"label.txt\"\n\n    # Load expected answer\n    expected_answer = load_expected_answer(label_path)\n    if not expected_answer:\n        print(\"Error: Could not load expected answer from label.txt\", file=sys.stderr)\n        return False\n\n    # Get model's response from MCP_MESSAGES\n    model_response = get_model_response()\n    if model_response:\n        print(\"Found model response, parsing answer format...\", file=sys.stderr)\n        model_answer = parse_answer_format(model_response)\n\n        if model_answer:\n            print(\"\\n=== Model Answer Parsed ===\", file=sys.stderr)\n            for key, value in model_answer.items():\n                print(f\"{key}: {value}\", file=sys.stderr)\n\n            # Compare answers\n            answer_match = compare_answers(model_answer, expected_answer)\n            if not answer_match:\n                print(\"\\nModel answer does not match expected answer\", file=sys.stderr)\n                return False\n            print(\"\\n✓ Model answer matches expected answer\", file=sys.stderr)\n            return True\n        else:\n            print(\n                \"Warning: Could not parse answer format from model response\",\n                file=sys.stderr,\n            )\n            return False\n    else:\n        print(\"No model response found\", file=sys.stderr)\n        return False\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/shopping_admin/sales_inventory_analysis/description.md",
    "content": "Retain just the first three analytic arenas—products, orders, and the dashboard—so the easy task stays read-only and short.\n\n**Task Requirements**\n\n1. If need to login, login with username 'admin' and password 'admin1234', then open **Catalog → Products**. Search for names containing `Sprite` to get their count, reset and set Quantity (From/To) = `100.0000` to count those rows, and finally reset to look up SKU `WS12` so you can copy its exact name and price.\n2. Switch to **Sales → Orders**. Filter Status = Pending to count those orders, then search for Grace Nguyen with Status = Complete, sort Grand Total ascending, and capture the cheapest completed order ID. Clear filters, sort Grand Total descending, and record the top row’s customer and amount.\n3. Finish in **Dashboard**. Sort **Bestsellers** by Quantity descending to capture the first row’s name and quantity, locate `Overnight Duffle` in that table to note its price, and check the **Top Search Terms** widget to see what position `hollister` occupies.\n\nAnswer with the reduced template:\n\n```\n<answer>\nSpriteProducts|count\nQuantity100Products|count\nWS12Info|name:price\nPendingOrders|count\nGraceOrderID|orderid\nHighestOrderInfo|customer:amount\nCheapProduct|name:quantity\nOvernightDufflePrice|price\nHollisterPosition|position\n</answer>\n```\n\n```\n<answer>\nSpriteProducts|count\nQuantity100Products|count\nWS12Info|name:price\nPendingOrders|count\nGraceOrderID|orderid\nHighestOrderInfo|customer:amount\nCheapProduct|name:quantity\nOvernightDufflePrice|price\nHollisterPosition|position\nCostelloCustomers|count\nSarahMillerInfo|group:date\nPaidInvoices|count\nInvoice002BillTo|name\n</answer>\n```\n\n**Example Output:**\n```\n<answer>\nSpriteProducts|XX\nQuantity100Products|XX\nWS12Info|Product Name Here:$XX.XX\nPendingOrders|X\nGraceOrderID|00000XXXX\nHighestOrderInfo|Customer Name:$XXX.XX\nCheapProduct|Product Name:XX\nOvernightDufflePrice|$XX.XX\nHollisterPosition|Xth\nCostelloCustomers|X\nSarahMillerInfo|Group Name:MMM DD, YYYY\nPaidInvoices|X\nInvoice002BillTo|Customer Name\n</answer>\n```\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/shopping_admin/sales_inventory_analysis/label.txt",
    "content": "SpriteProducts|16\nQuantity100Products|1886\nWS12Info|Radiant Tee:$22.00\nPendingOrders|10\nGraceOrderID|000000114\nHighestOrderInfo|Samantha Jones:$292.40\nCheapProduct|Sprite Yoga Strap 6 foot:6\nOvernightDufflePrice|$45.00\nHollisterPosition|1st\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/shopping_admin/sales_inventory_analysis/meta.json",
    "content": "{\n  \"task_id\": \"sales_inventory_analysis_easy\",\n  \"task_name\": \"Sales Inventory Analysis (Easy)\",\n  \"category_id\": \"shopping_admin\",\n  \"category_name\": \"Shopping Admin\",\n  \"description\": \"Follow one guided tour through Products, Orders, Dashboard, Customers, and Invoices to capture a compact set of sales-plus-inventory facts.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"data extraction\",\n    \"comparative analysis\",\n    \"inventory management\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/magento-admin.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/shopping_admin/sales_inventory_analysis/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nimport json\nfrom pathlib import Path\n\n\ndef get_model_response():\n    \"\"\"\n    Get the model's response from the MCP_MESSAGES environment variable.\n    Returns the last assistant message text.\n    \"\"\"\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    print(f\"MCP_MESSAGES: {messages_path}\")\n    if not messages_path:\n        print(\"Warning: MCP_MESSAGES environment variable not set\", file=sys.stderr)\n        return None\n\n    try:\n        with open(messages_path, \"r\") as f:\n            messages = json.load(f)\n\n        # Find the last assistant message with type='message', status='completed'\n        for message in reversed(messages):\n            if (\n                message.get(\"role\") == \"assistant\"\n                and message.get(\"status\") == \"completed\"\n                and message.get(\"type\") == \"message\"\n            ):\n                content = message.get(\"content\", [])\n                for item in content:\n                    # Check for both 'text' and 'output_text' types\n                    if item.get(\"type\") in [\"text\", \"output_text\"]:\n                        return item.get(\"text\", \"\")\n\n        print(\"Warning: No assistant response found in messages\", file=sys.stderr)\n        return None\n    except Exception as e:\n        print(f\"Error reading messages file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef parse_answer_format(text):\n    \"\"\"\n    Parse the <answer>...</answer> format from the agent's output.\n    Returns a dictionary with the parsed values.\n    \"\"\"\n    if not text:\n        print(\"ERROR: No text provided to parse\", file=sys.stderr)\n        return None\n\n    # Look for <answer>...</answer> pattern\n    match = re.search(r\"<answer>(.*?)</answer>\", text, re.IGNORECASE | re.DOTALL)\n    if not match:\n        print(\"ERROR: No <answer>...</answer> tags found in the response\", file=sys.stderr)\n        print(\"Response text preview (first 200 chars):\", text[:200], file=sys.stderr)\n        return None\n\n    answer_content = match.group(1).strip()\n    print(f\"Found answer content with {len(answer_content)} characters\", file=sys.stderr)\n\n    # Parse each line\n    result = {}\n    lines = answer_content.split(\"\\n\")\n    \n    # Expected keys for this task\n    expected_keys = [\n        \"SpriteProducts\", \"Quantity100Products\", \"WS12Info\", \"PendingOrders\",\n        \"GraceOrderID\", \"HighestOrderInfo\", \"CheapProduct\", \"OvernightDufflePrice\",\n        \"HollisterPosition\"\n    ]\n\n    if len(lines) != 9:\n        print(f\"ERROR: Expected 9 lines in answer, got {len(lines)}\", file=sys.stderr)\n        print(f\"Lines found: {lines}\", file=sys.stderr)\n        return None\n\n    for i, line in enumerate(lines, 1):\n        if \"|\" not in line:\n            print(f\"ERROR: Line {i} does not contain pipe separator '|': '{line}'\", file=sys.stderr)\n            return None\n        \n        parts = line.split(\"|\", 1)\n        if len(parts) != 2:\n            print(f\"ERROR: Line {i} could not be split into key|value: '{line}'\", file=sys.stderr)\n            return None\n            \n        key, value = parts\n        result[key.strip()] = value.strip()\n    \n    # Check if all expected keys are present\n    missing_keys = set(expected_keys) - set(result.keys())\n    if missing_keys:\n        print(f\"ERROR: Missing expected keys: {missing_keys}\", file=sys.stderr)\n        print(f\"Keys found: {list(result.keys())}\", file=sys.stderr)\n        return None\n    \n    # Check for unexpected keys\n    extra_keys = set(result.keys()) - set(expected_keys)\n    if extra_keys:\n        print(f\"WARNING: Unexpected keys found: {extra_keys}\", file=sys.stderr)\n\n    return result\n\n\ndef load_expected_answer(label_path):\n    \"\"\"\n    Load the expected answer from label.txt file.\n    Returns a dictionary with the expected values.\n    \"\"\"\n    try:\n        with open(label_path, \"r\") as f:\n            lines = f.read().strip().split(\"\\n\")\n\n        expected = {}\n        for line in lines:\n            if \"|\" in line:\n                key, value = line.split(\"|\", 1)\n                expected[key.strip()] = value.strip()\n\n        return expected\n    except Exception as e:\n        print(f\"Error reading label file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef compare_answers(model_answer, expected_answer):\n    \"\"\"\n    Compare the model's answer with the expected answer.\n    Returns True if all key information matches, False otherwise.\n    \"\"\"\n    if not model_answer or not expected_answer:\n        return False\n\n    # Check each expected key\n    mismatches = []\n    for key, expected_value in expected_answer.items():\n        model_value = model_answer.get(key, \"\")\n\n        # Special handling for different types of values\n        if key == \"WS12Info\":\n            # Check if product name and price match (format: name:price)\n            if \":\" in expected_value and \":\" in model_value:\n                expected_name, expected_price = expected_value.rsplit(\":\", 1)\n                model_name, model_price = model_value.rsplit(\":\", 1)\n                # Normalize price format\n                expected_price_clean = expected_price.replace(\"$\", \"\").replace(\",\", \"\")\n                model_price_clean = model_price.replace(\"$\", \"\").replace(\",\", \"\")\n                if (\n                    expected_name != model_name\n                    or expected_price_clean != model_price_clean\n                ):\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n            else:\n                if expected_value != model_value:\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n\n        elif key == \"GraceOrderID\":\n            # Order ID should start with \"000\" and match exactly\n            if not model_value.startswith(\"000\"):\n                mismatches.append(\n                    f\"{key}: expected to start with '000', got '{model_value}'\"\n                )\n            elif model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        elif key == \"HighestOrderInfo\":\n            # Check format customer:amount\n            if \":\" in expected_value and \":\" in model_value:\n                expected_customer, expected_amount = expected_value.rsplit(\":\", 1)\n                model_customer, model_amount = model_value.rsplit(\":\", 1)\n                # Normalize amount format\n                expected_amount_clean = expected_amount.replace(\"$\", \"\").replace(\n                    \",\", \"\"\n                )\n                model_amount_clean = model_amount.replace(\"$\", \"\").replace(\",\", \"\")\n                if (\n                    expected_customer != model_customer\n                    or expected_amount_clean != model_amount_clean\n                ):\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n            else:\n                if expected_value != model_value:\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n\n        elif key == \"Position2Product\":\n            # Check if product name and quantity match\n            if \":\" in expected_value and \":\" in model_value:\n                expected_name, expected_qty = expected_value.rsplit(\":\", 1)\n                model_name, model_qty = model_value.rsplit(\":\", 1)\n                if expected_name != model_name or expected_qty != model_qty:\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n            else:\n                if expected_value != model_value:\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n\n        elif key == \"OvernightDufflePrice\":\n            # Normalize price format\n            expected_clean = expected_value.replace(\"$\", \"\").replace(\",\", \"\")\n            model_clean = model_value.replace(\"$\", \"\").replace(\",\", \"\")\n            if expected_clean != model_clean:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        elif key == \"HollisterPosition\":\n            # Position format (1st, 2nd, 3rd, etc.)\n            if model_value.lower() != expected_value.lower():\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        elif key == \"SarahMillerInfo\":\n            # Format: group:date\n            if \":\" in expected_value and \":\" in model_value:\n                expected_group, expected_date = expected_value.split(\":\", 1)\n                model_group, model_date = model_value.split(\":\", 1)\n                # Allow some flexibility in date format\n                if expected_group != model_group:\n                    mismatches.append(\n                        f\"{key}: expected group '{expected_group}', got '{model_group}'\"\n                    )\n                # For date, check if key parts match\n                if not (expected_date in model_date or model_date in expected_date):\n                    mismatches.append(\n                        f\"{key}: expected date '{expected_date}', got '{model_date}'\"\n                    )\n            else:\n                if expected_value != model_value:\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n\n        elif key == \"Invoice002BillTo\":\n            # Name should match exactly\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        else:\n            # Exact match for count fields and other numeric values\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n    if mismatches:\n        print(\"\\n=== Answer Comparison Mismatches ===\", file=sys.stderr)\n        for mismatch in mismatches:\n            print(f\"✗ {mismatch}\", file=sys.stderr)\n        return False\n\n    print(\"\\n=== Answer Comparison ===\", file=sys.stderr)\n    print(\"✓ All key information matches the expected answer\", file=sys.stderr)\n    return True\n\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the sales and inventory analysis task has been completed correctly.\n    First checks the model's answer against the expected label,\n    then optionally verifies the actual state in the Magento Admin.\n    \"\"\"\n    print(\"\\n\" + \"=\"*60, file=sys.stderr)\n    print(\"Starting verification of Task 5\", file=sys.stderr)\n    print(\"=\"*60, file=sys.stderr)\n    \n    # Get the label file path\n    label_path = Path(__file__).parent / \"label.txt\"\n\n    # Load expected answer\n    print(\"\\n--- Loading Expected Answer ---\", file=sys.stderr)\n    expected_answer = load_expected_answer(label_path)\n    if not expected_answer:\n        print(\"FATAL ERROR: Could not load expected answer from label.txt\", file=sys.stderr)\n        return False\n    print(f\"Successfully loaded {len(expected_answer)} expected values\", file=sys.stderr)\n\n    # Get model's response from MCP_MESSAGES\n    print(\"\\n--- Loading Model Response ---\", file=sys.stderr)\n    model_response = get_model_response()\n    if not model_response:\n        print(\"FATAL ERROR: No model response found in MCP_MESSAGES\", file=sys.stderr)\n        return False\n    \n    print(f\"Found model response ({len(model_response)} characters)\", file=sys.stderr)\n    \n    print(\"\\n--- Parsing Answer Format ---\", file=sys.stderr)\n    model_answer = parse_answer_format(model_response)\n    \n    if not model_answer:\n        print(\"\\nFATAL ERROR: Could not parse answer format from model response\", file=sys.stderr)\n        print(\"Verification FAILED\", file=sys.stderr)\n        return False\n    \n    print(\"\\n=== Model Answer Successfully Parsed ===\", file=sys.stderr)\n    for key, value in model_answer.items():\n        print(f\"  {key}: {value}\", file=sys.stderr)\n\n    # Compare answers\n    print(\"\\n--- Comparing Answers ---\", file=sys.stderr)\n    answer_match = compare_answers(model_answer, expected_answer)\n    \n    if not answer_match:\n        print(\"\\n\" + \"=\"*60, file=sys.stderr)\n        print(\"VERIFICATION FAILED: Model answer does not match expected answer\", file=sys.stderr)\n        print(\"=\"*60, file=sys.stderr)\n        return False\n    \n    print(\"\\n\" + \"=\"*60, file=sys.stderr)\n    print(\"✓ VERIFICATION PASSED: Model answer matches expected answer\", file=sys.stderr)\n    print(\"=\"*60, file=sys.stderr)\n    return True\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/shopping_admin/search_filtering_operations/description.md",
    "content": "Limit the search intelligence pass to the first three steps from the original task so it’s just two Search Terms views plus one dashboard glance.\n\n**Task Requirements**\n\n1. If need to login, login with username 'admin' and password 'admin1234'.\n2. **Marketing → SEO & Search → Search Terms**: filter for queries containing `tank` to count them, reset and filter Results = 0 to count zero-result terms, then filter Uses ≥ 11 to capture the highest-use row and list every term whose Results are between 20 and 30 (join as `term:results`, or use `None:0` if none). Remove filters when done.\n3. **Reports → Search Terms**: set Hits ≥ 16 and record the filtered count, then add ID range 10–15 and capture the row with the most Results, and finally switch Store View to “Default Store View” to count those entries.\n4. **Dashboard**: in **Top Search Terms** list the entries whose Results = 1 (format `term:uses` joined with `|` or `None:0`), in **Last Search Terms** pick the row with the highest combination of Results and Uses, and in **Bestsellers** copy the product + quantity shown at position #3.\n\nReturn only these data points:\n\n```\n<answer>\nTankSearchCount|count\nZeroResultsCount|count\nHighestUseTerm|term:uses\nResults20to30Term|term1:results1|term2:results2|...\nHits15PlusCount|count\nID10to15MaxResults|term:results\nDefaultStoreViewCount|count\nOneResultTerm|term1:uses1|term2:uses2|...\nHighestResultLastSearch|term:results\nPosition3Bestseller|product:quantity\n</answer>\n```\n\n```\n<answer>\nTankSearchCount|count\nZeroResultsCount|count\nHighestUseTerm|term:uses\nResults20to30Term|term1:results1|term2:result2|term3:result3|...\nHits15PlusCount|count\nID10to15MaxResults|term:results\nDefaultStoreViewCount|count\nOneResultTerm|term1:uses1|term2:uses2|term3:uses3|...\nHighestResultLastSearch|term:results\nPosition3Bestseller|product:quantity\nTopUseTerm|term:uses\nFirstNonZeroResult|term:results\nTotalUniqueTerms|count\n</answer>\n```\n\n**Example Output:**\n```\n<answer>\nTankSearchCount|X\nZeroResultsCount|X\nHighestUseTerm|search_term:XX\nResults20to30Term|search_term1:XX1|search_term2:XX2|search_term3:XX3|...\nHits15PlusCount|X\nID10to15MaxResults|Product Name:XX\nDefaultStoreViewCount|X\nOneResultTerm|search_term1:XX1|search_term2:XX2|search_term3:XX3|...\nHighestResultLastSearch|search_term:XX\nPosition3Bestseller|Product Name:X\nTopUseTerm|search_term:XX\nFirstNonZeroResult|search_term:X\nTotalUniqueTerms|X\n</answer>\n```\n\n**Success Criteria:**\n- Successfully logged into Magento Admin\n- Applied complex search filters in Search Terms section\n- Used range filters for results and hits\n- Sorted columns to find specific records\n- Navigated between different report views\n- Extracted data from filtered and sorted results\n- Counted records accurately after applying filters\n- Output answer in exact format with 13 data lines\n- Answer wrapped in <answer> tags\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/shopping_admin/search_filtering_operations/label.txt",
    "content": "TankSearchCount|2\nZeroResultsCount|1\nHighestUseTerm|hollister:19\nResults20to30Term|Antonia Racer Tank:23|tanks:23\nHits15PlusCount|1\nID10to15MaxResults|Antonia Racer Tank:23\nDefaultStoreViewCount|7\nOneResultTerm|hollister:19|WP10:1\nHighestResultLastSearch|Antonia Racer Tank:23\nPosition3Bestseller|Sprite Stasis Ball 65 cm:6\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/shopping_admin/search_filtering_operations/meta.json",
    "content": "{\n  \"task_id\": \"search_filtering_operations_easy\",\n  \"task_name\": \"Search Filtering Operations (Easy)\",\n  \"category_id\": \"shopping_admin\",\n  \"category_name\": \"Shopping Admin\",\n  \"description\": \"Follow a clearly guided path through Search Terms, the Search Terms report, and the dashboard widgets to capture the metrics needed for a focused search-behavior brief.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"content submission\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/magento-admin.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}\n"
  },
  {
    "path": "tasks/playwright_webarena/easy/shopping_admin/search_filtering_operations/verify.py",
    "content": "import re\nimport json\nimport os\nimport sys\n\n\ndef verify(messages):\n    \"\"\"\n    Verify that the agent has successfully performed complex search and filtering operations\n    in the Magento Admin panel and extracted all required information correctly.\n\n    Args:\n        messages: List of message dictionaries containing the conversation\n\n    Returns:\n        Dictionary with 'valid' boolean and 'reason' string\n    \"\"\"\n\n    # Find the last assistant message with status \"completed\" and type \"message\"\n    answer_content = None\n    for message in reversed(messages):\n        if (\n            message.get(\"role\") == \"assistant\"\n            and message.get(\"status\") == \"completed\"\n            and message.get(\"type\") == \"message\"\n            and message.get(\"content\")\n        ):\n            # Extract text from content structure\n            content = message[\"content\"]\n            if isinstance(content, list):\n                for item in content:\n                    if isinstance(item, dict) and item.get(\"type\") == \"output_text\":\n                        text = item.get(\"text\", \"\")\n                        # Look for answer tags with case-insensitive search\n                        answer_match = re.search(\n                            r\"<answer>(.*?)</answer>\", text, re.DOTALL | re.IGNORECASE\n                        )\n                        if answer_match:\n                            answer_content = answer_match.group(1).strip()\n                            break\n            elif isinstance(content, str):\n                # Look for answer tags in string content\n                answer_match = re.search(r\"<answer>(.*?)</answer>\", content, re.DOTALL | re.IGNORECASE)\n                if answer_match:\n                    answer_content = answer_match.group(1).strip()\n                    break\n\n            if answer_content:\n                break\n\n    if not answer_content:\n        return {\"valid\": False, \"reason\": \"No answer found in <answer> tags\"}\n\n    # Expected format - each line should have a key|value pair\n    expected_keys = [\n        \"TankSearchCount\",\n        \"ZeroResultsCount\",\n        \"HighestUseTerm\",\n        \"Results20to30Term\",\n        \"Hits15PlusCount\",\n        \"ID10to15MaxResults\",\n        \"DefaultStoreViewCount\",\n        \"OneResultTerm\",\n        \"HighestResultLastSearch\",\n        \"Position3Bestseller\",\n    ]\n\n    # Parse the answer\n    lines = answer_content.strip().split(\"\\n\")\n\n    # Check if we have exactly 10 lines\n    if len(lines) != 10:\n        return {\"valid\": False, \"reason\": f\"Expected 10 data lines, found {len(lines)}\"}\n\n    # Parse each line and validate format\n    extracted_data = {}\n    for line in lines:\n        if \"|\" not in line:\n            return {\n                \"valid\": False,\n                \"reason\": f\"Invalid format in line: {line}. Expected 'key|value' format\",\n            }\n\n        parts = line.split(\"|\", 1)\n        if len(parts) != 2:\n            return {\"valid\": False, \"reason\": f\"Invalid format in line: {line}\"}\n\n        key, value = parts\n        extracted_data[key] = value\n\n    # Check all required keys are present\n    missing_keys = set(expected_keys) - set(extracted_data.keys())\n    if missing_keys:\n        return {\n            \"valid\": False,\n            \"reason\": f\"Missing required keys: {', '.join(missing_keys)}\",\n        }\n\n    # Validate specific data formats and expected values based on the current data\n\n    # 1. TankSearchCount should be a number (2 terms containing 'tank')\n    if not extracted_data[\"TankSearchCount\"].isdigit():\n        return {\n            \"valid\": False,\n            \"reason\": f\"TankSearchCount should be a number, got: {extracted_data['TankSearchCount']}\",\n        }\n\n    # Expected: \"Antonia Racer Tank\" and \"tanks\" contain 'tank'\n    if extracted_data[\"TankSearchCount\"] != \"2\":\n        return {\n            \"valid\": False,\n            \"reason\": f\"TankSearchCount should be '2', got: {extracted_data['TankSearchCount']}\",\n        }\n\n    # 2. ZeroResultsCount should be a number (nike has 0 results)\n    if not extracted_data[\"ZeroResultsCount\"].isdigit():\n        return {\n            \"valid\": False,\n            \"reason\": f\"ZeroResultsCount should be a number, got: {extracted_data['ZeroResultsCount']}\",\n        }\n\n    if extracted_data[\"ZeroResultsCount\"] != \"1\":\n        return {\n            \"valid\": False,\n            \"reason\": f\"ZeroResultsCount should be '1', got: {extracted_data['ZeroResultsCount']}\",\n        }\n\n    # 3. HighestUseTerm should be in format \"term:uses\"\n    if \":\" not in extracted_data[\"HighestUseTerm\"]:\n        return {\n            \"valid\": False,\n            \"reason\": f\"HighestUseTerm should be in format 'term:uses', got: {extracted_data['HighestUseTerm']}\",\n        }\n\n    # hollister has 19 uses (highest among terms with > 10 uses)\n    if extracted_data[\"HighestUseTerm\"] != \"hollister:19\":\n        return {\n            \"valid\": False,\n            \"reason\": f\"HighestUseTerm should be 'hollister:19', got: {extracted_data['HighestUseTerm']}\",\n        }\n\n    # 4. Results20to30Term should be in format \"term:results\"\n    if \":\" not in extracted_data[\"Results20to30Term\"]:\n        return {\n            \"valid\": False,\n            \"reason\": f\"Results20to30Term should be in format 'term:results', got: {extracted_data['Results20to30Term']}\",\n        }\n\n    # Both \"tanks\" and \"Antonia Racer Tank\" have 23 results (between 20-30)\n    valid_results20to30 = [\"tanks:23\", \"Antonia Racer Tank:23\"]\n    # Check if answer contains one of the valid values or both separated by |\n    if not any(\n        val in extracted_data[\"Results20to30Term\"] for val in valid_results20to30\n    ):\n        return {\n            \"valid\": False,\n            \"reason\": f\"Results20to30Term should contain 'tanks:23' or 'Antonia Racer Tank:23', got: {extracted_data['Results20to30Term']}\",\n        }\n\n    # 5. Hits15PlusCount should be a number (only hollister has 19 hits > 15)\n    if not extracted_data[\"Hits15PlusCount\"].isdigit():\n        return {\n            \"valid\": False,\n            \"reason\": f\"Hits15PlusCount should be a number, got: {extracted_data['Hits15PlusCount']}\",\n        }\n\n    if extracted_data[\"Hits15PlusCount\"] != \"1\":\n        return {\n            \"valid\": False,\n            \"reason\": f\"Hits15PlusCount should be '1', got: {extracted_data['Hits15PlusCount']}\",\n        }\n\n    # 6. ID10to15MaxResults should be in format \"term:results\"\n    if \":\" not in extracted_data[\"ID10to15MaxResults\"]:\n        return {\n            \"valid\": False,\n            \"reason\": f\"ID10to15MaxResults should be in format 'term:results', got: {extracted_data['ID10to15MaxResults']}\",\n        }\n\n    # ID 11 is hollister (1 result), ID 13 is Antonia Racer Tank (23 results)\n    if extracted_data[\"ID10to15MaxResults\"] != \"Antonia Racer Tank:23\":\n        return {\n            \"valid\": False,\n            \"reason\": f\"ID10to15MaxResults should be 'Antonia Racer Tank:23', got: {extracted_data['ID10to15MaxResults']}\",\n        }\n\n    # 7. DefaultStoreViewCount should be a number (all 7 terms are from Default Store View)\n    if not extracted_data[\"DefaultStoreViewCount\"].isdigit():\n        return {\n            \"valid\": False,\n            \"reason\": f\"DefaultStoreViewCount should be a number, got: {extracted_data['DefaultStoreViewCount']}\",\n        }\n\n    if extracted_data[\"DefaultStoreViewCount\"] != \"7\":\n        return {\n            \"valid\": False,\n            \"reason\": f\"DefaultStoreViewCount should be '7', got: {extracted_data['DefaultStoreViewCount']}\",\n        }\n\n    # 8. OneResultTerm should be in format \"term:uses\"\n    if \":\" not in extracted_data[\"OneResultTerm\"]:\n        return {\n            \"valid\": False,\n            \"reason\": f\"OneResultTerm should be in format 'term:uses', got: {extracted_data['OneResultTerm']}\",\n        }\n\n    # Both hollister and WP10 have exactly 1 result\n    valid_one_result = [\"hollister:19\", \"WP10:1\"]\n    if not any(val in extracted_data[\"OneResultTerm\"] for val in valid_one_result):\n        return {\n            \"valid\": False,\n            \"reason\": f\"OneResultTerm should contain 'hollister:19' or 'WP10:1', got: {extracted_data['OneResultTerm']}\",\n        }\n\n    # 9. HighestResultLastSearch should be in format \"term:results\"\n    if \":\" not in extracted_data[\"HighestResultLastSearch\"]:\n        return {\n            \"valid\": False,\n            \"reason\": f\"HighestResultLastSearch should be in format 'term:results', got: {extracted_data['HighestResultLastSearch']}\",\n        }\n\n    # In Last Search Terms: tanks and Antonia Racer Tank both have 23 results (highest)\n    valid_highest_last = [\"tanks:23\", \"Antonia Racer Tank:23\"]\n    if not any(\n        val in extracted_data[\"HighestResultLastSearch\"] for val in valid_highest_last\n    ):\n        return {\n            \"valid\": False,\n            \"reason\": f\"HighestResultLastSearch should contain 'tanks:23' or 'Antonia Racer Tank:23', got: {extracted_data['HighestResultLastSearch']}\",\n        }\n\n    # 10. Position3Bestseller should be in format \"product:quantity\"\n    if \":\" not in extracted_data[\"Position3Bestseller\"]:\n        return {\n            \"valid\": False,\n            \"reason\": f\"Position3Bestseller should be in format 'product:quantity', got: {extracted_data['Position3Bestseller']}\",\n        }\n\n    # Position 3 in Bestsellers is \"Sprite Stasis Ball 65 cm\" with quantity 6\n    if extracted_data[\"Position3Bestseller\"] != \"Sprite Stasis Ball 65 cm:6\":\n        return {\n            \"valid\": False,\n            \"reason\": f\"Position3Bestseller should be 'Sprite Stasis Ball 65 cm:6', got: {extracted_data['Position3Bestseller']}\",\n        }\n\n    # All validations passed\n    return {\n        \"valid\": True,\n        \"reason\": \"All complex search and filtering operations completed successfully\",\n    }\n\n\nif __name__ == \"__main__\":\n    # Load messages from environment variable\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    if not messages_path:\n        print(\n            json.dumps(\n                {\"valid\": False, \"reason\": \"MCP_MESSAGES environment variable not set\"}\n            )\n        )\n        exit(1)\n\n    try:\n        with open(messages_path, \"r\") as f:\n            messages = json.load(f)\n    except Exception as e:\n        print(\n            json.dumps({\"valid\": False, \"reason\": f\"Failed to load messages: {str(e)}\"})\n        )\n        exit(1)\n\n    # Run verification\n    result = verify(messages)\n    print(json.dumps(result))\n    # Exit with appropriate code based on verification result\n    sys.exit(0 if result[\"valid\"] else 1)\n"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/ai_data_analyst/description.md",
    "content": "I'm conducting an AI research analysis and need specific data from this forum platform. Please help me gather information about AI discussions and community engagement.\n\n**Task Requirements:**\n\n1. Create a new account with the exact 'AIDataAnalyst2025' and password 'SecurePass123!'\n\n2. After logging in, navigate to the deeplearning forum and count the exact number of posts visible on the first page\n\n3. Go to the MachineLearning forum and find the post titled '[P] I made a command-line tool that explains your errors using ChatGPT (link in comments)' - record its exact vote count\n\n4. Use the search bar to search for 'transformer' and record the exact title of the third search result\n\n5. In the MachineLearning forum, click on the 'Sort by: Hot' button and change it to 'New'. Navigate to page 2 of the results and find the post with the highest upvote count on that page:\n   - Record the exact post title\n   - Record the exact vote count\n   - Click through to the post and find the last comment\n   - Record the username of the last commenter\n   - Record the exact text of the last comment\n\n6. After collecting all the data, go to the MachineLearning forum and submit a new post with:\n   - Title: \"MachineLearning_Extraction\"\n   - Body text must be EXACTLY these lines without anything (keep the keys as-is, only replace the values after the pipe, follow the markdown format):\n\n```\n- Deeplearning_Post_Count|FILL_VALUE\n- ChatGPT_Tool_Vote_Count|FILL_VALUE\n- Transformer_Third_Result|FILL_VALUE\n- Page2_Top_Post_Title|FILL_VALUE\n- Page2_Top_Post_Votes|FILL_VALUE\n- Page2_Last_Comment_Username|FILL_VALUE\n- Page2_Last_Comment_Text|FILL_VALUE\n```"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/ai_data_analyst/label.txt",
    "content": "- Deeplearning_Post_Count|25\n- ChatGPT_Tool_Vote_Count|2655\n- Transformer_Third_Result|[R] The Table Feature Transformation Library Release\n- Page2_Top_Post_Title|[D]GPT-4 might be able to tell you if it hallucinated\n- Page2_Top_Post_Votes|634\n- Page2_Last_Comment_Username|Nous_AI\n- Page2_Last_Comment_Text|Fascinating."
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/ai_data_analyst/meta.json",
    "content": "{\n  \"task_id\": \"ai_data_analyst\",\n  \"task_name\": \"AI Data Analyst\",\n  \"category_id\": \"reddit\",\n  \"category_name\": \"Reddit\",\n  \"description\": \"Create account on forum platform, collect AI/ML discussion metrics including post counts, vote data, and analyze community engagement patterns through systematic data extraction.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-08-12\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"user interaction\",\n    \"data extraction\",\n    \"comparative analysis\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/postmill.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/ai_data_analyst/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nfrom pathlib import Path\nfrom playwright.async_api import (\n    async_playwright,\n    TimeoutError as PlaywrightTimeoutError,\n)\n\n# 从环境变量读取 base_url，默认回退到本地\nBASE_URL = os.getenv(\"WEBARENA_BASE_URL\", \"http://localhost:9999\").rstrip(\"/\")\n\n\ndef parse_key_value_format(text):\n    \"\"\"\n    Parse the Key|Value format from the submission body using regex.\n    Works with markdown format using pipe separators, with or without list markers.\n    \"\"\"\n    data = {}\n\n    # Define patterns for each field using pipe separator\n    # Optional list markers (-, •, *) at the beginning\n    patterns = {\n        \"Deeplearning_Post_Count\": r\"(?:[-•*]\\s*)?Deeplearning_Post_Count\\s*\\|\\s*(\\d+)\",\n        \"ChatGPT_Tool_Vote_Count\": r\"(?:[-•*]\\s*)?ChatGPT_Tool_Vote_Count\\s*\\|\\s*(\\d+)\",\n        \"Transformer_Third_Result\": r\"(?:[-•*]\\s*)?Transformer_Third_Result\\s*\\|\\s*(.+?)(?=\\n|$)\",\n        \"Page2_Top_Post_Title\": r\"(?:[-•*]\\s*)?Page2_Top_Post_Title\\s*\\|\\s*(.+?)(?=\\n|$)\",\n        \"Page2_Top_Post_Votes\": r\"(?:[-•*]\\s*)?Page2_Top_Post_Votes\\s*\\|\\s*(\\d+)\",\n        \"Page2_Last_Comment_Username\": r\"(?:[-•*]\\s*)?Page2_Last_Comment_Username\\s*\\|\\s*(.+?)(?=\\n|$)\",\n        \"Page2_Last_Comment_Text\": r\"(?:[-•*]\\s*)?Page2_Last_Comment_Text\\s*\\|\\s*(.+?)(?=\\n|$)\",\n    }\n\n    # Extract each field using regex\n    for key, pattern in patterns.items():\n        match = re.search(pattern, text, re.MULTILINE)\n        if match:\n            # For text fields, clean up newlines and extra spaces\n            value = match.group(1).strip()\n            if key not in [\n                \"Deeplearning_Post_Count\",\n                \"ChatGPT_Tool_Vote_Count\",\n                \"Page2_Top_Post_Votes\",\n            ]:\n                # Replace newlines with spaces and normalize whitespace\n                value = \" \".join(value.split())\n            data[key] = value\n\n    return data\n\n\ndef normalize_text(text):\n    \"\"\"\n    Normalize text for comparison by handling different quote styles and whitespace.\n    \"\"\"\n    if not isinstance(text, str):\n        return str(text)\n\n    # Replace various quote styles with standard quotes\n    text = text.replace(\"\"\", \"'\").replace(\"\"\", \"'\")\n    text = text.replace('\"', '\"').replace('\"', '\"')\n\n    # Normalize whitespace\n    text = \" \".join(text.split())\n\n    return text.strip()\n\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the ML extraction task has been completed correctly by checking the forum post.\n    \"\"\"\n    async with async_playwright() as p:\n        browser = await p.chromium.launch(headless=True)\n        context = await browser.new_context()\n        page = await context.new_page()\n\n        try:\n            # Navigate to the main page\n            print(\"Navigating to forum...\", file=sys.stderr)\n            await page.goto(f\"{BASE_URL}/\", wait_until=\"networkidle\")\n\n            # Step 1: Check if account was created correctly by trying to login\n            print(\"Step 1: Verifying account creation...\", file=sys.stderr)\n            user_button = page.locator('button:has-text(\"AIDataAnalyst2025\")')\n            if not await user_button.count():\n                # Try to login\n                print(\"Attempting to login with AIDataAnalyst2025...\", file=sys.stderr)\n\n                # Click login link\n                await page.click('a:has-text(\"Log in\")')\n                await page.wait_for_load_state(\"networkidle\")\n\n                # Fill login form\n                await page.fill('input[name=\"_username\"]', \"AIDataAnalyst2025\")\n                await page.fill('input[name=\"_password\"]', \"SecurePass123!\")\n\n                # Submit login form\n                await page.click('button:has-text(\"Log in\")')\n                await page.wait_for_load_state(\"networkidle\")\n\n                # Check if login successful\n                user_button = page.locator('button:has-text(\"AIDataAnalyst2025\")')\n                if not await user_button.count():\n                    print(\"FAILED: Account AIDataAnalyst2025 with password SecurePass123! cannot be logged in\", file=sys.stderr)\n                    print(\"This means the account was not created correctly\", file=sys.stderr)\n                    return False\n\n                print(\"PASSED: Successfully logged in as AIDataAnalyst2025\", file=sys.stderr)\n            else:\n                print(\"PASSED: Already logged in as AIDataAnalyst2025\", file=sys.stderr)\n\n            # Step 2: Check if submission was created correctly in MachineLearning forum\n            print(\"\\nStep 2: Verifying submission creation...\", file=sys.stderr)\n            print(\"Navigating to MachineLearning forum...\", file=sys.stderr)\n            await page.goto(\n                f\"{BASE_URL}/f/MachineLearning\", wait_until=\"networkidle\"\n            )\n\n            # Look for the post with title \"MachineLearning_Extraction\"\n            print(\n                \"Looking for submission with title 'MachineLearning_Extraction'...\",\n                file=sys.stderr,\n            )\n            post_link = page.locator('a:has-text(\"MachineLearning_Extraction\")')\n\n            if not await post_link.count():\n                print(\n                    \"FAILED: Could not find submission with title 'MachineLearning_Extraction' in MachineLearning forum\",\n                    file=sys.stderr,\n                )\n                return False\n            \n            print(\"PASSED: Found submission 'MachineLearning_Extraction' in MachineLearning forum\", file=sys.stderr)\n\n            # Step 3: Check submission content matches expected values\n            print(\"\\nStep 3: Verifying submission content...\", file=sys.stderr)\n            \n            # Click on the submission to view its content\n            await post_link.first.click()\n            await page.wait_for_load_state(\"networkidle\")\n\n            # Extract the submission body content\n            # Try multiple possible selectors for the post body\n            post_content = None\n            selectors = [\n                \".submission__body\",\n                \".post-body\",\n                \".RichText\",\n                '[class*=\"RichText\"]',\n                'div:has(> p:has-text(\"Deeplearning_Post_Count\"))',\n                'div:has-text(\"Deeplearning_Post_Count\"):has-text(\"Page2_Last_Comment_Text\")',\n            ]\n\n            for selector in selectors:\n                content_element = page.locator(selector)\n                if await content_element.count():\n                    post_content = await content_element.first.inner_text()\n                    if \"Deeplearning_Post_Count\" in post_content:\n                        print(\n                            f\"Found submission content using selector: {selector}\",\n                            file=sys.stderr,\n                        )\n                        break\n\n            if not post_content or \"Deeplearning_Post_Count\" not in post_content:\n                print(\n                    \"FAILED: Could not find submission body with required format\",\n                    file=sys.stderr,\n                )\n                print(\n                    \"Expected body to contain 'Deeplearning_Post_Count' in pipe-separated format\",\n                    file=sys.stderr,\n                )\n                return False\n\n            print(\"Found submission body content\", file=sys.stderr)\n            print(f\"Raw content preview: {post_content[:200]}...\", file=sys.stderr)\n\n            # Parse the Key: Value format\n            extracted_data = parse_key_value_format(post_content)\n            print(f\"Extracted data: {extracted_data}\", file=sys.stderr)\n\n            # Load expected values from label.txt\n            label_path = Path(__file__).parent / \"label.txt\"\n            if label_path.exists():\n                with open(label_path, \"r\") as f:\n                    expected_text = f.read().strip()\n                expected_data = parse_key_value_format(expected_text)\n                print(\"Loaded expected values from label.txt\", file=sys.stderr)\n\n            # Verify all required keys are present\n            required_keys = [\n                \"Deeplearning_Post_Count\",\n                \"ChatGPT_Tool_Vote_Count\",\n                \"Transformer_Third_Result\",\n                \"Page2_Top_Post_Title\",\n                \"Page2_Top_Post_Votes\",\n                \"Page2_Last_Comment_Username\",\n                \"Page2_Last_Comment_Text\",\n            ]\n\n            missing_keys = []\n            for key in required_keys:\n                if key not in extracted_data:\n                    missing_keys.append(key)\n\n            if missing_keys:\n                print(\n                    \"FAILED: Missing required keys in submission: {', '.join(missing_keys)}\",\n                    file=sys.stderr,\n                )\n                print(\n                    \"Expected all 7 fields to be present in pipe-separated format\",\n                    file=sys.stderr,\n                )\n                return False\n\n            # Validate data format and content\n            errors = []\n\n            # Check numeric fields\n            try:\n                post_count = int(extracted_data[\"Deeplearning_Post_Count\"])\n                if (\n                    \"expected_data\" in locals()\n                    and \"Deeplearning_Post_Count\" in expected_data\n                ):\n                    expected_count = int(expected_data[\"Deeplearning_Post_Count\"])\n                    if post_count != expected_count:\n                        errors.append(\n                            f\"Deeplearning_Post_Count mismatch: got {post_count}, expected {expected_count}\"\n                        )\n            except ValueError:\n                errors.append(\n                    f\"Deeplearning_Post_Count must be a number, got: {extracted_data['Deeplearning_Post_Count']}\"\n                )\n\n            # If we have expected data, compare against it\n            if \"expected_data\" in locals():\n                # Compare each field\n                for key in required_keys:\n                    if key in expected_data and key in extracted_data:\n                        expected_val = normalize_text(expected_data[key])\n                        actual_val = normalize_text(extracted_data[key])\n\n                        # For numeric fields, compare as integers\n                        if key in [\n                            \"Deeplearning_Post_Count\",\n                            \"ChatGPT_Tool_Vote_Count\",\n                            \"Page2_Top_Post_Votes\",\n                        ]:\n                            try:\n                                expected_int = int(expected_val)\n                                actual_int = int(actual_val)\n                                if expected_int != actual_int:\n                                    errors.append(\n                                        f\"{key} mismatch: got {actual_int}, expected {expected_int}\"\n                                    )\n                            except ValueError:\n                                errors.append(\n                                    f\"{key} should be numeric: got '{actual_val}'\"\n                                )\n                        else:\n                            # For text fields, compare normalized text\n                            if expected_val != actual_val:\n                                errors.append(\n                                    f\"{key} mismatch: got '{actual_val}', expected '{expected_val}'\"\n                                )\n\n            else:\n                # If no expected data, just do basic validation\n                for key in required_keys:\n                    if key not in extracted_data:\n                        errors.append(f\"Missing required key: {key}\")\n                    elif (\n                        not extracted_data[key] or extracted_data[key] == \"[FILL_VALUE]\"\n                    ):\n                        errors.append(f\"{key} was not filled in\")\n\n            if errors:\n                print(\n                    \"FAILED: Content validation failed with the following issues:\",\n                    file=sys.stderr,\n                )\n                for error in errors:\n                    print(f\"  - {error}\", file=sys.stderr)\n                print(\"\\nExpected values from label.txt:\", file=sys.stderr)\n                if \"expected_data\" in locals():\n                    for key in required_keys:\n                        if key in expected_data:\n                            print(f\"  {key}: {expected_data[key]}\", file=sys.stderr)\n                return False\n\n            # All checks passed\n            print(\"\\n=== VERIFICATION SUCCESSFUL ===\")\n            print(\"✓ Step 1: Account AIDataAnalyst2025 can login with password SecurePass123!\")\n            print(\"✓ Step 2: Submission 'MachineLearning_Extraction' found in MachineLearning forum\")\n            print(\"✓ Step 3: All submission content matches expected values:\")\n            print(f\"  - Deeplearning_Post_Count: {extracted_data['Deeplearning_Post_Count']}\")\n            print(f\"  - ChatGPT_Tool_Vote_Count: {extracted_data['ChatGPT_Tool_Vote_Count']}\")\n            print(f\"  - Transformer_Third_Result: {extracted_data['Transformer_Third_Result']}\")\n            print(f\"  - Page2_Top_Post_Title: {extracted_data['Page2_Top_Post_Title']}\")\n            print(f\"  - Page2_Top_Post_Votes: {extracted_data['Page2_Top_Post_Votes']}\")\n            print(f\"  - Page2_Last_Comment_Username: {extracted_data['Page2_Last_Comment_Username']}\")\n            print(f\"  - Page2_Last_Comment_Text: {extracted_data['Page2_Last_Comment_Text']}\")\n            print(\"✓ All data in correct pipe-separated markdown format\")\n            return True\n\n        except PlaywrightTimeoutError as e:\n            print(f\"Error: Timeout occurred - {str(e)}\", file=sys.stderr)\n            return False\n        except Exception as e:\n            print(f\"Error: Unexpected error - {str(e)}\", file=sys.stderr)\n            return False\n        finally:\n            await browser.close()\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/budget_europe_travel/description.md",
    "content": "Create a comprehensive budget travel resource for European travel on the Postmill forum platform by establishing a dedicated travel community with integrated wiki guides and engaging content.\n\n**Task Requirements:**\n\n1. Register a new account with username \"EuroTravelPlanner\" and password \"SecureTravel2024!\"\n\n2. After successful registration and login, search for \"Europe travel budget\" using the search bar to check for existing content\n\n3. Create a new forum dedicated to budget European travel:\n   - Forum name: \"BudgetEuropeTravel\"\n   - Title: \"Budget Travel Europe\"\n   - Description: \"Community for sharing money-saving tips for European travel\"\n   - Sidebar content: \"Share your best European travel deals and budget tips here!\"\n\n4. Create a comprehensive wiki page for your forum:\n   - Path: \"europe-travel-budget-guide\"\n   - Title: \"Complete Budget Travel Guide for Europe 2024\"\n   - Content must contain: \"Eurail passes and budget airlines\"\n\n5. Navigate to your newly created forum and create your first post:\n   - Title: \"My 14-day Europe trip for under 1000 - Complete itinerary\"\n   - Body must contain: \"budget guide wiki\"\n\n6. Search for \"travel insurance Europe\" using the search function:\n   - upvote the first post\n\n7. Navigate to user settings and configure:\n   - Set timezone to \"Europe/Amsterdam\" (to match your travel planning focus)\n   - Ensure \"Notify on reply\" is checked (enabled) for community engagement"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/budget_europe_travel/meta.json",
    "content": "{\n  \"task_id\": \"budget_europe_travel\",\n  \"task_name\": \"Budget Europe Travel\",\n  \"category_id\": \"reddit\",\n  \"category_name\": \"Reddit\",\n  \"description\": \"Create comprehensive budget travel resource for European destinations by establishing dedicated travel community with integrated wiki guides and engaging travel content.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-08-12\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"content submission\",\n    \"user interaction\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/postmill.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/budget_europe_travel/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nfrom pathlib import Path\nfrom playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError\n\nBASE_URL = os.getenv(\"WEBARENA_BASE_URL\", \"http://localhost:9999\").rstrip(\"/\")\n\ndef normalize_text(text):\n    \"\"\"\n    Normalize text for comparison by handling different quote styles and whitespace.\n    \"\"\"\n    if not isinstance(text, str):\n        return str(text)\n    \n    # Replace various quote styles with standard quotes\n    text = text.replace('\\'', \"'\").replace('\\'', \"'\")\n    text = text.replace('\"', '\"').replace('\"', '\"')\n    text = text.replace('&amp;', '&')\n    \n    # Normalize whitespace\n    text = ' '.join(text.split())\n    \n    return text.strip()\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the budget Europe travel resource task has been completed correctly.\n    \"\"\"\n    async with async_playwright() as p:\n        browser = await p.chromium.launch(headless=True)\n        context = await browser.new_context()\n        page = await context.new_page()\n        \n        verification_passed = True\n        \n        try:\n\n            # 1. Check if account can login with correct credentials\n            print(\"=\"*60)\n            print(\"Step 1: Verifying account login with credentials...\", file=sys.stderr)\n            print(\"=\"*60)\n            await page.goto(f\"{BASE_URL}/\", wait_until='networkidle')\n            \n            # First logout if already logged in\n            user_button = page.locator('button:has-text(\"EuroTravelPlanner\")')\n            if await user_button.count():\n                print(\"Already logged in, logging out first...\", file=sys.stderr)\n                await user_button.click()\n                logout_link = page.locator('a:has-text(\"Log out\")')\n                if await logout_link.count():\n                    await logout_link.click()\n                    await page.wait_for_load_state('networkidle')\n            \n            # Now try to login with the specified credentials\n            print(\"Attempting to login with username 'EuroTravelPlanner' and password 'SecureTravel2024!'...\", file=sys.stderr)\n            \n            # Navigate to login page\n            login_link = page.locator('a:has-text(\"Log in\")')\n            if await login_link.count():\n                await login_link.click()\n                await page.wait_for_load_state('networkidle')\n            else:\n                print(\"❌ ERROR: Cannot find login link\", file=sys.stderr)\n                verification_passed = False\n                \n            if verification_passed:\n                # Fill login form with exact credentials\n                await page.fill('input[name=\"_username\"]', 'EuroTravelPlanner')\n                await page.fill('input[name=\"_password\"]', 'SecureTravel2024!')\n                \n                # Submit login\n                login_button = page.locator('button[type=\"submit\"]:has-text(\"Log in\")')\n                if not await login_button.count():\n                    login_button = page.locator('button:has-text(\"Log in\")')\n                \n                await login_button.click()\n                await page.wait_for_load_state('networkidle')\n                \n                # Verify login success\n                user_button = page.locator('button:has-text(\"EuroTravelPlanner\")')\n                if not await user_button.count():\n                    print(\"❌ ERROR: Login failed with username 'EuroTravelPlanner' and password 'SecureTravel2024!'\", file=sys.stderr)\n                    verification_passed = False\n                else:\n                    print(\"✓ Account login successful with correct credentials\", file=sys.stderr)\n            \n\n            # 2. Check if forum exists and has correct properties\n            print(\"\\n\" + \"=\"*60)\n            print(\"Step 2: Checking forum existence and properties...\", file=sys.stderr)\n            print(\"=\"*60)\n            \n            # Check if forum exists at /f/BudgetEuropeTravel\n            await page.goto(f\"{BASE_URL}/f/BudgetEuropeTravel\", wait_until='networkidle')\n            \n            # Check if we get 404 or the forum exists\n            page_content = await page.content()\n            page_title = await page.title()\n            \n\n            if \"404\" in page_title or \"not found\" in page_title.lower() or \"Page not found\" in page_content:\n                print(\"❌ ERROR: Forum /f/BudgetEuropeTravel does not exist (404)\", file=sys.stderr)\n                verification_passed = False\n            else:\n                print(\"✓ Forum /f/BudgetEuropeTravel exists\", file=sys.stderr)\n                \n                # Navigate to edit page to check properties\n                await page.goto(f\"{BASE_URL}/f/BudgetEuropeTravel/edit\", wait_until='networkidle')\n                \n                # Check if we can access edit page\n                edit_page_content = await page.content()\n                edit_page_title = await page.title()\n                \n                if \"404\" in edit_page_title or \"not found\" in edit_page_title.lower() or \"Page not found\" in edit_page_content:\n                    print(\"❌ ERROR: Cannot access forum edit page at /f/BudgetEuropeTravel/edit\", file=sys.stderr)\n                    verification_passed = False\n                else:\n                    print(\"✓ Forum edit page accessible\", file=sys.stderr)\n                    \n                    # Check forum title\n                    title_input = page.locator('input[name*=\"title\"], input#forum_title')\n                    if await title_input.count():\n                        title_value = await title_input.input_value()\n                        if title_value != \"Budget Travel Europe\":\n                            print(f\"❌ ERROR: Forum title is '{title_value}', expected 'Budget Travel Europe'\", file=sys.stderr)\n                            verification_passed = False\n                        else:\n                            print(\"✓ Forum title correct: 'Budget Travel Europe'\", file=sys.stderr)\n                    else:\n                        print(\"❌ ERROR: Cannot find forum title field\", file=sys.stderr)\n                        verification_passed = False\n                    \n                    # Check forum description\n                    desc_input = page.locator('textarea[name*=\"description\"], input[name*=\"description\"]')\n                    if await desc_input.count():\n                        desc_value = await desc_input.input_value()\n                        expected_desc = \"Community for sharing money-saving tips for European travel\"\n                        if desc_value != expected_desc:\n                            print(f\"❌ ERROR: Forum description is '{desc_value}', expected '{expected_desc}'\", file=sys.stderr)\n                            verification_passed = False\n                        else:\n                            print(\"✓ Forum description correct\", file=sys.stderr)\n                    else:\n                        print(\"❌ ERROR: Cannot find forum description field\", file=sys.stderr)\n                        verification_passed = False\n                    \n                    # Check sidebar content\n                    sidebar_input = page.locator('textarea[name*=\"sidebar\"]')\n                    if await sidebar_input.count():\n                        sidebar_value = await sidebar_input.input_value()\n                        expected_sidebar = \"Share your best European travel deals and budget tips here!\"\n                        if sidebar_value != expected_sidebar:\n                            print(f\"❌ ERROR: Forum sidebar is '{sidebar_value}', expected '{expected_sidebar}'\", file=sys.stderr)\n                            verification_passed = False\n                        else:\n                            print(\"✓ Forum sidebar correct\", file=sys.stderr)\n                    else:\n                        print(\"❌ ERROR: Cannot find forum sidebar field\", file=sys.stderr)\n                        verification_passed = False\n            \n\n            # 3. Check wiki page existence and content\n            print(\"\\n\" + \"=\"*60)\n            print(\"Step 3: Checking wiki page existence and content...\", file=sys.stderr)\n            print(\"=\"*60)\n            \n            # Try the wiki URL with /wiki/ path\n            await page.goto(f\"{BASE_URL}/wiki/europe-travel-budget-guide\", wait_until='networkidle')\n            \n            wiki_page_content = await page.content()\n            wiki_page_title = await page.title()\n            \n            if \"404\" in wiki_page_title or \"not found\" in wiki_page_title.lower() or \"Page not found\" in wiki_page_content:\n                print(\"❌ ERROR: Wiki page does not exist at /wiki/europe-travel-budget-guide\", file=sys.stderr)\n                verification_passed = False\n            else:\n                print(\"✓ Wiki page exists at /wiki/europe-travel-budget-guide\", file=sys.stderr)\n                \n                # Check wiki title\n                wiki_title_found = False\n                expected_wiki_title = \"Complete Budget Travel Guide for Europe 2024\"\n                \n                # Try multiple selectors for wiki title\n                wiki_title_selectors = [\n                    f'h1:has-text(\"{expected_wiki_title}\")',\n                    f'h1:text-is(\"{expected_wiki_title}\")',\n                    'h1'\n                ]\n                \n                for selector in wiki_title_selectors:\n                    wiki_title_elem = page.locator(selector)\n                    if await wiki_title_elem.count():\n                        title_text = await wiki_title_elem.first.text_content()\n                        if expected_wiki_title in title_text:\n                            wiki_title_found = True\n                            break\n                \n                if not wiki_title_found:\n                    print(f\"❌ ERROR: Wiki title '{expected_wiki_title}' not found\", file=sys.stderr)\n                    verification_passed = False\n                else:\n                    print(f\"✓ Wiki title correct: '{expected_wiki_title}'\", file=sys.stderr)\n                \n                # Check for required content in wiki\n                required_wiki_content = \"Eurail passes and budget airlines\"\n                if required_wiki_content not in wiki_page_content:\n                    print(f\"❌ ERROR: Wiki content must contain '{required_wiki_content}'\", file=sys.stderr)\n                    verification_passed = False\n                else:\n                    print(f\"✓ Wiki content contains required text: '{required_wiki_content}'\", file=sys.stderr)\n            \n            # 4. Check for post in the forum\n            print(\"\\n\" + \"=\"*60)\n            print(\"Step 4: Checking for post in forum...\", file=sys.stderr)\n            print(\"=\"*60)\n            \n            await page.goto(f\"{BASE_URL}/f/BudgetEuropeTravel\", wait_until='networkidle')\n            \n            expected_post_title = \"My 14-day Europe trip for under 1000 - Complete itinerary\"\n            post_link = page.locator(f'a:has-text(\"{expected_post_title}\")')\n            \n            if not await post_link.count():\n                print(f\"❌ ERROR: Post with title '{expected_post_title}' not found in forum\", file=sys.stderr)\n                verification_passed = False\n            else:\n                print(f\"✓ Post found with title: '{expected_post_title}'\", file=sys.stderr)\n                \n                # Click on the post to check its content\n                await post_link.first.click()\n                await page.wait_for_load_state('networkidle')\n                \n                # Check if post contains required text\n                post_page_content = await page.content()\n                required_post_content = \"budget guide wiki\"\n                \n                if required_post_content not in post_page_content:\n                    print(f\"❌ ERROR: Post body must contain '{required_post_content}'\", file=sys.stderr)\n                    verification_passed = False\n                else:\n                    print(f\"✓ Post content contains required text: '{required_post_content}'\", file=sys.stderr)\n            \n            # 5. Check upvote on search result\n            print(\"\\n\" + \"=\"*60)\n            print(\"Step 5: Checking upvote on search result...\", file=sys.stderr)\n            print(\"=\"*60)\n            \n            # Navigate to search results for \"travel insurance Europe\"\n            await page.goto(f\"{BASE_URL}/search?q=travel+insurance+Europe\", wait_until='networkidle')\n            \n\n            # Check if we're on search results page\n            if \"/search\" not in page.url:\n                print(\"❌ ERROR: Not on search results page\", file=sys.stderr)\n                verification_passed = False\n            else:\n                print(\"✓ On search results page for 'travel insurance Europe'\", file=sys.stderr)\n                \n                # Check for upvoted posts\n                upvote_found = False\n                \n                # Method 1: Check for \"Retract upvote\" button (indicates user has upvoted)\n                retract_buttons = page.locator('button:has-text(\"Retract upvote\")')\n                if await retract_buttons.count() > 0:\n                    print(\"✓ Found upvoted post (Retract upvote button present)\", file=sys.stderr)\n                    upvote_found = True\n                \n                # Method 2: Check for posts with upvote count >= 1\n                if not upvote_found:\n                    # Look for vote counts\n                    vote_elements = page.locator('div.vote, span.vote-count, [class*=\"vote\"]')\n                    \n                    for i in range(await vote_elements.count()):\n                        vote_elem = vote_elements.nth(i)\n                        vote_text = await vote_elem.text_content()\n                        try:\n                            # Extract number from vote text\n                            import re\n                            numbers = re.findall(r'\\d+', vote_text)\n                            if numbers:\n                                vote_count = int(numbers[0])\n                                if vote_count >= 1:\n                                    print(f\"✓ Found post with {vote_count} upvote(s)\", file=sys.stderr)\n                                    upvote_found = True\n                                    break\n                        except:\n                            continue\n                \n                if not upvote_found:\n                    print(\"❌ ERROR: No upvoted posts found in search results\", file=sys.stderr)\n                    verification_passed = False\n            \n            # 6. Check user settings\n            print(\"\\n\" + \"=\"*60)\n            print(\"Step 6: Checking user settings...\", file=sys.stderr)\n            print(\"=\"*60)\n            \n\n            await page.goto(f\"{BASE_URL}/user/EuroTravelPlanner/preferences\", wait_until='networkidle')\n            \n            # Check timezone setting\n            timezone_correct = False\n            timezone_select = page.locator('select[name*=\"timezone\"], select#timezone')\n            \n            if await timezone_select.count():\n                selected_value = await timezone_select.input_value()\n                \n                if selected_value == \"Europe/Amsterdam\":\n                    print(\"✓ Timezone correctly set to 'Europe/Amsterdam'\", file=sys.stderr)\n                    timezone_correct = True\n                else:\n                    # Check selected option text\n                    selected_option = timezone_select.locator('option[selected]')\n                    if await selected_option.count():\n                        option_text = await selected_option.text_content()\n                        if \"Amsterdam\" in option_text:\n                            print(\"✓ Timezone correctly set to Europe/Amsterdam\", file=sys.stderr)\n                            timezone_correct = True\n                        else:\n                            print(f\"❌ ERROR: Timezone is set to '{option_text}', expected 'Europe/Amsterdam'\", file=sys.stderr)\n                            verification_passed = False\n                    else:\n                        print(f\"❌ ERROR: Timezone is '{selected_value}', expected 'Europe/Amsterdam'\", file=sys.stderr)\n                        verification_passed = False\n            else:\n                print(\"❌ ERROR: Cannot find timezone selector\", file=sys.stderr)\n                verification_passed = False\n            \n            # Check \"Notify on reply\" setting\n            notify_correct = False\n            \n            # Try multiple selectors for the checkbox\n            notify_selectors = [\n                'input[type=\"checkbox\"]:near(:text(\"Notify on reply\"))',\n                'label:has-text(\"Notify on reply\") input[type=\"checkbox\"]',\n                'input[type=\"checkbox\"][name*=\"notify\"]',\n                'input[type=\"checkbox\"][id*=\"notify\"]'\n            ]\n            \n            for selector in notify_selectors:\n                notify_checkbox = page.locator(selector)\n                if await notify_checkbox.count():\n                    is_checked = await notify_checkbox.first.is_checked()\n                    if is_checked:\n                        print(\"✓ 'Notify on reply' is enabled (checked)\", file=sys.stderr)\n                        notify_correct = True\n                    else:\n                        print(\"❌ ERROR: 'Notify on reply' is not enabled (unchecked)\", file=sys.stderr)\n                        verification_passed = False\n                    break\n            \n            if not notify_correct and verification_passed:\n                print(\"❌ ERROR: Cannot verify 'Notify on reply' setting\", file=sys.stderr)\n                verification_passed = False\n            \n            # Final summary\n            print(\"\\n\" + \"=\"*60)\n            if verification_passed:\n                print(\"✅ SUCCESS: All verification checks passed!\", file=sys.stderr)\n            else:\n                print(\"❌ FAILED: One or more verification checks failed!\", file=sys.stderr)\n            print(\"=\"*60)\n            \n            return verification_passed\n            \n        except PlaywrightTimeoutError as e:\n            print(f\"❌ ERROR: Timeout occurred - {str(e)}\", file=sys.stderr)\n            return False\n        except Exception as e:\n            print(f\"❌ ERROR: Unexpected error - {str(e)}\", file=sys.stderr)\n            return False\n        finally:\n            await browser.close()\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/buyitforlife_research/description.md",
    "content": "Research durable kitchen appliances on the BuyItForLife forum by extracting information about the most popular posts and their community engagement.\n\n**Task Requirements:**\n\n1. Register a new account with username 'buyitforlife_researcher' and password 'BIFL2024Research!'\n\n2. After successful registration, navigate to the BuyItForLife forum\n\n3. Find the top 3 posts with the highest upvote counts and extract:\n   - Exact post title\n   - Exact upvote count\n   - Exact number of comments\n\n4. For the post with the highest upvotes from step 3, click through to read the comments and find the comment with the most upvotes. Record:\n   - The exact comment text\n   - The username of the commenter\n\n5. From the BuyItForLife forum, identify the usernames of the authors who posted the top 3 posts with the highest number of upvotes\n\n6. After collecting all the data, submit a new post in the BuyItForLife forum with:\n   - Title: \"Research Report for BuyItForLife\"\n   - Body text must be EXACTLY these lines without anything (keep the keys as-is, only replace the values after the pipe, follow the markdown format):\n\n```\n- Post1_Title|FILL_VALUE\n- Post1_Upvotes|FILL_VALUE\n- Post1_Comments|FILL_VALUE\n- Post2_Title|FILL_VALUE\n- Post2_Upvotes|FILL_VALUE\n- Post2_Comments|FILL_VALUE\n- Post3_Title|FILL_VALUE\n- Post3_Upvotes|FILL_VALUE\n- Post3_Comments|FILL_VALUE\n- TopComment_Text|FILL_VALUE\n- TopComment_Username|FILL_VALUE\n- Post1_Author|FILL_VALUE\n- Post2_Author|FILL_VALUE\n- Post3_Author|FILL_VALUE\n```"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/buyitforlife_research/label.txt",
    "content": "- Post1_Title|Hand me down name tag trail on this child's jacket\n- Post1_Upvotes|14487\n- Post1_Comments|163\n- Post2_Title|My Grandmother's oven, circa 1966. Many holiday meals were cooked with love here, right up until the day she passed. Aside from one shattered glass door that was replaced, everything still works!\n- Post2_Upvotes|6413\n- Post2_Comments|205\n- Post3_Title|1956 Frigidaire Range\n- Post3_Upvotes|5797\n- Post3_Comments|190\n- TopComment_Text|Rei does this too\n- TopComment_Username|ATeaformeplease\n- Post1_Author|Mofomania\n- Post2_Author|_Mr_Roboto_\n- Post3_Author|dezualy"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/buyitforlife_research/meta.json",
    "content": "{\n  \"task_id\": \"buyitforlife_research\",\n  \"task_name\": \"Buy It For Life Research\",\n  \"category_id\": \"reddit\",\n  \"category_name\": \"Reddit\",\n  \"description\": \"Research durable lifetime-quality products by creating forum account, analyzing community recommendations, extracting product data, and compiling comprehensive durability report with voting metrics.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-08-12\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"user interaction\",\n    \"data extraction\",\n    \"search aggregation\",\n    \"content submission\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/postmill.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/buyitforlife_research/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nfrom pathlib import Path\nfrom playwright.async_api import (\n    async_playwright,\n    TimeoutError as PlaywrightTimeoutError,\n)\n\nBASE_URL = os.getenv(\"WEBARENA_BASE_URL\", \"http://localhost:9999\").rstrip(\"/\")\n\n\ndef parse_markdown_list_format(text):\n    \"\"\"\n    Parse the markdown list format (- Key|Value) from the submission body.\n    Also handles plain Key|Value format without the dash.\n    \"\"\"\n    data = {}\n    \n    # Pattern to match lines like: - Key|Value or just Key|Value\n    pattern = r'^(?:-\\s*)?([^|]+)\\|(.+)$'\n    \n    lines = text.strip().split('\\n')\n    for line in lines:\n        line = line.strip()\n        if '|' in line:\n            match = re.match(pattern, line)\n            if match:\n                key = match.group(1).strip()\n                value = match.group(2).strip()\n                data[key] = value\n    \n    return data\n\n\ndef normalize_text(text):\n    \"\"\"\n    Normalize text for comparison by handling different quote styles and whitespace.\n    \"\"\"\n    if not isinstance(text, str):\n        return str(text)\n\n    # Replace various quote styles with standard quotes\n    text = text.replace(\"\"\", \"'\").replace(\"\"\", \"'\")\n    text = text.replace('\"', '\"').replace('\"', '\"')\n\n    # Normalize whitespace\n    text = \" \".join(text.split())\n\n    return text.strip()\n\n\nasync def check_account_login(page):\n    \"\"\"\n    Check if the account can be successfully logged in.\n    Returns True if login successful, False otherwise.\n    \"\"\"\n    try:\n        # Navigate to main page\n        await page.goto(f\"{BASE_URL}/\", wait_until=\"networkidle\")\n        \n        # Check if already logged in\n        user_button = page.locator('button[class*=\"UserButton\"]')\n        if await user_button.count():\n            username = await user_button.inner_text()\n            if username.lower() == \"buyitforlife_researcher\":\n                print(f\"Already logged in as {username}\", file=sys.stderr)\n                return True\n            else:\n                # Log out first\n                await user_button.click()\n                await page.click('a:has-text(\"Log out\")')\n                await page.wait_for_load_state(\"networkidle\")\n        \n        # Try to login\n        await page.click('a:has-text(\"Log in\")')\n        await page.wait_for_load_state(\"networkidle\")\n        \n        await page.fill('input[name=\"_username\"]', \"buyitforlife_researcher\")\n        await page.fill('input[name=\"_password\"]', \"BIFL2024Research!\")\n        \n        await page.click('button:has-text(\"Log in\")')\n        await page.wait_for_load_state(\"networkidle\")\n        \n        # Check if login successful\n        user_button = page.locator('button:has-text(\"buyitforlife_researcher\")')\n        if await user_button.count():\n            print(\"Successfully logged in as buyitforlife_researcher\", file=sys.stderr)\n            return True\n        else:\n            print(\"Login failed for buyitforlife_researcher\", file=sys.stderr)\n            return False\n            \n    except Exception as e:\n        print(f\"Error during login check: {str(e)}\", file=sys.stderr)\n        return False\n\n\nasync def check_submission_exists(page):\n    \"\"\"\n    Check if the submission exists in the BuyItForLife forum with correct content.\n    Returns (exists, content_data) tuple.\n    \"\"\"\n    try:\n        # Navigate to BuyItForLife forum\n        await page.goto(f\"{BASE_URL}/f/BuyItForLife\", wait_until=\"networkidle\")\n        \n        # Look for the post with correct title\n        print(\"Looking for post 'Research Report for BuyItForLife'...\", file=sys.stderr)\n        post_locator = page.locator('a:has-text(\"Research Report for BuyItForLife\")')\n        \n        if not await post_locator.count():\n            print(\"Error: Could not find post with title 'Research Report for BuyItForLife'\", file=sys.stderr)\n            return False, None\n        \n        # Click on the post to view its content\n        await post_locator.first.click()\n        await page.wait_for_load_state(\"networkidle\")\n        \n        # Get the post content\n        post_content = None\n        selectors = [\n            '.PostFullItem-body',\n            '.Post-body',\n            '.PostItem-body',\n            '.item-RichText',\n            '[class*=\"RichText\"]',\n            'div:has-text(\"Post1_Title\")',\n        ]\n        \n        for selector in selectors:\n            post_content_element = page.locator(selector)\n            if await post_content_element.count():\n                # Get the text content, handling multiple elements if needed\n                if await post_content_element.count() > 1:\n                    for i in range(await post_content_element.count()):\n                        text = await post_content_element.nth(i).inner_text()\n                        if \"Post1_Title\" in text:\n                            post_content = text\n                            print(f\"Found post content using selector: {selector} (element {i})\", file=sys.stderr)\n                            break\n                else:\n                    post_content = await post_content_element.first.inner_text()\n                    print(f\"Found post content using selector: {selector}\", file=sys.stderr)\n                \n                if post_content and \"Post1_Title\" in post_content:\n                    break\n        \n        if not post_content:\n            print(\"Error: Could not find post content element\", file=sys.stderr)\n            return False, None\n        \n        print(\"Post content found:\", file=sys.stderr)\n        print(post_content[:200] + \"...\" if len(post_content) > 200 else post_content, file=sys.stderr)\n        \n        # Parse the markdown list format\n        extracted_data = parse_markdown_list_format(post_content)\n        print(f\"Extracted data: {extracted_data}\", file=sys.stderr)\n        \n        return True, extracted_data\n        \n    except Exception as e:\n        print(f\"Error checking submission: {str(e)}\", file=sys.stderr)\n        return False, None\n\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the BuyItForLife research task has been completed correctly.\n    Checks:\n    1. Account creation (can login with credentials)\n    2. Submission exists with correct title\n    3. Submission content matches expected format and values\n    \"\"\"\n    async with async_playwright() as p:\n        browser = await p.chromium.launch(headless=True)\n        context = await browser.new_context()\n        page = await context.new_page()\n        \n        try:\n            # Step 1: Check account creation\n            print(\"=== Step 1: Checking account creation ===\", file=sys.stderr)\n            account_ok = await check_account_login(page)\n            if not account_ok:\n                print(\"Error: Account 'buyitforlife_researcher' cannot be logged in\", file=sys.stderr)\n                return False\n            \n            # Step 2: Check submission exists and get content\n            print(\"\\n=== Step 2: Checking submission ===\", file=sys.stderr)\n            submission_exists, extracted_data = await check_submission_exists(page)\n            \n            if not submission_exists:\n                print(\"Error: Submission not found in BuyItForLife forum\", file=sys.stderr)\n                return False\n            \n            if not extracted_data:\n                print(\"Error: Could not extract data from submission\", file=sys.stderr)\n                return False\n            \n            # Step 3: Load expected data from label.txt\n            print(\"\\n=== Step 3: Validating submission content ===\", file=sys.stderr)\n            label_path = Path(__file__).parent / \"label.txt\"\n            if not label_path.exists():\n                print(\"Error: label.txt not found\", file=sys.stderr)\n                return False\n            \n            with open(label_path, \"r\") as f:\n                expected_text = f.read().strip()\n            expected_data = parse_markdown_list_format(expected_text)\n            print(f\"Expected data from label.txt: {expected_data}\", file=sys.stderr)\n            \n            # Verify all required keys are present\n            required_keys = [\n                \"Post1_Title\",\n                \"Post1_Upvotes\",\n                \"Post1_Comments\",\n                \"Post2_Title\",\n                \"Post2_Upvotes\",\n                \"Post2_Comments\",\n                \"Post3_Title\",\n                \"Post3_Upvotes\",\n                \"Post3_Comments\",\n                \"TopComment_Text\",\n                \"TopComment_Username\",\n                \"Post1_Author\",\n                \"Post2_Author\",\n                \"Post3_Author\",\n            ]\n            \n            missing_keys = []\n            for key in required_keys:\n                if key not in extracted_data:\n                    missing_keys.append(key)\n            \n            if missing_keys:\n                print(f\"Error: Missing required keys: {', '.join(missing_keys)}\", file=sys.stderr)\n                return False\n            \n            # Compare each field with expected values\n            errors = []\n            for key in required_keys:\n                if key in expected_data and key in extracted_data:\n                    expected_val = normalize_text(expected_data[key])\n                    actual_val = normalize_text(extracted_data[key])\n                    \n                    # For numeric fields, compare as integers\n                    if \"Upvotes\" in key or \"Comments\" in key:\n                        try:\n                            expected_int = int(expected_val)\n                            actual_int = int(actual_val)\n                            if expected_int != actual_int:\n                                errors.append(f\"{key} mismatch: got {actual_int}, expected {expected_int}\")\n                        except ValueError:\n                            errors.append(f\"{key} should be numeric: got '{actual_val}'\")\n                    else:\n                        # For text fields, special handling for usernames with underscores\n                        if \"Author\" in key or key == \"TopComment_Username\":\n                            expected_core = expected_val.strip('_')\n                            actual_core = actual_val.strip('_')\n                            if expected_core != actual_core:\n                                errors.append(f\"{key} mismatch: got '{actual_val}', expected '{expected_val}'\")\n                        else:\n                            if expected_val != actual_val:\n                                errors.append(f\"{key} mismatch: got '{actual_val}', expected '{expected_val}'\")\n            \n            # Verify upvotes are in descending order\n            try:\n                post1_votes = int(extracted_data[\"Post1_Upvotes\"])\n                post2_votes = int(extracted_data[\"Post2_Upvotes\"])\n                post3_votes = int(extracted_data[\"Post3_Upvotes\"])\n                \n                if not (post1_votes >= post2_votes >= post3_votes):\n                    errors.append(f\"Posts should be ordered by upvotes: {post1_votes} >= {post2_votes} >= {post3_votes}\")\n            except (ValueError, KeyError):\n                pass  # Already reported above\n            \n            if errors:\n                print(\"Error: Validation failed with the following issues:\", file=sys.stderr)\n                for error in errors:\n                    print(f\"  - {error}\", file=sys.stderr)\n                return False\n            \n            # All checks passed\n            print(\"\\n=== SUCCESS ===\", file=sys.stderr)\n            print(\"✓ Account 'buyitforlife_researcher' created and can login\", file=sys.stderr)\n            print(\"✓ Submission 'Research Report for BuyItForLife' found in correct forum\", file=sys.stderr)\n            print(\"✓ All 14 required fields present and correct\", file=sys.stderr)\n            print(\"✓ Data matches expected values from label.txt\", file=sys.stderr)\n            print(\"✓ Posts ordered by upvotes (descending)\", file=sys.stderr)\n            return True\n            \n        except PlaywrightTimeoutError as e:\n            print(f\"Error: Timeout occurred - {str(e)}\", file=sys.stderr)\n            return False\n        except Exception as e:\n            print(f\"Error: Unexpected error - {str(e)}\", file=sys.stderr)\n            return False\n        finally:\n            await browser.close()\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/llm_research_summary/description.md",
    "content": "I need you to perform a comprehensive analysis of Large Language Model discussions on this forum platform and create a summary post. This analysis will help me understand the community's engagement with LLM topics.\n\n**Task Requirements:**\n\n1. Register a new account with username 'llm_analyst_2024' and password 'LLMExpert2024!'\n\n2. After successful registration, navigate to the MachineLearning forum and analyze posts about Large Language Models:\n   - Count all posts on the first page that contain 'GPT', 'ChatGPT', or 'LLM' in their titles\n\n3. Identify the top 3 posts with the highest upvote counts from your LLM-related posts list and record their complete details\n\n4. Navigate to the deeplearning forum and go to page 2:\n   - Find the post with the most comments on page 2\n   - Record its exact title and comment count\n\n5. Create a new submission in the MachineLearning forum with:\n   - Title: \"LLM Research Summary: GPT Discussions Analysis [2024]\"\n   - Body text must be EXACTLY these lines without anything (keep the keys as-is, only replace the values after the pipe, follow the markdown format):\n\n```\n- Total_LLM_Posts|FILL_VALUE\n- Top1_Title|FILL_VALUE\n- Top1_Upvotes|FILL_VALUE\n- Top1_Date|FILL_VALUE\n- Top2_Title|FILL_VALUE\n- Top2_Upvotes|FILL_VALUE\n- Top2_Date|FILL_VALUE\n- Top3_Title|FILL_VALUE\n- Top3_Upvotes|FILL_VALUE\n- Top3_Date|FILL_VALUE\n- Deeplearning_MostDiscussed|FILL_VALUE\n- Deeplearning_Comments|FILL_VALUE\n```"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/llm_research_summary/label.txt",
    "content": "- Total_LLM_Posts|9\n- Top1_Title|[P] I made a command-line tool that explains your errors using ChatGPT (link in comments)\n- Top1_Upvotes|2655\n- Top1_Date|3 years ago\n- Top2_Title|[P] I built Adrenaline, a debugger that fixes errors and explains them with GPT-3\n- Top2_Upvotes|1542\n- Top2_Date|3 years ago\n- Top3_Title|[N] OpenAI may have benchmarked GPT-4's coding ability on it's own training data\n- Top3_Upvotes|925\n- Top3_Date|2 years ago\n- Deeplearning_MostDiscussed|Do companies actually care about their model's training/inference speed?\n- Deeplearning_Comments|39"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/llm_research_summary/meta.json",
    "content": "{\n  \"task_id\": \"llm_research_summary\",\n  \"task_name\": \"LLM Research Summary\",\n  \"category_id\": \"reddit\",\n  \"category_name\": \"Reddit\",\n  \"description\": \"Aggregate and analyze LLM research discussions across multiple forums, collect trending topics, compile technical insights, and create comprehensive summary post with community engagement.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-08-12\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data extraction\",\n    \"search aggregation\",\n    \"content submission\",\n    \"user interaction\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/postmill.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/llm_research_summary/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nfrom pathlib import Path\nfrom playwright.async_api import (\n    async_playwright,\n    TimeoutError as PlaywrightTimeoutError,\n)\n\n# 从环境变量读取 base_url，默认回退到本地\nBASE_URL = os.getenv(\"WEBARENA_BASE_URL\", \"http://localhost:9999\").rstrip(\"/\")\n\n\ndef parse_key_value_format(text):\n    \"\"\"\n    Parse the Key|Value format from the submission body.\n    Handles both pipe (|) and colon (:) separators for compatibility.\n    \"\"\"\n    data = {}\n    \n    # Try to parse with pipe separator first (expected format)\n    lines = text.strip().split('\\n')\n    for line in lines:\n        line = line.strip()\n        if not line:\n            continue\n        \n        # Remove markdown list prefix if present\n        if line.startswith('- '):\n            line = line[2:]\n        elif line.startswith('* '):\n            line = line[2:]\n        \n        # Try pipe separator first\n        if '|' in line:\n            parts = line.split('|', 1)\n            if len(parts) == 2:\n                key = parts[0].strip()\n                value = parts[1].strip()\n                data[key] = value\n        # Fallback to colon separator for label.txt compatibility\n        elif ':' in line:\n            parts = line.split(':', 1)\n            if len(parts) == 2:\n                key = parts[0].strip()\n                value = parts[1].strip()\n                data[key] = value\n    \n    return data\n\n\ndef normalize_text(text):\n    \"\"\"\n    Normalize text for comparison by handling different quote styles and whitespace.\n    \"\"\"\n    if not isinstance(text, str):\n        return str(text)\n\n    # Replace various quote styles with standard quotes\n    text = text.replace(\"\"\", \"'\").replace(\"\"\", \"'\")\n    text = text.replace('\"', '\"').replace('\"', '\"')\n\n    # Normalize whitespace\n    text = \" \".join(text.split())\n\n    return text.strip()\n\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the LLM analysis task has been completed correctly.\n    \"\"\"\n    async with async_playwright() as p:\n        browser = await p.chromium.launch(headless=True)\n        context = await browser.new_context()\n        page = await context.new_page()\n\n        try:\n            # Navigate to the main page\n            print(\"Navigating to forum...\", file=sys.stderr)\n            await page.goto(f\"{BASE_URL}/\", wait_until=\"networkidle\")\n\n            # Check if logged in as llm_analyst_2024\n            user_button = page.locator('button:has-text(\"llm_analyst_2024\")')\n            if not await user_button.count():\n                # Try to login\n                print(\"Not logged in, attempting to login...\", file=sys.stderr)\n\n                await page.click('a:has-text(\"Log in\")')\n                await page.wait_for_load_state(\"networkidle\")\n\n                await page.fill('input[name=\"_username\"]', \"llm_analyst_2024\")\n                await page.fill('input[name=\"_password\"]', \"LLMExpert2024!\")\n\n                await page.click('button:has-text(\"Log in\")')\n                await page.wait_for_load_state(\"networkidle\")\n\n                user_button = page.locator('button:has-text(\"llm_analyst_2024\")')\n                if not await user_button.count():\n                    print(\"Error: Login failed for llm_analyst_2024\", file=sys.stderr)\n                    return False\n\n                print(\"Successfully logged in as llm_analyst_2024\", file=sys.stderr)\n            else:\n                print(\"Already logged in as llm_analyst_2024\", file=sys.stderr)\n\n            # Navigate to MachineLearning forum\n            print(\"Navigating to MachineLearning forum...\", file=sys.stderr)\n            await page.goto(\n                f\"{BASE_URL}/f/MachineLearning\", wait_until=\"networkidle\"\n            )\n\n            # Look for the submission with our specific title\n            print(\n                \"Looking for submission 'LLM Research Summary: GPT Discussions Analysis [2024]'...\",\n                file=sys.stderr,\n            )\n            post_link = page.locator(\n                'a:has-text(\"LLM Research Summary: GPT Discussions Analysis [2024]\")'\n            )\n\n            if not await post_link.count():\n                print(\n                    \"Error: Could not find submission with required title\",\n                    file=sys.stderr,\n                )\n                return False\n\n            # Click on the submission to view its content\n            await post_link.first.click()\n            await page.wait_for_load_state(\"networkidle\")\n\n            # Extract the submission body content\n            # Try multiple possible selectors for the post body\n            post_content = None\n            selectors = [\n                \".submission__body\",\n                \".post-body\",\n                \".RichText\",\n                '[class*=\"RichText\"]',\n                'div:has(> p:has-text(\"Total_LLM_Posts\"))',\n                'div:has-text(\"Total_LLM_Posts\"):has-text(\"Deeplearning_Comments\")',\n            ]\n\n            for selector in selectors:\n                content_element = page.locator(selector)\n                if await content_element.count():\n                    post_content = await content_element.first.inner_text()\n                    if \"Total_LLM_Posts\" in post_content:\n                        print(\n                            f\"Found submission content using selector: {selector}\",\n                            file=sys.stderr,\n                        )\n                        break\n\n            if not post_content or \"Total_LLM_Posts\" not in post_content:\n                print(\n                    \"Error: Could not find submission body with required format\",\n                    file=sys.stderr,\n                )\n                return False\n\n            print(\"Submission content found, parsing data...\", file=sys.stderr)\n            print(f\"Raw content: {post_content[:200]}...\", file=sys.stderr)\n\n            # Parse the Key: Value format\n            extracted_data = parse_key_value_format(post_content)\n            print(f\"Extracted data: {extracted_data}\", file=sys.stderr)\n\n            # Load expected values from label.txt\n            label_path = Path(__file__).parent / \"label.txt\"\n            if label_path.exists():\n                with open(label_path, \"r\") as f:\n                    expected_text = f.read().strip()\n                expected_data = parse_key_value_format(expected_text)\n                print(\"Loaded expected values from label.txt\", file=sys.stderr)\n\n            # Verify all required keys are present\n            required_keys = [\n                \"Total_LLM_Posts\",\n                \"Top1_Title\",\n                \"Top1_Upvotes\",\n                \"Top1_Date\",\n                \"Top2_Title\",\n                \"Top2_Upvotes\",\n                \"Top2_Date\",\n                \"Top3_Title\",\n                \"Top3_Upvotes\",\n                \"Top3_Date\",\n                \"Deeplearning_MostDiscussed\",\n                \"Deeplearning_Comments\",\n            ]\n\n            missing_keys = []\n            for key in required_keys:\n                if key not in extracted_data:\n                    missing_keys.append(key)\n\n            if missing_keys:\n                print(\n                    f\"Error: Missing required keys: {', '.join(missing_keys)}\",\n                    file=sys.stderr,\n                )\n                return False\n\n            # Validate data format and content\n            errors = []\n\n            # Check Total_LLM_Posts is a number and matches expected\n            try:\n                total_posts = int(extracted_data[\"Total_LLM_Posts\"])\n                if \"expected_data\" in locals() and \"Total_LLM_Posts\" in expected_data:\n                    expected_total = int(expected_data[\"Total_LLM_Posts\"])\n                    if total_posts != expected_total:\n                        errors.append(\n                            f\"Total_LLM_Posts mismatch: got {total_posts}, expected {expected_total}\"\n                        )\n                elif total_posts < 5:  # Based on exploration, should be at least 5\n                    errors.append(f\"Total_LLM_Posts seems too low: {total_posts}\")\n            except ValueError:\n                errors.append(\n                    f\"Total_LLM_Posts must be a number, got: {extracted_data['Total_LLM_Posts']}\"\n                )\n\n            # If we have expected data, compare against it\n            if \"expected_data\" in locals():\n                # Compare each field\n                for key in required_keys:\n                    if key in expected_data and key in extracted_data:\n                        expected_val = normalize_text(expected_data[key])\n                        actual_val = normalize_text(extracted_data[key])\n\n                        # For numeric fields, compare as integers\n                        if (\n                            \"Upvotes\" in key\n                            or \"Comments\" in key\n                            or key == \"Total_LLM_Posts\"\n                        ):\n                            try:\n                                expected_int = int(expected_val)\n                                actual_int = int(actual_val)\n                                if expected_int != actual_int:\n                                    errors.append(\n                                        f\"{key} mismatch: got {actual_int}, expected {expected_int}\"\n                                    )\n                            except ValueError:\n                                errors.append(\n                                    f\"{key} should be numeric: got '{actual_val}'\"\n                                )\n                        else:\n                            # For text fields, compare normalized text\n                            if expected_val != actual_val:\n                                errors.append(\n                                    f\"{key} mismatch: got '{actual_val}', expected '{expected_val}'\"\n                                )\n\n            else:\n                # If no expected data, just do basic validation\n                for key in required_keys:\n                    if key not in extracted_data:\n                        errors.append(f\"Missing required key: {key}\")\n                    elif (\n                        not extracted_data[key] or extracted_data[key] == \"[FILL_VALUE]\"\n                    ):\n                        errors.append(f\"{key} was not filled in\")\n\n            # Verify upvotes are in descending order for top 3\n            try:\n                top1_votes = int(extracted_data[\"Top1_Upvotes\"])\n                top2_votes = int(extracted_data[\"Top2_Upvotes\"])\n                top3_votes = int(extracted_data[\"Top3_Upvotes\"])\n\n                if not (top1_votes >= top2_votes >= top3_votes):\n                    errors.append(\n                        f\"Top posts should be ordered by upvotes: {top1_votes} >= {top2_votes} >= {top3_votes}\"\n                    )\n            except (ValueError, KeyError):\n                pass  # Already reported above\n\n            if errors:\n                print(\n                    \"Error: Validation failed with the following issues:\",\n                    file=sys.stderr,\n                )\n                for error in errors:\n                    print(f\"  - {error}\", file=sys.stderr)\n                return False\n\n            # All checks passed\n            print(\"Success: LLM analysis task completed successfully.\")\n            print(\"- Account llm_analyst_2024 verified\")\n            print(\n                \"- Submission 'LLM Research Summary: GPT Discussions Analysis [2024]' found\"\n            )\n            print(\n                f\"- Total LLM-related posts analyzed: {extracted_data['Total_LLM_Posts']}\"\n            )\n            print(\"- Top 3 posts by upvotes identified and documented\")\n            print(\n                f\"- Deeplearning forum page 2 most discussed post: {extracted_data['Deeplearning_MostDiscussed']}\"\n            )\n            print(\"- All data in correct Key: Value format with 12 lines\")\n            return True\n\n        except PlaywrightTimeoutError as e:\n            print(f\"Error: Timeout occurred - {str(e)}\", file=sys.stderr)\n            return False\n        except Exception as e:\n            print(f\"Error: Unexpected error - {str(e)}\", file=sys.stderr)\n            return False\n        finally:\n            await browser.close()\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/movie_reviewer_analysis/description.md",
    "content": "I need you to analyze wonderful movie discussions on Postmill and create a summary post. This analysis will help me understand the community's engagement with movie topics.\n\n**Task Requirements:**\n\n1. Register a new account with username 'movie_reviewer_2024' and password 'movie_reviewer_2024'\n\n2. After successful registration, navigate to the movies forum and analyze the posts on the first page:\n   - Count all posts that have 4-digit years in their titles\n\n3. Identify the top 3 posts with the highest upvote counts from ALL posts on the first page and record their complete details\n\n4. Find these specific posts and record their data:\n   - The 'Rittenhouse Square' poster post (record exact upvotes and comments)\n\n5. Navigate through the first 5 pages of movies forum and count the total number of image/poster submissions across all 5 pages\n\n6. Create a new submission in the movies forum with:\n   - Title: \"Wonderful Movies Analysis: Community Favorites [2024]\"\n   - Body text must be EXACTLY these lines without anything (keep the keys as-is, only replace the values after the pipe, follow the markdown format):\n\n```\n- Total_Year_Posts|FILL_VALUE\n- Top1_Title|FILL_VALUE\n- Top1_Upvotes|FILL_VALUE\n- Top1_Comments|FILL_VALUE\n- Top2_Title|FILL_VALUE\n- Top2_Upvotes|FILL_VALUE\n- Top2_Comments|FILL_VALUE\n- Top3_Title|FILL_VALUE\n- Top3_Upvotes|FILL_VALUE\n- Top3_Comments|FILL_VALUE\n- Rittenhouse_Upvotes|FILL_VALUE\n- Rittenhouse_Comments|FILL_VALUE\n- Total_Image_Posts_5Pages|FILL_VALUE\n```"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/movie_reviewer_analysis/label.txt",
    "content": "- Total_Year_Posts|1\n- Top1_Title|Who will win the Oscar for ACTRESS IN A SUPPORTING ROLE?\n- Top1_Upvotes|9933\n- Top1_Comments|23\n- Top2_Title|Who will win the Oscar for FILM EDITING?\n- Top2_Upvotes|7720\n- Top2_Comments|20\n- Top3_Title|Cindy Williams Dies: 'Laverne & Shirley' Star Who Appeared In 'American Graffiti' & 'The Conversation' Was 75\n- Top3_Upvotes|5268\n- Top3_Comments|190\n- Rittenhouse_Upvotes|2761\n- Rittenhouse_Comments|182\n- Total_Image_Posts_5Pages|6"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/movie_reviewer_analysis/meta.json",
    "content": "{\n  \"task_id\": \"movie_reviewer_analysis\",\n  \"task_name\": \"Movie Reviewer Analysis\",\n  \"category_id\": \"reddit\",\n  \"category_name\": \"Reddit\",\n  \"description\": \"Analyze movie review patterns by creating reviewer profile, collecting ratings data, tracking review trends, and generating analytical report on community movie preferences and discussions.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-08-12\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"user interaction\",\n    \"data extraction\",\n    \"comparative analysis\",\n    \"content submission\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/postmill.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/movie_reviewer_analysis/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nfrom pathlib import Path\nfrom playwright.async_api import (\n    async_playwright,\n    TimeoutError as PlaywrightTimeoutError,\n)\n\n# 从环境变量读取 base_url，默认回退到原地址\nBASE_URL = os.getenv(\"WEBARENA_BASE_URL\", \"http://localhost:9999\").rstrip(\"/\")\nprint(f\"Using base URL: {BASE_URL}\")\n\ndef parse_key_value_format(text):\n    \"\"\"\n    Parse the Key|Value format from the submission body using regex.\n    Works regardless of line breaks.\n    \"\"\"\n    data = {}\n\n    # Define patterns for each field with the pipe separator\n    patterns = {\n        \"Total_Year_Posts\": r\"Total_Year_Posts\\s*\\|\\s*(\\d+)\",\n        \"Top1_Title\": r\"Top1_Title\\s*\\|\\s*(.+?)(?=\\nTop1_Upvotes|$)\",\n        \"Top1_Upvotes\": r\"Top1_Upvotes\\s*\\|\\s*(\\d+)\",\n        \"Top1_Comments\": r\"Top1_Comments\\s*\\|\\s*(\\d+)\",\n        \"Top2_Title\": r\"Top2_Title\\s*\\|\\s*(.+?)(?=\\nTop2_Upvotes|$)\",\n        \"Top2_Upvotes\": r\"Top2_Upvotes\\s*\\|\\s*(\\d+)\",\n        \"Top2_Comments\": r\"Top2_Comments\\s*\\|\\s*(\\d+)\",\n        \"Top3_Title\": r\"Top3_Title\\s*\\|\\s*(.+?)(?=\\nTop3_Upvotes|$)\",\n        \"Top3_Upvotes\": r\"Top3_Upvotes\\s*\\|\\s*(\\d+)\",\n        \"Top3_Comments\": r\"Top3_Comments\\s*\\|\\s*(\\d+)\",\n        \"Rittenhouse_Upvotes\": r\"Rittenhouse_Upvotes\\s*\\|\\s*(\\d+)\",\n        \"Rittenhouse_Comments\": r\"Rittenhouse_Comments\\s*\\|\\s*(\\d+)\",\n        \"Total_Image_Posts_5Pages\": r\"Total_Image_Posts_5Pages\\s*\\|\\s*(\\d+)\",\n    }\n\n    # Extract each field using regex\n    for key, pattern in patterns.items():\n        match = re.search(pattern, text, re.DOTALL | re.MULTILINE)\n        if match:\n            # For title fields, clean up newlines and extra spaces\n            value = match.group(1).strip()\n            if \"Title\" in key:\n                # Replace newlines with spaces and normalize whitespace\n                value = \" \".join(value.split())\n            data[key] = value\n\n    return data\n\n\ndef normalize_text(text):\n    \"\"\"\n    Normalize text for comparison by handling different quote styles and whitespace.\n    \"\"\"\n    if not isinstance(text, str):\n        return str(text)\n\n    # Replace various quote styles with standard quotes\n    text = text.replace(\"\"\", \"'\").replace(\"\"\", \"'\")\n    text = text.replace('\"', '\"').replace('\"', '\"')\n    text = text.replace(\"&amp;\", \"&\")\n\n    # Normalize whitespace\n    text = \" \".join(text.split())\n\n    return text.strip()\n\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the wonderful movies analysis task has been completed correctly.\n    \"\"\"\n    async with async_playwright() as p:\n        browser = await p.chromium.launch(headless=True)\n        context = await browser.new_context()\n        page = await context.new_page()\n\n        try:\n            # Navigate to the main page\n            print(\"Navigating to forum...\", file=sys.stderr)\n            await page.goto(f\"{BASE_URL}/\", wait_until=\"networkidle\")\n\n            # Check if logged in as movie_reviewer_2024\n            user_button = page.locator('button:has-text(\"movie_reviewer_2024\")')\n            if not await user_button.count():\n                # Try to login\n                print(\"Not logged in, attempting to login...\", file=sys.stderr)\n\n                await page.click('a:has-text(\"Log in\")')\n                await page.wait_for_load_state(\"networkidle\")\n\n                await page.fill('input[name=\"_username\"]', \"movie_reviewer_2024\")\n                await page.fill('input[name=\"_password\"]', \"movie_reviewer_2024\")\n\n                await page.click('button:has-text(\"Log in\")')\n                await page.wait_for_load_state(\"networkidle\")\n\n                user_button = page.locator('button:has-text(\"movie_reviewer_2024\")')\n                if not await user_button.count():\n                    print(\n                        \"Error: Login failed for movie_reviewer_2024\", file=sys.stderr\n                    )\n                    return False\n\n                print(\"Successfully logged in as movie_reviewer_2024\", file=sys.stderr)\n            else:\n                print(\"Already logged in as movie_reviewer_2024\", file=sys.stderr)\n\n            # Navigate to movies forum\n            print(\"Navigating to movies forum...\", file=sys.stderr)\n            await page.goto(\n                f\"{BASE_URL}/f/movies\", wait_until=\"networkidle\"\n            )\n\n            # Look for the submission with our specific title\n            print(\n                \"Looking for submission 'Wonderful Movies Analysis: Community Favorites [2024]'...\",\n                file=sys.stderr,\n            )\n            post_link = page.locator(\n                'a:has-text(\"Wonderful Movies Analysis: Community Favorites [2024]\")'\n            )\n\n            if not await post_link.count():\n                print(\n                    \"Error: Could not find submission with required title\",\n                    file=sys.stderr,\n                )\n                return False\n\n            # Click on the submission to view its content\n            await post_link.first.click()\n            await page.wait_for_load_state(\"networkidle\")\n\n            # Extract the submission body content\n            # Try multiple possible selectors for the post body\n            post_content = None\n            selectors = [\n                \".submission__body\",\n                \".post-body\",\n                \".RichText\",\n                '[class*=\"RichText\"]',\n                'div:has(> p:has-text(\"Total_Year_Posts\"))',\n                'div:has-text(\"Total_Year_Posts\"):has-text(\"Total_Image_Posts_5Pages\")',\n            ]\n\n            for selector in selectors:\n                content_element = page.locator(selector)\n                if await content_element.count():\n                    post_content = await content_element.first.inner_text()\n                    if \"Total_Year_Posts\" in post_content:\n                        print(\n                            f\"Found submission content using selector: {selector}\",\n                            file=sys.stderr,\n                        )\n                        break\n\n            if not post_content or \"Total_Year_Posts\" not in post_content:\n                print(\n                    \"Error: Could not find submission body with required format\",\n                    file=sys.stderr,\n                )\n                return False\n\n            print(\"Submission content found, parsing data...\", file=sys.stderr)\n            print(f\"Raw content: {post_content[:200]}...\", file=sys.stderr)\n\n            # Parse the Key: Value format\n            extracted_data = parse_key_value_format(post_content)\n            print(f\"Extracted data: {extracted_data}\", file=sys.stderr)\n\n            # Load expected values from label.txt\n            label_path = Path(__file__).parent / \"label.txt\"\n            if label_path.exists():\n                with open(label_path, \"r\") as f:\n                    expected_text = f.read().strip()\n                expected_data = parse_key_value_format(expected_text)\n                print(\"Loaded expected values from label.txt\", file=sys.stderr)\n\n            # Verify all required keys are present\n            required_keys = [\n                \"Total_Year_Posts\",\n                \"Top1_Title\",\n                \"Top1_Upvotes\",\n                \"Top1_Comments\",\n                \"Top2_Title\",\n                \"Top2_Upvotes\",\n                \"Top2_Comments\",\n                \"Top3_Title\",\n                \"Top3_Upvotes\",\n                \"Top3_Comments\",\n                \"Rittenhouse_Upvotes\",\n                \"Rittenhouse_Comments\",\n                \"Total_Image_Posts_5Pages\",\n            ]\n\n            missing_keys = []\n            for key in required_keys:\n                if key not in extracted_data:\n                    missing_keys.append(key)\n\n            if missing_keys:\n                print(\n                    f\"Error: Missing required keys: {', '.join(missing_keys)}\",\n                    file=sys.stderr,\n                )\n                return False\n\n            # Validate data format and content\n            errors = []\n\n            # Check Total_Year_Posts is a number and matches expected\n            try:\n                total_posts = int(extracted_data[\"Total_Year_Posts\"])\n                if \"expected_data\" in locals() and \"Total_Year_Posts\" in expected_data:\n                    expected_total = int(expected_data[\"Total_Year_Posts\"])\n                    if total_posts != expected_total:\n                        errors.append(\n                            f\"Total_Year_Posts mismatch: got {total_posts}, expected {expected_total}\"\n                        )\n            except ValueError:\n                errors.append(\n                    f\"Total_Year_Posts must be a number, got: {extracted_data['Total_Year_Posts']}\"\n                )\n\n            # If we have expected data, compare against it\n            if \"expected_data\" in locals():\n                # Compare each field\n                for key in required_keys:\n                    if key in expected_data and key in extracted_data:\n                        expected_val = normalize_text(expected_data[key])\n                        actual_val = normalize_text(extracted_data[key])\n\n                        # For numeric fields, compare as integers\n                        if (\n                            \"Upvotes\" in key\n                            or \"Comments\" in key\n                            or key == \"Total_Year_Posts\"\n                            or key == \"Total_Image_Posts_5Pages\"\n                        ):\n                            try:\n                                expected_int = int(expected_val)\n                                actual_int = int(actual_val)\n                                if expected_int != actual_int:\n                                    errors.append(\n                                        f\"{key} mismatch: got {actual_int}, expected {expected_int}\"\n                                    )\n                            except ValueError:\n                                errors.append(\n                                    f\"{key} should be numeric: got '{actual_val}'\"\n                                )\n                        else:\n                            # For text fields, compare normalized text\n                            if expected_val != actual_val:\n                                errors.append(\n                                    f\"{key} mismatch: got '{actual_val}', expected '{expected_val}'\"\n                                )\n\n            else:\n                # If no expected data, just do basic validation\n                for key in required_keys:\n                    if key not in extracted_data:\n                        errors.append(f\"Missing required key: {key}\")\n                    elif (\n                        not extracted_data[key] or extracted_data[key] == \"[FILL_VALUE]\"\n                    ):\n                        errors.append(f\"{key} was not filled in\")\n\n            if errors:\n                print(\n                    \"Error: Validation failed with the following issues:\",\n                    file=sys.stderr,\n                )\n                for error in errors:\n                    print(f\"  - {error}\", file=sys.stderr)\n                return False\n\n            # All checks passed\n            print(\"Success: Wonderful movies analysis task completed successfully.\")\n            print(\"- Account movie_reviewer_2024 verified\")\n            print(\n                \"- Submission 'Wonderful Movies Analysis: Community Favorites [2024]' found\"\n            )\n            print(f\"- Total posts with years: {extracted_data['Total_Year_Posts']}\")\n            print(\"- Top 3 posts by upvotes identified and documented\")\n            print(\n                f\"- Rittenhouse Square data: {extracted_data['Rittenhouse_Upvotes']} upvotes, {extracted_data['Rittenhouse_Comments']} comments\"\n            )\n            print(\n                f\"- Total image posts across 5 pages: {extracted_data['Total_Image_Posts_5Pages']}\"\n            )\n            print(\"- All data in correct Key|Value format\")\n            return True\n\n        except PlaywrightTimeoutError as e:\n            print(f\"Error: Timeout occurred - {str(e)}\", file=sys.stderr)\n            return False\n        except Exception as e:\n            print(f\"Error: Unexpected error - {str(e)}\", file=sys.stderr)\n            return False\n        finally:\n            await browser.close()\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/nba_statistics_analysis/description.md",
    "content": "I'm conducting research on NBA player discussions in online sports communities. Please help me create a comprehensive analysis.\n\n**Task Requirements:**\n\n1. Register a new account with username 'NBA_DataAnalyst_2024' and password 'Research#2024!'\n\n2. Navigate to the sports forum and search for posts containing 'NBA' in their titles:\n   - Collect data from the 5 NBA-related posts with the most comments\n   - For each post, record: the exact post title, vote count, comment count, and the username of the person who submitted it\n\n3. Visit the user profile of 'BCLetsRide69':\n   - Count his total submissions\n\n4. Create a new submission in the sports forum with:\n   - Title: \"Statistical Analysis: NBA Content Engagement on This Forum\"\n   - Body text must be EXACTLY these lines without anything (keep the keys as-is, only replace the values after the colon, follow the markdown format):\n\n```\n- Total_NBA_Posts|FILL_VALUE\n- Top1_Title|FILL_VALUE\n- Top1_Votes|FILL_VALUE\n- Top1_Comments|FILL_VALUE\n- Top1_Author|FILL_VALUE\n- Top2_Title|FILL_VALUE\n- Top2_Votes|FILL_VALUE\n- Top2_Comments|FILL_VALUE\n- Top2_Author|FILL_VALUE\n- Top3_Title|FILL_VALUE\n- Top3_Votes|FILL_VALUE\n- Top3_Comments|FILL_VALUE\n- Top3_Author|FILL_VALUE\n- Top4_Title|FILL_VALUE\n- Top4_Votes|FILL_VALUE\n- Top4_Comments|FILL_VALUE\n- Top4_Author|FILL_VALUE\n- Top5_Title|FILL_VALUE\n- Top5_Votes|FILL_VALUE\n- Top5_Comments|FILL_VALUE\n- Top5_Author|FILL_VALUE\n- BCLetsRide69_Total_Posts|FILL_VALUE\n```\n"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/nba_statistics_analysis/label.txt",
    "content": "- Total_NBA_Posts|20\n- Top1_Title|Hamby claims [WNBA Champ] Aces 'unprofessional' after trade\n- Top1_Votes|614\n- Top1_Comments|170\n- Top1_Author|Responsible-Lunch815\n- Top2_Title|Heat place literally every player on injury report after receiving NBA fine ahead of Mexico City game\n- Top2_Votes|1266\n- Top2_Comments|145\n- Top2_Author|XXmynameisNeganXX\n- Top3_Title|[ESPN] Announced attendance at the Alamodome tonight|68,323, a new single-game NBA record, in the Spurs' first game there since Game 4 of the 2002 Western Conference Semifinals.\n- Top3_Votes|1511\n- Top3_Comments|101\n- Top3_Author|dragon8811\n- Top4_Title|Phoenix Mercury confirm Brittney Griner’s return to WNBA\n- Top4_Votes|0\n- Top4_Comments|42\n- Top4_Author|rejs7\n- Top5_Title|Perspective | Kyrie Irving lit a flame. The NBA, top to bottom, watched the fire spread.\n- Top5_Votes|74\n- Top5_Comments|32\n- Top5_Author|tomyland\n- BCLetsRide69_Total_Posts|48"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/nba_statistics_analysis/meta.json",
    "content": "{\n  \"task_id\": \"nba_statistics_analysis\",\n  \"task_name\": \"NBA Statistics Analysis\",\n  \"category_id\": \"reddit\",\n  \"category_name\": \"Reddit\",\n  \"description\": \"Create sports analytics account, collect NBA player statistics from forum discussions, analyze basketball performance metrics, and compile comprehensive statistical report with community insights.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-08-12\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"user interaction\",\n    \"data extraction\",\n    \"comparative analysis\",\n    \"content submission\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/postmill.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/nba_statistics_analysis/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nfrom pathlib import Path\nfrom playwright.async_api import (\n    async_playwright,\n    TimeoutError as PlaywrightTimeoutError,\n)\n\n# 从环境变量读取 base_url，默认回退到本地\nBASE_URL = os.getenv(\"WEBARENA_BASE_URL\", \"http://localhost:9999\").rstrip(\"/\")\n\n\ndef parse_key_value_format(text):\n    \"\"\"\n    Parse the Key|Value format from the submission body.\n    This handles both the expected format from label.txt and the submission format.\n    \"\"\"\n    data = {}\n    \n    # Split by lines and parse each line\n    lines = text.strip().split('\\n')\n    for line in lines:\n        line = line.strip()\n        if not line or line.startswith('#'):\n            continue\n            \n        # Remove bullet point if present\n        if line.startswith('- '):\n            line = line[2:]\n        elif line.startswith('• '):\n            line = line[2:]\n            \n        # Parse pipe-separated format\n        if '|' in line:\n            parts = line.split('|', 1)\n            if len(parts) == 2:\n                key = parts[0].strip()\n                value = parts[1].strip()\n                if value and value != 'FILL_VALUE':\n                    data[key] = value\n    \n    return data\n\n\ndef normalize_text(text):\n    \"\"\"\n    Normalize text for comparison by handling different quote styles and whitespace.\n    \"\"\"\n    if not isinstance(text, str):\n        return str(text)\n\n    # Replace various quote styles with standard quotes\n    text = text.replace(\"\"\", \"'\").replace(\"\"\", \"'\")\n    text = text.replace('\"', '\"').replace('\"', '\"')\n    # Also normalize apostrophes - use unicode escapes to be safe\n    text = text.replace(\"\\u2019\", \"'\")  # RIGHT SINGLE QUOTATION MARK (')\n    text = text.replace(\"\\u2018\", \"'\")  # LEFT SINGLE QUOTATION MARK (')\n\n    # Normalize whitespace\n    text = \" \".join(text.split())\n\n    return text.strip()\n\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the NBA analysis task has been completed correctly.\n    \"\"\"\n    async with async_playwright() as p:\n        browser = await p.chromium.launch(headless=True)\n        context = await browser.new_context()\n        page = await context.new_page()\n\n        try:\n            # Navigate to the main page\n            print(\"Navigating to forum...\", file=sys.stderr)\n            await page.goto(f\"{BASE_URL}/\", wait_until=\"networkidle\")\n\n            # Check if logged in as NBA_DataAnalyst_2024\n            user_button = page.locator('button:has-text(\"NBA_DataAnalyst_2024\")')\n            if not await user_button.count():\n                # Try to login\n                print(\"Not logged in, attempting to login...\", file=sys.stderr)\n\n                await page.click('a:has-text(\"Log in\")')\n                await page.wait_for_load_state(\"networkidle\")\n\n                await page.fill('input[name=\"_username\"]', \"NBA_DataAnalyst_2024\")\n                await page.fill('input[name=\"_password\"]', \"Research#2024!\")\n\n                await page.click('button:has-text(\"Log in\")')\n                await page.wait_for_load_state(\"networkidle\")\n\n                user_button = page.locator('button:has-text(\"NBA_DataAnalyst_2024\")')\n                if not await user_button.count():\n                    print(\n                        \"Error: Login failed for NBA_DataAnalyst_2024\", file=sys.stderr\n                    )\n                    return False\n\n                print(\"Successfully logged in as NBA_DataAnalyst_2024\", file=sys.stderr)\n            else:\n                print(\"Already logged in as NBA_DataAnalyst_2024\", file=sys.stderr)\n\n            # Navigate to sports forum to check submission\n            print(\"Navigating to sports forum to check submission...\", file=sys.stderr)\n            await page.goto(\n                f\"{BASE_URL}/f/sports\", wait_until=\"networkidle\"\n            )\n\n            # Look for the submission with our specific title\n            print(\n                \"Looking for submission 'Statistical Analysis: NBA Content Engagement on This Forum'...\",\n                file=sys.stderr,\n            )\n            post_link = page.locator(\n                'a:has-text(\"Statistical Analysis: NBA Content Engagement on This Forum\")'\n            )\n\n            if not await post_link.count():\n                print(\n                    \"Error: Could not find submission with required title\",\n                    file=sys.stderr,\n                )\n                return False\n\n            # Click on the submission to view its content\n            await post_link.first.click()\n            await page.wait_for_load_state(\"networkidle\")\n\n            # Extract the submission body content\n            # Try multiple possible selectors for the post body\n            post_content = None\n            selectors = [\n                \".submission__body\",\n                \".post-body\",\n                \".RichText\",\n                '[class*=\"RichText\"]',\n                'div:has(> p:has-text(\"Total_NBA_Posts\"))',\n                'div:has-text(\"Total_NBA_Posts\"):has-text(\"Most_Popular_NBA_Author\")',\n            ]\n\n            for selector in selectors:\n                content_element = page.locator(selector)\n                if await content_element.count():\n                    post_content = await content_element.first.inner_text()\n                    if \"Total_NBA_Posts\" in post_content:\n                        print(\n                            f\"Found submission content using selector: {selector}\",\n                            file=sys.stderr,\n                        )\n                        break\n\n            if not post_content or \"Total_NBA_Posts\" not in post_content:\n                print(\n                    \"Error: Could not find submission body with required format\",\n                    file=sys.stderr,\n                )\n                return False\n\n            print(\"Submission content found, parsing data...\", file=sys.stderr)\n            print(f\"Raw content: {post_content[:200]}...\", file=sys.stderr)\n\n            # Parse the Key: Value format\n            extracted_data = parse_key_value_format(post_content)\n            print(f\"Extracted data: {extracted_data}\", file=sys.stderr)\n\n            # Load expected values from label.txt\n            label_path = Path(__file__).parent / \"label.txt\"\n            if label_path.exists():\n                with open(label_path, \"r\") as f:\n                    expected_text = f.read().strip()\n                expected_data = parse_key_value_format(expected_text)\n                print(\"Loaded expected values from label.txt\", file=sys.stderr)\n\n            # Verify all required keys are present\n            required_keys = [\n                \"Total_NBA_Posts\",\n                \"Top1_Title\",\n                \"Top1_Votes\",\n                \"Top1_Comments\",\n                \"Top1_Author\",\n                \"Top2_Title\",\n                \"Top2_Votes\",\n                \"Top2_Comments\",\n                \"Top2_Author\",\n                \"Top3_Title\",\n                \"Top3_Votes\",\n                \"Top3_Comments\",\n                \"Top3_Author\",\n                \"Top4_Title\",\n                \"Top4_Votes\",\n                \"Top4_Comments\",\n                \"Top4_Author\",\n                \"Top5_Title\",\n                \"Top5_Votes\",\n                \"Top5_Comments\",\n                \"Top5_Author\",\n                \"BCLetsRide69_Total_Posts\",\n            ]\n\n            missing_keys = []\n            for key in required_keys:\n                if key not in extracted_data:\n                    missing_keys.append(key)\n\n            if missing_keys:\n                print(\n                    f\"Error: Missing required keys: {', '.join(missing_keys)}\",\n                    file=sys.stderr,\n                )\n                return False\n\n            # Validate data format and content\n            errors = []\n\n            # Check Total_NBA_Posts is a number and matches expected\n            try:\n                total_posts = int(extracted_data[\"Total_NBA_Posts\"])\n                if \"expected_data\" in locals() and \"Total_NBA_Posts\" in expected_data:\n                    expected_total = int(expected_data[\"Total_NBA_Posts\"])\n                    if total_posts != expected_total:\n                        errors.append(\n                            f\"Total_NBA_Posts mismatch: got {total_posts}, expected {expected_total}\"\n                        )\n                elif (\n                    total_posts < 5\n                ):  # Should be at least 5 since we're collecting top 5\n                    errors.append(f\"Total_NBA_Posts seems too low: {total_posts}\")\n            except ValueError:\n                errors.append(\n                    f\"Total_NBA_Posts must be a number, got: {extracted_data['Total_NBA_Posts']}\"\n                )\n\n            # If we have expected data, compare against it\n            if \"expected_data\" in locals():\n                # Compare each field\n                for key in required_keys:\n                    if key in expected_data and key in extracted_data:\n                        expected_val = normalize_text(expected_data[key])\n                        actual_val = normalize_text(extracted_data[key])\n\n                        # For numeric fields, compare as integers\n                        if (\n                            \"Votes\" in key\n                            or \"Comments\" in key\n                            or key == \"Total_NBA_Posts\"\n                            or key == \"BCLetsRide69_Total_Posts\"\n                        ):\n                            try:\n                                expected_int = int(expected_val)\n                                actual_int = int(actual_val)\n                                if expected_int != actual_int:\n                                    errors.append(\n                                        f\"{key} mismatch: got {actual_int}, expected {expected_int}\"\n                                    )\n                            except ValueError:\n                                errors.append(\n                                    f\"{key} should be numeric: got '{actual_val}'\"\n                                )\n                        else:\n                            # For text fields, compare normalized text\n                            if expected_val != actual_val:\n                                errors.append(\n                                    f\"{key} mismatch: got '{actual_val}', expected '{expected_val}'\"\n                                )\n\n            else:\n                # If no expected data, just do basic validation\n                for key in required_keys:\n                    if key not in extracted_data:\n                        errors.append(f\"Missing required key: {key}\")\n                    elif (\n                        not extracted_data[key] or extracted_data[key] == \"[FILL_VALUE]\"\n                    ):\n                        errors.append(f\"{key} was not filled in\")\n\n            if errors:\n                print(\n                    \"Error: Validation failed with the following issues:\",\n                    file=sys.stderr,\n                )\n                for error in errors:\n                    print(f\"  - {error}\", file=sys.stderr)\n                return False\n\n            # All checks passed\n            print(\"Success: NBA analysis task completed successfully.\")\n            print(\"- Account NBA_DataAnalyst_2024 verified\")\n            print(\n                \"- Submission 'Statistical Analysis: NBA Content Engagement on This Forum' found\"\n            )\n            print(\n                f\"- Total NBA-related posts analyzed: {extracted_data['Total_NBA_Posts']}\"\n            )\n            print(\"- Top 5 posts identified and documented\")\n            print(\n                f\"- BCLetsRide69's total posts: {extracted_data['BCLetsRide69_Total_Posts']}\"\n            )\n            print(\"- All data in correct Key|Value format\")\n            return True\n\n        except PlaywrightTimeoutError as e:\n            print(f\"Error: Timeout occurred - {str(e)}\", file=sys.stderr)\n            return False\n        except Exception as e:\n            print(f\"Error: Unexpected error - {str(e)}\", file=sys.stderr)\n            return False\n        finally:\n            await browser.close()\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/routine_tracker_forum/description.md",
    "content": "Create a daily routine tracking system on the Postmill forum platform by setting up a new account and engaging with productivity-related content in the LifeProTips community.\n\n**Task Requirements:**\n\n1. Create a new account with the exact username \"RoutineTracker2025\" and password \"DailyRoutine123!\"\n\n2. Locate the following specific post:\n   - Title: \"LPT: Use your calendar as your to-do list. Assigning dedicated time to tasks increases the likelyhood of you acting upon it.\"\n   - Record the content of the most upvoted comment of this post\n\n3. Create a new submission in the LifeProTips forum with these exact specifications:\n   - Title: \"My 5-Step Morning Routine That Increased My Productivity by 200%\"\n   - Body content must be the content recorded in the above step\n\n4. After successfully posting, verify that your post appears in the LifeProTips forum listing\n\n5. Engage with the community by upvoting exactly these two posts:\n   - The calendar to-do list post you found in step 2\n   - The post titled \"LPT: clean your stovetop after using the oven. The heat loosens grime for easy removal\""
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/routine_tracker_forum/meta.json",
    "content": "{\n  \"task_id\": \"routine_tracker_forum\",\n  \"task_name\": \"Routine Tracker Forum\",\n  \"category_id\": \"reddit\",\n  \"category_name\": \"Reddit\",\n  \"description\": \"Establish productivity community by creating account, setting up routine tracking forum, implementing daily habit systems, and engaging members with structured productivity challenges and resources.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-08-12\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"user interaction\",\n    \"content submission\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/postmill.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}"
  },
  {
    "path": "tasks/playwright_webarena/standard/reddit/routine_tracker_forum/verify.py",
    "content": "import asyncio\nimport sys\nimport os\nfrom pathlib import Path\nfrom datetime import datetime\nfrom playwright.async_api import (\n    async_playwright,\n    TimeoutError as PlaywrightTimeoutError,\n)\n\nBASE_URL = os.getenv(\"WEBARENA_BASE_URL\", \"http://localhost:9999\").rstrip(\"/\")\n\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the daily routine tracking setup has been completed correctly on the forum.\n    \"\"\"\n    async with async_playwright() as p:\n        browser = await p.chromium.launch(headless=True)\n        context = await browser.new_context()\n        page = await context.new_page()\n\n        try:\n            # Step 1: Check if account can be logged in\n            print(\"Step 1: Verifying account login...\", file=sys.stderr)\n            await page.goto(f\"{BASE_URL}/\", wait_until=\"networkidle\")\n\n            # Check if already logged in\n            user_button = page.locator('button:has-text(\"RoutineTracker2025\")')\n            if not await user_button.count():\n                # Try to login\n                print(\"Not logged in, attempting to login...\", file=sys.stderr)\n\n                # Click login link\n                await page.click('a:has-text(\"Log in\")')\n                await page.wait_for_load_state(\"networkidle\")\n\n                # Fill login form\n                await page.fill('input[name=\"_username\"]', \"RoutineTracker2025\")\n                await page.fill('input[name=\"_password\"]', \"DailyRoutine123!\")\n\n                # Submit login form\n                await page.click('button:has-text(\"Log in\")')\n                await page.wait_for_load_state(\"networkidle\")\n\n                # Check if login successful\n                user_button = page.locator('button:has-text(\"RoutineTracker2025\")')\n                if not await user_button.count():\n                    print(\"Error: Account login failed for RoutineTracker2025\", file=sys.stderr)\n                    return False\n\n                print(\"✓ Account login successful\", file=sys.stderr)\n            else:\n                print(\"✓ Already logged in as RoutineTracker2025\", file=sys.stderr)\n\n            # Step 2: Check if the post exists in LifeProTips forum with correct content\n            print(\"Step 2: Verifying post in LifeProTips forum...\", file=sys.stderr)\n            await page.goto(\n                f\"{BASE_URL}/f/LifeProTips\", wait_until=\"networkidle\"\n            )\n\n            # Check for the created post\n            expected_title = \"My 5-Step Morning Routine That Increased My Productivity by 200%\"\n            post_link = page.locator(f'a:has-text(\"{expected_title}\")')\n            \n            if not await post_link.count():\n                print(f\"Error: Post with title '{expected_title}' not found in LifeProTips forum\", file=sys.stderr)\n                return False\n\n            # Click on the post to verify content\n            await post_link.click()\n            await page.wait_for_load_state(\"networkidle\")\n\n            # Verify post content - this should be the content from the most upvoted comment of the calendar post\n            expected_content = \"As a college student, having a visible reminder of the assignments I have and when they are due is super helpful for me. It also just feels good to erase them from the board once they are completed.\"\n\n            # Check if the content exists in the page\n            content_found = False\n            article_content = await page.locator(\"article\").text_content()\n            if article_content and expected_content in article_content:\n                content_found = True\n\n            if not content_found:\n                print(f\"Error: Post content does not match expected content\", file=sys.stderr)\n                print(f\"Expected: {expected_content}\", file=sys.stderr)\n                return False\n\n            print(\"✓ Post found in LifeProTips with correct title and content\", file=sys.stderr)\n\n            # Step 3: Check upvotes via search\n            print(\"Step 3: Verifying upvotes on posts...\", file=sys.stderr)\n            \n            # Check first post upvote\n            search_url1 = f\"{BASE_URL}/search?q=LPT%3A+Use+your+calendar+as+your+to-do+list.+Assigning+dedicated+time+to+tasks+increases+the+likelyhood+of+you+acting+upon+it.\"\n            await page.goto(search_url1, wait_until=\"networkidle\")\n            \n            # Find the post and check its upvote count\n            posts = await page.locator(\"article\").all()\n            calendar_upvoted = False\n            \n            for post in posts:\n                title_elem = post.locator(\"h1 a\")\n                if await title_elem.count():\n                    title = await title_elem.text_content()\n                    if \"Use your calendar as your to-do list\" in title:\n                        # Check upvote count\n                        vote_count_elem = post.locator(\"span.vote__net-score\")\n                        if await vote_count_elem.count():\n                            vote_count = await vote_count_elem.text_content()\n                            if vote_count and vote_count.strip() == \"1\":\n                                calendar_upvoted = True\n                                print(\"✓ Calendar post upvoted (count: 1)\", file=sys.stderr)\n                                break\n            \n            if not calendar_upvoted:\n                print(\"Error: Calendar post not upvoted or upvote count is not 1\", file=sys.stderr)\n                return False\n\n            # Check second post upvote\n            search_url2 = f\"{BASE_URL}/search?q=LPT%3A+clean+your+stovetop+after+using+the+oven.+The+heat+loosens+grime+for+easy+removal\"\n            await page.goto(search_url2, wait_until=\"networkidle\")\n            \n            posts = await page.locator(\"article\").all()\n            stovetop_upvoted = False\n            \n            for post in posts:\n                title_elem = post.locator(\"h1 a\")\n                if await title_elem.count():\n                    title = await title_elem.text_content()\n                    if \"clean your stovetop after using the oven\" in title:\n                        # Check upvote count\n                        vote_count_elem = post.locator(\"span.vote__net-score\")\n                        if await vote_count_elem.count():\n                            vote_count = await vote_count_elem.text_content()\n                            if vote_count and vote_count.strip() == \"1\":\n                                stovetop_upvoted = True\n                                print(\"✓ Stovetop post upvoted (count: 1)\", file=sys.stderr)\n                                break\n            \n            if not stovetop_upvoted:\n                print(\"Error: Stovetop post not upvoted or upvote count is not 1\", file=sys.stderr)\n                return False\n\n            print(\"Success: All verification steps passed!\")\n            return True\n\n        except PlaywrightTimeoutError as e:\n            print(f\"Error: Timeout occurred - {str(e)}\", file=sys.stderr)\n            return False\n        except Exception as e:\n            print(f\"Error: Unexpected error - {str(e)}\", file=sys.stderr)\n            return False\n        finally:\n            await browser.close()\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/advanced_product_analysis/description.md",
    "content": "\n\n**Task Requirements:**\n\n1. Search for products with 'Ginger' in the Product Name field and price range $50.00 to $100.00\n\n2. Add Q Mixers Premium Ginger Ale product to the comparison list\n\n3. Find Intel NUC Kit product in Electronics category and add it to the comparison list\n\n4. From the comparison page:\n   - Record SKU numbers for both products\n   - Add all products to cart\n\n5. Record the total cart value\n\n6. On the Ginger Ale product detail page, record:\n   - Number of customer reviews\n   - Name of the most recent reviewer (on top of the first page)\n\n7. Output your findings in this format:\n\n```\n<answer>\nGingerAleSKU|sku\nIntelNUCSKU|sku\nCartTotal|amount\nReviewCount|count\nLatestReviewer|name\n</answer>\n```\n\n**Example Output:**\n```\n<answer>\nGingerAleSKU|XXXXXXXXX\nIntelNUCSKU|XXXXXXXXX\nCartTotal|$XXX.XX\nReviewCount|XX\nLatestReviewer|name\n</answer>\n```\n\n"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/advanced_product_analysis/label.txt",
    "content": "GingerAleSKU|B071KC37VD\nIntelNUCSKU|B01DJ9XID4\nCartTotal|$668.49\nReviewCount|12\nLatestReviewer|jwm"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/advanced_product_analysis/meta.json",
    "content": "{\n  \"task_id\": \"advanced_product_analysis\",\n  \"task_name\": \"Advanced Product Analysis\",\n  \"category_id\": \"shopping\",\n  \"category_name\": \"Shopping\",\n  \"description\": \"Perform comprehensive product analysis including feature comparisons, price tracking, review aggregation, customer sentiment analysis, and generate detailed recommendation reports for informed purchasing decisions.\",\n  \"author\": \"Yaoqi Ye\",\n  \"created_at\": \"2025-08-17\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data extraction\",\n    \"comparative analysis\",\n    \"content submission\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/one-stop-market.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/advanced_product_analysis/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nimport json\nfrom pathlib import Path\n\n\ndef get_model_response():\n    \"\"\"\n    Get the model's response from the MCP_MESSAGES environment variable.\n    Returns the last assistant message text.\n    \"\"\"\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    print(f\"MCP_MESSAGES: {messages_path}\")\n    if not messages_path:\n        print(\"Warning: MCP_MESSAGES environment variable not set\", file=sys.stderr)\n        return None\n\n    try:\n        with open(messages_path, \"r\") as f:\n            messages = json.load(f)\n\n        # Find the last assistant message\n        for message in reversed(messages):\n            if (\n                message.get(\"role\") == \"assistant\"\n                and message.get(\"status\") == \"completed\"\n            ):\n                content = message.get(\"content\", [])\n                for item in content:\n                    if item.get(\"type\") == \"output_text\":\n                        return item.get(\"text\", \"\")\n\n        print(\"Warning: No assistant response found in messages\", file=sys.stderr)\n        return None\n    except Exception as e:\n        print(f\"Error reading messages file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef parse_answer_format(text):\n    \"\"\"\n    Parse the <answer>xxx</answer> format from the agent's output.\n    Returns a dictionary with the parsed values.\n    \"\"\"\n    if not text:\n        return None\n\n    # Look for <answer>...</answer> pattern\n    match = re.search(r\"<answer>(.*?)</answer>\", text, re.IGNORECASE | re.DOTALL)\n    if not match:\n        return None\n\n    answer_content = match.group(1).strip()\n\n    # Parse each line\n    result = {}\n    lines = answer_content.split(\"\\n\")\n\n    if len(lines) != 5:\n        print(f\"Error: Expected 5 lines in answer, got {len(lines)}\", file=sys.stderr)\n        return None\n\n    for line in lines:\n        if \"|\" in line:\n            key, value = line.split(\"|\", 1)\n            result[key.strip()] = value.strip()\n\n    return result\n\n\ndef load_expected_answer(label_path):\n    \"\"\"\n    Load the expected answer from label.txt file.\n    Returns a dictionary with the expected values.\n    \"\"\"\n    try:\n        with open(label_path, \"r\") as f:\n            lines = f.read().strip().split(\"\\n\")\n\n        expected = {}\n        for line in lines:\n            if \"|\" in line:\n                key, value = line.split(\"|\", 1)\n                expected[key.strip()] = value.strip()\n\n        return expected\n    except Exception as e:\n        print(f\"Error reading label file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef compare_answers(model_answer, expected_answer):\n    \"\"\"\n    Compare the model's answer with the expected answer.\n    Returns True if all key information matches, False otherwise.\n    \"\"\"\n    if not model_answer or not expected_answer:\n        return False\n\n    # Check each expected key\n    mismatches = []\n    for key, expected_value in expected_answer.items():\n        model_value = model_answer.get(key, \"\")\n\n        # Special handling for different types of values\n        if key == \"GingerAleSKU\":\n            # Check exact SKU match\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        elif key == \"IntelNUCSKU\":\n            # Check exact SKU match\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        elif key == \"CartTotal\":\n            # For price fields, only support $XX.XX format\n            # Check if model value has correct format\n            if not model_value.startswith(\"$\"):\n                mismatches.append(\n                    f\"{key}: incorrect format - expected '$XX.XX' format, got '{model_value}'\"\n                )\n            else:\n                # Normalize and compare values\n                expected_clean = expected_value.replace(\"$\", \"\").replace(\",\", \"\")\n                model_clean = model_value.replace(\"$\", \"\").replace(\",\", \"\")\n                if expected_clean != model_clean:\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n\n        elif key == \"ReviewCount\":\n            # Check review count matches\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        elif key == \"LatestReviewer\":\n            # Check reviewer name (allow partial match for names)\n            if expected_value.lower() not in model_value.lower() and model_value.lower() not in expected_value.lower():\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        else:\n            # Exact match for other fields\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n    if mismatches:\n        print(\"\\n=== Answer Comparison Mismatches ===\", file=sys.stderr)\n        for mismatch in mismatches:\n            print(f\"✗ {mismatch}\", file=sys.stderr)\n        return False\n\n    print(\"\\n=== Answer Comparison ===\", file=sys.stderr)\n    print(\"✓ All key information matches the expected answer\", file=sys.stderr)\n    return True\n\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the advanced product analysis task has been completed correctly.\n    First checks the model's answer against the expected label.\n    \"\"\"\n    # Get the label file path\n    label_path = Path(__file__).parent / \"label.txt\"\n\n    # Load expected answer\n    expected_answer = load_expected_answer(label_path)\n    if not expected_answer:\n        print(\"Error: Could not load expected answer from label.txt\", file=sys.stderr)\n        return False\n\n    # Get model's response from MCP_MESSAGES\n    model_response = get_model_response()\n    if model_response:\n        print(\"Found model response, parsing answer format...\", file=sys.stderr)\n        model_answer = parse_answer_format(model_response)\n\n        if model_answer:\n            print(\"\\n=== Model Answer Parsed ===\", file=sys.stderr)\n            for key, value in model_answer.items():\n                print(f\"{key}: {value}\", file=sys.stderr)\n\n            # Compare answers\n            answer_match = compare_answers(model_answer, expected_answer)\n            if not answer_match:\n                print(\"\\nModel answer does not match expected answer\", file=sys.stderr)\n                return False\n            print(\"\\n✓ Model answer matches expected answer\", file=sys.stderr)\n            return True\n        else:\n            print(\n                \"Warning: Could not parse answer format from model response\",\n                file=sys.stderr,\n            )\n            return False\n    else:\n        print(\"No model response found\", file=sys.stderr)\n        return False\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/gaming_accessories_analysis/description.md",
    "content": "\n\n**Task Requirements:**\n\n1. In Video Games category, count products with customer rating 70% or higher in the first 2 pages\n\n2. Sort products by price (ascending) and identify the cheapest product that has customer reviews\n\n3. Find product with SKU 'B07D6LSCXZ' (N64 Controller), add to cart with quantity 3\n\n4. Add products with SKU 'B071DR5V1K' and 'B082LZ4451' to comparison list, then count total products on comparison page\n\n5. In cart, update N64 Controller quantity to 5 and record the subtotal for this item\n\n6. Proceed to checkout and fill shipping form:\n   - Email: test.buyer@example.com\n   - First Name: Alice\n   - Last Name: Johnson\n   - Street Address: 456 Oak Avenue\n   - Country: United States\n   - State/Province: California\n   - City: San Francisco\n   - Zip Code: 94102\n   - Phone: 415-555-0123\n   Then count available shipping methods\n\n7. Output your findings in this format:\n\n```\n<answer>\nProducts70Plus|count\nCheapestReviewedSKU|sku\nCheapestReviewedPrice|price\nComparisonCount|count\nN64Subtotal|amount\nCheckoutEmail|test.buyer@example.com\nShippingState|California\nShippingMethods|count\n</answer>\n```\n\n"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/gaming_accessories_analysis/label.txt",
    "content": "Products70Plus|7\nCheapestReviewedSKU|B014HDAUAA\nCheapestReviewedPrice|$0.99\nComparisonCount|2\nN64Subtotal|$84.95\nCheckoutEmail|test.buyer@example.com\nShippingState|California\nShippingMethods|1"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/gaming_accessories_analysis/meta.json",
    "content": "{\n  \"task_id\": \"gaming_accessories_analysis\",\n  \"task_name\": \"Gaming Accessories Analysis\",\n  \"category_id\": \"shopping\",\n  \"category_name\": \"Shopping\",\n  \"description\": \"Research gaming peripherals by analyzing technical specifications, comparing performance metrics, evaluating user reviews, tracking price trends, and creating detailed gaming accessory recommendations.\",\n  \"author\": \"Yaoqi Ye\",\n  \"created_at\": \"2025-08-17\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"search aggregation\",\n    \"comparative analysis\",\n    \"data extraction\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/one-stop-market.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/gaming_accessories_analysis/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nimport json\nfrom pathlib import Path\n\n\ndef get_model_response():\n    \"\"\"\n    Get the model's response from the MCP_MESSAGES environment variable.\n    Returns the last assistant message text.\n    \"\"\"\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    print(f\"MCP_MESSAGES: {messages_path}\")\n    if not messages_path:\n        print(\"Warning: MCP_MESSAGES environment variable not set\", file=sys.stderr)\n        return None\n\n    try:\n        with open(messages_path, \"r\") as f:\n            messages = json.load(f)\n\n        # Find the last assistant message\n        for message in reversed(messages):\n            if (\n                message.get(\"role\") == \"assistant\"\n                and message.get(\"status\") == \"completed\"\n                and message.get(\"type\") == \"message\"\n            ):\n                content = message.get(\"content\", [])\n                for item in content:\n                    if item.get(\"type\") == \"output_text\":\n                        return item.get(\"text\", \"\")\n\n        print(\"Warning: No assistant response found in messages\", file=sys.stderr)\n        return None\n    except Exception as e:\n        print(f\"Error reading messages file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef parse_answer_format(text):\n    \"\"\"\n    Parse the <answer>...</answer> format from the agent's output.\n    Returns a dictionary with the parsed values.\n    \"\"\"\n    if not text:\n        return None\n\n    # Look for <answer>...</answer> pattern\n    match = re.search(r\"<answer>(.*?)</answer>\", text, re.IGNORECASE | re.DOTALL)\n    if not match:\n        return None\n\n    answer_content = match.group(1).strip()\n\n    # Parse each line\n    result = {}\n    lines = answer_content.split(\"\\n\")\n\n    if len(lines) != 8:\n        print(f\"Error: Expected 8 lines in answer, got {len(lines)}\", file=sys.stderr)\n        return None\n\n    for line in lines:\n        if \"|\" in line:\n            key, value = line.split(\"|\", 1)\n            result[key.strip()] = value.strip()\n\n    return result\n\n\ndef load_expected_answer(label_path):\n    \"\"\"\n    Load the expected answer from label.txt file.\n    Returns a dictionary with the expected values.\n    \"\"\"\n    try:\n        with open(label_path, \"r\") as f:\n            lines = f.read().strip().split(\"\\n\")\n\n        expected = {}\n        for line in lines:\n            if \"|\" in line:\n                key, value = line.split(\"|\", 1)\n                expected[key.strip()] = value.strip()\n\n        return expected\n    except Exception as e:\n        print(f\"Error reading label file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef compare_answers(model_answer, expected_answer):\n    \"\"\"\n    Compare the model's answer with the expected answer.\n    Returns True if all key information matches, False otherwise.\n    \"\"\"\n    if not model_answer or not expected_answer:\n        return False\n\n    # Check each expected key\n    mismatches = []\n    for key, expected_value in expected_answer.items():\n        model_value = model_answer.get(key, \"\")\n\n        # Special handling for different types of values\n        if key in [\"CheapestReviewedPrice\", \"N64Subtotal\"]:\n            # For price fields, only support $XX.XX format\n            # Check if model value has correct format\n            if not model_value.startswith(\"$\"):\n                mismatches.append(\n                    f\"{key}: incorrect format - expected '$XX.XX' format, got '{model_value}'\"\n                )\n            else:\n                # Normalize and compare values\n                expected_clean = expected_value.replace(\"$\", \"\").replace(\",\", \"\")\n                model_clean = model_value.replace(\"$\", \"\").replace(\",\", \"\")\n                if expected_clean != model_clean:\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n\n        elif key == \"CheckoutEmail\":\n            # Email should match exactly (case-insensitive)\n            if model_value.lower() != expected_value.lower():\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        elif key == \"Products70Plus\":\n            # For count fields, allow some flexibility (products might change)\n            # But still check if it's a reasonable number\n            try:\n                model_count = int(model_value)\n                expected_count = int(expected_value)\n                # Allow up to 2 products difference (in case of dynamic content)\n                if abs(model_count - expected_count) > 2:\n                    mismatches.append(\n                        f\"{key}: expected around '{expected_value}', got '{model_value}'\"\n                    )\n            except ValueError:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        else:\n            # Exact match for other fields\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n    if mismatches:\n        print(\"\\n=== Answer Comparison Mismatches ===\", file=sys.stderr)\n        for mismatch in mismatches:\n            print(f\"✗ {mismatch}\", file=sys.stderr)\n        return False\n\n    print(\"\\n=== Answer Comparison ===\", file=sys.stderr)\n    print(\"✓ All key information matches the expected answer\", file=sys.stderr)\n    return True\n\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the gaming accessories analysis task has been completed correctly.\n    Checks the model's answer against the expected label.\n    \"\"\"\n    # Get the label file path\n    label_path = Path(__file__).parent / \"label.txt\"\n\n    # Load expected answer\n    expected_answer = load_expected_answer(label_path)\n    if not expected_answer:\n        print(\"Error: Could not load expected answer from label.txt\", file=sys.stderr)\n        return False\n\n    # Get model's response from MCP_MESSAGES\n    model_response = get_model_response()\n    if model_response:\n        print(\"Found model response, parsing answer format...\", file=sys.stderr)\n        model_answer = parse_answer_format(model_response)\n\n        if model_answer:\n            print(\"\\n=== Model Answer Parsed ===\", file=sys.stderr)\n            for key, value in model_answer.items():\n                print(f\"{key}: {value}\", file=sys.stderr)\n\n            # Compare answers\n            answer_match = compare_answers(model_answer, expected_answer)\n            if not answer_match:\n                print(\"\\nModel answer does not match expected answer\", file=sys.stderr)\n                return False\n            print(\"\\n✓ Model answer matches expected answer\", file=sys.stderr)\n            return True\n        else:\n            print(\n                \"Warning: Could not parse answer format from model response\",\n                file=sys.stderr,\n            )\n            return False\n    else:\n        print(\"No model response found\", file=sys.stderr)\n        return False\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/health_routine_optimization/description.md",
    "content": "\n\n## Task Requirements\n\n1. Search for products with `vitamin` in Description and price range `$0.00` to `$99.99`. Record total search results count.\n\n2. In \"Health & Household\" category with price filter `$0.00 - $99.99`:\n   - Add \"LOOPACELL AG13 LR44 L1154 357 76A A76 Button Cell Battery 10 Pack\" to comparison\n   - Add \"Energizer MAX C Batteries, Premium Alkaline C Cell Batteries (8 Battery Count)\" to comparison\n   - Record each battery's price\n   - Verify comparison list has 2 items\n\n3. Search `Elmwood Inn Fine Teas`, find \"Elmwood Inn Fine Teas, Orange Vanilla Caffeine-free Fruit Infusion, 16-Ounce Pouch\":\n   - Record SKU, rating percentage, and review count\n   - Add to cart with quantity 2\n\n4. Search `energy`, sort by Relevance (descending):\n   - Find \"V8 +Energy, Healthy Energy Drink, Steady Energy from Black and Green Tea, Pomegranate Blueberry, 8 Ounce Can ,Pack of 24\"\n   - Record its position (1st, 2nd, 3rd, etc.)\n   - Add to cart with quantity 1\n\n5. In cart:\n   - Record unique products count, total quantity, and subtotal\n   - Then update Elmwood tea quantity to 3\n   - Record new subtotal\n\n## Output Format\n\n```\n<answer>\nAdvancedSearchResults|XXXX\nBattery1Name|LOOPACELL AG13 LR44\nBattery1Price|$X.XX\nBattery2Name|Energizer MAX C\nBattery2Price|$XX.XX\nComparisonCount|X\nTeaSKU|XXXXXXXXXX\nTeaRating|XXX%\nTeaReviews|X\nV8Position|Xth\nCartUniqueProducts|X\nCartTotalQuantity|X\nInitialSubtotal|$XX.XX\nFinalSubtotal|$XX.XX\n</answer>\n```\n\n"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/health_routine_optimization/label.txt",
    "content": "<answer>\nAdvancedSearchResults|2906\nBattery1Name|LOOPACELL AG13 LR44\nBattery1Price|$3.72\nBattery2Name|Energizer MAX C\nBattery2Price|$14.87\nComparisonCount|2\nTeaSKU|B0040WHKIY\nTeaRating|95%\nTeaReviews|4\nV8Position|3rd\nCartUniqueProducts|2\nCartTotalQuantity|3\nInitialSubtotal|$53.19\nFinalSubtotal|$72.55\n</answer>"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/health_routine_optimization/meta.json",
    "content": "{\n  \"task_id\": \"health_routine_optimization\",\n  \"task_name\": \"Health Routine Optimization\",\n  \"category_id\": \"shopping\",\n  \"category_name\": \"Shopping\",\n  \"description\": \"Optimize health and wellness product selections by analyzing nutritional supplements, fitness equipment, creating personalized routines, and tracking health metrics for lifestyle improvements.\",\n  \"author\": \"Yaoqi Ye\",\n  \"created_at\": \"2025-08-17\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data extraction\",\n    \"comparative analysis\",\n    \"content submission\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/one-stop-market.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/health_routine_optimization/verify.py",
    "content": "\nimport asyncio\nimport sys\nimport os\nimport json\nimport re\nfrom pathlib import Path\n\n\ndef get_model_response():\n    \"\"\"\n    Get the model's response from the MCP_MESSAGES environment variable.\n    Returns the last assistant message text.\n    \"\"\"\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    print(f\"MCP_MESSAGES: {messages_path}\")\n    if not messages_path:\n        print(\"Warning: MCP_MESSAGES environment variable not set\", file=sys.stderr)\n        return None\n\n    try:\n        with open(messages_path, \"r\") as f:\n            messages = json.load(f)\n\n        # Find the last assistant message\n        for message in reversed(messages):\n            if (\n                message.get(\"role\") == \"assistant\"\n                and message.get(\"status\") == \"completed\"\n                and message.get(\"type\") == \"message\"\n            ):\n                content = message.get(\"content\", [])\n                for item in content:\n                    if item.get(\"type\") == \"output_text\":\n                        return item.get(\"text\", \"\")\n\n        print(\"Warning: No assistant response found in messages\", file=sys.stderr)\n        return None\n    except Exception as e:\n        print(f\"Error reading messages file: {str(e)}\", file=sys.stderr)\n        return None\n\ndef parse_answer_format(text):\n    \"\"\"\n    Parse the <answer>...</answer> format from the agent's output.\n    Returns a dictionary with the parsed values.\n    \"\"\"\n    if not text:\n        return None\n\n    # Look for <answer>...</answer> pattern\n    match = re.search(r\"<answer>(.*?)</answer>\", text, re.IGNORECASE | re.DOTALL)\n    if not match:\n        return None\n\n    answer_content = match.group(1).strip()\n\n    # Parse each line\n    result = {}\n    lines = answer_content.split(\"\\n\")\n\n    if len(lines) != 14:\n        print(f\"Error: Expected 14 lines in answer, got {len(lines)}\", file=sys.stderr)\n        return None\n\n    for line in lines:\n        if \"|\" in line:\n            key, value = line.split(\"|\", 1)\n            result[key.strip()] = value.strip()\n\n    return result\n\ndef load_expected_answer(label_path):\n    \"\"\"\n    Load the expected answer from label.txt file.\n    Returns a dictionary with the expected values.\n    \"\"\"\n    try:\n        with open(label_path, \"r\") as f:\n            content = f.read().strip()\n\n        # Parse the answer from the label file\n        # The label file contains <answer>...</answer> tags\n        match = re.search(r\"<answer>(.*?)</answer>\", content, re.IGNORECASE | re.DOTALL)\n        if match:\n            answer_content = match.group(1).strip()\n            lines = answer_content.split(\"\\n\")\n        else:\n            # Fallback: treat the whole file as answer content\n            lines = content.split(\"\\n\")\n\n        expected = {}\n        for line in lines:\n            if \"|\" in line:\n                key, value = line.split(\"|\", 1)\n                expected[key.strip()] = value.strip()\n\n        return expected\n    except Exception as e:\n        print(f\"Error reading label file: {str(e)}\", file=sys.stderr)\n        return None\n\ndef compare_answers(model_answer, expected_answer):\n    \"\"\"\n    Compare the model's answer with the expected answer.\n    Returns True if all key information matches, False otherwise.\n    \"\"\"\n    if not model_answer or not expected_answer:\n        return False\n\n    # Check each expected key\n    mismatches = []\n    for key, expected_value in expected_answer.items():\n        model_value = model_answer.get(key, \"\")\n\n        # Special handling for different types of values\n        if key in [\"Battery1Price\", \"Battery2Price\", \"InitialSubtotal\", \"FinalSubtotal\"]:\n            # For price fields, only support $XX.XX format\n            # Check if model value has correct format\n            if not model_value.startswith(\"$\"):\n                mismatches.append(\n                    f\"{key}: incorrect format - expected '$XX.XX' format, got '{model_value}'\"\n                )\n            else:\n                # Normalize and compare values\n                expected_clean = expected_value.replace(\"$\", \"\").replace(\",\", \"\")\n                model_clean = model_value.replace(\"$\", \"\").replace(\",\", \"\")\n                if expected_clean != model_clean:\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n\n        else:\n            # Exact match for other fields\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n    if mismatches:\n        print(\"\\n=== Answer Comparison Mismatches ===\", file=sys.stderr)\n        for mismatch in mismatches:\n            print(f\"✗ {mismatch}\", file=sys.stderr)\n        return False\n\n    print(\"\\n=== Answer Comparison ===\", file=sys.stderr)\n    print(\"✓ All key information matches the expected answer\", file=sys.stderr)\n    return True\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the health routine optimization task has been completed correctly.\n    Checks the model's answer against the expected label.\n    \"\"\"\n    # Get the label file path\n    label_path = Path(__file__).parent / \"label.txt\"\n\n    # Load expected answer\n    expected_answer = load_expected_answer(label_path)\n    if not expected_answer:\n        print(\"Error: Could not load expected answer from label.txt\", file=sys.stderr)\n        return False\n\n    # Get model's response from MCP_MESSAGES\n    model_response = get_model_response()\n    if model_response:\n        print(\"Found model response, parsing answer format...\", file=sys.stderr)\n        model_answer = parse_answer_format(model_response)\n\n        if model_answer:\n            print(\"\\n=== Model Answer Parsed ===\", file=sys.stderr)\n            for key, value in model_answer.items():\n                print(f\"{key}: {value}\", file=sys.stderr)\n\n            # Compare answers\n            answer_match = compare_answers(model_answer, expected_answer)\n            if not answer_match:\n                print(\"\\nModel answer does not match expected answer\", file=sys.stderr)\n                return False\n            print(\"\\n✓ Model answer matches expected answer\", file=sys.stderr)\n            return True\n        else:\n            print(\n                \"Warning: Could not parse answer format from model response\",\n                file=sys.stderr,\n            )\n            return False\n    else:\n        print(\"No model response found\", file=sys.stderr)\n        return False\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/holiday_baking_competition/description.md",
    "content": "\n\n**Task Requirements:**\n\n1. Search 'gingerbread', sort by price (high to low):\n   - Add most expensive product to comparison list\n   - Record SKU of second most expensive product\n\n2. Search 'cookie' with price range $20.00-$40.00:\n   - Find product with highest rating % and at least 5 reviews in the first 2 pages (if tied, choose lowest price)\n   - Record SKU and rating %\n   - Select \"Cookies: Oatmeal Chocolate Chunk\" flavor if required\n   - Add to cart with quantity 2\n\n3. Search 'chocolate', sort by price (low to high):\n   - Find cheapest product with at least 1 review\n   - Record price and review count\n   - Select \"Peanut Butter Flavor\" if required\n   - Add to cart with quantity 3\n\n4. In cart:\n   - Update cookie quantity from 2 to 5\n   - Record cart subtotal and total items count\n\n5. Search 'gingerbread', go to page 2:\n   - Find third product on page 2\n   - Record SKU, price, and manufacturer\n\n**Output Format:**\n\n```\n<answer>\nSecondGingerbreadSKU|sku\nHighestRatedCookieSKURating|sku:rating%\nCheapestChocolatePriceReviews|$price:reviews\nCartSubtotalAfterUpdate|$amount\nTotalCartItems|count\nPage2ThirdProductSKUPrice|sku:$price\nProductManufacturer|manufacturer\n</answer>\n```\n\n"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/holiday_baking_competition/label.txt",
    "content": "SecondGingerbreadSKU|B0075AO9RI\nHighestRatedCookieSKURating|B0951CPYV7:86%\nCheapestChocolatePriceReviews|$1.04:12\nCartSubtotalAfterUpdate|$128.07\nTotalCartItems|8\nPage2ThirdProductSKUPrice|B09RPXCB47:$21.99\nProductManufacturer|That Melanin Tho"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/holiday_baking_competition/meta.json",
    "content": "{\n  \"task_id\": \"holiday_baking_competition\",\n  \"task_name\": \"Holiday Baking Competition\",\n  \"category_id\": \"shopping\",\n  \"category_name\": \"Shopping\",\n  \"description\": \"Research baking supplies for competition preparation including ingredient quality analysis, equipment comparisons, recipe optimization, and creating comprehensive shopping list with budget recommendations.\",\n  \"author\": \"Yaoqi Ye\",\n  \"created_at\": \"2025-08-17\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"search aggregation\",\n    \"comparative analysis\",\n    \"inventory management\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/one-stop-market.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/holiday_baking_competition/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nimport json\nfrom pathlib import Path\n\n\ndef get_model_response():\n    \"\"\"\n    Get the model's response from the MCP_MESSAGES environment variable.\n    Returns the last assistant message text.\n    \"\"\"\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    print(f\"MCP_MESSAGES: {messages_path}\")\n    if not messages_path:\n        print(\"Warning: MCP_MESSAGES environment variable not set\", file=sys.stderr)\n        return None\n\n    try:\n        with open(messages_path, \"r\") as f:\n            messages = json.load(f)\n\n        # Find the last assistant message\n        for message in reversed(messages):\n            if (\n                message.get(\"role\") == \"assistant\"\n                and message.get(\"status\") == \"completed\"\n                and message.get(\"type\") == \"message\"\n            ):\n                content = message.get(\"content\", [])\n                for item in content:\n                    if item.get(\"type\") == \"output_text\":\n                        return item.get(\"text\", \"\")\n\n        print(\"Warning: No assistant response found in messages\", file=sys.stderr)\n        return None\n    except Exception as e:\n        print(f\"Error reading messages file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef parse_answer_format(text):\n    \"\"\"\n    Parse the <answer>...</answer> format from the agent's output.\n    Returns a dictionary with the parsed values.\n    \"\"\"\n    if not text:\n        return None\n\n    # Look for <answer>...</answer> pattern\n    match = re.search(r\"<answer>(.*?)</answer>\", text, re.IGNORECASE | re.DOTALL)\n    if not match:\n        return None\n\n    answer_content = match.group(1).strip()\n\n    # Parse each line\n    result = {}\n    lines = answer_content.split(\"\\n\")\n\n    if len(lines) != 7:\n        print(f\"Error: Expected 7 lines in answer, got {len(lines)}\", file=sys.stderr)\n        return None\n\n    for line in lines:\n        if \"|\" in line:\n            key, value = line.split(\"|\", 1)\n            result[key.strip()] = value.strip()\n\n    return result\n\n\ndef load_expected_answer(label_path):\n    \"\"\"\n    Load the expected answer from label.txt file.\n    Returns a dictionary with the expected values.\n    \"\"\"\n    try:\n        with open(label_path, \"r\") as f:\n            lines = f.read().strip().split(\"\\n\")\n\n        expected = {}\n        for line in lines:\n            if \"|\" in line:\n                key, value = line.split(\"|\", 1)\n                expected[key.strip()] = value.strip()\n\n        return expected\n    except Exception as e:\n        print(f\"Error reading label file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef compare_answers(model_answer, expected_answer):\n    \"\"\"\n    Compare the model's answer with the expected answer.\n    Returns True if all key information matches, False otherwise.\n    \"\"\"\n    if not model_answer or not expected_answer:\n        return False\n\n    # Check each expected key\n    mismatches = []\n    for key, expected_value in expected_answer.items():\n        model_value = model_answer.get(key, \"\")\n\n        # Special handling for different types of values\n        if key == \"SecondGingerbreadSKU\":\n            # SKU should match exactly (case-insensitive)\n            if model_value.upper() != expected_value.upper():\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n                \n        elif key in [\"CartSubtotalAfterUpdate\"]:\n            # For price fields, only support $XX.XX format\n            # Check if model value has correct format\n            if not model_value.startswith(\"$\"):\n                mismatches.append(\n                    f\"{key}: incorrect format - expected '$XX.XX' format, got '{model_value}'\"\n                )\n            else:\n                # Normalize and compare values\n                expected_clean = expected_value.replace(\"$\", \"\").replace(\",\", \"\")\n                model_clean = model_value.replace(\"$\", \"\").replace(\",\", \"\")\n                # Allow some tolerance for price calculations (within $0.01)\n                try:\n                    expected_float = float(expected_clean)\n                    model_float = float(model_clean)\n                    if abs(expected_float - model_float) > 0.01:\n                        mismatches.append(\n                            f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                        )\n                except ValueError:\n                    if expected_value != model_value:\n                        mismatches.append(\n                            f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                        )\n                    \n        elif key in [\"TotalCartItems\"]:\n            # Should be a number\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n                \n        elif key in [\"HighestRatedCookieSKURating\", \"CheapestChocolatePriceReviews\", \"Page2ThirdProductSKUPrice\"]:\n            # Colon-separated fields (sku:rating, price:reviews, sku:price)\n            if \":\" in expected_value and \":\" in model_value:\n                expected_parts = expected_value.split(\":\", 1)\n                model_parts = model_value.split(\":\", 1)\n                if len(expected_parts) == 2 and len(model_parts) == 2:\n                    # For price fields, normalize the price part\n                    if key == \"CheapestChocolatePriceReviews\":\n                        # Check if price part has correct format ($XX.XX)\n                        if not model_parts[0].startswith(\"$\"):\n                            mismatches.append(\n                                f\"{key}: incorrect format - price part should start with '$', got '{model_value}'\"\n                            )\n                        else:\n                            expected_price = expected_parts[0].replace(\"$\", \"\").replace(\",\", \"\")\n                            model_price = model_parts[0].replace(\"$\", \"\").replace(\",\", \"\")\n                            try:\n                                if abs(float(expected_price) - float(model_price)) > 0.01 or expected_parts[1] != model_parts[1]:\n                                    mismatches.append(\n                                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                                    )\n                            except ValueError:\n                                if expected_value != model_value:\n                                    mismatches.append(\n                                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                                    )\n                    elif key == \"Page2ThirdProductSKUPrice\":\n                        # Check if price part has correct format ($XX.XX)\n                        if not model_parts[1].startswith(\"$\"):\n                            mismatches.append(\n                                f\"{key}: incorrect format - price part should start with '$', got '{model_value}'\"\n                            )\n                        else:\n                            expected_price = expected_parts[1].replace(\"$\", \"\").replace(\",\", \"\")\n                            model_price = model_parts[1].replace(\"$\", \"\").replace(\",\", \"\")\n                            try:\n                                if expected_parts[0] != model_parts[0] or abs(float(expected_price) - float(model_price)) > 0.01:\n                                    mismatches.append(\n                                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                                    )\n                            except ValueError:\n                                if expected_value != model_value:\n                                    mismatches.append(\n                                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                                    )\n                    else:\n                        # For rating fields, exact match\n                        if expected_value != model_value:\n                            mismatches.append(\n                                f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                            )\n                else:\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n            else:\n                if expected_value != model_value:\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n        else:\n            # Exact match for other fields (like ProductManufacturer)\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n    if mismatches:\n        print(\"\\n=== Answer Comparison Mismatches ===\", file=sys.stderr)\n        for mismatch in mismatches:\n            print(f\"✗ {mismatch}\", file=sys.stderr)\n        return False\n\n    print(\"\\n=== Answer Comparison ===\", file=sys.stderr)\n    print(\"✓ All key information matches the expected answer\", file=sys.stderr)\n    return True\n\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the holiday baking competition task has been completed correctly.\n    Checks the model's answer against the expected label.\n    \"\"\"\n    # Get the label file path\n    label_path = Path(__file__).parent / \"label.txt\"\n\n    # Load expected answer\n    expected_answer = load_expected_answer(label_path)\n    if not expected_answer:\n        print(\"Error: Could not load expected answer from label.txt\", file=sys.stderr)\n        return False\n\n    # Get model's response from MCP_MESSAGES\n    model_response = get_model_response()\n    if model_response:\n        print(\"Found model response, parsing answer format...\", file=sys.stderr)\n        model_answer = parse_answer_format(model_response)\n\n        if model_answer:\n            print(\"\\n=== Model Answer Parsed ===\", file=sys.stderr)\n            for key, value in model_answer.items():\n                print(f\"{key}: {value}\", file=sys.stderr)\n\n            # Compare answers\n            answer_match = compare_answers(model_answer, expected_answer)\n            if not answer_match:\n                print(\"\\nModel answer does not match expected answer\", file=sys.stderr)\n                return False\n            print(\"\\n✓ Model answer matches expected answer\", file=sys.stderr)\n            return True\n        else:\n            print(\n                \"Warning: Could not parse answer format from model response\",\n                file=sys.stderr,\n            )\n            return False\n    else:\n        print(\"No model response found\", file=sys.stderr)\n        return False\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/multi_category_budget_analysis/description.md",
    "content": "Complete the following tasks on One Stop Market (http://localhost:7770):\n\n**Task Requirements:**\n\n1. In Chocolate subcategory, sort by price (ascending):\n   - Record price and SKU of first 3 products\n\n2. Search for 'tabletop' with price range $100.00-$200.00:\n   - Find the cheapest tabletop that has the highest review rating with at least 3 reviews.\n   - Record search results count\n   - Record price of required tabletop\n\n3. In \"Computers & Accessories\" subcategory with price filter $0.00-$9,999.99:\n   - Sort by price (ascending)\n   - Record price of cheapest item\n\n4. Add these products to comparison:\n   - \"Little Secrets Chocolate Pieces, Peanut Butter Flavor\"\n   - \"Multi Accessory Hub Adapter By JOBY\"\n   - \"SanDisk Cruzer Glide 32GB (5 Pack) USB 2.0 Flash Drive\"\n   - Count total items on comparison page\n\n5. In cart:\n   - Add the cheapest chocolate product (from step 1) with \"Peanut flavor\" if available\n   - Add cheapest computer accessory (from step 3)\n   - Record cart subtotal and item count\n\n6. Calculate:\n   - Sum of 3 chocolate product prices\n   - Price difference: cheapest tabletop minus cheapest computer accessory\n   - Whether sum of 3 comparison items < $60\n\n**Output Format:**\n\n```\n<answer>\nchocolate_products|Price1:SKU1;Price2:SKU2;Price3:SKU3\nchocolate_sum|Total\ntabletop_search_count|Count\ntabletop_product|Price:SKU\ntabletop_reviews|NumbersOfReviews:Rating\ncheapest_computer_accessory|Price\nprice_difference|Amount\ncomparison_count|Count\ncart_subtotal|Amount\ncart_item_count|Count\nunder_60_budget|YES/NO\n</answer>\n```\n\n"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/multi_category_budget_analysis/label.txt",
    "content": "chocolate_products|$1.04:B071954ZDC;$1.89:B07ND598N3;$2.50:B01G26DMSC\nchocolate_sum|$5.43\ntabletop_search_count|60\ntabletop_product|$169.99:B09NPX5CDP\ntabletop_reviews|4:95%\ncheapest_computer_accessory|$1.17\nprice_difference|$168.82\ncomparison_count|3\ncart_subtotal|$2.21\ncart_item_count|2\nunder_60_budget|YES"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/multi_category_budget_analysis/meta.json",
    "content": "{\n  \"task_id\": \"multi_category_budget_analysis\",\n  \"task_name\": \"Multi Category Budget Analysis\",\n  \"category_id\": \"shopping\",\n  \"category_name\": \"Shopping\",\n  \"description\": \"Analyze spending patterns across multiple product categories, optimize budget allocation, identify cost-saving opportunities, and generate comprehensive financial planning report with purchase recommendations.\",\n  \"author\": \"Yaoqi Ye\",\n  \"created_at\": \"2025-08-17\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data extraction\",\n    \"search aggregation\",\n    \"content submission\",\n    \"comparative analysis\",\n    \"inventory management\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/one-stop-market.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/multi_category_budget_analysis/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nimport json\nfrom pathlib import Path\n\n\ndef get_model_response():\n    \"\"\"\n    Get the model's response from the MCP_MESSAGES environment variable.\n    Returns the last assistant message text.\n    \"\"\"\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    print(f\"MCP_MESSAGES: {messages_path}\")\n    if not messages_path:\n        print(\"Warning: MCP_MESSAGES environment variable not set\", file=sys.stderr)\n        return None\n\n    try:\n        with open(messages_path, \"r\") as f:\n            messages = json.load(f)\n\n        # Find the last assistant message\n        for message in reversed(messages):\n            if (\n                message.get(\"role\") == \"assistant\"\n                and message.get(\"status\") == \"completed\"\n                and message.get(\"type\") == \"message\"\n            ):\n                content = message.get(\"content\", [])\n                for item in content:\n                    if item.get(\"type\") == \"output_text\":\n                        return item.get(\"text\", \"\")\n\n        print(\"Warning: No assistant response found in messages\", file=sys.stderr)\n        return None\n    except Exception as e:\n        print(f\"Error reading messages file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef parse_answer_format(text):\n    \"\"\"\n    Parse the <answer>...</answer> format from the agent's output.\n    Returns a dictionary with the parsed values.\n    \"\"\"\n    if not text:\n        return None\n\n    # Look for <answer>...</answer> pattern\n    match = re.search(r\"<answer>(.*?)</answer>\", text, re.IGNORECASE | re.DOTALL)\n    if not match:\n        return None\n\n    answer_content = match.group(1).strip()\n\n    # Parse each line\n    result = {}\n    lines = answer_content.split(\"\\n\")\n\n    if len(lines) != 11:\n        print(f\"Error: Expected 11 lines in answer, got {len(lines)}\", file=sys.stderr)\n        return None\n\n    for line in lines:\n        if \"|\" in line:\n            key, value = line.split(\"|\", 1)\n            result[key.strip()] = value.strip()\n\n    return result\n\n\ndef load_expected_answer(label_path):\n    \"\"\"\n    Load the expected answer from label.txt file.\n    Returns a dictionary with the expected values.\n    \"\"\"\n    try:\n        with open(label_path, \"r\") as f:\n            lines = f.read().strip().split(\"\\n\")\n\n        expected = {}\n        for line in lines:\n            if \"|\" in line:\n                key, value = line.split(\"|\", 1)\n                expected[key.strip()] = value.strip()\n\n        return expected\n    except Exception as e:\n        print(f\"Error reading label file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef compare_answers(model_answer, expected_answer):\n    \"\"\"\n    Compare the model's answer with the expected answer.\n    Returns True if all key information matches, False otherwise.\n    \"\"\"\n    if not model_answer or not expected_answer:\n        return False\n\n    # Check each expected key\n    mismatches = []\n    for key, expected_value in expected_answer.items():\n        model_value = model_answer.get(key, \"\")\n\n        # Special handling for different types of values\n        if key == \"chocolate_products\":\n            # Parse and compare chocolate products with price:SKU format\n            expected_products = expected_value.split(\";\")\n            model_products = model_value.split(\";\")\n            \n            if len(expected_products) != len(model_products):\n                mismatches.append(f\"{key}: expected {len(expected_products)} products, got {len(model_products)}\")\n            else:\n                for i, (exp, mod) in enumerate(zip(expected_products, model_products)):\n                    exp_parts = exp.strip().split(\":\")\n                    mod_parts = mod.strip().split(\":\")\n                    if len(exp_parts) != 2 or len(mod_parts) != 2:\n                        mismatches.append(f\"{key}: product {i+1} format error - expected 'price:SKU'\")\n                    else:\n                        # Check price format (should start with $)\n                        if not mod_parts[0].startswith(\"$\"):\n                            mismatches.append(f\"{key}: product {i+1} price format error - expected '$XX.XX' format, got '{mod_parts[0]}'\")\n                        elif exp_parts[0] != mod_parts[0] or exp_parts[1] != mod_parts[1]:\n                            mismatches.append(f\"{key}: product {i+1} mismatch - expected '{exp}', got '{mod}'\")\n\n        elif key == \"tabletop_product\":\n            # Parse and compare tabletop product with price:SKU format\n            exp_parts = expected_value.strip().split(\":\")\n            mod_parts = model_value.strip().split(\":\")\n            if len(exp_parts) != 2 or len(mod_parts) != 2:\n                mismatches.append(f\"{key}: format error - expected 'price:SKU', got '{model_value}'\")\n            else:\n                # Check price format (should start with $)\n                if not mod_parts[0].startswith(\"$\"):\n                    mismatches.append(f\"{key}: price format error - expected '$XX.XX' format, got '{mod_parts[0]}'\")\n                elif exp_parts[0] != mod_parts[0] or exp_parts[1] != mod_parts[1]:\n                    mismatches.append(f\"{key}: mismatch - expected '{expected_value}', got '{model_value}'\")\n        \n        elif key == \"tabletop_reviews\":\n            # Parse and compare tabletop reviews with NumberOfReviews:Rating format\n            exp_parts = expected_value.strip().split(\":\")\n            mod_parts = model_value.strip().split(\":\")\n            if len(exp_parts) != 2 or len(mod_parts) != 2:\n                mismatches.append(f\"{key}: format error - expected 'NumberOfReviews:Rating', got '{model_value}'\")\n            else:\n                # Check if both parts match\n                if exp_parts[0] != mod_parts[0] or exp_parts[1] != mod_parts[1]:\n                    mismatches.append(f\"{key}: mismatch - expected '{expected_value}', got '{model_value}'\")\n\n        elif key in [\"chocolate_sum\", \"price_difference\", \"cart_subtotal\", \"cheapest_computer_accessory\"]:\n            # For price fields, only support $XX.XX format\n            # Check if model value has correct format\n            if not model_value.startswith(\"$\"):\n                mismatches.append(\n                    f\"{key}: incorrect format - expected '$XX.XX' format, got '{model_value}'\"\n                )\n            else:\n                # Normalize and compare values\n                expected_clean = expected_value.replace(\"$\", \"\").replace(\",\", \"\")\n                model_clean = model_value.replace(\"$\", \"\").replace(\",\", \"\")\n                if expected_clean != model_clean:\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n\n        elif key == \"under_60_budget\":\n            # Compare YES/NO value (case-insensitive)\n            if expected_value.upper() != model_value.upper():\n                mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n\n        elif key in [\"tabletop_search_count\", \"comparison_count\", \"cart_item_count\"]:\n            # Numeric fields - exact match\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        else:\n            # Exact match for other fields\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n    if mismatches:\n        print(\"\\n=== Answer Comparison Mismatches ===\", file=sys.stderr)\n        for mismatch in mismatches:\n            print(f\"✗ {mismatch}\", file=sys.stderr)\n        return False\n\n    print(\"\\n=== Answer Comparison ===\", file=sys.stderr)\n    print(\"✓ All key information matches the expected answer\", file=sys.stderr)\n    return True\n\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the multi-category budget analysis task has been completed correctly.\n    \"\"\"\n    # Get the label file path\n    label_path = Path(__file__).parent / \"label.txt\"\n    \n    # Load expected answer\n    expected_answer = load_expected_answer(label_path)\n    if not expected_answer:\n        print(\"Error: Could not load expected answer from label.txt\", file=sys.stderr)\n        return False\n\n    # Get model's response from MCP_MESSAGES\n    model_response = get_model_response()\n    if model_response:\n        print(\"Found model response, parsing answer format...\", file=sys.stderr)\n        model_answer = parse_answer_format(model_response)\n        \n        if model_answer:\n            print(\"\\n=== Model Answer Parsed ===\", file=sys.stderr)\n            for key, value in model_answer.items():\n                print(f\"{key}: {value}\", file=sys.stderr)\n            \n            # Compare answers\n            answer_match = compare_answers(model_answer, expected_answer)\n            if not answer_match:\n                print(\"\\nModel answer does not match expected answer\", file=sys.stderr)\n                return False\n            print(\"\\n✓ Model answer matches expected answer\", file=sys.stderr)\n            return True\n        else:\n            print(\"Warning: Could not parse answer format from model response\", file=sys.stderr)\n            return False\n    else:\n        print(\"No model response found\", file=sys.stderr)\n        return False\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/printer_keyboard_search/description.md",
    "content": "\n\n1. Search for a `printer capable of reducing blue light` that:\n   - Is pink or purple (must be stated in product details, not from image)\n   - Manufactured in Asia\n   Record SKU ID and price\n\n2. Find a keyboard with:\n   - Bluetooth mode (must be stated either stated in details or title)\n   - Price between $50.00-$100.00\n   - Highest review rating among matching products\n   Record SKU ID, price, number of reviews, and review rating\n\n**Output Format:**\n\n```\n<answer>\nPrinterSKUID|id\nPrinterPrice|$XX.XX\nKeyboardSKUID|id\nKeyboardPrice|$XX.XX\nKeyboardReviews|XX\nKeyboardRating|XX%\n</answer>\n```\n\n"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/printer_keyboard_search/label.txt",
    "content": "PrinterSKUID|B09J8KQX6V\nPrinterPrice|$248.04\nKeyboardSKUID|B08JD7F3F5\nKeyboardPrice|$85.99\nKeyboardReviews|12\nKeyboardRating|77%"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/printer_keyboard_search/meta.json",
    "content": "{\n  \"task_id\": \"printer_keyboard_search\",\n  \"task_name\": \"Printer Keyboard Search\",\n  \"category_id\": \"shopping\",\n  \"category_name\": \"Shopping\",\n  \"description\": \"Search and evaluate office equipment by comparing printer specifications, keyboard ergonomics, analyzing user reviews, tracking prices, and generating detailed purchase recommendations report.\",\n  \"author\": \"Yaoqi Ye\",\n  \"created_at\": \"2025-08-17\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"search aggregation\",\n    \"comparative analysis\",\n    \"content submission\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/one-stop-market.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/printer_keyboard_search/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nimport json\nfrom pathlib import Path\n\n\ndef get_model_response():\n    \"\"\"\n    Get the model's response from the MCP_MESSAGES environment variable.\n    Returns the last assistant message text.\n    \"\"\"\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    print(f\"MCP_MESSAGES: {messages_path}\")\n    if not messages_path:\n        print(\"Warning: MCP_MESSAGES environment variable not set\", file=sys.stderr)\n        return None\n\n    try:\n        with open(messages_path, \"r\") as f:\n            messages = json.load(f)\n\n        # Find the last assistant message\n        for message in reversed(messages):\n            if (\n                message.get(\"role\") == \"assistant\"\n                and message.get(\"status\") == \"completed\"\n                and message.get(\"type\") == \"message\"\n            ):\n                content = message.get(\"content\", [])\n                for item in content:\n                    if item.get(\"type\") == \"output_text\":\n                        return item.get(\"text\", \"\")\n\n        print(\"Warning: No assistant response found in messages\", file=sys.stderr)\n        return None\n    except Exception as e:\n        print(f\"Error reading messages file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef parse_answer_format(text):\n    \"\"\"\n    Parse the <answer>...</answer> format from the agent's output.\n    Returns a dictionary with the parsed values.\n    \"\"\"\n    if not text:\n        return None\n\n    # Look for <answer>...</answer> pattern\n    match = re.search(r\"<answer>(.*?)</answer>\", text, re.IGNORECASE | re.DOTALL)\n    if not match:\n        return None\n\n    answer_content = match.group(1).strip()\n\n    # Parse each line\n    result = {}\n    lines = answer_content.split(\"\\n\")\n\n    if len(lines) != 6:\n        print(f\"Error: Expected 6 lines in answer, got {len(lines)}\", file=sys.stderr)\n        return None\n\n    for line in lines:\n        if \"|\" in line:\n            key, value = line.split(\"|\", 1)\n            result[key.strip()] = value.strip()\n\n    return result\n\n\ndef load_expected_answer(label_path):\n    \"\"\"\n    Load the expected answer from label.txt file.\n    Returns a dictionary with the expected values.\n    \"\"\"\n    try:\n        with open(label_path, \"r\") as f:\n            lines = f.read().strip().split(\"\\n\")\n\n        expected = {}\n        for line in lines:\n            if \"|\" in line:\n                key, value = line.split(\"|\", 1)\n                expected[key.strip()] = value.strip()\n\n        return expected\n    except Exception as e:\n        print(f\"Error reading label file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef compare_answers(model_answer, expected_answer):\n    \"\"\"\n    Compare the model's answer with the expected answer.\n    Returns True if all key information matches, False otherwise.\n    \"\"\"\n    if not model_answer or not expected_answer:\n        return False\n\n    # Check each expected key\n    mismatches = []\n    for key, expected_value in expected_answer.items():\n        model_value = model_answer.get(key, \"\")\n\n        # Special handling for different types of values\n        if key in [\"PrinterPrice\", \"KeyboardPrice\"]:\n            # For price fields, only support $XX.XX format\n            # Check if model value has correct format\n            if not model_value.startswith(\"$\"):\n                mismatches.append(\n                    f\"{key}: incorrect format - expected '$XX.XX' format, got '{model_value}'\"\n                )\n            else:\n                # Normalize and compare values\n                expected_clean = expected_value.replace(\"$\", \"\").replace(\",\", \"\")\n                model_clean = model_value.replace(\"$\", \"\").replace(\",\", \"\")\n                if expected_clean != model_clean:\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n\n        elif key in [\"PrinterSKUID\", \"KeyboardSKUID\"]:\n            # SKU should match exactly (case-insensitive)\n            if model_value.upper() != expected_value.upper():\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        elif key == \"KeyboardReviews\":\n            # Number of reviews should match exactly\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        elif key == \"KeyboardRating\":\n            # Rating should match exactly (including % sign)\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        else:\n            # Exact match for other fields\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n    if mismatches:\n        print(\"\\n=== Answer Comparison Mismatches ===\", file=sys.stderr)\n        for mismatch in mismatches:\n            print(f\"✗ {mismatch}\", file=sys.stderr)\n        return False\n\n    print(\"\\n=== Answer Comparison ===\", file=sys.stderr)\n    print(\"✓ All key information matches the expected answer\", file=sys.stderr)\n    return True\n\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the electronic products task has been completed correctly.\n    Checks the model's answer against the expected label.\n    \"\"\"\n    # Get the label file path\n    label_path = Path(__file__).parent / \"label.txt\"\n\n    # Load expected answer\n    expected_answer = load_expected_answer(label_path)\n    if not expected_answer:\n        print(\"Error: Could not load expected answer from label.txt\", file=sys.stderr)\n        return False\n\n    # Get model's response from MCP_MESSAGES\n    model_response = get_model_response()\n    if model_response:\n        print(\"Found model response, parsing answer format...\", file=sys.stderr)\n        model_answer = parse_answer_format(model_response)\n\n        if model_answer:\n            print(\"\\n=== Model Answer Parsed ===\", file=sys.stderr)\n            for key, value in model_answer.items():\n                print(f\"{key}: {value}\", file=sys.stderr)\n\n            # Compare answers\n            answer_match = compare_answers(model_answer, expected_answer)\n            if not answer_match:\n                print(\"\\nModel answer does not match expected answer\", file=sys.stderr)\n                return False\n            print(\"\\n✓ Model answer matches expected answer\", file=sys.stderr)\n            return True\n        else:\n            print(\n                \"Warning: Could not parse answer format from model response\",\n                file=sys.stderr,\n            )\n            return False\n    else:\n        print(\"No model response found\", file=sys.stderr)\n        return False\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/running_shoes_purchase/description.md",
    "content": "\n\n1. Find running shoes:\n   - Price between $50.00-$60.00\n   - \"running shoe\" must appear in product name\n   - Choose the one with highest number of reviews\n   - Select black or white color, size 10\n   - Add to cart with quantity 2\n\n2. Record from product page: SKU ID, price, number of reviews, review rating\n\n3. Record cart subtotal\n\n**Output Format:**\n\n```\n<answer>\nSKUID|id\nPrice|$XX.XX\nNumberOfReviews|XX\nReviewRating|XX%\nSubtotal|$XX.XX\n</answer>\n```\n\n"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/running_shoes_purchase/label.txt",
    "content": "SKUID|B08KKX1WXQ\nPrice|$56.21\nNumberOfReviews|46\nReviewRating|86%\nSubtotal|$112.42"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/running_shoes_purchase/meta.json",
    "content": "{\n  \"task_id\": \"running_shoes_purchase\",\n  \"task_name\": \"Running Shoes Purchase\",\n  \"category_id\": \"shopping\",\n  \"category_name\": \"Shopping\",\n  \"description\": \"Research running footwear by analyzing biomechanical features, comparing cushioning technologies, evaluating durability ratings, considering user preferences, and recommending optimal shoe selections.\",\n  \"author\": \"Yaoqi Ye\",\n  \"created_at\": \"2025-08-17\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"search aggregation\",\n    \"comparative analysis\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/one-stop-market.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping/running_shoes_purchase/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nimport json\nfrom pathlib import Path\n\n\ndef get_model_response():\n    \"\"\"\n    Get the model's response from the MCP_MESSAGES environment variable.\n    Returns the last assistant message text.\n    \"\"\"\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    print(f\"MCP_MESSAGES: {messages_path}\")\n    if not messages_path:\n        print(\"Warning: MCP_MESSAGES environment variable not set\", file=sys.stderr)\n        return None\n\n    try:\n        with open(messages_path, \"r\") as f:\n            messages = json.load(f)\n\n        # Find the last assistant message\n        for message in reversed(messages):\n            if (\n                message.get(\"role\") == \"assistant\"\n                and message.get(\"status\") == \"completed\"\n                and message.get(\"type\") == \"message\"\n            ):\n                content = message.get(\"content\", [])\n                for item in content:\n                    if item.get(\"type\") == \"output_text\":\n                        return item.get(\"text\", \"\")\n\n        print(\"Warning: No assistant response found in messages\", file=sys.stderr)\n        return None\n    except Exception as e:\n        print(f\"Error reading messages file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef parse_answer_format(text):\n    \"\"\"\n    Parse the <answer>...</answer> format from the agent's output.\n    Returns a dictionary with the parsed values.\n    \"\"\"\n    if not text:\n        return None\n\n    # Look for <answer>...</answer> pattern\n    match = re.search(r\"<answer>(.*?)</answer>\", text, re.IGNORECASE | re.DOTALL)\n    if not match:\n        return None\n\n    answer_content = match.group(1).strip()\n\n    # Parse each line\n    result = {}\n    lines = answer_content.split(\"\\n\")\n\n    if len(lines) != 5:\n        print(f\"Error: Expected 5 lines in answer, got {len(lines)}\", file=sys.stderr)\n        return None\n\n    for line in lines:\n        if \"|\" in line:\n            key, value = line.split(\"|\", 1)\n            result[key.strip()] = value.strip()\n\n    return result\n\n\ndef load_expected_answer(label_path):\n    \"\"\"\n    Load the expected answer from label.txt file.\n    Returns a dictionary with the expected values.\n    \"\"\"\n    try:\n        with open(label_path, \"r\") as f:\n            lines = f.read().strip().split(\"\\n\")\n\n        expected = {}\n        for line in lines:\n            if \"|\" in line:\n                key, value = line.split(\"|\", 1)\n                expected[key.strip()] = value.strip()\n\n        return expected\n    except Exception as e:\n        print(f\"Error reading label file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef compare_answers(model_answer, expected_answer):\n    \"\"\"\n    Compare the model's answer with the expected answer.\n    Returns True if all key information matches, False otherwise.\n    \"\"\"\n    if not model_answer or not expected_answer:\n        return False\n\n    # Check each expected key\n    mismatches = []\n    for key, expected_value in expected_answer.items():\n        model_value = model_answer.get(key, \"\")\n\n        # Special handling for different types of values\n        if key in [\"Price\", \"Subtotal\"]:\n            # For price fields, only support $XX.XX format\n            # Check if model value has correct format\n            if not model_value.startswith(\"$\"):\n                mismatches.append(\n                    f\"{key}: incorrect format - expected '$XX.XX' format, got '{model_value}'\"\n                )\n            else:\n                # Normalize and compare values\n                expected_clean = expected_value.replace(\"$\", \"\").replace(\",\", \"\")\n                model_clean = model_value.replace(\"$\", \"\").replace(\",\", \"\")\n                \n                # Allow small tolerance for price calculations (within $0.01)\n                try:\n                    expected_float = float(expected_clean)\n                    model_float = float(model_clean)\n                    if abs(expected_float - model_float) > 0.01:\n                        mismatches.append(\n                            f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                        )\n                except ValueError:\n                    if expected_clean != model_clean:\n                        mismatches.append(\n                            f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                        )\n\n        elif key == \"SKUID\":\n            # SKU should match exactly (case-insensitive)\n            if model_value.upper() != expected_value.upper():\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        elif key == \"NumberOfReviews\":\n            # Number of reviews should match exactly\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        elif key == \"ReviewRating\":\n            # Rating should match exactly (including % sign)\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        else:\n            # Exact match for other fields\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n    if mismatches:\n        print(\"\\n=== Answer Comparison Mismatches ===\", file=sys.stderr)\n        for mismatch in mismatches:\n            print(f\"✗ {mismatch}\", file=sys.stderr)\n        return False\n\n    print(\"\\n=== Answer Comparison ===\", file=sys.stderr)\n    print(\"✓ All key information matches the expected answer\", file=sys.stderr)\n    return True\n\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the running shoes shopping task has been completed correctly.\n    Checks the model's answer against the expected label.\n    \"\"\"\n    # Get the label file path\n    label_path = Path(__file__).parent / \"label.txt\"\n\n    # Load expected answer\n    expected_answer = load_expected_answer(label_path)\n    if not expected_answer:\n        print(\"Error: Could not load expected answer from label.txt\", file=sys.stderr)\n        return False\n\n    # Get model's response from MCP_MESSAGES\n    model_response = get_model_response()\n    if model_response:\n        print(\"Found model response, parsing answer format...\", file=sys.stderr)\n        model_answer = parse_answer_format(model_response)\n\n        if model_answer:\n            print(\"\\n=== Model Answer Parsed ===\", file=sys.stderr)\n            for key, value in model_answer.items():\n                print(f\"{key}: {value}\", file=sys.stderr)\n\n            # Compare answers\n            answer_match = compare_answers(model_answer, expected_answer)\n            if not answer_match:\n                print(\"\\nModel answer does not match expected answer\", file=sys.stderr)\n                return False\n            print(\"\\n✓ Model answer matches expected answer\", file=sys.stderr)\n            return True\n        else:\n            print(\n                \"Warning: Could not parse answer format from model response\",\n                file=sys.stderr,\n            )\n            return False\n    else:\n        print(\"No model response found\", file=sys.stderr)\n        return False\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/customer_segmentation_setup/description.md",
    "content": "Perform customer segmentation setup and analysis in the Magento Admin panel to establish new customer groups and manage customer profiles.\n\n**Task Requirements:**\n\n1. Access the Magento Admin panel to begin customer segmentation setup. if need to login, login with username 'admin' and password 'admin1234'\n\n2. Establish baseline metrics for customer groups:\n   - Record the exact number shown in \"records found\" at the top of the grid\n   - This will be your initial groups count\n\n3. Create a specialized customer group for European premium customers:\n   - Group Name: Premium Europe\n   - Tax Class: Retail Customer\n   - Save the group\n\n4. Verify the customer group creation was successful:\n   - After saving, return to Customer Groups list\n   - Record the new total shown in \"records found\"\n\n5. Establish baseline metrics for all customers database:\n   - Record the exact number shown in \"records found\" at the top of the grid\n   - This will be your initial customers count\n\n6. Add a representative customer to the new premium group:\n   - Create a new customer with the following details:\n   - First Name: Isabella\n   - Last Name: Romano\n   - Email: isabella.romano@premium.eu\n   - Associate to Website: Main Website\n   - Group: The group you just created\n   - Save the customer\n\n7. Verify the customer creation was successful:\n   - After saving, return to All Customers list\n   - Record the new total shown in \"records found\"\n\n8. Analyze recent customer activity patterns:\n   - Navigate to Dashboard\n   - Look at the \"Last Orders\" section\n   - Record the customer name in the last row of the table\n\n9. Compile all your findings and output them in the following exact format:\n\n```\n<answer>\nInitialGroups|count\nFinalGroups|count  \nInitialCustomers|count\nFinalCustomers|count\nLastOrderCustomer|name\n</answer>\n```\n\n**Example Output:**\n```\n<answer>\nInitialGroups|XX\nFinalGroups|XX\nInitialCustomers|XXX\nFinalCustomers|XXX\nLastOrderCustomer|XXX\n</answer>\n```"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/customer_segmentation_setup/label.txt",
    "content": "InitialGroups|4\nFinalGroups|5\nInitialCustomers|70\nFinalCustomers|71\nLastOrderCustomer|Ava Brown"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/customer_segmentation_setup/meta.json",
    "content": "{\n  \"task_id\": \"customer_segmentation_setup\",\n  \"task_name\": \"Customer Segmentation Setup\",\n  \"category_id\": \"shopping_admin\",\n  \"category_name\": \"Shopping Admin\",\n  \"description\": \"Configure customer segmentation system in admin panel by defining demographic criteria, creating behavior-based segments, implementing targeting rules, and setting up automated marketing workflows.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-08-17\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"content submission\",\n    \"inventory management\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/magento-admin.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/customer_segmentation_setup/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nimport json\nfrom pathlib import Path\nfrom playwright.async_api import (\n    async_playwright,\n    TimeoutError as PlaywrightTimeoutError,\n)\n\n# 从环境变量读取 base_url（shopping_admin 会注入 http://localhost:7780/admin），默认回退到本地\nBASE_URL = os.getenv(\"WEBARENA_BASE_URL\", \"http://localhost:7780/admin\").rstrip(\"/\")\n\n\ndef get_model_response():\n    \"\"\"\n    Get the model's response from the MCP_MESSAGES environment variable.\n    Returns the last assistant message text.\n    \"\"\"\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    print(f\"MCP_MESSAGES: {messages_path}\")\n    if not messages_path:\n        print(\"Warning: MCP_MESSAGES environment variable not set\", file=sys.stderr)\n        return None\n\n    try:\n        with open(messages_path, \"r\") as f:\n            messages = json.load(f)\n\n        # Find the last assistant message\n        for message in reversed(messages):\n            if (\n                message.get(\"role\") == \"assistant\"\n                and message.get(\"status\") == \"completed\"\n            ):\n                content = message.get(\"content\", [])\n                for item in content:\n                    if item.get(\"type\") == \"output_text\":\n                        return item.get(\"text\", \"\")\n\n        print(\"Warning: No assistant response found in messages\", file=sys.stderr)\n        return None\n    except Exception as e:\n        print(f\"Error reading messages file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef parse_answer_format(text):\n    \"\"\"\n    Parse the <answer>...</answer> format from the agent's output.\n    Returns a dictionary with the parsed values.\n    \"\"\"\n    if not text:\n        return None\n\n    # Look for <answer>...</answer> pattern\n    match = re.search(r\"<answer>(.*?)</answer>\", text, re.IGNORECASE | re.DOTALL)\n    if not match:\n        return None\n\n    answer_content = match.group(1).strip()\n\n    # Parse each line\n    result = {}\n    lines = answer_content.split(\"\\n\")\n\n    if len(lines) != 5:\n        print(f\"Error: Expected 5 lines in answer, got {len(lines)}\", file=sys.stderr)\n        return None\n\n    for line in lines:\n        if \"|\" in line:\n            key, value = line.split(\"|\", 1)\n            result[key.strip()] = value.strip()\n\n    return result\n\n\ndef load_expected_answer(label_path):\n    \"\"\"\n    Load the expected answer from label.txt file.\n    Returns a dictionary with the expected values.\n    \"\"\"\n    try:\n        with open(label_path, \"r\") as f:\n            lines = f.read().strip().split(\"\\n\")\n\n        expected = {}\n        for line in lines:\n            if \"|\" in line:\n                key, value = line.split(\"|\", 1)\n                expected[key.strip()] = value.strip()\n\n        return expected\n    except Exception as e:\n        print(f\"Error reading label file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef compare_answers(model_answer, expected_answer):\n    \"\"\"\n    Compare the model's answer with the expected answer.\n    Returns True if all key information matches, False otherwise.\n    \"\"\"\n    if not model_answer or not expected_answer:\n        return False\n\n    # Check each expected key\n    mismatches = []\n    for key, expected_value in expected_answer.items():\n        model_value = model_answer.get(key, \"\")\n\n        # Exact match for all fields\n        if model_value != expected_value:\n            mismatches.append(\n                f\"{key}: expected '{expected_value}', got '{model_value}'\"\n            )\n\n    if mismatches:\n        print(\"\\n=== Answer Comparison Mismatches ===\", file=sys.stderr)\n        for mismatch in mismatches:\n            print(f\"✗ {mismatch}\", file=sys.stderr)\n        return False\n\n    print(\"\\n=== Answer Comparison ===\", file=sys.stderr)\n    print(\"✓ All key information matches the expected answer\", file=sys.stderr)\n    return True\n\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the customer segmentation setup task has been completed correctly.\n    First checks the model's answer against the expected label,\n    then verifies the actual state in the Magento Admin.\n    \"\"\"\n    # Get the label file path\n    label_path = Path(__file__).parent / \"label.txt\"\n\n    # Load expected answer\n    expected_answer = load_expected_answer(label_path)\n    if not expected_answer:\n        print(\"Error: Could not load expected answer from label.txt\", file=sys.stderr)\n        return False\n\n    # Get model's response from MCP_MESSAGES\n    model_response = get_model_response()\n    if model_response:\n        print(\"Found model response, parsing answer format...\", file=sys.stderr)\n        model_answer = parse_answer_format(model_response)\n\n        if model_answer:\n            print(\"\\n=== Model Answer Parsed ===\", file=sys.stderr)\n            for key, value in model_answer.items():\n                print(f\"{key}: {value}\", file=sys.stderr)\n\n            # Compare answers\n            answer_match = compare_answers(model_answer, expected_answer)\n            if not answer_match:\n                print(\"\\nModel answer does not match expected answer\", file=sys.stderr)\n                return False\n            print(\"\\n✓ Model answer matches expected answer\", file=sys.stderr)\n        else:\n            print(\n                \"Warning: Could not parse answer format from model response\",\n                file=sys.stderr,\n            )\n            print(\"Will proceed with browser verification only\", file=sys.stderr)\n    else:\n        print(\n            \"No model response found, proceeding with browser verification\",\n            file=sys.stderr,\n        )\n\n    # Browser verification for actual state\n    print(\"\\n=== Starting Browser Verification ===\", file=sys.stderr)\n    async with async_playwright() as p:\n        browser = await p.chromium.launch(headless=True)\n        context = await browser.new_context()\n        page = await context.new_page()\n\n        try:\n            # Navigate to Magento Admin\n            print(\"Navigating to Magento Admin...\", file=sys.stderr)\n            await page.goto(\n                f\"{BASE_URL}/\", wait_until=\"networkidle\"\n            )\n\n            # Check if already logged in, if not, login\n            if \"dashboard\" not in page.url.lower():\n                print(\"Logging into Magento Admin...\", file=sys.stderr)\n                await page.fill('input[name=\"login[username]\"]', \"admin\")\n                await page.fill('input[name=\"login[password]\"]', \"admin1234\")\n                await page.click('button:has-text(\"Sign in\")')\n                await page.wait_for_load_state(\"networkidle\")\n\n                if \"dashboard\" not in page.url.lower():\n                    print(\"Error: Login failed\", file=sys.stderr)\n                    return False\n\n            print(\"Successfully logged into Magento Admin\", file=sys.stderr)\n\n            # 1. Verify Customer Groups\n            print(\"\\nVerifying Customer Groups...\", file=sys.stderr)\n            await page.goto(\n                f\"{BASE_URL}/customer/group/\",\n                wait_until=\"networkidle\",\n            )\n            await page.wait_for_timeout(2000)  # Wait for grid to load\n\n            # Check for Premium Europe group\n            premium_europe_exists = (\n                await page.locator(\"text=Premium Europe\").count() > 0\n            )\n            if premium_europe_exists:\n                print(\"✓ Found 'Premium Europe' customer group\", file=sys.stderr)\n\n                # Check if it has Retail Customer tax class\n                # Look for Premium Europe row and check its tax class\n                premium_row = page.locator('tr:has-text(\"Premium Europe\")')\n                if await premium_row.count() > 0:\n                    tax_class_text = await premium_row.locator(\"td\").nth(2).inner_text()\n                    if \"Retail Customer\" in tax_class_text:\n                        print(\n                            \"✓ Premium Europe has 'Retail Customer' tax class\",\n                            file=sys.stderr,\n                        )\n                    else:\n                        print(\n                            f\"Warning: Premium Europe tax class is '{tax_class_text}'\",\n                            file=sys.stderr,\n                        )\n            else:\n                print(\"✗ 'Premium Europe' customer group not found\", file=sys.stderr)\n                return False\n\n            # Check total groups count\n            records_found = page.locator(\"text=records found\").first\n            if await records_found.count() > 0:\n                count_text = await records_found.inner_text()\n                print(f\"Customer Groups count: {count_text}\", file=sys.stderr)\n\n                # Extract number\n                import re\n\n                match = re.search(r\"(\\d+)\\s+records found\", count_text)\n                if match:\n                    groups_count = int(match.group(1))\n                    print(f\"✓ Customer groups count is {groups_count}\", file=sys.stderr)\n\n            # 2. Verify Customer\n            print(\"\\nVerifying Customer Isabella Romano...\", file=sys.stderr)\n            await page.goto(\n                f\"{BASE_URL}/customer/index/\",\n                wait_until=\"networkidle\",\n            )\n            await page.wait_for_timeout(3000)  # Wait for grid to load\n\n            # Check total customers count\n            customer_records = page.locator(\"text=records found\").first\n            if await customer_records.count() > 0:\n                count_text = await customer_records.inner_text()\n                print(f\"Customers count: {count_text}\", file=sys.stderr)\n\n                # Extract number\n                match = re.search(r\"(\\d+)\\s+records found\", count_text)\n                if match:\n                    customers_count = int(match.group(1))\n                    print(\n                        f\"✓ Total customers count is {customers_count}\", file=sys.stderr\n                    )\n\n                    # Verify against expected answer if available\n                    if expected_answer and \"FinalCustomers\" in expected_answer:\n                        expected_final = int(expected_answer[\"FinalCustomers\"])\n                        if customers_count == expected_final:\n                            print(\n                                f\"✓ Customer count matches expected: {customers_count}\",\n                                file=sys.stderr,\n                            )\n                        else:\n                            print(\n                                f\"✗ Customer count mismatch: Expected {expected_final} customers, found {customers_count}\",\n                                file=sys.stderr,\n                            )\n                            return False\n\n            # Wait for the customer grid to load properly\n            await page.wait_for_timeout(5000)\n            \n            # Check if Isabella Romano exists - first wait for grid to load\n            grid_loaded = False\n            for i in range(3):\n                # Look for grid container and wait for it to populate\n                grid_container = page.locator(\".admin__data-grid-outer-wrap, .data-grid, table\").first\n                if await grid_container.count() > 0:\n                    # Check if there are customer rows loaded\n                    customer_rows = page.locator(\"td[data-column='email'], td:has-text('@')\")\n                    if await customer_rows.count() > 0:\n                        grid_loaded = True\n                        break\n                await page.wait_for_timeout(2000)\n            \n            if not grid_loaded:\n                print(\"✗ Customer grid failed to load properly\", file=sys.stderr)\n                return False\n            \n            # Now check if Isabella Romano exists in the loaded grid\n            isabella_exists = (\n                await page.locator(\"text=isabella.romano@premium.eu\").count() > 0\n            )\n            \n            if not isabella_exists:\n                # Try searching for the customer to be more thorough\n                try:\n                    search_box = page.locator('input[placeholder*=\"Search by keyword\"], input[name=\"search\"], [data-role=\"search\"]').first\n                    if await search_box.count() > 0:\n                        await search_box.clear()\n                        await search_box.fill(\"isabella.romano@premium.eu\")\n                        await page.keyboard.press(\"Enter\")\n                        await page.wait_for_load_state(\"networkidle\")\n                        await page.wait_for_timeout(3000)\n                        \n                        # Check again after search\n                        isabella_exists = (\n                            await page.locator(\"text=isabella.romano@premium.eu\").count() > 0\n                        )\n                        \n                        # Also check for \"No records found\" message\n                        no_records = await page.locator(\"text=We couldn't find any records., text=No records found\").count() > 0\n                        if no_records:\n                            print(\n                                \"✗ Customer 'isabella.romano@premium.eu' not found - search returned no results\",\n                                file=sys.stderr,\n                            )\n                            return False\n                except Exception as e:\n                    print(f\"✗ Search failed: {str(e)}\", file=sys.stderr)\n            \n            if isabella_exists:\n                print(\n                    \"✓ Found customer with email 'isabella.romano@premium.eu'\",\n                    file=sys.stderr,\n                )\n            else:\n                print(\n                    \"✗ Customer 'isabella.romano@premium.eu' not found\",\n                    file=sys.stderr,\n                )\n                return False\n\n            # 3. Verify Dashboard Last Orders\n            print(\"\\nVerifying Dashboard Last Orders...\", file=sys.stderr)\n            await page.goto(\n                f\"{BASE_URL}/admin/dashboard/\",\n                wait_until=\"networkidle\",\n            )\n            await page.wait_for_timeout(2000)\n\n            # Check for Last Orders section\n            last_orders_exists = await page.locator(\"text=Last Orders\").count() > 0\n            if last_orders_exists:\n                print(\"✓ Found 'Last Orders' section on dashboard\", file=sys.stderr)\n\n                # Find the first customer in the table\n                # Look for the table after \"Last Orders\" heading\n                orders_table = (\n                    page.locator(\"text=Last Orders\")\n                    .locator(\"..\")\n                    .locator(\"table\")\n                    .first\n                )\n                if await orders_table.count() > 0:\n                    # Get the last row in tbody\n                    last_row = orders_table.locator(\"tbody tr\").last\n                    if await last_row.count() > 0:\n                        last_customer = await last_row.locator(\n                            \"td\"\n                        ).first.inner_text()\n                        print(\n                            f\"✓ Last customer in Last Orders: {last_customer}\",\n                            file=sys.stderr,\n                        )\n\n                        # Verify against expected answer if available\n                        if expected_answer and \"LastOrderCustomer\" in expected_answer:\n                            if last_customer == expected_answer[\"LastOrderCustomer\"]:\n                                print(\n                                    f\"✓ Last Order Customer matches expected: {last_customer}\",\n                                    file=sys.stderr,\n                                )\n                            else:\n                                print(\n                                    f\"✗ Last Order Customer mismatch: Expected '{expected_answer['LastOrderCustomer']}' but actual is '{last_customer}'\",\n                                    file=sys.stderr,\n                                )\n                                return False\n            else:\n                print(\n                    \"Warning: 'Last Orders' section not found on dashboard\",\n                    file=sys.stderr,\n                )\n\n            # Summary of verification - only print if we reach this point (all checks passed)\n            print(\"\\n=== Browser Verification Summary ===\", file=sys.stderr)\n            print(\"✓ Magento Admin login successful\", file=sys.stderr)\n            print(\n                \"✓ Customer group 'Premium Europe' exists with correct tax class\",\n                file=sys.stderr,\n            )\n            print(\"✓ Customer 'isabella.romano@premium.eu' found in system\", file=sys.stderr)\n            print(\"✓ Customer counts verified\", file=sys.stderr)\n            print(\"✓ Dashboard Last Orders section accessible\", file=sys.stderr)\n\n            return True\n\n        except PlaywrightTimeoutError as e:\n            print(f\"Error: Timeout occurred - {str(e)}\", file=sys.stderr)\n            return False\n        except Exception as e:\n            print(f\"Error: Unexpected error - {str(e)}\", file=sys.stderr)\n            return False\n        finally:\n            await browser.close()\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/fitness_promotion_strategy/description.md",
    "content": "Our marketing team is planning a new promotion for our bestselling fitness products. We need to analyze the current performance of our top-selling items and their related promotions to optimize our strategy.\n\n**Task Requirements:**\n\n1. If need to login, login with username 'admin' and password 'admin1234'\n\n2. Start by checking our current bestsellers:\n   - Identify the top 3 bestselling products based on their Price\tand Quantity - record their names, prices, and quantities sold\n   - Note the total Revenue amount displayed\n   - Check if any of these bestsellers appear in the Top Search Terms table - if yes, record the search term and its usage count, else output 'No:0'\n\n3. Investigate these bestselling products in detail:\n   - For each of the top 3 bestsellers identified, search for them by name and record:\n     - Their SKU\n     - Current inventory quantity\n     - Whether they are 'Enabled' or 'Disabled'\n\n4. Check if we have existing promotions for these products:\n   - Look for any active rules that might apply to fitness/yoga products\n   - Find if there's a rule offering percentage discount - record the rule name and discount percentage\n   - Count total number of active rules\n\n5. Analyze customer purchasing patterns:\n   - Count total number of orders in the system\n   - Note the ID of the most recent order\n\n6. Review our top customers who might be interested:\n   - Find the customer who appears in the Last Orders section of the dashboard with the highest total\n   - Look up this customer in the All Customers list and record his email and customer group\n   - Count how many other customers are in the same group\n\n7. Compile your findings and output them in the following exact format:\n\n```\n<answer>\nBestseller1|name:price:quantity:sku:inventory:status\nBestseller2|name:price:quantity:sku:inventory:status\nBestseller3|name:price:quantity:sku:inventory:status\nTotalRevenue|amount\nBestsellerInSearch|term:count\nPercentageDiscountRule|name:percentage\nActiveRulesCount|count\nTotalOrders|count\nMostRecentOrderID|id\nTopCustomer|name:email:group\nSameGroupCustomers|count\n</answer>\n```\n\n**Example Output:**\n```\n<answer>\nBestseller1|Product Name:$XX.XX:X:XXX(SKU):X:Enabled/Disabled\nBestseller2|Product Name:$XX.XX:X:XXX(SKU):X:Enabled/Disabled\nBestseller3|Product Name:$XX.XX:X:XXX(SKU):X:Enabled/Disabled\nTotalRevenue|$XX.XX\nBestsellerInSearch|Term:X or None:0\nPercentageDiscountRule|Rule Name:XX%\nActiveRulesCount|X\nTotalOrders|X\nMostRecentOrderID|X or None\nTopCustomer|Customer Name:email@example.com:Group Name\nSameGroupCustomers|X\n</answer>\n```\n\n"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/fitness_promotion_strategy/label.txt",
    "content": "Bestseller1|Sprite Stasis Ball 65 cm:$27.00:6:24-WG082-blue:100:Enabled\nBestseller2|Quest Lumaflex™ Band:$19.00:6:24-UG01:100:Enabled\nBestseller3|Sprite Yoga Strap 6 foot:$14.00:6:24-WG085:100:Enabled\nTotalRevenue|$0.00\nBestsellerInSearch|No:0\nPercentageDiscountRule|20% OFF Ever $200-plus purchase!*:20%\nActiveRulesCount|4\nTotalOrders|308\nMostRecentOrderID|000000299\nTopCustomer|Sarah Miller:sarah.miller@example.com:General\nSameGroupCustomers|70"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/fitness_promotion_strategy/meta.json",
    "content": "{\n  \"task_id\": \"fitness_promotion_strategy\",\n  \"task_name\": \"Fitness Promotion Strategy\",\n  \"category_id\": \"shopping_admin\",\n  \"category_name\": \"Shopping Admin\",\n  \"description\": \"Develop fitness product promotion campaigns by analyzing sales data, creating targeted offers, configuring promotional rules, and implementing cross-selling strategies in admin dashboard.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-08-17\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data extraction\",\n    \"comparative analysis\",\n    \"inventory management\",\n    \"content submission\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/magento-admin.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/fitness_promotion_strategy/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nimport json\nfrom pathlib import Path\n\ndef get_model_response():\n    \"\"\"\n    Get the model's response from the MCP_MESSAGES environment variable.\n    Returns the last assistant message text.\n    \"\"\"\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    print(f\"MCP_MESSAGES: {messages_path}\")\n    if not messages_path:\n        print(\"Warning: MCP_MESSAGES environment variable not set\", file=sys.stderr)\n        return None\n    \n    try:\n        with open(messages_path, 'r') as f:\n            messages = json.load(f)\n        \n        # Find the last assistant message\n        for message in reversed(messages):\n            if message.get('role') == 'assistant' and message.get('status') == 'completed':\n                content = message.get('content', [])\n                for item in content:\n                    if item.get('type') == 'output_text':\n                        return item.get('text', '')\n        \n        print(\"Warning: No assistant response found in messages\", file=sys.stderr)\n        return None\n    except Exception as e:\n        print(f\"Error reading messages file: {str(e)}\", file=sys.stderr)\n        return None\n\ndef parse_answer_format(text):\n    \"\"\"\n    Parse the <answer>...</answer> format from the agent's output.\n    Returns a dictionary with the parsed values.\n    \"\"\"\n    if not text:\n        return None\n    \n    # Look for <answer>...</answer> pattern\n    match = re.search(r'<answer>(.*?)</answer>', text, re.IGNORECASE | re.DOTALL)\n    if not match:\n        return None\n    \n    answer_content = match.group(1).strip()\n    \n    # Parse each line\n    result = {}\n    lines = answer_content.split('\\n')\n    \n    # Skip the check for exact number of lines - just parse what we have\n    # if len(lines) != 13:\n    #     print(f\"Error: Expected 13 lines in answer, got {len(lines)}\", file=sys.stderr)\n    #     return None\n    \n    for line in lines:\n        if '|' in line:\n            key, value = line.split('|', 1)\n            result[key.strip()] = value.strip()\n    \n    return result\n\ndef load_expected_answer(label_path):\n    \"\"\"\n    Load the expected answer from label.txt file.\n    Returns a dictionary with the expected values.\n    \"\"\"\n    try:\n        with open(label_path, 'r') as f:\n            lines = f.read().strip().split('\\n')\n        \n        expected = {}\n        for line in lines:\n            if '|' in line:\n                key, value = line.split('|', 1)\n                expected[key.strip()] = value.strip()\n        \n        return expected\n    except Exception as e:\n        print(f\"Error reading label file: {str(e)}\", file=sys.stderr)\n        return None\n\ndef compare_answers(model_answer, expected_answer):\n    \"\"\"\n    Compare the model's answer with the expected answer.\n    Returns True if all key information matches, False otherwise.\n    \"\"\"\n    if not model_answer or not expected_answer:\n        return False\n    \n    # Check each expected key\n    mismatches = []\n    for key, expected_value in expected_answer.items():\n        model_value = model_answer.get(key, '')\n        \n        # Special handling for different types of values\n        if key in ['Bestseller1', 'Bestseller2', 'Bestseller3']:\n            # Check if all parts match (name:price:quantity:sku:inventory:status)\n            if ':' in expected_value and ':' in model_value:\n                expected_parts = expected_value.split(':')\n                model_parts = model_value.split(':')\n                if len(expected_parts) == 6 and len(model_parts) == 6:\n                    # Compare each part\n                    for i, (exp, mod) in enumerate(zip(expected_parts, model_parts)):\n                        if i == 1:  # Price field\n                            exp_clean = exp.replace('$', '').replace(',', '')\n                            mod_clean = mod.replace('$', '').replace(',', '')\n                            if exp_clean != mod_clean:\n                                mismatches.append(f\"{key} price: expected '{exp}', got '{mod}'\")\n                        elif i == 4:  # Inventory field (may have decimal places)\n                            exp_float = float(exp.replace(',', ''))\n                            mod_float = float(mod.replace(',', ''))\n                            if abs(exp_float - mod_float) > 0.0001:\n                                mismatches.append(f\"{key} inventory: expected '{exp}', got '{mod}'\")\n                        else:\n                            if exp.lower() != mod.lower():\n                                mismatches.append(f\"{key} part {i}: expected '{exp}', got '{mod}'\")\n                else:\n                    mismatches.append(f\"{key}: format mismatch - expected '{expected_value}', got '{model_value}'\")\n            else:\n                if expected_value != model_value:\n                    mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        elif key == 'LowestInventoryProduct':\n            # Check product name and inventory\n            if ':' in expected_value and ':' in model_value:\n                expected_name, expected_inv = expected_value.rsplit(':', 1)\n                model_name, model_inv = model_value.rsplit(':', 1)\n                if expected_name.lower() != model_name.lower():\n                    mismatches.append(f\"{key} name: expected '{expected_name}', got '{model_name}'\")\n                exp_float = float(expected_inv.replace(',', ''))\n                mod_float = float(model_inv.replace(',', ''))\n                if abs(exp_float - mod_float) > 0.0001:\n                    mismatches.append(f\"{key} inventory: expected '{expected_inv}', got '{model_inv}'\")\n            else:\n                if expected_value != model_value:\n                    mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        elif key in ['TotalRevenue', 'MinimumPurchaseRule']:\n            # For price/amount fields, normalize format\n            expected_clean = expected_value.replace('$', '').replace(',', '')\n            model_clean = model_value.replace('$', '').replace(',', '')\n            if expected_clean != model_clean:\n                mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        elif key == 'BestsellerInSearch':\n            # Check search term and count\n            if expected_value.lower() != model_value.lower():\n                mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        elif key == 'PercentageDiscountRule':\n            # Check rule name and percentage\n            if ':' in expected_value and ':' in model_value:\n                expected_name, expected_pct = expected_value.rsplit(':', 1)\n                model_name, model_pct = model_value.rsplit(':', 1)\n                if expected_name != model_name:\n                    mismatches.append(f\"{key} name: expected '{expected_name}', got '{model_name}'\")\n                # Normalize percentage (20% vs 20 vs 0.20)\n                exp_pct_clean = expected_pct.replace('%', '').strip()\n                mod_pct_clean = model_pct.replace('%', '').strip()\n                if exp_pct_clean != mod_pct_clean:\n                    mismatches.append(f\"{key} percentage: expected '{expected_pct}', got '{model_pct}'\")\n            else:\n                if expected_value != model_value:\n                    mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        elif key == 'TopCustomer':\n            # Check name:email:group\n            if ':' in expected_value and ':' in model_value:\n                expected_parts = expected_value.split(':')\n                model_parts = model_value.split(':')\n                if len(expected_parts) == 3 and len(model_parts) == 3:\n                    exp_name, exp_email, exp_group = expected_parts\n                    mod_name, mod_email, mod_group = model_parts\n                    if exp_name != mod_name:\n                        mismatches.append(f\"{key} name: expected '{exp_name}', got '{mod_name}'\")\n                    if exp_email.lower() != mod_email.lower():\n                        mismatches.append(f\"{key} email: expected '{exp_email}', got '{mod_email}'\")\n                    if exp_group.lower() != mod_group.lower():\n                        mismatches.append(f\"{key} group: expected '{exp_group}', got '{mod_group}'\")\n                else:\n                    mismatches.append(f\"{key}: format mismatch - expected '{expected_value}', got '{model_value}'\")\n            else:\n                if expected_value != model_value:\n                    mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        elif key == 'MostRecentOrderDate':\n            # Date format may vary, do flexible comparison\n            if expected_value.lower() == 'none' and model_value.lower() == 'none':\n                continue\n            elif expected_value != model_value:\n                # Could add more flexible date parsing here if needed\n                mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        else:\n            # Exact match for other fields (counts, etc.)\n            if str(model_value) != str(expected_value):\n                mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n    \n    if mismatches:\n        print(\"\\n=== Answer Comparison Mismatches ===\", file=sys.stderr)\n        for mismatch in mismatches:\n            print(f\"✗ {mismatch}\", file=sys.stderr)\n        return False\n    \n    print(\"\\n=== Answer Comparison ===\", file=sys.stderr)\n    print(\"✓ All key information matches the expected answer\", file=sys.stderr)\n    return True\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the bestseller analysis and promotion task has been completed correctly.\n    First checks the model's answer against the expected label,\n    then optionally verifies the actual state in the Magento Admin.\n    \"\"\"\n    # Get the label file path\n    label_path = Path(__file__).parent / \"label.txt\"\n    \n    # Load expected answer\n    expected_answer = load_expected_answer(label_path)\n    if not expected_answer:\n        print(\"Error: Could not load expected answer from label.txt\", file=sys.stderr)\n        return False\n    \n    # Get model's response from MCP_MESSAGES\n    model_response = get_model_response()\n    if model_response:\n        print(\"Found model response, parsing answer format...\", file=sys.stderr)\n        model_answer = parse_answer_format(model_response)\n        \n        if model_answer:\n            print(\"\\n=== Model Answer Parsed ===\", file=sys.stderr)\n            for key, value in model_answer.items():\n                print(f\"{key}: {value}\", file=sys.stderr)\n            \n            # Compare answers\n            answer_match = compare_answers(model_answer, expected_answer)\n            if not answer_match:\n                print(\"\\nModel answer does not match expected answer\", file=sys.stderr)\n                return False\n            print(\"\\n✓ Model answer matches expected answer\", file=sys.stderr)\n            return True\n        else:\n            print(\"Warning: Could not parse answer format from model response\", file=sys.stderr)\n            return False\n    else:\n        print(\"No model response found\", file=sys.stderr)\n        return False\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/marketing_customer_analysis/description.md",
    "content": "Perform a comprehensive marketing and customer analysis workflow in the Magento Admin panel to understand search behavior patterns and promotional effectiveness.\n\n**Task Requirements:**\n\n1. First, we need to access the system to begin our comprehensive analysis:\n   if need to login, login with username 'admin' and password 'admin1234'\n\n2. Let's start by analyzing customer search behavior to understand what customers are looking for:\n   Go to Search Terms in Reports and analyze the search data:\n   - Identify the TOP 2 search terms with the highest number of hits (record exact terms and hit counts)\n   - Find a search term that has 0 results but still has search hits (record exact term and hit count)\n   - Count the total number of search terms displayed in the report\n\n3. Next, we'll examine our promotional strategies to understand current marketing efforts:\n   Navigate to Cart Price Rules and identify:\n   - Find ALL rules that contain a coupon code\n   - Record the exact coupon codes and the complete rule names for each\n   - Count how many active rules exist in total\n\n4. Now let's analyze our email marketing reach and subscriber engagement:\n   Go to Newsletter Subscribers:\n   - Apply filter to show only 'Subscribed' status\n   - Count the total number of subscribed users showing after filter\n   - Verify whether these TWO emails appear in the subscribed list:\n     * john.smith.xyz@gmail.com\n     * admin@magento.com\n\n5. To support our analysis, we need to create test customer profiles for different segments:\n   Create TWO new customers with the following details:\n   \n   Customer 1:\n   - First Name: Marketing1\n   - Last Name: Analy\n   - Email: marketdata1.analysis@magento.com\n   - Associate to Website: Main Website\n   - Group: General\n   \n   Customer 2:\n   - First Name: Analytics1\n   - Last Name: Report\n   - Email: analytics1.report@magento.com\n   - Associate to Website: Main Website\n   - Group: Wholesale\n\n6. Finally, let's review overall business performance metrics from the main dashboard:\n   Go to Dashboard and identify:\n   - The names and sales quantities of the products that are both the best-selling and most expensive\n   - The total revenue displayed on the dashboard\n\n7. Compile all your findings and must output them in the following exact format at last:\n\n```\n<answer>\nTop2SearchTerms|term1:hits1,term2:hits2\nZeroResultTerm|term:hits\nTotalSearchTerms|count\nCouponCodes|code1:rulename1,code2:rulename2\nActiveRulesCount|count\nSubscribedCount|count\nEmailVerification|john.smith.xyz@gmail.com:yes/no,admin@magento.com:yes/no\nTopProduct|name:quantity\nTotalRevenue|amount\n</answer>\n```\n\n**Example Output:**\n```\n<answer>\nTop2SearchTerms|term1:XX,term2:XX\nZeroResultTerm|term:XX\nTotalSearchTerms|XX\nCouponCodes|CODE:Rule Name Here\nActiveRulesCount|X\nSubscribedCount|XX\nEmailVerification|john.smith.xyz@gmail.com:yes/no,admin@magento.com:yes/no\nTopProduct|Product Name:XX\nTotalRevenue|$XX.XX\n</answer>\n```\n\n**Success Criteria:**\n- Successfully logged into Magento Admin\n- Navigated to Search Terms Report and identified top 2 terms\n- Found search term with 0 results but has hits\n- Counted total search terms in report\n- Located all Cart Price Rules with coupon codes\n- Extracted exact coupon codes and rule names\n- Counted active rules\n- Filtered Newsletter Subscribers by 'Subscribed' status\n- Counted total subscribed users\n- Verified presence of two specific email addresses\n- Created two new customers successfully\n- Found top bestselling product from dashboard\n- Identified total revenue from dashboard\n- Output answer in exact format with 9 data lines\n- Answer wrapped in <answer> tags"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/marketing_customer_analysis/label.txt",
    "content": "Top2SearchTerms|hollister:19,Joust Bag:4\nZeroResultTerm|nike:3\nTotalSearchTerms|7\nCouponCodes|H20:$4 Luma water bottle (save 70%)\nActiveRulesCount|4\nSubscribedCount|1\nEmailVerification|john.smith.xyz@gmail.com:yes,admin@magento.com:no\nTopProduct|Sprite Stasis Ball 65 cm:6\nTotalRevenue|$0.00"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/marketing_customer_analysis/meta.json",
    "content": "{\n  \"task_id\": \"marketing_customer_analysis\",\n  \"task_name\": \"Marketing Customer Analysis\",\n  \"category_id\": \"shopping_admin\",\n  \"category_name\": \"Shopping Admin\",\n  \"description\": \"Analyze customer behavior patterns using admin analytics, segment user demographics, track purchase histories, evaluate campaign effectiveness, and generate comprehensive marketing intelligence reports.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-08-17\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data extraction\",\n    \"comparative analysis\",\n    \"content submission\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/magento-admin.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/marketing_customer_analysis/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nimport json\nfrom pathlib import Path\nfrom playwright.async_api import (\n    async_playwright,\n    TimeoutError as PlaywrightTimeoutError,\n)\n\n# 从环境变量读取 base_url（shopping_admin 会注入 http://localhost:7780/admin），默认回退到本地\nBASE_URL = os.getenv(\"WEBARENA_BASE_URL\", \"http://localhost:7780/admin\").rstrip(\"/\")\n\n\ndef get_model_response():\n    \"\"\"\n    Get the model's response from the MCP_MESSAGES environment variable.\n    Returns the last assistant message text.\n    \"\"\"\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    print(f\"MCP_MESSAGES: {messages_path}\")\n    if not messages_path:\n        print(\"Warning: MCP_MESSAGES environment variable not set\", file=sys.stderr)\n        return None\n\n    try:\n        with open(messages_path, \"r\") as f:\n            messages = json.load(f)\n\n        # Find the last assistant message\n        for message in reversed(messages):\n            if (\n                message.get(\"role\") == \"assistant\"\n                and message.get(\"status\") == \"completed\"\n            ):\n                content = message.get(\"content\", [])\n                for item in content:\n                    if item.get(\"type\") == \"output_text\":\n                        return item.get(\"text\", \"\")\n\n        print(\"Warning: No assistant response found in messages\", file=sys.stderr)\n        return None\n    except Exception as e:\n        print(f\"Error reading messages file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef parse_answer_format(text):\n    \"\"\"\n    Parse the new multi-line <answer>xxx</answer> format from the agent's output.\n    Returns a dictionary with the parsed values.\n    \"\"\"\n    if not text:\n        return None\n\n    # Look for <answer>...</answer> pattern\n    match = re.search(r\"<answer>(.*?)</answer>\", text, re.IGNORECASE | re.DOTALL)\n    if not match:\n        return None\n\n    answer_content = match.group(1).strip()\n\n    # Parse each line\n    result = {}\n    lines = answer_content.split(\"\\n\")\n\n    if len(lines) != 9:\n        print(f\"Error: Expected 9 lines in answer, got {len(lines)}\", file=sys.stderr)\n        return None\n\n    for line in lines:\n        if \"|\" in line:\n            key, value = line.split(\"|\", 1)\n            result[key.strip()] = value.strip()\n\n    return result\n\n\ndef load_expected_answer(label_path):\n    \"\"\"\n    Load the expected answer from label.txt file.\n    Returns a dictionary with the expected values.\n    \"\"\"\n    try:\n        with open(label_path, \"r\") as f:\n            lines = f.read().strip().split(\"\\n\")\n\n        expected = {}\n        for line in lines:\n            if \"|\" in line:\n                key, value = line.split(\"|\", 1)\n                expected[key.strip()] = value.strip()\n\n        return expected\n    except Exception as e:\n        print(f\"Error reading label file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef compare_answers(model_answer, expected_answer):\n    \"\"\"\n    Compare the model's answer with the expected answer.\n    Returns True if all key information matches, False otherwise.\n    \"\"\"\n    if not model_answer or not expected_answer:\n        return False\n\n    # Check each expected key\n    mismatches = []\n    for key, expected_value in expected_answer.items():\n        model_value = model_answer.get(key, \"\")\n\n        # Special handling for different types of values\n        if key == \"Top2SearchTerms\":\n            # Check if both search terms are present with correct counts\n            expected_terms = expected_value.split(\",\")\n            model_terms = model_value.split(\",\")\n            if set(expected_terms) != set(model_terms):\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        elif key == \"EmailVerification\":\n            # Check email verification status\n            expected_emails = dict(\n                item.split(\":\") for item in expected_value.split(\",\")\n            )\n            model_emails = dict(\n                item.split(\":\") for item in model_value.split(\",\") if \":\" in item\n            )\n            if expected_emails != model_emails:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        elif key == \"CouponCodes\":\n            # Check if coupon code and rule name are present\n            if \"H20\" not in model_value or \"Luma water bottle\" not in model_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        elif key == \"TopProduct\":\n            # Check if product name and quantity match\n            if expected_value != model_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        else:\n            # Exact match for other fields\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n    if mismatches:\n        print(\"\\n=== Answer Comparison Mismatches ===\", file=sys.stderr)\n        for mismatch in mismatches:\n            print(f\"✗ {mismatch}\", file=sys.stderr)\n        return False\n\n    print(\"\\n=== Answer Comparison ===\", file=sys.stderr)\n    print(\"✓ All key information matches the expected answer\", file=sys.stderr)\n    return True\n\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the marketing analysis task has been completed correctly.\n    First checks the model's answer against the expected label,\n    then optionally verifies the actual state in the Magento Admin.\n    \"\"\"\n    # Get the label file path\n    label_path = Path(__file__).parent / \"label.txt\"\n\n    # Load expected answer\n    expected_answer = load_expected_answer(label_path)\n    if not expected_answer:\n        print(\"Error: Could not load expected answer from label.txt\", file=sys.stderr)\n        return False\n\n    # Get model's response from MCP_MESSAGES\n    model_response = get_model_response()\n    if model_response:\n        print(\"Found model response, parsing answer format...\", file=sys.stderr)\n        model_answer = parse_answer_format(model_response)\n\n        if model_answer:\n            print(\"\\n=== Model Answer Parsed ===\", file=sys.stderr)\n            for key, value in model_answer.items():\n                print(f\"{key}: {value}\", file=sys.stderr)\n\n            # Compare answers\n            answer_match = compare_answers(model_answer, expected_answer)\n            if not answer_match:\n                print(\"\\nModel answer does not match expected answer\", file=sys.stderr)\n                return False\n            print(\"\\n✓ Model answer matches expected answer\", file=sys.stderr)\n        else:\n            print(\n                \"Warning: Could not parse answer format from model response\",\n                file=sys.stderr,\n            )\n            print(\"Will proceed with browser verification only\", file=sys.stderr)\n    else:\n        print(\n            \"No model response found, proceeding with browser verification\",\n            file=sys.stderr,\n        )\n\n    # Browser verification - only check customer creation (the critical task requirement)\n    print(\"\\n=== Starting Browser Verification ===\", file=sys.stderr)\n    async with async_playwright() as p:\n        browser = await p.chromium.launch(headless=True)\n        context = await browser.new_context()\n        page = await context.new_page()\n\n        try:\n            # Navigate to Magento Admin\n            print(\"Navigating to Magento Admin...\", file=sys.stderr)\n            await page.goto(\n                f\"{BASE_URL}/\", wait_until=\"networkidle\"\n            )\n\n            # Check if already logged in, if not, login\n            if \"dashboard\" not in page.url.lower():\n                print(\"Logging into Magento Admin...\", file=sys.stderr)\n                await page.fill('input[name=\"login[username]\"]', \"admin\")\n                await page.fill('input[name=\"login[password]\"]', \"admin1234\")\n                await page.click('button:has-text(\"Sign in\")')\n                await page.wait_for_load_state(\"networkidle\")\n\n                if \"dashboard\" not in page.url.lower():\n                    print(\"Error: Login failed\", file=sys.stderr)\n                    return False\n\n            print(\"Successfully logged into Magento Admin\", file=sys.stderr)\n\n            # Verify Customer Creation (the only critical check for task completion)\n            print(\"Verifying Customer Creation...\", file=sys.stderr)\n            await page.goto(\n                f\"{BASE_URL}/customer/index/\",\n                wait_until=\"networkidle\",\n            )\n\n            # Wait for the customer grid to load\n            try:\n                await page.wait_for_selector(\"table\", timeout=15000)\n            except PlaywrightTimeoutError:\n                print(\"Table not found, trying to proceed anyway...\", file=sys.stderr)\n\n            # Define customer requirements\n            customer1_requirements = {\n                \"email\": \"marketdata1.analysis@magento.com\",\n                \"first_name\": \"Marketing1\",\n                \"last_name\": \"Analy\",\n                \"group\": \"General\",\n                \"website\": \"Main Website\"\n            }\n            \n            customer2_requirements = {\n                \"email\": \"analytics1.report@magento.com\", \n                \"first_name\": \"Analytics1\",\n                \"last_name\": \"Report\",\n                \"group\": \"Wholesale\",\n                \"website\": \"Main Website\"\n            }\n\n            async def check_customer_exists(customer_requirements):\n                \"\"\"Check if a customer exists by looking for their details in the customer grid\"\"\"\n                email = customer_requirements[\"email\"]\n                first_name = customer_requirements[\"first_name\"]\n                last_name = customer_requirements[\"last_name\"]\n                group = customer_requirements[\"group\"]\n                \n                # First check if email exists in current page without searching\n                email_found = await page.locator(f\"*:has-text('{email}')\").count() > 0\n                \n                if not email_found:\n                    # Try searching for the customer\n                    try:\n                        search_box = page.locator('input[placeholder*=\"Search by keyword\"]').first\n                        await search_box.clear()\n                        await search_box.fill(email)\n                        await page.keyboard.press(\"Enter\")\n                        await page.wait_for_load_state(\"networkidle\")\n                        await page.wait_for_timeout(2000)\n                        \n                        # Check again after search\n                        email_found = await page.locator(f\"*:has-text('{email}')\").count() > 0\n                    except:\n                        pass\n                \n                if not email_found:\n                    return False, f\"Email {email} not found\"\n                \n                # More precise validation: find the row containing this customer's email\n                # Then check if the required fields are in the same row or nearby context\n                try:\n                    # Find the specific row containing this email\n                    email_cell = page.locator(f\"td:has-text('{email}')\").first\n                    if await email_cell.count() == 0:\n                        # Fall back to broader search\n                        email_cell = page.locator(f\"*:has-text('{email}')\").first\n                    \n                    # Get the parent row or container\n                    row = email_cell.locator(\"xpath=ancestor::tr[1]\")\n                    if await row.count() == 0:\n                        # Fall back to getting nearby content\n                        row = email_cell.locator(\"xpath=..\")\n                    \n                    # Get the text content of the row/container\n                    row_text = await row.text_content() if await row.count() > 0 else \"\"\n                    \n                    # If we can't get a specific row, fall back to broader validation\n                    if not row_text or len(row_text.strip()) < 10:\n                        # Search in nearby cells or elements\n                        nearby_elements = page.locator(f\"*:has-text('{email}')\").locator(\"xpath=../following-sibling::* | xpath=../preceding-sibling::*\")\n                        nearby_count = await nearby_elements.count()\n                        nearby_text = \"\"\n                        for i in range(min(nearby_count, 5)):  # Check up to 5 nearby elements\n                            element_text = await nearby_elements.nth(i).text_content()\n                            if element_text:\n                                nearby_text += element_text + \" \"\n                        row_text = row_text + \" \" + nearby_text\n                    \n                    # Check if required fields are present in the row/context\n                    required_fields = [first_name, last_name, group]\n                    found_fields = [email]  # Email is already confirmed\n                    missing_fields = []\n                    \n                    for field in required_fields:\n                        if field in row_text:\n                            found_fields.append(field)\n                        else:\n                            missing_fields.append(field)\n                    \n                    if missing_fields:\n                        return False, f\"Customer found but missing fields in row context: {', '.join(missing_fields)}. Row text: {row_text[:100]}...\"\n                    \n                    return True, f\"Customer verified with all required fields: {', '.join(found_fields)}\"\n                    \n                except Exception as e:\n                    # Fall back to original simple validation\n                    page_content = await page.content()\n                    required_fields = [first_name, last_name, group, email]\n                    found_fields = []\n                    missing_fields = []\n                    \n                    for field in required_fields:\n                        if field in page_content:\n                            found_fields.append(field)\n                        else:\n                            missing_fields.append(field)\n                    \n                    if missing_fields:\n                        return False, f\"Customer found but missing fields (fallback): {', '.join(missing_fields)}\"\n                    \n                    return True, f\"Customer verified with all required fields (fallback): {', '.join(found_fields)}\"\n\n            # Check both customers\n            customer1_exists, customer1_msg = await check_customer_exists(customer1_requirements)\n            customer2_exists, customer2_msg = await check_customer_exists(customer2_requirements)\n\n            print(\n                f\"Customer 1 (marketdata1.analysis@magento.com): {'Found' if customer1_exists else 'Not Found'} - {customer1_msg}\",\n                file=sys.stderr,\n            )\n            print(\n                f\"Customer 2 (analytics1.report@magento.com): {'Found' if customer2_exists else 'Not Found'} - {customer2_msg}\",\n                file=sys.stderr,\n            )\n\n            if not (customer1_exists and customer2_exists):\n                print(\"Error: Required customers were not found in the system\", file=sys.stderr)\n                return False\n\n            print(\"✓ Both required customers found in the system\", file=sys.stderr)\n            return True\n\n        except PlaywrightTimeoutError as e:\n            print(f\"Error: Timeout occurred - {str(e)}\", file=sys.stderr)\n            return False\n        except Exception as e:\n            print(f\"Error: Unexpected error - {str(e)}\", file=sys.stderr)\n            return False\n        finally:\n            await browser.close()\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/ny_expansion_analysis/description.md",
    "content": "Our company is planning to expand sales operations to New York state and needs a comprehensive analysis of our current sales performance and tax implications. Please help me gather critical data for our expansion feasibility report.\n\n**Task Requirements:**\n\n1. Log in with username 'admin' and password 'admin1234'\n\n2. First, analyze our current sales performance on the dashboard:\n   - Check the 'Lifetime Sales' amount displayed\n   - In the Bestsellers table, identify which product has lowest price and record its exact name, price, and quantity sold\n   - Find if this same product appears in the 'Last Orders' table, and if so, note which customer(s) ordered it, if no, note 'No'\n\n3. Since we're expanding to New York, we need check tax:\n   - Find and record the exact tax rate for New York state\n   - Compare it with California's tax rate - record which state has a higher rate\n   - Count how many different US states currently have tax configurations\n\n4. You need to understand our order status of stores processing for the NY market:\n   - Filter orders to show only statuses that are 'Visible On Storefront = Yes'\n   - Among these visible statuses, identify if exists one has the status code 'processing' (Yes or No),\n   - Check if this 'processing' status is set as a 'Default Status' (Yes or No)\n\n\n5. Since New York orders might need special handling, check all stores:\n   - Note the number of website configured\n   - Record the store code for the first Main Website Store\n\n6. For inventory planning, check the sources of it:\n   - Check if the Default Source is currently 'Enabled' or shows as 'Disabled' for Pickup Location\n   - Click the 'Edit' link for the Default Source and check if there's a 'State/Province' field (Yes or No)\n\n7. Finally, return to the Dashboard and examine the revenue metrics:\n   - Record the current Revenue amount shown\n   - Check if Tax and Shipping amounts are both $0.00 (Yes or No)\n\n**Please provide your findings in the following exact format:**\n\n```\n<answer>\nLifetime_Sales_Amount|amount\nCheap_Bestseller_Name|name\nSecond_Bestseller_Price|price\nSecond_Bestseller_Quantity|quantity\nProduct_In_Last_Orders|yes_or_no\nNY_Tax_Rate|rate\nCA_Tax_Rate|rate\nHigher_Tax_State|state\nTotal_States_With_Tax|count\nProcessing_Visible_Storefront|Yes_or_No\nProcessing_Default_Status|Yes_or_No\nNumber_Of_Websites|count\nMain_Store_Code|code\nDefault_Source_Pickup_Status|status\nDefault_Source_State|state_or_none\nDashboard_Revenue|amount\nTax_Shipping_Zero|yes_or_no\n</answer>\n```\n\n**Example Output:**\n```\n<answer>\nLifetime_Sales_Amount|$XX.XX\nCheap_Bestseller_Name|Product Name Here\nSecond_Bestseller_Price|$XX.XX\nSecond_Bestseller_Quantity|XX\nProduct_In_Last_Orders|Yes/No\nNY_Tax_Rate|X.XXXX\nCA_Tax_Rate|X.XXXX\nHigher_Tax_State|XX\nTotal_States_With_Tax|XX\nProcessing_Visible_Storefront|Yes/No\nProcessing_Default_Status|Yes/No\nNumber_Of_Websites|X\nMain_Store_Code|code_here\nDefault_Source_Pickup_Status|Enabled/Disabled\nDefault_Source_State|State or None\nDashboard_Revenue|$XX.XX\nTax_Shipping_Zero|Yes/No\n</answer>\n```"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/ny_expansion_analysis/label.txt",
    "content": "Lifetime_Sales_Amount|$0.00\nCheap_Bestseller_Name|Sprite Yoga Strap 6 foot\nSecond_Bestseller_Price|$14.00\nSecond_Bestseller_Quantity|6\nProduct_In_Last_Orders|No\nNY_Tax_Rate|8.3750\nCA_Tax_Rate|8.2500\nHigher_Tax_State|NY\nTotal_States_With_Tax|2\nProcessing_Visible_Storefront|Yes\nProcessing_Default_Status|Yes\nNumber_Of_Websites|1\nMain_Store_Code|main_website_store\nDefault_Source_Pickup_Status|Enabled\nDefault_Source_State|No\nDashboard_Revenue|$0.00\nTax_Shipping_Zero|Yes"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/ny_expansion_analysis/meta.json",
    "content": "{\n  \"task_id\": \"ny_expansion_analysis\",\n  \"task_name\": \"NY Expansion Analysis\",\n  \"category_id\": \"shopping_admin\",\n  \"category_name\": \"Shopping Admin\",\n  \"description\": \"Prepare New York market expansion strategy by analyzing regional demographics, evaluating competitor presence, assessing logistics requirements, and creating detailed market entry plan.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-08-17\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data extraction\",\n    \"comparative analysis\",\n    \"content submission\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/magento-admin.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/ny_expansion_analysis/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nimport json\nfrom pathlib import Path\n\ndef get_model_response():\n    \"\"\"\n    Get the model's response from the MCP_MESSAGES environment variable.\n    Returns the last assistant message text.\n    \"\"\"\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    print(f\"MCP_MESSAGES: {messages_path}\")\n    if not messages_path:\n        print(\"ERROR: MCP_MESSAGES environment variable not set\", file=sys.stderr)\n        return None\n    \n    # Check if file exists\n    if not Path(messages_path).exists():\n        print(f\"ERROR: Messages file not found at path: {messages_path}\", file=sys.stderr)\n        return None\n    \n    try:\n        with open(messages_path, 'r') as f:\n            content = f.read()\n            \n        # Check if file is empty\n        if not content or content.strip() == '\"\"':\n            print(\"ERROR: Messages file is empty or contains only empty string\", file=sys.stderr)\n            return None\n            \n        messages = json.loads(content)\n        \n        # Check if messages is a list\n        if not isinstance(messages, list):\n            print(f\"ERROR: Messages file should contain a list, got {type(messages).__name__}\", file=sys.stderr)\n            return None\n        \n        # Find the last assistant message\n        for message in reversed(messages):\n            if message.get('role') == 'assistant' and message.get('status') == 'completed':\n                content = message.get('content', [])\n                if not content:\n                    print(\"WARNING: Assistant message has empty content\", file=sys.stderr)\n                    continue\n                    \n                for item in content:\n                    if item.get('type') == 'output_text':\n                        text = item.get('text', '')\n                        if not text:\n                            print(\"WARNING: Output text is empty\", file=sys.stderr)\n                            continue\n                        return text\n        \n        print(\"ERROR: No assistant response with output_text found in messages\", file=sys.stderr)\n        return None\n    except json.JSONDecodeError as e:\n        print(f\"ERROR: Invalid JSON in messages file: {str(e)}\", file=sys.stderr)\n        return None\n    except Exception as e:\n        print(f\"ERROR: Unexpected error reading messages file: {str(e)}\", file=sys.stderr)\n        return None\n\ndef parse_answer_format(text):\n    \"\"\"\n    Parse the <answer>...</answer> format from the agent's output.\n    Returns a dictionary with the parsed values.\n    \"\"\"\n    if not text:\n        print(\"ERROR: No text provided to parse\", file=sys.stderr)\n        return None\n    \n    # Look for <answer>...</answer> pattern\n    match = re.search(r'<answer>(.*?)</answer>', text, re.IGNORECASE | re.DOTALL)\n    if not match:\n        print(\"ERROR: No <answer> tags found in the response\", file=sys.stderr)\n        print(f\"  Response preview: {text[:200]}...\", file=sys.stderr)\n        return None\n    \n    answer_content = match.group(1).strip()\n    \n    if not answer_content:\n        print(\"ERROR: Empty content between <answer> tags\", file=sys.stderr)\n        return None\n    \n    # Parse each line\n    result = {}\n    lines = answer_content.split('\\n')\n    \n    # Expected keys that should be present\n    expected_keys = [\n        'Lifetime_Sales_Amount', 'Cheap_Bestseller_Name', 'Second_Bestseller_Price',\n        'Second_Bestseller_Quantity', 'Product_In_Last_Orders', 'NY_Tax_Rate',\n        'CA_Tax_Rate', 'Higher_Tax_State', 'Total_States_With_Tax',\n        'Processing_Visible_Storefront', 'Processing_Default_Status',\n        'Number_Of_Websites', 'Main_Store_Code', 'Default_Source_Pickup_Status',\n        'Default_Source_State', 'Dashboard_Revenue', 'Tax_Shipping_Zero'\n    ]\n    \n    parsed_keys = []\n    for line in lines:\n        line = line.strip()\n        if not line:\n            continue\n            \n        if '|' not in line:\n            print(f\"ERROR: Line missing pipe separator '|': {line}\", file=sys.stderr)\n            continue\n            \n        parts = line.split('|', 1)\n        if len(parts) != 2:\n            print(f\"ERROR: Invalid line format: {line}\", file=sys.stderr)\n            continue\n            \n        key, value = parts\n        key = key.strip()\n        value = value.strip()\n        \n        if not key:\n            print(f\"ERROR: Empty key in line: {line}\", file=sys.stderr)\n            continue\n            \n        result[key] = value\n        parsed_keys.append(key)\n    \n    # Check for missing expected keys\n    missing_keys = set(expected_keys) - set(parsed_keys)\n    if missing_keys:\n        print(f\"ERROR: Missing expected keys: {', '.join(sorted(missing_keys))}\", file=sys.stderr)\n        \n    # Check for unexpected keys\n    unexpected_keys = set(parsed_keys) - set(expected_keys)\n    if unexpected_keys:\n        print(f\"WARNING: Unexpected keys found: {', '.join(sorted(unexpected_keys))}\", file=sys.stderr)\n    \n    if not result:\n        print(\"ERROR: No valid key-value pairs parsed from answer\", file=sys.stderr)\n        return None\n    \n    return result\n\ndef load_expected_answer(label_path):\n    \"\"\"\n    Load the expected answer from label.txt file.\n    Returns a dictionary with the expected values.\n    \"\"\"\n    try:\n        with open(label_path, 'r') as f:\n            lines = f.read().strip().split('\\n')\n        \n        expected = {}\n        for line in lines:\n            if '|' in line:\n                key, value = line.split('|', 1)\n                expected[key.strip()] = value.strip()\n        \n        return expected\n    except Exception as e:\n        print(f\"Error reading label file: {str(e)}\", file=sys.stderr)\n        return None\n\ndef compare_answers(model_answer, expected_answer):\n    \"\"\"\n    Compare the model's answer with the expected answer.\n    Returns True if all key information matches, False otherwise.\n    \"\"\"\n    if not model_answer or not expected_answer:\n        return False\n    \n    # Check each expected key\n    mismatches = []\n    for key, expected_value in expected_answer.items():\n        model_value = model_answer.get(key, '')\n        \n        # Special handling for different types of values\n        if key in ['Lifetime_Sales_Amount', 'Second_Bestseller_Price', 'Dashboard_Revenue']:\n            # For price/amount fields, normalize format\n            expected_clean = expected_value.replace('$', '').replace(',', '')\n            model_clean = model_value.replace('$', '').replace(',', '')\n            if expected_clean != model_clean:\n                mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        elif key in ['NY_Tax_Rate', 'CA_Tax_Rate']:\n            # Tax rates - allow different decimal formats\n            expected_clean = expected_value.replace('%', '').strip()\n            model_clean = model_value.replace('%', '').strip()\n            # Convert to float for comparison\n            try:\n                if float(expected_clean) != float(model_clean):\n                    mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n            except ValueError:\n                if expected_clean != model_clean:\n                    mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        elif key in ['Product_In_Last_Orders', 'Processing_Visible_Storefront', 'Processing_Default_Status', 'Tax_Shipping_Zero']:\n            # Yes/No fields - case insensitive\n            if model_value.lower() != expected_value.lower():\n                mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        elif key == 'Empty_Rows_Yes_Effect':\n            # Allow flexible descriptions for this field\n            # Just check if model provided some reasonable description\n            if not model_value or len(model_value) < 5:\n                mismatches.append(f\"{key}: expected meaningful description, got '{model_value}'\")\n        \n        elif key == 'Order_Status_Options':\n            # Check if main options are mentioned\n            expected_options = set(opt.strip() for opt in expected_value.split(','))\n            model_options = set(opt.strip() for opt in model_value.split(','))\n            if expected_options != model_options:\n                mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        elif key == 'Chart_Disabled_Message':\n            # Allow some flexibility in message text\n            # Check for key words\n            if 'disabled' not in model_value.lower() and 'enable' not in model_value.lower():\n                mismatches.append(f\"{key}: expected message about chart being disabled, got '{model_value}'\")\n        \n        elif key == 'Default_Source_State':\n            # Handle 'None' or empty state\n            expected_normalized = expected_value.lower() if expected_value.lower() != 'none' else ''\n            model_normalized = model_value.lower() if model_value.lower() != 'none' else ''\n            if expected_normalized != model_normalized:\n                mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n        \n        else:\n            # Exact match for other fields\n            if model_value != expected_value:\n                mismatches.append(f\"{key}: expected '{expected_value}', got '{model_value}'\")\n    \n    if mismatches:\n        print(\"\\n=== Answer Comparison Mismatches ===\", file=sys.stderr)\n        for mismatch in mismatches:\n            print(f\"✗ {mismatch}\", file=sys.stderr)\n        return False\n    \n    print(\"\\n=== Answer Comparison ===\", file=sys.stderr)\n    print(\"✓ All key information matches the expected answer\", file=sys.stderr)\n    return True\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the NY expansion analysis task has been completed correctly.\n    First checks the model's answer against the expected label,\n    then optionally verifies the actual state in the Magento Admin.\n    \"\"\"\n    print(\"\\n=== Starting Verification ===\", file=sys.stderr)\n    \n    # Get the label file path\n    label_path = Path(__file__).parent / \"label.txt\"\n    \n    # Load expected answer\n    print(\"Loading expected answer from label.txt...\", file=sys.stderr)\n    expected_answer = load_expected_answer(label_path)\n    if not expected_answer:\n        print(\"FATAL ERROR: Could not load expected answer from label.txt\", file=sys.stderr)\n        return False\n    \n    print(f\"Expected answer loaded with {len(expected_answer)} keys\", file=sys.stderr)\n    \n    # Get model's response from MCP_MESSAGES\n    print(\"\\nReading model response from MCP_MESSAGES...\", file=sys.stderr)\n    model_response = get_model_response()\n    \n    if not model_response:\n        print(\"FATAL ERROR: No valid model response found\", file=sys.stderr)\n        return False\n    \n    print(f\"Model response found (length: {len(model_response)} chars)\", file=sys.stderr)\n    print(\"\\nParsing answer format from model response...\", file=sys.stderr)\n    \n    model_answer = parse_answer_format(model_response)\n    \n    if not model_answer:\n        print(\"FATAL ERROR: Could not parse answer format from model response\", file=sys.stderr)\n        return False\n    \n    print(f\"\\n=== Model Answer Parsed Successfully ===\", file=sys.stderr)\n    print(f\"Parsed {len(model_answer)} key-value pairs\", file=sys.stderr)\n    \n    for key, value in model_answer.items():\n        print(f\"  {key}: {value}\", file=sys.stderr)\n    \n    # Compare answers\n    print(\"\\n=== Comparing Model Answer with Expected Answer ===\", file=sys.stderr)\n    answer_match = compare_answers(model_answer, expected_answer)\n    \n    if not answer_match:\n        print(\"\\nFATAL ERROR: Model answer does not match expected answer\", file=sys.stderr)\n        print(\"Verification FAILED\", file=sys.stderr)\n        return False\n    \n    print(\"\\n✓ Model answer matches expected answer\", file=sys.stderr)\n    print(\"Verification PASSED\", file=sys.stderr)\n    return True\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/products_sales_analysis/description.md",
    "content": "Perform a comprehensive products and sales analysis in the Magento Admin panel to identify inventory status and sales performance metrics.\n\n**Task Requirements:**\n\n1. if need to login, login with username 'admin' and password 'admin1234'\n\n2. Analyze product inventory and catalog details, perform the following:\n   - Search for all products containing 'Yoga' in their name - count the exact number of results\n   - Clear the search and find the product with SKU 'WH11' - record its exact price\n   - Apply a filter to show only products with Quantity = 0.0000 - count how many products match\n\n3. To identify top-selling products and revenue metrics, navigate to the Dashboard and from the Bestsellers table:\n   - Identify the product with lowest price and lowest quantity - record the product name and quantity sold\n   - Find the second cheapest product in the table - record its exact quantity sold\n   - Note the total Revenue amount displayed in the dashboard\n\n4. Father all customers' information and demographics:\n   - Find customer 'Sarah Miller' - record her exact email address\n   - Count the total number of customers shown in the grid\n\n5. Review order status and customer purchase history, go to orders of sales:\n   - Count the total number of orders with 'Pending' status\n   - Find the order ID of Grace Nguyen's order with the completed status and the most expensive price (starting with \"000\")\n\n6. To provide a comprehensive report of all gathered data, compile all your findings and output them in the following exact format:\n\n```\n<answer>\nYogaProducts|count\nWH11Price|price\nZeroQuantityProducts|count\nLowestProduct|name:quantity\nQuestLumaflexQuantity|quantity\nDashboardRevenue|amount\nSarahMillerEmail|email\nTotalCustomers|count\nPendingOrders|count\nGraceNguyenOrderID|orderid\n</answer>\n```\n\n**Example Output:**\n```\n<answer>\nYogaProducts|XX\nWH11Price|$XX.XX\nZeroQuantityProducts|XX\nLowestProduct|Product Name Here:XX\nQuestLumaflexQuantity|XX\nDashboardRevenue|$XX.XX\nSarahMillerEmail|email@example.com\nTotalCustomers|XX\nPendingOrders|X\nGraceNguyenOrderID|00000XXXX\n</answer>\n```"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/products_sales_analysis/label.txt",
    "content": "YogaProducts|171\nWH11Price|$54.00\nZeroQuantityProducts|150\nLowestProduct|Sprite Stasis Ball 55 cm foot:5\nQuestLumaflexQuantity|6\nDashboardRevenue|$0.00\nSarahMillerEmail|helloworld@yahoo.com\nTotalCustomers|72\nPendingOrders|10\nGraceNguyenOrderID|000000189"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/products_sales_analysis/meta.json",
    "content": "{\n  \"task_id\": \"products_sales_analysis\",\n  \"task_name\": \"Products Sales Analysis\",\n  \"category_id\": \"shopping_admin\",\n  \"category_name\": \"Shopping Admin\",\n  \"description\": \"Generate comprehensive sales performance reports by extracting product metrics, analyzing revenue trends, identifying top performers, evaluating inventory turnover, and creating actionable insights.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-08-17\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data extraction\",\n    \"comparative analysis\",\n    \"content submission\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/magento-admin.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/products_sales_analysis/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nimport json\nfrom pathlib import Path\n\n\ndef get_model_response():\n    \"\"\"\n    Get the model's response from the MCP_MESSAGES environment variable.\n    Returns the last assistant message text.\n    \"\"\"\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    print(f\"MCP_MESSAGES: {messages_path}\")\n    if not messages_path:\n        print(\"Warning: MCP_MESSAGES environment variable not set\", file=sys.stderr)\n        return None\n\n    try:\n        with open(messages_path, \"r\") as f:\n            messages = json.load(f)\n\n        # Find the last assistant message\n        for message in reversed(messages):\n            if (\n                message.get(\"role\") == \"assistant\"\n                and message.get(\"status\") == \"completed\"\n            ):\n                content = message.get(\"content\", [])\n                for item in content:\n                    if item.get(\"type\") == \"output_text\":\n                        return item.get(\"text\", \"\")\n\n        print(\"Warning: No assistant response found in messages\", file=sys.stderr)\n        return None\n    except Exception as e:\n        print(f\"Error reading messages file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef parse_answer_format(text):\n    \"\"\"\n    Parse the <answer>...</answer> format from the agent's output.\n    Returns a dictionary with the parsed values.\n    \"\"\"\n    if not text:\n        print(\"Error: No text provided to parse\", file=sys.stderr)\n        return None\n\n    # Look for <answer>...</answer> pattern\n    match = re.search(r\"<answer>(.*?)</answer>\", text, re.IGNORECASE | re.DOTALL)\n    if not match:\n        print(\"Error: No <answer>...</answer> tags found in response\", file=sys.stderr)\n        return None\n\n    answer_content = match.group(1).strip()\n    if not answer_content:\n        print(\"Error: Empty answer content\", file=sys.stderr)\n        return None\n\n    # Parse each line\n    result = {}\n    lines = [line.strip() for line in answer_content.split(\"\\n\") if line.strip()]\n\n    if len(lines) != 10:\n        print(f\"Error: Expected 10 lines in answer, got {len(lines)}\", file=sys.stderr)\n        print(f\"Lines found: {lines}\", file=sys.stderr)\n        return None\n\n    # Expected keys for validation\n    expected_keys = [\n        \"YogaProducts\", \"WH11Price\", \"ZeroQuantityProducts\", \"LowestProduct\",\n        \"QuestLumaflexQuantity\", \"DashboardRevenue\", \"SarahMillerEmail\",\n        \"TotalCustomers\", \"PendingOrders\", \"GraceNguyenOrderID\"\n    ]\n\n    for line in lines:\n        if \"|\" not in line:\n            print(f\"Error: Line missing '|' separator: {line}\", file=sys.stderr)\n            return None\n        \n        parts = line.split(\"|\", 1)\n        if len(parts) != 2:\n            print(f\"Error: Invalid line format: {line}\", file=sys.stderr)\n            return None\n            \n        key, value = parts[0].strip(), parts[1].strip()\n        \n        if not key or not value:\n            print(f\"Error: Empty key or value in line: {line}\", file=sys.stderr)\n            return None\n            \n        result[key] = value\n\n    # Validate all expected keys are present\n    missing_keys = set(expected_keys) - set(result.keys())\n    if missing_keys:\n        print(f\"Error: Missing required keys: {missing_keys}\", file=sys.stderr)\n        return None\n\n    return result\n\n\ndef load_expected_answer(label_path):\n    \"\"\"\n    Load the expected answer from label.txt file.\n    Returns a dictionary with the expected values.\n    \"\"\"\n    try:\n        with open(label_path, \"r\") as f:\n            lines = f.read().strip().split(\"\\n\")\n\n        expected = {}\n        for line in lines:\n            if \"|\" in line:\n                key, value = line.split(\"|\", 1)\n                expected[key.strip()] = value.strip()\n\n        return expected\n    except Exception as e:\n        print(f\"Error reading label file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef compare_answers(model_answer, expected_answer):\n    \"\"\"\n    Compare the model's answer with the expected answer.\n    Returns True if all key information matches, False otherwise.\n    \"\"\"\n    if not model_answer or not expected_answer:\n        return False\n\n    # Check each expected key\n    mismatches = []\n    for key, expected_value in expected_answer.items():\n        model_value = model_answer.get(key, \"\")\n\n        # Special handling for different types of values\n        if key == \"LowestProduct\":\n            # Check if product name and quantity match (format: \"Product Name:quantity\")\n            if \":\" in expected_value and \":\" in model_value:\n                expected_name, expected_qty = expected_value.rsplit(\":\", 1)\n                model_name, model_qty = model_value.rsplit(\":\", 1)\n                if expected_name != model_name or expected_qty != model_qty:\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n            else:\n                if expected_value != model_value:\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n\n        elif key in [\"WH11Price\", \"DashboardRevenue\"]:\n            # For price/amount fields, normalize format\n            expected_clean = expected_value.replace(\"$\", \"\").replace(\",\", \"\")\n            model_clean = model_value.replace(\"$\", \"\").replace(\",\", \"\")\n            if expected_clean != model_clean:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        elif key == \"SarahMillerEmail\":\n            # Email should match exactly\n            if model_value.lower() != expected_value.lower():\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        else:\n            # Exact match for other fields\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n    if mismatches:\n        print(\"\\n=== Answer Comparison Mismatches ===\", file=sys.stderr)\n        for mismatch in mismatches:\n            print(f\"✗ {mismatch}\", file=sys.stderr)\n        return False\n\n    print(\"\\n=== Answer Comparison ===\", file=sys.stderr)\n    print(\"✓ All key information matches the expected answer\", file=sys.stderr)\n    return True\n\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the products and sales analysis task has been completed correctly.\n    First checks the model's answer against the expected label,\n    then optionally verifies the actual state in the Magento Admin.\n    \"\"\"\n    # Get the label file path\n    label_path = Path(__file__).parent / \"label.txt\"\n\n    # Load expected answer\n    expected_answer = load_expected_answer(label_path)\n    if not expected_answer:\n        print(\"Error: Could not load expected answer from label.txt\", file=sys.stderr)\n        return False\n\n    # Get model's response from MCP_MESSAGES\n    model_response = get_model_response()\n    if model_response:\n        print(\"Found model response, parsing answer format...\", file=sys.stderr)\n        model_answer = parse_answer_format(model_response)\n\n        if model_answer:\n            print(\"\\n=== Model Answer Parsed ===\", file=sys.stderr)\n            for key, value in model_answer.items():\n                print(f\"{key}: {value}\", file=sys.stderr)\n\n            # Compare answers\n            answer_match = compare_answers(model_answer, expected_answer)\n            if not answer_match:\n                print(\"\\nModel answer does not match expected answer\", file=sys.stderr)\n                return False\n            print(\"\\n✓ Model answer matches expected answer\", file=sys.stderr)\n            return True\n        else:\n            print(\n                \"Warning: Could not parse answer format from model response\",\n                file=sys.stderr,\n            )\n            return False\n    else:\n        print(\"No model response found\", file=sys.stderr)\n        return False\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/sales_inventory_analysis/description.md",
    "content": "Perform a comprehensive sales and inventory analysis by extracting specific metrics from multiple sections of the Magento Admin panel.\n\n**Task Requirements:**\n\n1. Login with username 'admin' and password 'admin1234'\n\n2. To analyze product inventory and identify key items, check all products:\n   - Search for all products containing 'Sprite' in their name - count the exact number of results\n   - Clear the search and filter products by Quantity = 100.0000 - count how many products match\n   - Find the product with SKU 'WS12' - record its exact name and price\n\n3. To understand sales performance and order status, we need check all orders:\n   - Search for all orders with 'Pending' status - count the total number\n   - Find Grace Nguyen's Complete and the most cheap order - record the order ID (starts with \"000\")\n   - Find the order with the highest Grand Total - record the customer name and amount\n\n4. To examine bestselling products and search trends, from the main page:\n   - In the Bestsellers table, identify the product with most quantity but and lowest price - record its name and quantity sold\n   - Find 'Overnight Duffle' and record its exact price\n   - In the Top Search Terms table, find 'hollister' and record its position number (1st, 2nd, etc.)\n\n5. To analyze customer demographics and account information, go to All Customers:\n   - Search for customers with its email address containing 'costello' - count the results\n   - Find Sarah Miller's customer record - record her Group and extract Customer Since date\n\n6. To review payment status and billing information, navigate to Invoices:\n   - Find all invoices with 'Paid' status - count them\n   - Find the invoice for order #000000002 - record the Bill-to Name\n\n7. To provide a comprehensive report of all gathered data, compile all findings and output them in the following exact format:\n\n```\n<answer>\nSpriteProducts|count\nQuantity100Products|count\nWS12Info|name:price\nPendingOrders|count\nGraceOrderID|orderid\nHighestOrderInfo|customer:amount\nCheapProduct|name:quantity\nOvernightDufflePrice|price\nHollisterPosition|position\nCostelloCustomers|count\nSarahMillerInfo|group:date\nPaidInvoices|count\nInvoice002BillTo|name\n</answer>\n```\n\n**Example Output:**\n```\n<answer>\nSpriteProducts|XX\nQuantity100Products|XX\nWS12Info|Product Name Here:$XX.XX\nPendingOrders|X\nGraceOrderID|00000XXXX\nHighestOrderInfo|Customer Name:$XXX.XX\nCheapProduct|Product Name:XX\nOvernightDufflePrice|$XX.XX\nHollisterPosition|Xth\nCostelloCustomers|X\nSarahMillerInfo|Group Name:MMM DD, YYYY\nPaidInvoices|X\nInvoice002BillTo|Customer Name\n</answer>\n```"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/sales_inventory_analysis/label.txt",
    "content": "SpriteProducts|16\nQuantity100Products|1886\nWS12Info|Radiant Tee:$22.00\nPendingOrders|10\nGraceOrderID|000000114\nHighestOrderInfo|Samantha Jones:$292.40\nCheapProduct|Sprite Yoga Strap 6 foot:6\nOvernightDufflePrice|$45.00\nHollisterPosition|1st\nCostelloCustomers|0\nSarahMillerInfo|General:Apr 19, 2023 5:45:07 PM\nPaidInvoices|2\nInvoice002BillTo|Veronica Costello"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/sales_inventory_analysis/meta.json",
    "content": "{\n  \"task_id\": \"sales_inventory_analysis\",\n  \"task_name\": \"Sales Inventory Analysis\",\n  \"category_id\": \"shopping_admin\",\n  \"category_name\": \"Shopping Admin\",\n  \"description\": \"Analyze sales patterns and inventory levels to optimize stock management, identify slow-moving items, predict demand trends, and generate restocking recommendations.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-08-17\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data extraction\",\n    \"comparative analysis\",\n    \"inventory management\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/magento-admin.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/sales_inventory_analysis/verify.py",
    "content": "import asyncio\nimport sys\nimport re\nimport os\nimport json\nfrom pathlib import Path\n\n\ndef get_model_response():\n    \"\"\"\n    Get the model's response from the MCP_MESSAGES environment variable.\n    Returns the last assistant message text.\n    \"\"\"\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    print(f\"MCP_MESSAGES: {messages_path}\")\n    if not messages_path:\n        print(\"Warning: MCP_MESSAGES environment variable not set\", file=sys.stderr)\n        return None\n\n    try:\n        with open(messages_path, \"r\") as f:\n            messages = json.load(f)\n\n        # Find the last assistant message with type='message', status='completed'\n        for message in reversed(messages):\n            if (\n                message.get(\"role\") == \"assistant\"\n                and message.get(\"status\") == \"completed\"\n                and message.get(\"type\") == \"message\"\n            ):\n                content = message.get(\"content\", [])\n                for item in content:\n                    # Check for both 'text' and 'output_text' types\n                    if item.get(\"type\") in [\"text\", \"output_text\"]:\n                        return item.get(\"text\", \"\")\n\n        print(\"Warning: No assistant response found in messages\", file=sys.stderr)\n        return None\n    except Exception as e:\n        print(f\"Error reading messages file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef parse_answer_format(text):\n    \"\"\"\n    Parse the <answer>...</answer> format from the agent's output.\n    Returns a dictionary with the parsed values.\n    \"\"\"\n    if not text:\n        print(\"ERROR: No text provided to parse\", file=sys.stderr)\n        return None\n\n    # Look for <answer>...</answer> pattern\n    match = re.search(r\"<answer>(.*?)</answer>\", text, re.IGNORECASE | re.DOTALL)\n    if not match:\n        print(\"ERROR: No <answer>...</answer> tags found in the response\", file=sys.stderr)\n        print(\"Response text preview (first 200 chars):\", text[:200], file=sys.stderr)\n        return None\n\n    answer_content = match.group(1).strip()\n    print(f\"Found answer content with {len(answer_content)} characters\", file=sys.stderr)\n\n    # Parse each line\n    result = {}\n    lines = answer_content.split(\"\\n\")\n    \n    # Expected keys for this task\n    expected_keys = [\n        \"SpriteProducts\", \"Quantity100Products\", \"WS12Info\", \"PendingOrders\",\n        \"GraceOrderID\", \"HighestOrderInfo\", \"CheapProduct\", \"OvernightDufflePrice\",\n        \"HollisterPosition\", \"CostelloCustomers\", \"SarahMillerInfo\", \n        \"PaidInvoices\", \"Invoice002BillTo\"\n    ]\n\n    if len(lines) != 13:\n        print(f\"ERROR: Expected 13 lines in answer, got {len(lines)}\", file=sys.stderr)\n        print(f\"Lines found: {lines}\", file=sys.stderr)\n        return None\n\n    for i, line in enumerate(lines, 1):\n        if \"|\" not in line:\n            print(f\"ERROR: Line {i} does not contain pipe separator '|': '{line}'\", file=sys.stderr)\n            return None\n        \n        parts = line.split(\"|\", 1)\n        if len(parts) != 2:\n            print(f\"ERROR: Line {i} could not be split into key|value: '{line}'\", file=sys.stderr)\n            return None\n            \n        key, value = parts\n        result[key.strip()] = value.strip()\n    \n    # Check if all expected keys are present\n    missing_keys = set(expected_keys) - set(result.keys())\n    if missing_keys:\n        print(f\"ERROR: Missing expected keys: {missing_keys}\", file=sys.stderr)\n        print(f\"Keys found: {list(result.keys())}\", file=sys.stderr)\n        return None\n    \n    # Check for unexpected keys\n    extra_keys = set(result.keys()) - set(expected_keys)\n    if extra_keys:\n        print(f\"WARNING: Unexpected keys found: {extra_keys}\", file=sys.stderr)\n\n    return result\n\n\ndef load_expected_answer(label_path):\n    \"\"\"\n    Load the expected answer from label.txt file.\n    Returns a dictionary with the expected values.\n    \"\"\"\n    try:\n        with open(label_path, \"r\") as f:\n            lines = f.read().strip().split(\"\\n\")\n\n        expected = {}\n        for line in lines:\n            if \"|\" in line:\n                key, value = line.split(\"|\", 1)\n                expected[key.strip()] = value.strip()\n\n        return expected\n    except Exception as e:\n        print(f\"Error reading label file: {str(e)}\", file=sys.stderr)\n        return None\n\n\ndef compare_answers(model_answer, expected_answer):\n    \"\"\"\n    Compare the model's answer with the expected answer.\n    Returns True if all key information matches, False otherwise.\n    \"\"\"\n    if not model_answer or not expected_answer:\n        return False\n\n    # Check each expected key\n    mismatches = []\n    for key, expected_value in expected_answer.items():\n        model_value = model_answer.get(key, \"\")\n\n        # Special handling for different types of values\n        if key == \"WS12Info\":\n            # Check if product name and price match (format: name:price)\n            if \":\" in expected_value and \":\" in model_value:\n                expected_name, expected_price = expected_value.rsplit(\":\", 1)\n                model_name, model_price = model_value.rsplit(\":\", 1)\n                # Normalize price format\n                expected_price_clean = expected_price.replace(\"$\", \"\").replace(\",\", \"\")\n                model_price_clean = model_price.replace(\"$\", \"\").replace(\",\", \"\")\n                if (\n                    expected_name != model_name\n                    or expected_price_clean != model_price_clean\n                ):\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n            else:\n                if expected_value != model_value:\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n\n        elif key == \"GraceOrderID\":\n            # Order ID should start with \"000\" and match exactly\n            if not model_value.startswith(\"000\"):\n                mismatches.append(\n                    f\"{key}: expected to start with '000', got '{model_value}'\"\n                )\n            elif model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        elif key == \"HighestOrderInfo\":\n            # Check format customer:amount\n            if \":\" in expected_value and \":\" in model_value:\n                expected_customer, expected_amount = expected_value.rsplit(\":\", 1)\n                model_customer, model_amount = model_value.rsplit(\":\", 1)\n                # Normalize amount format\n                expected_amount_clean = expected_amount.replace(\"$\", \"\").replace(\n                    \",\", \"\"\n                )\n                model_amount_clean = model_amount.replace(\"$\", \"\").replace(\",\", \"\")\n                if (\n                    expected_customer != model_customer\n                    or expected_amount_clean != model_amount_clean\n                ):\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n            else:\n                if expected_value != model_value:\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n\n        elif key == \"Position2Product\":\n            # Check if product name and quantity match\n            if \":\" in expected_value and \":\" in model_value:\n                expected_name, expected_qty = expected_value.rsplit(\":\", 1)\n                model_name, model_qty = model_value.rsplit(\":\", 1)\n                if expected_name != model_name or expected_qty != model_qty:\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n            else:\n                if expected_value != model_value:\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n\n        elif key == \"OvernightDufflePrice\":\n            # Normalize price format\n            expected_clean = expected_value.replace(\"$\", \"\").replace(\",\", \"\")\n            model_clean = model_value.replace(\"$\", \"\").replace(\",\", \"\")\n            if expected_clean != model_clean:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        elif key == \"HollisterPosition\":\n            # Position format (1st, 2nd, 3rd, etc.)\n            if model_value.lower() != expected_value.lower():\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        elif key == \"SarahMillerInfo\":\n            # Format: group:date\n            if \":\" in expected_value and \":\" in model_value:\n                expected_group, expected_date = expected_value.split(\":\", 1)\n                model_group, model_date = model_value.split(\":\", 1)\n                # Allow some flexibility in date format\n                if expected_group != model_group:\n                    mismatches.append(\n                        f\"{key}: expected group '{expected_group}', got '{model_group}'\"\n                    )\n                # For date, check if key parts match\n                if not (expected_date in model_date or model_date in expected_date):\n                    mismatches.append(\n                        f\"{key}: expected date '{expected_date}', got '{model_date}'\"\n                    )\n            else:\n                if expected_value != model_value:\n                    mismatches.append(\n                        f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                    )\n\n        elif key == \"Invoice002BillTo\":\n            # Name should match exactly\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n        else:\n            # Exact match for count fields and other numeric values\n            if model_value != expected_value:\n                mismatches.append(\n                    f\"{key}: expected '{expected_value}', got '{model_value}'\"\n                )\n\n    if mismatches:\n        print(\"\\n=== Answer Comparison Mismatches ===\", file=sys.stderr)\n        for mismatch in mismatches:\n            print(f\"✗ {mismatch}\", file=sys.stderr)\n        return False\n\n    print(\"\\n=== Answer Comparison ===\", file=sys.stderr)\n    print(\"✓ All key information matches the expected answer\", file=sys.stderr)\n    return True\n\n\nasync def verify() -> bool:\n    \"\"\"\n    Verifies that the sales and inventory analysis task has been completed correctly.\n    First checks the model's answer against the expected label,\n    then optionally verifies the actual state in the Magento Admin.\n    \"\"\"\n    print(\"\\n\" + \"=\"*60, file=sys.stderr)\n    print(\"Starting verification of Task 5\", file=sys.stderr)\n    print(\"=\"*60, file=sys.stderr)\n    \n    # Get the label file path\n    label_path = Path(__file__).parent / \"label.txt\"\n\n    # Load expected answer\n    print(\"\\n--- Loading Expected Answer ---\", file=sys.stderr)\n    expected_answer = load_expected_answer(label_path)\n    if not expected_answer:\n        print(\"FATAL ERROR: Could not load expected answer from label.txt\", file=sys.stderr)\n        return False\n    print(f\"Successfully loaded {len(expected_answer)} expected values\", file=sys.stderr)\n\n    # Get model's response from MCP_MESSAGES\n    print(\"\\n--- Loading Model Response ---\", file=sys.stderr)\n    model_response = get_model_response()\n    if not model_response:\n        print(\"FATAL ERROR: No model response found in MCP_MESSAGES\", file=sys.stderr)\n        return False\n    \n    print(f\"Found model response ({len(model_response)} characters)\", file=sys.stderr)\n    \n    print(\"\\n--- Parsing Answer Format ---\", file=sys.stderr)\n    model_answer = parse_answer_format(model_response)\n    \n    if not model_answer:\n        print(\"\\nFATAL ERROR: Could not parse answer format from model response\", file=sys.stderr)\n        print(\"Verification FAILED\", file=sys.stderr)\n        return False\n    \n    print(\"\\n=== Model Answer Successfully Parsed ===\", file=sys.stderr)\n    for key, value in model_answer.items():\n        print(f\"  {key}: {value}\", file=sys.stderr)\n\n    # Compare answers\n    print(\"\\n--- Comparing Answers ---\", file=sys.stderr)\n    answer_match = compare_answers(model_answer, expected_answer)\n    \n    if not answer_match:\n        print(\"\\n\" + \"=\"*60, file=sys.stderr)\n        print(\"VERIFICATION FAILED: Model answer does not match expected answer\", file=sys.stderr)\n        print(\"=\"*60, file=sys.stderr)\n        return False\n    \n    print(\"\\n\" + \"=\"*60, file=sys.stderr)\n    print(\"✓ VERIFICATION PASSED: Model answer matches expected answer\", file=sys.stderr)\n    print(\"=\"*60, file=sys.stderr)\n    return True\n\n\ndef main():\n    \"\"\"\n    Executes the verification process and exits with a status code.\n    \"\"\"\n    result = asyncio.run(verify())\n    sys.exit(0 if result else 1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/search_filtering_operations/description.md",
    "content": "Perform comprehensive search and filtering operations in the Magento Admin panel to extract specific business insights using advanced search techniques.\n\n**Task Requirements:**\n\n1. Login with username 'admin' and password 'admin1234'\n\n2. To analyze search behavior and term effectiveness, check the Search Terms of Marketing and perform complex filtering:\n   - Search for all terms containing 'tank' in their name - count the exact number of results\n   - Clear filters and find terms with exactly 0 results - count how many such terms exist\n   - Apply a filter to show only terms with more than 10 uses - record the term with highest uses and its count (You need to see how many there are and record them all.)\n   - Find the search term that has results between 20-30 - record its name and exact result count\n\n3. To gather detailed marketing insights from search data, go to Search Terms in Reports:\n   - Apply filter for terms with more than 15 hits - count total filtered results\n   - Find the term with ID between 10-15 that has the most results - record term name and result count (You need to see how many there are and record them all.)\n   - Filter to show only terms from \"Default Store View\" - count total results\n\n4. To examine real-time search trends and top performers, from the Dashboard, perform targeted searches:\n   - In the 'Top Search Terms' table, find the term with exactly 1 result - record its name and uses\n   - In the 'Last Search Terms' table, identify the term with the both the highest number of results and uses - record name and the number of results\n   - In the 'Bestsellers' tab, find the product at position #3 - record name and quantity\n\n5. To identify patterns in search usage and results, navigate to Search Terms (main grid) in step 2:\n   - Sort by 'Uses' column (descending) - record the top term and its uses count\n   - Sort by 'Results' column (ascending) - record the first non-zero result term and its count\n   - Count total number of unique search terms in the system\n\n6. To provide a comprehensive report of all gathered data, compile all findings and output in the following exact format:\n\n```\n<answer>\nTankSearchCount|count\nZeroResultsCount|count\nHighestUseTerm|term:uses\nResults20to30Term|term1:results1|term2:result2|term3:result3|...\nHits15PlusCount|count\nID10to15MaxResults|term:results\nDefaultStoreViewCount|count\nOneResultTerm|term1:uses1|term2:uses2|term3:uses3|...\nHighestResultLastSearch|term:results\nPosition3Bestseller|product:quantity\nTopUseTerm|term:uses\nFirstNonZeroResult|term:results\nTotalUniqueTerms|count\n</answer>\n```\n\n**Example Output:**\n```\n<answer>\nTankSearchCount|X\nZeroResultsCount|X\nHighestUseTerm|search_term:XX\nResults20to30Term|search_term1:XX1|search_term2:XX2|search_term3:XX3|...\nHits15PlusCount|X\nID10to15MaxResults|Product Name:XX\nDefaultStoreViewCount|X\nOneResultTerm|search_term1:XX1|search_term2:XX2|search_term3:XX3|...\nHighestResultLastSearch|search_term:XX\nPosition3Bestseller|Product Name:X\nTopUseTerm|search_term:XX\nFirstNonZeroResult|search_term:X\nTotalUniqueTerms|X\n</answer>\n```\n\n**Success Criteria:**\n- Successfully logged into Magento Admin\n- Applied complex search filters in Search Terms section\n- Used range filters for results and hits\n- Sorted columns to find specific records\n- Navigated between different report views\n- Extracted data from filtered and sorted results\n- Counted records accurately after applying filters\n- Output answer in exact format with 13 data lines\n- Answer wrapped in <answer> tags"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/search_filtering_operations/label.txt",
    "content": "TankSearchCount|2\nZeroResultsCount|1\nHighestUseTerm|hollister:19\nResults20to30Term|Antonia Racer Tank:23|tanks:23\nHits15PlusCount|1\nID10to15MaxResults|Antonia Racer Tank:23\nDefaultStoreViewCount|7\nOneResultTerm|hollister:19|WP10:1\nHighestResultLastSearch|Antonia Racer Tank:23\nPosition3Bestseller|Sprite Stasis Ball 65 cm:6\nTopUseTerm|hollister:19\nFirstNonZeroResult|WP10:1\nTotalUniqueTerms|7"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/search_filtering_operations/meta.json",
    "content": "{\n  \"task_id\": \"search_filtering_operations\",\n  \"task_name\": \"Search Filtering Operations\",\n  \"category_id\": \"shopping_admin\",\n  \"category_name\": \"Shopping Admin\",\n  \"description\": \"Configure advanced search and filtering systems in admin interface, implement category hierarchies, set up attribute filters, and optimize search algorithms for user experience.\",\n  \"author\": \"Fanqing Meng\",\n  \"created_at\": \"2025-08-17\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"content submission\"\n  ],\n  \"mcp\": [\n    \"playwright\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"video\",\n    \"stateContent\": null,\n    \"stateUrl\": \"https://storage.mcpmark.ai/tasks_state/playwright_video/magento-admin.mp4\",\n    \"stateOriginalUrl\": \"https://github.com/web-arena-x/webarena/tree/main/environment_docker\"\n  }\n}"
  },
  {
    "path": "tasks/playwright_webarena/standard/shopping_admin/search_filtering_operations/verify.py",
    "content": "import re\nimport json\nimport os\nimport sys\n\n\ndef verify(messages):\n    \"\"\"\n    Verify that the agent has successfully performed complex search and filtering operations\n    in the Magento Admin panel and extracted all required information correctly.\n\n    Args:\n        messages: List of message dictionaries containing the conversation\n\n    Returns:\n        Dictionary with 'valid' boolean and 'reason' string\n    \"\"\"\n\n    # Find the last assistant message with status \"completed\" and type \"message\"\n    answer_content = None\n    for message in reversed(messages):\n        if (\n            message.get(\"role\") == \"assistant\"\n            and message.get(\"status\") == \"completed\"\n            and message.get(\"type\") == \"message\"\n            and message.get(\"content\")\n        ):\n            # Extract text from content structure\n            content = message[\"content\"]\n            if isinstance(content, list):\n                for item in content:\n                    if isinstance(item, dict) and item.get(\"type\") == \"output_text\":\n                        text = item.get(\"text\", \"\")\n                        # Look for answer tags with case-insensitive search\n                        answer_match = re.search(\n                            r\"<answer>(.*?)</answer>\", text, re.DOTALL | re.IGNORECASE\n                        )\n                        if answer_match:\n                            answer_content = answer_match.group(1).strip()\n                            break\n            elif isinstance(content, str):\n                # Look for answer tags in string content\n                answer_match = re.search(r\"<answer>(.*?)</answer>\", content, re.DOTALL | re.IGNORECASE)\n                if answer_match:\n                    answer_content = answer_match.group(1).strip()\n                    break\n\n            if answer_content:\n                break\n\n    if not answer_content:\n        return {\"valid\": False, \"reason\": \"No answer found in <answer> tags\"}\n\n    # Expected format - each line should have a key|value pair\n    expected_keys = [\n        \"TankSearchCount\",\n        \"ZeroResultsCount\",\n        \"HighestUseTerm\",\n        \"Results20to30Term\",\n        \"Hits15PlusCount\",\n        \"ID10to15MaxResults\",\n        \"DefaultStoreViewCount\",\n        \"OneResultTerm\",\n        \"HighestResultLastSearch\",\n        \"Position3Bestseller\",\n        \"TopUseTerm\",\n        \"FirstNonZeroResult\",\n        \"TotalUniqueTerms\",\n    ]\n\n    # Parse the answer\n    lines = answer_content.strip().split(\"\\n\")\n\n    # Check if we have exactly 13 lines\n    if len(lines) != 13:\n        return {\"valid\": False, \"reason\": f\"Expected 13 data lines, found {len(lines)}\"}\n\n    # Parse each line and validate format\n    extracted_data = {}\n    for line in lines:\n        if \"|\" not in line:\n            return {\n                \"valid\": False,\n                \"reason\": f\"Invalid format in line: {line}. Expected 'key|value' format\",\n            }\n\n        parts = line.split(\"|\", 1)\n        if len(parts) != 2:\n            return {\"valid\": False, \"reason\": f\"Invalid format in line: {line}\"}\n\n        key, value = parts\n        extracted_data[key] = value\n\n    # Check all required keys are present\n    missing_keys = set(expected_keys) - set(extracted_data.keys())\n    if missing_keys:\n        return {\n            \"valid\": False,\n            \"reason\": f\"Missing required keys: {', '.join(missing_keys)}\",\n        }\n\n    # Validate specific data formats and expected values based on the current data\n\n    # 1. TankSearchCount should be a number (2 terms containing 'tank')\n    if not extracted_data[\"TankSearchCount\"].isdigit():\n        return {\n            \"valid\": False,\n            \"reason\": f\"TankSearchCount should be a number, got: {extracted_data['TankSearchCount']}\",\n        }\n\n    # Expected: \"Antonia Racer Tank\" and \"tanks\" contain 'tank'\n    if extracted_data[\"TankSearchCount\"] != \"2\":\n        return {\n            \"valid\": False,\n            \"reason\": f\"TankSearchCount should be '2', got: {extracted_data['TankSearchCount']}\",\n        }\n\n    # 2. ZeroResultsCount should be a number (nike has 0 results)\n    if not extracted_data[\"ZeroResultsCount\"].isdigit():\n        return {\n            \"valid\": False,\n            \"reason\": f\"ZeroResultsCount should be a number, got: {extracted_data['ZeroResultsCount']}\",\n        }\n\n    if extracted_data[\"ZeroResultsCount\"] != \"1\":\n        return {\n            \"valid\": False,\n            \"reason\": f\"ZeroResultsCount should be '1', got: {extracted_data['ZeroResultsCount']}\",\n        }\n\n    # 3. HighestUseTerm should be in format \"term:uses\"\n    if \":\" not in extracted_data[\"HighestUseTerm\"]:\n        return {\n            \"valid\": False,\n            \"reason\": f\"HighestUseTerm should be in format 'term:uses', got: {extracted_data['HighestUseTerm']}\",\n        }\n\n    # hollister has 19 uses (highest among terms with > 10 uses)\n    if extracted_data[\"HighestUseTerm\"] != \"hollister:19\":\n        return {\n            \"valid\": False,\n            \"reason\": f\"HighestUseTerm should be 'hollister:19', got: {extracted_data['HighestUseTerm']}\",\n        }\n\n    # 4. Results20to30Term should be in format \"term:results\"\n    if \":\" not in extracted_data[\"Results20to30Term\"]:\n        return {\n            \"valid\": False,\n            \"reason\": f\"Results20to30Term should be in format 'term:results', got: {extracted_data['Results20to30Term']}\",\n        }\n\n    # Both \"tanks\" and \"Antonia Racer Tank\" have 23 results (between 20-30)\n    valid_results20to30 = [\"tanks:23\", \"Antonia Racer Tank:23\"]\n    # Check if answer contains one of the valid values or both separated by |\n    if not any(\n        val in extracted_data[\"Results20to30Term\"] for val in valid_results20to30\n    ):\n        return {\n            \"valid\": False,\n            \"reason\": f\"Results20to30Term should contain 'tanks:23' or 'Antonia Racer Tank:23', got: {extracted_data['Results20to30Term']}\",\n        }\n\n    # 5. Hits15PlusCount should be a number (only hollister has 19 hits > 15)\n    if not extracted_data[\"Hits15PlusCount\"].isdigit():\n        return {\n            \"valid\": False,\n            \"reason\": f\"Hits15PlusCount should be a number, got: {extracted_data['Hits15PlusCount']}\",\n        }\n\n    if extracted_data[\"Hits15PlusCount\"] != \"1\":\n        return {\n            \"valid\": False,\n            \"reason\": f\"Hits15PlusCount should be '1', got: {extracted_data['Hits15PlusCount']}\",\n        }\n\n    # 6. ID10to15MaxResults should be in format \"term:results\"\n    if \":\" not in extracted_data[\"ID10to15MaxResults\"]:\n        return {\n            \"valid\": False,\n            \"reason\": f\"ID10to15MaxResults should be in format 'term:results', got: {extracted_data['ID10to15MaxResults']}\",\n        }\n\n    # ID 11 is hollister (1 result), ID 13 is Antonia Racer Tank (23 results)\n    if extracted_data[\"ID10to15MaxResults\"] != \"Antonia Racer Tank:23\":\n        return {\n            \"valid\": False,\n            \"reason\": f\"ID10to15MaxResults should be 'Antonia Racer Tank:23', got: {extracted_data['ID10to15MaxResults']}\",\n        }\n\n    # 7. DefaultStoreViewCount should be a number (all 7 terms are from Default Store View)\n    if not extracted_data[\"DefaultStoreViewCount\"].isdigit():\n        return {\n            \"valid\": False,\n            \"reason\": f\"DefaultStoreViewCount should be a number, got: {extracted_data['DefaultStoreViewCount']}\",\n        }\n\n    if extracted_data[\"DefaultStoreViewCount\"] != \"7\":\n        return {\n            \"valid\": False,\n            \"reason\": f\"DefaultStoreViewCount should be '7', got: {extracted_data['DefaultStoreViewCount']}\",\n        }\n\n    # 8. OneResultTerm should be in format \"term:uses\"\n    if \":\" not in extracted_data[\"OneResultTerm\"]:\n        return {\n            \"valid\": False,\n            \"reason\": f\"OneResultTerm should be in format 'term:uses', got: {extracted_data['OneResultTerm']}\",\n        }\n\n    # Both hollister and WP10 have exactly 1 result\n    valid_one_result = [\"hollister:19\", \"WP10:1\"]\n    if not any(val in extracted_data[\"OneResultTerm\"] for val in valid_one_result):\n        return {\n            \"valid\": False,\n            \"reason\": f\"OneResultTerm should contain 'hollister:19' or 'WP10:1', got: {extracted_data['OneResultTerm']}\",\n        }\n\n    # 9. HighestResultLastSearch should be in format \"term:results\"\n    if \":\" not in extracted_data[\"HighestResultLastSearch\"]:\n        return {\n            \"valid\": False,\n            \"reason\": f\"HighestResultLastSearch should be in format 'term:results', got: {extracted_data['HighestResultLastSearch']}\",\n        }\n\n    # In Last Search Terms: tanks and Antonia Racer Tank both have 23 results (highest)\n    valid_highest_last = [\"tanks:23\", \"Antonia Racer Tank:23\"]\n    if not any(\n        val in extracted_data[\"HighestResultLastSearch\"] for val in valid_highest_last\n    ):\n        return {\n            \"valid\": False,\n            \"reason\": f\"HighestResultLastSearch should contain 'tanks:23' or 'Antonia Racer Tank:23', got: {extracted_data['HighestResultLastSearch']}\",\n        }\n\n    # 10. Position3Bestseller should be in format \"product:quantity\"\n    if \":\" not in extracted_data[\"Position3Bestseller\"]:\n        return {\n            \"valid\": False,\n            \"reason\": f\"Position3Bestseller should be in format 'product:quantity', got: {extracted_data['Position3Bestseller']}\",\n        }\n\n    # Position 3 in Bestsellers is \"Sprite Stasis Ball 65 cm\" with quantity 6\n    if extracted_data[\"Position3Bestseller\"] != \"Sprite Stasis Ball 65 cm:6\":\n        return {\n            \"valid\": False,\n            \"reason\": f\"Position3Bestseller should be 'Sprite Stasis Ball 65 cm:6', got: {extracted_data['Position3Bestseller']}\",\n        }\n\n    # 11. TopUseTerm should be in format \"term:uses\"\n    if \":\" not in extracted_data[\"TopUseTerm\"]:\n        return {\n            \"valid\": False,\n            \"reason\": f\"TopUseTerm should be in format 'term:uses', got: {extracted_data['TopUseTerm']}\",\n        }\n\n    # hollister has 19 uses (highest)\n    if extracted_data[\"TopUseTerm\"] != \"hollister:19\":\n        return {\n            \"valid\": False,\n            \"reason\": f\"TopUseTerm should be 'hollister:19', got: {extracted_data['TopUseTerm']}\",\n        }\n\n    # 12. FirstNonZeroResult should be in format \"term:results\"\n    if \":\" not in extracted_data[\"FirstNonZeroResult\"]:\n        return {\n            \"valid\": False,\n            \"reason\": f\"FirstNonZeroResult should be in format 'term:results', got: {extracted_data['FirstNonZeroResult']}\",\n        }\n\n    # When sorted by results ascending, first non-zero is WP10 (has 1 result)\n    if extracted_data[\"FirstNonZeroResult\"] != \"WP10:1\":\n        return {\n            \"valid\": False,\n            \"reason\": f\"FirstNonZeroResult should be 'WP10:1', got: {extracted_data['FirstNonZeroResult']}\",\n        }\n\n    # 13. TotalUniqueTerms should be a number\n    if not extracted_data[\"TotalUniqueTerms\"].isdigit():\n        return {\n            \"valid\": False,\n            \"reason\": f\"TotalUniqueTerms should be a number, got: {extracted_data['TotalUniqueTerms']}\",\n        }\n\n    # There are 7 unique search terms in the system\n    if extracted_data[\"TotalUniqueTerms\"] != \"7\":\n        return {\n            \"valid\": False,\n            \"reason\": f\"TotalUniqueTerms should be '7', got: {extracted_data['TotalUniqueTerms']}\",\n        }\n\n    # All validations passed\n    return {\n        \"valid\": True,\n        \"reason\": \"All complex search and filtering operations completed successfully\",\n    }\n\n\nif __name__ == \"__main__\":\n    # Load messages from environment variable\n    messages_path = os.getenv(\"MCP_MESSAGES\")\n    if not messages_path:\n        print(\n            json.dumps(\n                {\"valid\": False, \"reason\": \"MCP_MESSAGES environment variable not set\"}\n            )\n        )\n        exit(1)\n\n    try:\n        with open(messages_path, \"r\") as f:\n            messages = json.load(f)\n    except Exception as e:\n        print(\n            json.dumps({\"valid\": False, \"reason\": f\"Failed to load messages: {str(e)}\"})\n        )\n        exit(1)\n\n    # Run verification\n    result = verify(messages)\n    print(json.dumps(result))\n    # Exit with appropriate code based on verification result\n    sys.exit(0 if result[\"valid\"] else 1)\n"
  },
  {
    "path": "tasks/postgres/easy/.gitkeep",
    "content": ""
  },
  {
    "path": "tasks/postgres/easy/chinook/customer_data_migration_basic/description.md",
    "content": "Migrate customer data from an acquired company to PostgreSQL using efficient bulk operations.\n\n## Your Mission:\n\nChinook Music Store has recently acquired \"MelodyMart,\" a competing music retailer. Their customer database needs to be migrated into Chinook's PostgreSQL database.\n\n## Migration Requirements:\n\n1. **Process all customer records from the data table below** and migrate them into the `Customer` table \n2. **Apply business logic during migration**:\n   - Assign `CustomerID` values starting from the next available ID\n   - Assign all customers to support representative with EmployeeId 3\n   - Set `Fax` field to NULL for all migrated customers\n\n## Customer Data to Migrate:\n\n| FirstName | LastName | Company | Address | City | State | Country | PostalCode | Phone | Email |\n|-----------|----------|---------|---------|------|-------|---------|------------|-------|--------|\n| Danielle | Johnson | Sanchez-Taylor | 819 Johnson Course | East William | AK | USA | 74064 | 386-3794 | danielle.johnson@sancheztaylor.com |\n| Katherine | Moore | Peterson-Moore | 16155 Roman Stream Suite 816 | New Kellystad | OK | USA | 25704 | 103-4131 | katherine_moore@petersonmoore.org |\n| Joshua | Reid | Martin-Kelly | 192 Frank Light Suite 835 | East Lydiamouth | MO | USA | 35594 | 139-5376 | joshua_reid@martinkelly.org |\n"
  },
  {
    "path": "tasks/postgres/easy/chinook/customer_data_migration_basic/meta.json",
    "content": "{\n  \"task_id\": \"customer_data_migration_basic\",\n  \"task_name\": \"Customer Data Migration Basic\",\n  \"category_id\": \"chinook\",\n  \"category_name\": \"Chinook\",\n  \"description\": \"Load the MelodyMart customer rows into the Customer table with new ids, SupportRepId = 3, and Fax values set to NULL.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"data migration\",\n    \"transactional operations\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Table \\\"Album\\\" {\\n  \\\"AlbumId\\\" int4 [pk, not null]\\n  \\\"Title\\\" varchar(160) [not null]\\n  \\\"ArtistId\\\" int4 [not null]\\n\\n  Indexes {\\n    ArtistId [type: btree, name: \\\"IFK_AlbumArtistId\\\"]\\n  }\\n}\\n\\nTable \\\"Artist\\\" {\\n  \\\"ArtistId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(120)\\n}\\n\\nTable \\\"Customer\\\" {\\n  \\\"CustomerId\\\" int4 [pk, not null]\\n  \\\"FirstName\\\" varchar(40) [not null]\\n  \\\"LastName\\\" varchar(20) [not null]\\n  \\\"Company\\\" varchar(80)\\n  \\\"Address\\\" varchar(70)\\n  \\\"City\\\" varchar(40)\\n  \\\"State\\\" varchar(40)\\n  \\\"Country\\\" varchar(40)\\n  \\\"PostalCode\\\" varchar(10)\\n  \\\"Phone\\\" varchar(24)\\n  \\\"Fax\\\" varchar(24)\\n  \\\"Email\\\" varchar(60) [not null]\\n  \\\"SupportRepId\\\" int4\\n\\n  Indexes {\\n    SupportRepId [type: btree, name: \\\"IFK_CustomerSupportRepId\\\"]\\n  }\\n}\\n\\nTable \\\"Employee\\\" {\\n  \\\"EmployeeId\\\" int4 [pk, not null]\\n  \\\"LastName\\\" varchar(20) [not null]\\n  \\\"FirstName\\\" varchar(20) [not null]\\n  \\\"Title\\\" varchar(30)\\n  \\\"ReportsTo\\\" int4\\n  \\\"BirthDate\\\" timestamp\\n  \\\"HireDate\\\" timestamp\\n  \\\"Address\\\" varchar(70)\\n  \\\"City\\\" varchar(40)\\n  \\\"State\\\" varchar(40)\\n  \\\"Country\\\" varchar(40)\\n  \\\"PostalCode\\\" varchar(10)\\n  \\\"Phone\\\" varchar(24)\\n  \\\"Fax\\\" varchar(24)\\n  \\\"Email\\\" varchar(60)\\n\\n  Indexes {\\n    ReportsTo [type: btree, name: \\\"IFK_EmployeeReportsTo\\\"]\\n  }\\n}\\n\\nTable \\\"Genre\\\" {\\n  \\\"GenreId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(120)\\n}\\n\\nTable \\\"Invoice\\\" {\\n  \\\"InvoiceId\\\" int4 [pk, not null]\\n  \\\"CustomerId\\\" int4 [not null]\\n  \\\"InvoiceDate\\\" timestamp [not null]\\n  \\\"BillingAddress\\\" varchar(70)\\n  \\\"BillingCity\\\" varchar(40)\\n  \\\"BillingState\\\" varchar(40)\\n  \\\"BillingCountry\\\" varchar(40)\\n  \\\"BillingPostalCode\\\" varchar(10)\\n  \\\"Total\\\" numeric(10,2) [not null]\\n\\n  Indexes {\\n    CustomerId [type: btree, name: \\\"IFK_InvoiceCustomerId\\\"]\\n  }\\n}\\n\\nTable \\\"InvoiceLine\\\" {\\n  \\\"InvoiceLineId\\\" int4 [pk, not null]\\n  \\\"InvoiceId\\\" int4 [not null]\\n  \\\"TrackId\\\" int4 [not null]\\n  \\\"UnitPrice\\\" numeric(10,2) [not null]\\n  \\\"Quantity\\\" int4 [not null]\\n\\n  Indexes {\\n    InvoiceId [type: btree, name: \\\"IFK_InvoiceLineInvoiceId\\\"]\\n    TrackId [type: btree, name: \\\"IFK_InvoiceLineTrackId\\\"]\\n  }\\n}\\n\\nTable \\\"MediaType\\\" {\\n  \\\"MediaTypeId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(120)\\n}\\n\\nTable \\\"Playlist\\\" {\\n  \\\"PlaylistId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(120)\\n}\\n\\nTable \\\"PlaylistTrack\\\" {\\n  \\\"PlaylistId\\\" int4 [not null]\\n  \\\"TrackId\\\" int4 [not null]\\n\\n  Indexes {\\n    (PlaylistId, TrackId) [type: btree, name: \\\"PK_PlaylistTrack\\\"]\\n    TrackId [type: btree, name: \\\"IFK_PlaylistTrackTrackId\\\"]\\n  }\\n}\\n\\nTable \\\"Track\\\" {\\n  \\\"TrackId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(200) [not null]\\n  \\\"AlbumId\\\" int4\\n  \\\"MediaTypeId\\\" int4 [not null]\\n  \\\"GenreId\\\" int4\\n  \\\"Composer\\\" varchar(220)\\n  \\\"Milliseconds\\\" int4 [not null]\\n  \\\"Bytes\\\" int4\\n  \\\"UnitPrice\\\" numeric(10,2) [not null]\\n\\n  Indexes {\\n    AlbumId [type: btree, name: \\\"IFK_TrackAlbumId\\\"]\\n    GenreId [type: btree, name: \\\"IFK_TrackGenreId\\\"]\\n    MediaTypeId [type: btree, name: \\\"IFK_TrackMediaTypeId\\\"]\\n  }\\n}\\n\\nRef \\\"FK_AlbumArtistId\\\":\\\"Artist\\\".\\\"ArtistId\\\" < \\\"Album\\\".\\\"ArtistId\\\"\\n\\nRef \\\"FK_CustomerSupportRepId\\\":\\\"Employee\\\".\\\"EmployeeId\\\" < \\\"Customer\\\".\\\"SupportRepId\\\"\\n\\nRef \\\"FK_EmployeeReportsTo\\\":\\\"Employee\\\".\\\"EmployeeId\\\" < \\\"Employee\\\".\\\"ReportsTo\\\"\\n\\nRef \\\"FK_InvoiceCustomerId\\\":\\\"Customer\\\".\\\"CustomerId\\\" < \\\"Invoice\\\".\\\"CustomerId\\\"\\n\\nRef \\\"FK_InvoiceLineInvoiceId\\\":\\\"Invoice\\\".\\\"InvoiceId\\\" < \\\"InvoiceLine\\\".\\\"InvoiceId\\\"\\n\\nRef \\\"FK_InvoiceLineTrackId\\\":\\\"Track\\\".\\\"TrackId\\\" < \\\"InvoiceLine\\\".\\\"TrackId\\\"\\n\\nRef \\\"FK_PlaylistTrackPlaylistId\\\":\\\"Playlist\\\".\\\"PlaylistId\\\" < \\\"PlaylistTrack\\\".\\\"PlaylistId\\\"\\n\\nRef \\\"FK_PlaylistTrackTrackId\\\":\\\"Track\\\".\\\"TrackId\\\" < \\\"PlaylistTrack\\\".\\\"TrackId\\\"\\n\\nRef \\\"FK_TrackAlbumId\\\":\\\"Album\\\".\\\"AlbumId\\\" < \\\"Track\\\".\\\"AlbumId\\\"\\n\\nRef \\\"FK_TrackGenreId\\\":\\\"Genre\\\".\\\"GenreId\\\" < \\\"Track\\\".\\\"GenreId\\\"\\n\\nRef \\\"FK_TrackMediaTypeId\\\":\\\"MediaType\\\".\\\"MediaTypeId\\\" < \\\"Track\\\".\\\"MediaTypeId\\\"\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/chinook.sql\"\n  }\n}\n"
  },
  {
    "path": "tasks/postgres/easy/chinook/customer_data_migration_basic/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Task 2: Customer Data Migration\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nimport pickle\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\")\n    }\n\ndef load_expected_customers():\n    \"\"\"Load the expected customer data from pickle file.\"\"\"\n    import os\n    script_dir = os.path.dirname(os.path.abspath(__file__))\n    pkl_path = os.path.join(script_dir, 'customer_data.pkl')\n    \n    try:\n        with open(pkl_path, 'rb') as f:\n            return pickle.load(f)\n    except FileNotFoundError:\n        print(f\"❌ customer_data.pkl not found at {pkl_path}. Please generate customer data first.\")\n        return None\n    except Exception as e:\n        print(f\"❌ Error loading customer data: {e}\")\n        return None\n\ndef verify_migrated_customers(conn, expected_customers) -> bool:\n    \"\"\"Verify migrated customers by comparing with expected data as sets.\"\"\"\n    with conn.cursor() as cur:\n        # Get all customers with ID > 59 (the migrated ones)\n        cur.execute('''\n            SELECT \"FirstName\", \"LastName\", \"Company\", \"Address\", \"City\", \n                   \"State\", \"Country\", \"PostalCode\", \"Phone\", \"Email\", \n                   \"SupportRepId\", \"Fax\"\n            FROM \"Customer\" \n            WHERE \"CustomerId\" > 59\n        ''')\n        \n        actual_customers = cur.fetchall()\n        \n        if len(actual_customers) != len(expected_customers):\n            print(f\"❌ Expected {len(expected_customers)} migrated customers, found {len(actual_customers)}\")\n            return False\n        \n        # Convert expected customers to tuples for set comparison\n        expected_tuples = set()\n        for expected in expected_customers:\n            expected_tuple = (\n                expected['FirstName'], expected['LastName'], expected['Company'],\n                expected['Address'], expected['City'], expected['State'],\n                expected['Country'], expected['PostalCode'], expected['Phone'], \n                expected['Email'], 3, None  # SupportRepId=3, Fax=None\n            )\n            expected_tuples.add(expected_tuple)\n        \n        # Convert actual customers to set with proper type conversion\n        actual_tuples = set()\n        for row in actual_customers:\n            # Convert all fields to strings for consistent comparison\n            actual_tuple = (\n                str(row[0]) if row[0] is not None else '',  # FirstName\n                str(row[1]) if row[1] is not None else '',  # LastName  \n                str(row[2]) if row[2] is not None else '',  # Company\n                str(row[3]) if row[3] is not None else '',  # Address\n                str(row[4]) if row[4] is not None else '',  # City\n                str(row[5]) if row[5] is not None else '',  # State\n                str(row[6]) if row[6] is not None else '',  # Country\n                str(row[7]) if row[7] is not None else '',  # PostalCode\n                str(row[8]) if row[8] is not None else '',  # Phone\n                str(row[9]) if row[9] is not None else '',  # Email\n                int(row[10]) if row[10] is not None else None,  # SupportRepId\n                row[11]  # Fax (should be None)\n            )\n            actual_tuples.add(actual_tuple)\n        \n        # Check if sets are equal\n        if actual_tuples != expected_tuples:\n            missing_in_actual = expected_tuples - actual_tuples\n            extra_in_actual = actual_tuples - expected_tuples\n            \n            print(f\"❌ Customer data sets don't match!\")\n            if missing_in_actual:\n                print(f\"   Missing {len(missing_in_actual)} expected customers\")\n                for missing in list(missing_in_actual)[:3]:  # Show first 3\n                    print(f\"   Missing: {missing[0]} {missing[1]} - {missing[2]}\")\n                if len(missing_in_actual) > 3:\n                    print(f\"   ... and {len(missing_in_actual) - 3} more\")\n            \n            if extra_in_actual:\n                print(f\"   Found {len(extra_in_actual)} unexpected customers\")\n                for extra in list(extra_in_actual)[:3]:  # Show first 3\n                    print(f\"   Extra: {extra[0]} {extra[1]} - {extra[2]}\")\n                if len(extra_in_actual) > 3:\n                    print(f\"   ... and {len(extra_in_actual) - 3} more\")\n            \n            return False\n        \n        print(f\"✅ All {len(expected_customers)} customers migrated correctly\")\n        print(f\"✅ All customers assigned to SupportRepId 3\")\n        print(f\"✅ All customers have Fax field set to NULL\")\n        print(f\"✅ Customer data sets match exactly (order-independent)\")\n        \n        return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 60)\n    print(\"Verifying Customer Data Migration Task\")\n    print(\"=\" * 60)\n\n    # Load expected customer data\n    expected_customers = load_expected_customers()\n    if not expected_customers:\n        sys.exit(1)\n    \n    print(f\"Loaded {len(expected_customers)} expected customer records\")\n\n    # Get connection parameters\n    conn_params = get_connection_params()\n\n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n\n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n\n        # Verify migration\n        success = verify_migrated_customers(conn, expected_customers)\n\n        conn.close()\n\n        if success:\n            print(\"\\n🎉 Task verification: PASS\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Task verification: FAIL\")\n            sys.exit(1)\n\n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/easy/chinook/update_employee_info/description.md",
    "content": "Update employee information and reorganize the reporting structure in the Chinook database to reflect organizational changes.\n\n## Your Tasks:\n\n### **UPDATE: Modify Existing Employee Information**\n- Change Andrew Adams (EmployeeId = 1) title from 'General Manager' to 'CEO'\n- Update Nancy Edwards (EmployeeId = 2) phone number to '+1 (403) 555-9999'\n- Change all employees with Title = 'IT Staff' to have Title = 'IT Specialist'\n\n\n## Requirements:\n\n- Use UPDATE statements to modify the existing records\n- The title update for 'IT Staff' should affect all matching employees\n\n## Expected Results:\n\nAfter completing the updates:\n- Andrew Adams should have Title = 'CEO'\n- Nancy Edwards should have Phone = '+1 (403) 555-9999'\n- All employees previously with Title = 'IT Staff' should now have Title = 'IT Specialist'\n\nThis task practices UPDATE operations for both employee information and organizational hierarchy management.\n"
  },
  {
    "path": "tasks/postgres/easy/chinook/update_employee_info/meta.json",
    "content": "{\n  \"task_id\": \"update_employee_info\",\n  \"task_name\": \"Update Employee Info\",\n  \"category_id\": \"chinook\",\n  \"category_name\": \"Chinook\",\n  \"description\": \"Update Chinook employee records so Andrew Adams becomes CEO, Nancy Edwards receives the new phone number, and every \\\"IT Staff\\\" title becomes \\\"IT Specialist.\\\"\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"data updates\",\n    \"organizational change\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Table \\\"Album\\\" {\\n  \\\"AlbumId\\\" int4 [pk, not null]\\n  \\\"Title\\\" varchar(160) [not null]\\n  \\\"ArtistId\\\" int4 [not null]\\n\\n  Indexes {\\n    ArtistId [type: btree, name: \\\"IFK_AlbumArtistId\\\"]\\n  }\\n}\\n\\nTable \\\"Artist\\\" {\\n  \\\"ArtistId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(120)\\n}\\n\\nTable \\\"Customer\\\" {\\n  \\\"CustomerId\\\" int4 [pk, not null]\\n  \\\"FirstName\\\" varchar(40) [not null]\\n  \\\"LastName\\\" varchar(20) [not null]\\n  \\\"Company\\\" varchar(80)\\n  \\\"Address\\\" varchar(70)\\n  \\\"City\\\" varchar(40)\\n  \\\"State\\\" varchar(40)\\n  \\\"Country\\\" varchar(40)\\n  \\\"PostalCode\\\" varchar(10)\\n  \\\"Phone\\\" varchar(24)\\n  \\\"Fax\\\" varchar(24)\\n  \\\"Email\\\" varchar(60) [not null]\\n  \\\"SupportRepId\\\" int4\\n\\n  Indexes {\\n    SupportRepId [type: btree, name: \\\"IFK_CustomerSupportRepId\\\"]\\n  }\\n}\\n\\nTable \\\"Employee\\\" {\\n  \\\"EmployeeId\\\" int4 [pk, not null]\\n  \\\"LastName\\\" varchar(20) [not null]\\n  \\\"FirstName\\\" varchar(20) [not null]\\n  \\\"Title\\\" varchar(30)\\n  \\\"ReportsTo\\\" int4\\n  \\\"BirthDate\\\" timestamp\\n  \\\"HireDate\\\" timestamp\\n  \\\"Address\\\" varchar(70)\\n  \\\"City\\\" varchar(40)\\n  \\\"State\\\" varchar(40)\\n  \\\"Country\\\" varchar(40)\\n  \\\"PostalCode\\\" varchar(10)\\n  \\\"Phone\\\" varchar(24)\\n  \\\"Fax\\\" varchar(24)\\n  \\\"Email\\\" varchar(60)\\n\\n  Indexes {\\n    ReportsTo [type: btree, name: \\\"IFK_EmployeeReportsTo\\\"]\\n  }\\n}\\n\\nTable \\\"Genre\\\" {\\n  \\\"GenreId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(120)\\n}\\n\\nTable \\\"Invoice\\\" {\\n  \\\"InvoiceId\\\" int4 [pk, not null]\\n  \\\"CustomerId\\\" int4 [not null]\\n  \\\"InvoiceDate\\\" timestamp [not null]\\n  \\\"BillingAddress\\\" varchar(70)\\n  \\\"BillingCity\\\" varchar(40)\\n  \\\"BillingState\\\" varchar(40)\\n  \\\"BillingCountry\\\" varchar(40)\\n  \\\"BillingPostalCode\\\" varchar(10)\\n  \\\"Total\\\" numeric(10,2) [not null]\\n\\n  Indexes {\\n    CustomerId [type: btree, name: \\\"IFK_InvoiceCustomerId\\\"]\\n  }\\n}\\n\\nTable \\\"InvoiceLine\\\" {\\n  \\\"InvoiceLineId\\\" int4 [pk, not null]\\n  \\\"InvoiceId\\\" int4 [not null]\\n  \\\"TrackId\\\" int4 [not null]\\n  \\\"UnitPrice\\\" numeric(10,2) [not null]\\n  \\\"Quantity\\\" int4 [not null]\\n\\n  Indexes {\\n    InvoiceId [type: btree, name: \\\"IFK_InvoiceLineInvoiceId\\\"]\\n    TrackId [type: btree, name: \\\"IFK_InvoiceLineTrackId\\\"]\\n  }\\n}\\n\\nTable \\\"MediaType\\\" {\\n  \\\"MediaTypeId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(120)\\n}\\n\\nTable \\\"Playlist\\\" {\\n  \\\"PlaylistId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(120)\\n}\\n\\nTable \\\"PlaylistTrack\\\" {\\n  \\\"PlaylistId\\\" int4 [not null]\\n  \\\"TrackId\\\" int4 [not null]\\n\\n  Indexes {\\n    (PlaylistId, TrackId) [type: btree, name: \\\"PK_PlaylistTrack\\\"]\\n    TrackId [type: btree, name: \\\"IFK_PlaylistTrackTrackId\\\"]\\n  }\\n}\\n\\nTable \\\"Track\\\" {\\n  \\\"TrackId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(200) [not null]\\n  \\\"AlbumId\\\" int4\\n  \\\"MediaTypeId\\\" int4 [not null]\\n  \\\"GenreId\\\" int4\\n  \\\"Composer\\\" varchar(220)\\n  \\\"Milliseconds\\\" int4 [not null]\\n  \\\"Bytes\\\" int4\\n  \\\"UnitPrice\\\" numeric(10,2) [not null]\\n\\n  Indexes {\\n    AlbumId [type: btree, name: \\\"IFK_TrackAlbumId\\\"]\\n    GenreId [type: btree, name: \\\"IFK_TrackGenreId\\\"]\\n    MediaTypeId [type: btree, name: \\\"IFK_TrackMediaTypeId\\\"]\\n  }\\n}\\n\\nRef \\\"FK_AlbumArtistId\\\":\\\"Artist\\\".\\\"ArtistId\\\" < \\\"Album\\\".\\\"ArtistId\\\"\\n\\nRef \\\"FK_CustomerSupportRepId\\\":\\\"Employee\\\".\\\"EmployeeId\\\" < \\\"Customer\\\".\\\"SupportRepId\\\"\\n\\nRef \\\"FK_EmployeeReportsTo\\\":\\\"Employee\\\".\\\"EmployeeId\\\" < \\\"Employee\\\".\\\"ReportsTo\\\"\\n\\nRef \\\"FK_InvoiceCustomerId\\\":\\\"Customer\\\".\\\"CustomerId\\\" < \\\"Invoice\\\".\\\"CustomerId\\\"\\n\\nRef \\\"FK_InvoiceLineInvoiceId\\\":\\\"Invoice\\\".\\\"InvoiceId\\\" < \\\"InvoiceLine\\\".\\\"InvoiceId\\\"\\n\\nRef \\\"FK_InvoiceLineTrackId\\\":\\\"Track\\\".\\\"TrackId\\\" < \\\"InvoiceLine\\\".\\\"TrackId\\\"\\n\\nRef \\\"FK_PlaylistTrackPlaylistId\\\":\\\"Playlist\\\".\\\"PlaylistId\\\" < \\\"PlaylistTrack\\\".\\\"PlaylistId\\\"\\n\\nRef \\\"FK_PlaylistTrackTrackId\\\":\\\"Track\\\".\\\"TrackId\\\" < \\\"PlaylistTrack\\\".\\\"TrackId\\\"\\n\\nRef \\\"FK_TrackAlbumId\\\":\\\"Album\\\".\\\"AlbumId\\\" < \\\"Track\\\".\\\"AlbumId\\\"\\n\\nRef \\\"FK_TrackGenreId\\\":\\\"Genre\\\".\\\"GenreId\\\" < \\\"Track\\\".\\\"GenreId\\\"\\n\\nRef \\\"FK_TrackMediaTypeId\\\":\\\"MediaType\\\".\\\"MediaTypeId\\\" < \\\"Track\\\".\\\"MediaTypeId\\\"\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/chinook.sql\"\n  }\n}\n"
  },
  {
    "path": "tasks/postgres/easy/chinook/update_employee_info/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Task 3: Employee Hierarchy Management\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nfrom decimal import Decimal\n\ndef rows_match(actual_row, expected_row):\n    \"\"\"\n    Compare two rows with appropriate tolerance.\n    For Decimal types: allows 0.01 tolerance\n    For other types: requires exact match\n    \"\"\"\n    if len(actual_row) != len(expected_row):\n        return False\n    \n    for actual, expected in zip(actual_row, expected_row):\n        if isinstance(actual, Decimal) and isinstance(expected, Decimal):\n            if abs(float(actual) - float(expected)) > 0.01:\n                return False\n        elif actual != expected:\n            return False\n    \n    return True\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\")\n    }\n\ndef verify_employee_count_and_titles(conn) -> bool:\n    \"\"\"Verify the final employee count and title changes.\"\"\"\n    with conn.cursor() as cur:\n        # Check the final verification query results\n        cur.execute(\"\"\"\n            SELECT \n                COUNT(*) as total_employees,\n                COUNT(CASE WHEN \"Title\" = 'CEO' THEN 1 END) as ceo_count,\n                COUNT(CASE WHEN \"Title\" = 'IT Specialist' THEN 1 END) as it_specialist_count\n            FROM \"Employee\"\n        \"\"\")\n        result = cur.fetchone()\n        \n        total_employees, ceo_count, it_specialist_count = result\n        \n        if total_employees != 8:\n            print(f\"❌ Expected 8 total employees, got {total_employees}\")\n            return False\n            \n        if ceo_count != 1:\n            print(f\"❌ Expected 1 CEO, got {ceo_count}\")\n            return False\n            \n        if it_specialist_count != 2:\n            print(f\"❌ Expected 2 IT Specialists, got {it_specialist_count}\")\n            return False\n            \n        print(\"✅ Employee count and title verification passed\")\n        return True\n\ndef verify_specific_employees(conn) -> bool:\n    \"\"\"Verify specific employee records and modifications.\"\"\"\n    with conn.cursor() as cur:\n        # Check all employee fields in one query\n        cur.execute(\"\"\"\n            SELECT \"EmployeeId\", \"LastName\", \"FirstName\", \"Title\", \"ReportsTo\", \"BirthDate\", \n                   \"HireDate\", \"Address\", \"City\", \"State\", \"Country\", \"PostalCode\", \n                   \"Phone\", \"Fax\", \"Email\"\n            FROM \"Employee\" \n            WHERE \"EmployeeId\" IN (1, 2)\n            ORDER BY \"EmployeeId\"\n        \"\"\")\n        employees = cur.fetchall()\n        \n        from datetime import datetime\n        \n        expected = [\n            # Andrew Adams (ID 1) - Title changes to 'CEO', phone stays original, ReportsTo stays None\n            (1, 'Adams', 'Andrew', 'CEO', None, datetime(1962, 2, 18), datetime(2002, 8, 14),\n             '11120 Jasper Ave NW', 'Edmonton', 'AB', 'Canada', 'T5K 2N1', '+1 (780) 428-9482', '+1 (780) 428-3457', 'andrew@chinookcorp.com'),\n            # Nancy Edwards (ID 2) - Phone changes, title stays 'Sales Manager', ReportsTo stays 1\n            (2, 'Edwards', 'Nancy', 'Sales Manager', 1, datetime(1958, 12, 8), datetime(2002, 5, 1),\n             '825 8 Ave SW', 'Calgary', 'AB', 'Canada', 'T2P 2T3', '+1 (403) 555-9999', '+1 (403) 262-3322', 'nancy@chinookcorp.com'),\n        ]\n        \n        if len(employees) != 2:\n            print(f\"❌ Expected 2 key employees, found {len(employees)}\")\n            return False\n            \n        # Full field comparison for all employees using rows_match\n        for actual, expected_emp in zip(employees, expected):\n            if not rows_match(actual, expected_emp):\n                print(f\"❌ Employee {actual[0]} row mismatch: expected {expected_emp}, got {actual}\")\n                return False\n        \n        print(\"✅ Specific employee verification passed - all fields match exactly\")\n        return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 50)\n    print(\"Verifying Task 3: Employee Hierarchy Management\")\n    print(\"=\" * 50)\n\n    # Get connection parameters\n    conn_params = get_connection_params()\n\n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n\n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n\n        # Run verification checks with short-circuit evaluation\n        success = (\n            verify_employee_count_and_titles(conn) and\n            verify_specific_employees(conn)\n                  )\n        conn.close()\n\n        if success:\n            print(\"\\n🎉 Task verification: PASS\")\n            print(\"All employee hierarchy management operations completed correctly!\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Task verification: FAIL\")\n            sys.exit(1)\n\n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/easy/dvdrental/create_payment_index/description.md",
    "content": "Create an index to optimize customer payment queries in the DVD rental database.\n\n## Your Task:\n\nCreate an index on the `customer_id` column of the `payment` table to improve query performance.\n\n## Requirements:\n\n- Create an index on the `payment` table's `customer_id` column\n- The index name can be anything you choose (e.g., `idx_payment_customer_id`)\n- Use the standard CREATE INDEX syntax\n\n## Why This Helps:\n\nThe `customer_id` column is frequently used in:\n- JOIN operations between customer and payment tables\n- WHERE clauses filtering by customer\n- Subqueries that look up payments for specific customers\n\nAdding an index will significantly speed up these operations.\n\n"
  },
  {
    "path": "tasks/postgres/easy/dvdrental/create_payment_index/meta.json",
    "content": "{\n  \"task_id\": \"create_payment_index\",\n  \"task_name\": \"Create Payment Index\",\n  \"category_id\": \"dvdrental\",\n  \"category_name\": \"DVD Rental\",\n  \"description\": \"Add an index on payment.customer_id to speed up the customer payment lookups in the DVD Rental database.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"performance optimization\",\n    \"indexing\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Enum \\\"mpaa_rating\\\" {\\n  \\\"G\\\"\\n  \\\"PG\\\"\\n  \\\"PG-13\\\"\\n  \\\"R\\\"\\n  \\\"NC-17\\\"\\n}\\n\\nTable \\\"customer\\\" {\\n  \\\"customer_id\\\" int4 [pk, not null, increment]\\n  \\\"store_id\\\" int2 [not null]\\n  \\\"first_name\\\" varchar(45) [not null]\\n  \\\"last_name\\\" varchar(45) [not null]\\n  \\\"email\\\" varchar(50)\\n  \\\"address_id\\\" int2 [not null]\\n  \\\"activebool\\\" bool [not null, default: true]\\n  \\\"create_date\\\" date [not null, default: `('now'::text)::date`]\\n  \\\"last_update\\\" timestamp [default: `now()`]\\n  \\\"active\\\" int4\\n\\n  Indexes {\\n    address_id [type: btree, name: \\\"idx_fk_address_id\\\"]\\n    store_id [type: btree, name: \\\"idx_fk_store_id\\\"]\\n    last_name [type: btree, name: \\\"idx_last_name\\\"]\\n  }\\n}\\n\\nTable \\\"actor\\\" {\\n  \\\"actor_id\\\" int4 [pk, not null, increment]\\n  \\\"first_name\\\" varchar(45) [not null]\\n  \\\"last_name\\\" varchar(45) [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    last_name [type: btree, name: \\\"idx_actor_last_name\\\"]\\n  }\\n}\\n\\nTable \\\"category\\\" {\\n  \\\"category_id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" varchar(25) [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n}\\n\\nTable \\\"film\\\" {\\n  \\\"film_id\\\" int4 [pk, not null, increment]\\n  \\\"title\\\" varchar(255) [not null]\\n  \\\"description\\\" text\\n  \\\"release_year\\\" int4\\n  \\\"language_id\\\" int2 [not null]\\n  \\\"rental_duration\\\" int2 [not null, default: 3]\\n  \\\"rental_rate\\\" numeric(4,2) [not null, default: 4.99]\\n  \\\"length\\\" int2\\n  \\\"replacement_cost\\\" numeric(5,2) [not null, default: 19.99]\\n  \\\"rating\\\" mpaa_rating [default: 'G']\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n  \\\"special_features\\\" \\\"text[]\\\"\\n  \\\"fulltext\\\" tsvector [not null]\\n\\n  Indexes {\\n    fulltext [type: gist, name: \\\"film_fulltext_idx\\\"]\\n    language_id [type: btree, name: \\\"idx_fk_language_id\\\"]\\n    title [type: btree, name: \\\"idx_title\\\"]\\n  }\\n}\\n\\nTable \\\"film_actor\\\" {\\n  \\\"actor_id\\\" int2 [not null]\\n  \\\"film_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    (actor_id, film_id) [type: btree, name: \\\"film_actor_pkey\\\"]\\n    film_id [type: btree, name: \\\"idx_fk_film_id\\\"]\\n  }\\n}\\n\\nTable \\\"film_category\\\" {\\n  \\\"film_id\\\" int2 [not null]\\n  \\\"category_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    (film_id, category_id) [type: btree, name: \\\"film_category_pkey\\\"]\\n  }\\n}\\n\\nTable \\\"address\\\" {\\n  \\\"address_id\\\" int4 [pk, not null, increment]\\n  \\\"address\\\" varchar(50) [not null]\\n  \\\"address2\\\" varchar(50)\\n  \\\"district\\\" varchar(20) [not null]\\n  \\\"city_id\\\" int2 [not null]\\n  \\\"postal_code\\\" varchar(10)\\n  \\\"phone\\\" varchar(20) [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    city_id [type: btree, name: \\\"idx_fk_city_id\\\"]\\n  }\\n}\\n\\nTable \\\"city\\\" {\\n  \\\"city_id\\\" int4 [pk, not null, increment]\\n  \\\"city\\\" varchar(50) [not null]\\n  \\\"country_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    country_id [type: btree, name: \\\"idx_fk_country_id\\\"]\\n  }\\n}\\n\\nTable \\\"country\\\" {\\n  \\\"country_id\\\" int4 [pk, not null, increment]\\n  \\\"country\\\" varchar(50) [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n}\\n\\nTable \\\"inventory\\\" {\\n  \\\"inventory_id\\\" int4 [pk, not null, increment]\\n  \\\"film_id\\\" int2 [not null]\\n  \\\"store_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    (store_id, film_id) [type: btree, name: \\\"idx_store_id_film_id\\\"]\\n  }\\n}\\n\\nTable \\\"language\\\" {\\n  \\\"language_id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" bpchar(20) [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n}\\n\\nTable \\\"payment\\\" {\\n  \\\"payment_id\\\" int4 [pk, not null, increment]\\n  \\\"customer_id\\\" int2 [not null]\\n  \\\"staff_id\\\" int2 [not null]\\n  \\\"rental_id\\\" int4 [not null]\\n  \\\"amount\\\" numeric(5,2) [not null]\\n  \\\"payment_date\\\" timestamp [not null]\\n\\n  Indexes {\\n    rental_id [type: btree, name: \\\"idx_fk_rental_id\\\"]\\n    staff_id [type: btree, name: \\\"idx_fk_staff_id\\\"]\\n  }\\n}\\n\\nTable \\\"rental\\\" {\\n  \\\"rental_id\\\" int4 [pk, not null, increment]\\n  \\\"rental_date\\\" timestamp [not null]\\n  \\\"inventory_id\\\" int4 [not null]\\n  \\\"customer_id\\\" int2 [not null]\\n  \\\"return_date\\\" timestamp\\n  \\\"staff_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    (rental_date, inventory_id, customer_id) [type: btree, name: \\\"idx_unq_rental_rental_date_inventory_id_customer_id\\\"]\\n    inventory_id [type: btree, name: \\\"idx_fk_inventory_id\\\"]\\n  }\\n}\\n\\nTable \\\"staff\\\" {\\n  \\\"staff_id\\\" int4 [pk, not null, increment]\\n  \\\"first_name\\\" varchar(45) [not null]\\n  \\\"last_name\\\" varchar(45) [not null]\\n  \\\"address_id\\\" int2 [not null]\\n  \\\"email\\\" varchar(50)\\n  \\\"store_id\\\" int2 [not null]\\n  \\\"active\\\" bool [not null, default: true]\\n  \\\"username\\\" varchar(16) [not null]\\n  \\\"password\\\" varchar(40)\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n  \\\"picture\\\" bytea\\n}\\n\\nTable \\\"store\\\" {\\n  \\\"store_id\\\" int4 [pk, not null, increment]\\n  \\\"manager_staff_id\\\" int2 [unique, not null]\\n  \\\"address_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n}\\n\\nRef \\\"fk_address_city\\\":\\\"city\\\".\\\"city_id\\\" < \\\"address\\\".\\\"city_id\\\"\\n\\nRef \\\"fk_city\\\":\\\"country\\\".\\\"country_id\\\" < \\\"city\\\".\\\"country_id\\\"\\n\\nRef \\\"customer_address_id_fkey\\\":\\\"address\\\".\\\"address_id\\\" < \\\"customer\\\".\\\"address_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"film_language_id_fkey\\\":\\\"language\\\".\\\"language_id\\\" < \\\"film\\\".\\\"language_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"film_actor_actor_id_fkey\\\":\\\"actor\\\".\\\"actor_id\\\" < \\\"film_actor\\\".\\\"actor_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"film_actor_film_id_fkey\\\":\\\"film\\\".\\\"film_id\\\" < \\\"film_actor\\\".\\\"film_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"film_category_category_id_fkey\\\":\\\"category\\\".\\\"category_id\\\" < \\\"film_category\\\".\\\"category_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"film_category_film_id_fkey\\\":\\\"film\\\".\\\"film_id\\\" < \\\"film_category\\\".\\\"film_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"inventory_film_id_fkey\\\":\\\"film\\\".\\\"film_id\\\" < \\\"inventory\\\".\\\"film_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"payment_customer_id_fkey\\\":\\\"customer\\\".\\\"customer_id\\\" < \\\"payment\\\".\\\"customer_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"payment_rental_id_fkey\\\":\\\"rental\\\".\\\"rental_id\\\" < \\\"payment\\\".\\\"rental_id\\\" [update: cascade, delete: set null]\\n\\nRef \\\"payment_staff_id_fkey\\\":\\\"staff\\\".\\\"staff_id\\\" < \\\"payment\\\".\\\"staff_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"rental_customer_id_fkey\\\":\\\"customer\\\".\\\"customer_id\\\" < \\\"rental\\\".\\\"customer_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"rental_inventory_id_fkey\\\":\\\"inventory\\\".\\\"inventory_id\\\" < \\\"rental\\\".\\\"inventory_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"rental_staff_id_key\\\":\\\"staff\\\".\\\"staff_id\\\" < \\\"rental\\\".\\\"staff_id\\\"\\n\\nRef \\\"staff_address_id_fkey\\\":\\\"address\\\".\\\"address_id\\\" < \\\"staff\\\".\\\"address_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"store_address_id_fkey\\\":\\\"address\\\".\\\"address_id\\\" < \\\"store\\\".\\\"address_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"store_manager_staff_id_fkey\\\":\\\"staff\\\".\\\"staff_id\\\" < \\\"store\\\".\\\"manager_staff_id\\\" [update: cascade, delete: restrict]\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/gordonkwokkwok/DVD-Rental-PostgreSQL-Project\"\n  }\n}\n"
  },
  {
    "path": "tasks/postgres/easy/dvdrental/create_payment_index/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Task 1: Customer Payment Query Optimization\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\")\n    }\n\ndef check_payment_customer_id_index(conn) -> bool:\n    \"\"\"Check if there's any index on payment.customer_id column.\"\"\"\n    with conn.cursor() as cur:\n        cur.execute(\"\"\"\n            SELECT indexname, indexdef \n            FROM pg_indexes \n            WHERE schemaname = 'public' \n            AND tablename = 'payment'\n            AND indexdef LIKE '%customer_id%'\n        \"\"\")\n        indexes = cur.fetchall()\n        print(indexes)\n        return len(indexes) > 0, indexes\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 60)\n    print(\"PostgreSQL Task 1 Verification: Customer Payment Query Optimization\")\n    print(\"=\" * 60)\n    \n    # Get connection parameters\n    conn_params = get_connection_params()\n    \n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n    \n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n        \n        print(\"\\n🔍 Checking for customer_id index on payment table...\")\n        \n        # Check if any index exists on payment.customer_id\n        has_index, indexes = check_payment_customer_id_index(conn)\n        \n        if has_index:\n            print(\"✅ Found index(es) on payment.customer_id:\")\n            for index_name, index_def in indexes:\n                print(f\"   - {index_name}: {index_def}\")\n        else:\n            print(\"❌ No index found on payment.customer_id column\")\n        \n        conn.close()\n        \n        if has_index:\n            print(f\"\\n🎉 Task verification: PASS\")\n            print(f\"   - Index on payment.customer_id exists\")\n            sys.exit(0)\n        else:\n            print(f\"\\n❌ Task verification: FAIL\")\n            print(f\"   - No index found on payment.customer_id\")\n            print(f\"   - Create an index on payment(customer_id) to optimize the queries\")\n            sys.exit(1)\n            \n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/easy/employees/department_summary_view/description.md",
    "content": "Create an executive department summary view to provide quick insights into departmental metrics for leadership dashboards. This view will consolidate key department statistics in one easily accessible place.\n\n## Your Task:\n\n**Create the executive department summary view** — build a materialized view called `exec_department_summary` in the `employees` schema with these exact columns:\n\n* `department_name` (varchar) — department name\n* `total_employees` (integer) — current active employee count (employees with active salary where to_date = '9999-01-01')\n* `avg_salary` (decimal) — average current salary for active employees\n* `total_payroll` (bigint) — total monthly payroll cost (sum of all current salaries in the department)\n* `manager_name` (varchar) — current department manager's full name (first_name and last_name concatenated)\n\n## Requirements:\n\n1. Use materialized view to cache results for better performance\n2. Join the following tables:\n   - `departments` - for department information\n   - `dept_emp` - for employee-department relationships\n   - `employees` - for employee details\n   - `salaries` - for current salary information\n   - `dept_manager` - for current manager information\n3. Only include current active employees (those with to_date = '9999-01-01' in both `dept_emp` and `salaries`)\n4. Only include current managers (to_date = '9999-01-01' in `dept_manager`)\n5. Order results by department_name\n\n## After Creation:\n\nRefresh the materialized view to populate it with current data.\n\nThis view will provide executives with a real-time snapshot of departmental workforce metrics and costs.\n"
  },
  {
    "path": "tasks/postgres/easy/employees/department_summary_view/meta.json",
    "content": "{\n  \"task_id\": \"department_summary_view\",\n  \"task_name\": \"Department Summary View\",\n  \"category_id\": \"employees\",\n  \"category_name\": \"Employees\",\n  \"description\": \"Build the exec_department_summary materialized view showing department name, active headcount, payroll totals, and the manager name.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"reporting and analytics\",\n    \"materialized views\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Enum \\\"employees\\\".\\\"employee_gender\\\" {\\n  \\\"M\\\"\\n  \\\"F\\\"\\n}\\n\\nTable \\\"employees\\\".\\\"department\\\" {\\n  \\\"id\\\" bpchar(4) [pk, not null]\\n  \\\"dept_name\\\" varchar(40) [unique, not null]\\n}\\n\\nTable \\\"employees\\\".\\\"department_employee\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"department_id\\\" bpchar(4) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, department_id) [type: btree, name: \\\"idx_16982_primary\\\"]\\n    department_id [type: btree, name: \\\"idx_16982_dept_no\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"department_manager\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"department_id\\\" bpchar(4) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, department_id) [type: btree, name: \\\"idx_16985_primary\\\"]\\n    department_id [type: btree, name: \\\"idx_16985_dept_no\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"employee\\\" {\\n  \\\"id\\\" int8 [pk, not null, increment]\\n  \\\"birth_date\\\" date [not null]\\n  \\\"first_name\\\" varchar(14) [not null]\\n  \\\"last_name\\\" varchar(16) [not null]\\n  \\\"gender\\\" employees.employee_gender [not null]\\n  \\\"hire_date\\\" date [not null]\\n}\\n\\nTable \\\"employees\\\".\\\"salary\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"amount\\\" int8 [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, from_date) [type: btree, name: \\\"idx_16991_primary\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"title\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"title\\\" varchar(50) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date\\n\\n  Indexes {\\n    (employee_id, title, from_date) [type: btree, name: \\\"idx_16994_primary\\\"]\\n  }\\n}\\n\\nRef \\\"dept_emp_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_employee\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_emp_ibfk_2\\\":\\\"employees\\\".\\\"department\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_employee\\\".\\\"department_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_manager_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_manager\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_manager_ibfk_2\\\":\\\"employees\\\".\\\"department\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_manager\\\".\\\"department_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"salaries_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"salary\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"titles_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"title\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/employees.sql.gz\"\n  }\n}\n"
  },
  {
    "path": "tasks/postgres/easy/employees/department_summary_view/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Task 6: Reporting and Automation System\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nfrom decimal import Decimal\n\ndef rows_match(actual_row, expected_row):\n    \"\"\"\n    Compare two rows with appropriate tolerance.\n    For Decimal types: allows 0.1 tolerance\n    For date types: convert to string for comparison\n    For other types: requires exact match\n    \"\"\"\n    if len(actual_row) != len(expected_row):\n        return False\n    \n    for actual, expected in zip(actual_row, expected_row):\n        if isinstance(actual, Decimal) and isinstance(expected, (Decimal, float, int)):\n            if abs(float(actual) - float(expected)) > 0.1:\n                return False\n        elif hasattr(actual, 'strftime'):  # datetime.date or datetime.datetime\n            if str(actual) != str(expected):\n                return False\n        elif actual != expected:\n            return False\n    \n    return True\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\")\n    }\n\ndef verify_materialized_views(conn) -> bool:\n    \"\"\"Verify that materialized views were created and populated correctly.\"\"\"\n    with conn.cursor() as cur:\n        # Check all departments' data accuracy\n        cur.execute(\"\"\"\n            SELECT department_name, total_employees, avg_salary, total_payroll, manager_name\n            FROM employees.exec_department_summary\n            ORDER BY department_name\n        \"\"\")\n        view_data = cur.fetchall()\n        \n        # Get actual data for all departments\n        cur.execute(\"\"\"\n            WITH current_salary AS (\n            SELECT employee_id, amount\n            FROM (\n                SELECT s.*,\n                    ROW_NUMBER() OVER (\n                        PARTITION BY s.employee_id\n                        ORDER BY s.from_date DESC, s.amount DESC\n                    ) AS rn\n                FROM employees.salary s\n                WHERE s.to_date = DATE '9999-01-01'\n            ) x\n            WHERE rn = 1\n            ),\n            current_dept AS (\n            SELECT DISTINCT de.employee_id, de.department_id\n            FROM employees.department_employee de\n            WHERE de.to_date = DATE '9999-01-01'\n            ),\n            current_manager AS (\n            SELECT department_id,\n                    CONCAT(e.first_name, ' ', e.last_name) AS manager_name\n            FROM (\n                SELECT dm.*,\n                    ROW_NUMBER() OVER (\n                        PARTITION BY dm.department_id\n                        ORDER BY dm.from_date DESC, dm.employee_id\n                    ) AS rn\n                FROM employees.department_manager dm\n                WHERE dm.to_date = DATE '9999-01-01'\n            ) dm\n            JOIN employees.employee e ON e.id = dm.employee_id\n            WHERE dm.rn = 1\n            )\n            SELECT\n            d.dept_name AS department_name,\n            COUNT(cd.employee_id)::INT AS total_employees,\n            AVG(cs.amount)::DECIMAL   AS avg_salary,\n            COALESCE(SUM(cs.amount), 0)::BIGINT AS total_payroll,\n            cm.manager_name\n            FROM employees.department d\n            LEFT JOIN current_dept   cd ON cd.department_id = d.id\n            LEFT JOIN current_salary cs ON cs.employee_id = cd.employee_id\n            LEFT JOIN current_manager cm ON cm.department_id = d.id\n            GROUP BY d.id, d.dept_name, cm.manager_name\n            ORDER BY d.dept_name;\n        \"\"\")\n        actual_data = cur.fetchall()\n        \n        if len(view_data) != len(actual_data):\n            print(f\"❌ Department count mismatch: view={len(view_data)}, actual={len(actual_data)}\")\n            return False\n            \n        for view_row, actual_row in zip(view_data, actual_data):\n            if not rows_match(view_row, actual_row):\n                print(f\"❌ Department summary data incorrect for {view_row[0]}: view={view_row}, actual={actual_row}\")\n                return False\n            \n        return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 50)\n\n    # Get connection parameters\n    conn_params = get_connection_params()\n\n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n\n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n\n        # Verify all components\n        success = verify_materialized_views(conn)\n\n        conn.close()\n\n        if success:\n            print(\"\\n🎉 Task verification: PASS\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Task verification: FAIL\")\n            sys.exit(1)\n\n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/easy/employees/employee_gender_statistics/description.md",
    "content": "Create a gender statistics summary table for the HR team's annual workforce composition report. This is a simple analysis to understand the gender distribution in our employee database.\n\n## Your Task:\n\n**Create the gender statistics table** — build a table called `gender_statistics` in the `employees` schema with these exact columns:\n\n* `gender` (varchar) — gender ('M' or 'F')\n* `total_employees` (integer) — total number of employees of this gender\n* `current_employees` (integer) — current employees of this gender (have active salary where to_date = '9999-01-01')\n* `percentage_of_workforce` (decimal) — percentage of current workforce (current_employees / total current employees * 100)\n\n## Requirements:\n\n1. Calculate total employees by counting all employees of each gender from the `employees` table\n2. Calculate current employees by counting employees with active salary records (to_date = '9999-01-01' in the `salaries` table)\n3. Calculate the percentage based on current workforce only\n4. The table should contain exactly 2 rows (one for 'M' and one for 'F')\n\nThis analysis will help HR understand the basic gender composition of our workforce for diversity reporting.\n"
  },
  {
    "path": "tasks/postgres/easy/employees/employee_gender_statistics/meta.json",
    "content": "{\n  \"task_id\": \"employee_gender_statistics\",\n  \"task_name\": \"Employee Gender Statistics\",\n  \"category_id\": \"employees\",\n  \"category_name\": \"Employees\",\n  \"description\": \"Aggregate the employees dataset into a gender_statistics table with counts of total/current staff by gender plus workforce percentage.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"reporting and analytics\",\n    \"data aggregation\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Enum \\\"employees\\\".\\\"employee_gender\\\" {\\n  \\\"M\\\"\\n  \\\"F\\\"\\n}\\n\\nTable \\\"employees\\\".\\\"department\\\" {\\n  \\\"id\\\" bpchar(4) [pk, not null]\\n  \\\"dept_name\\\" varchar(40) [unique, not null]\\n}\\n\\nTable \\\"employees\\\".\\\"department_employee\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"department_id\\\" bpchar(4) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, department_id) [type: btree, name: \\\"idx_16982_primary\\\"]\\n    department_id [type: btree, name: \\\"idx_16982_dept_no\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"department_manager\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"department_id\\\" bpchar(4) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, department_id) [type: btree, name: \\\"idx_16985_primary\\\"]\\n    department_id [type: btree, name: \\\"idx_16985_dept_no\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"employee\\\" {\\n  \\\"id\\\" int8 [pk, not null, increment]\\n  \\\"birth_date\\\" date [not null]\\n  \\\"first_name\\\" varchar(14) [not null]\\n  \\\"last_name\\\" varchar(16) [not null]\\n  \\\"gender\\\" employees.employee_gender [not null]\\n  \\\"hire_date\\\" date [not null]\\n}\\n\\nTable \\\"employees\\\".\\\"salary\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"amount\\\" int8 [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, from_date) [type: btree, name: \\\"idx_16991_primary\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"title\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"title\\\" varchar(50) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date\\n\\n  Indexes {\\n    (employee_id, title, from_date) [type: btree, name: \\\"idx_16994_primary\\\"]\\n  }\\n}\\n\\nRef \\\"dept_emp_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_employee\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_emp_ibfk_2\\\":\\\"employees\\\".\\\"department\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_employee\\\".\\\"department_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_manager_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_manager\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_manager_ibfk_2\\\":\\\"employees\\\".\\\"department\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_manager\\\".\\\"department_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"salaries_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"salary\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"titles_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"title\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/employees.sql.gz\"\n  }\n}\n"
  },
  {
    "path": "tasks/postgres/easy/employees/employee_gender_statistics/verify.py",
    "content": "import os\nimport sys\nimport psycopg2\nfrom decimal import Decimal\n\ndef rows_match(actual_row, expected_row):\n    \"\"\"\n    Compare two rows with appropriate tolerance.\n    For Decimal types: allows 0.1 tolerance\n    For other types: requires exact match\n    \"\"\"\n    if len(actual_row) != len(expected_row):\n        return False\n    \n    for actual, expected in zip(actual_row, expected_row):\n        if isinstance(actual, Decimal) and isinstance(expected, Decimal):\n            if abs(float(actual) - float(expected)) > 0.1:\n                return False\n        elif actual != expected:\n            return False\n    \n    return True\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\")\n    }\n\ndef verify_gender_statistics_results(conn) -> bool:\n    \"\"\"Verify the gender statistics results.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results from the created table\n        cur.execute(\"\"\"\n            SELECT gender, total_employees, current_employees, percentage_of_workforce\n            FROM employees.gender_statistics\n            ORDER BY gender\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        # Execute ground truth query\n        cur.execute(\"\"\"\n            WITH current_emp AS (\n            SELECT DISTINCT s.employee_id\n            FROM employees.salary s\n            WHERE s.to_date = DATE '9999-01-01'\n            ),\n            total_current AS (\n            SELECT COUNT(*) AS cnt\n            FROM current_emp\n            )\n            SELECT\n            e.gender::varchar AS gender,\n            COUNT(*) AS total_employees,\n            COUNT(*) FILTER (WHERE ce.employee_id IS NOT NULL) AS current_employees,\n            (COUNT(*) FILTER (WHERE ce.employee_id IS NOT NULL))::DECIMAL\n                / NULLIF((SELECT cnt FROM total_current), 0) * 100 AS percentage_of_workforce\n            FROM employees.employee e\n            LEFT JOIN current_emp ce ON ce.employee_id = e.id\n            WHERE e.gender IN ('M','F')\n            GROUP BY e.gender\n            ORDER BY gender;\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} gender statistics results, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:  # Only show first 5 mismatches\n                    print(f\"❌ Row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total mismatches: {mismatches}\")\n            return False\n\n        print(f\"✅ Gender statistics results are correct ({len(actual_results)} records)\")\n        return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 50)\n\n    # Get connection parameters\n    conn_params = get_connection_params()\n\n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n\n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n\n        # Verify all four analysis results\n        success = verify_gender_statistics_results(conn)\n\n        conn.close()\n\n        if success:\n            print(\"\\n🎉 Task verification: PASS\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Task verification: FAIL\")\n            sys.exit(1)\n\n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/easy/employees/employee_projects_basic/description.md",
    "content": "Create and manage a basic employee projects table to track company projects. The IT team needs you to build the database table structure and populate it with initial project data.\n\n## Your Tasks:\n\n1. **Create the employee_projects table** — build a new table in the `employees` schema:\n\n   **Table: `employee_projects`**\n   * `project_id` (integer, primary key, auto-increment)\n   * `project_name` (varchar(100), not null)\n   * `start_date` (date, not null)\n   * `end_date` (date)\n   * `budget` (decimal(10,2))\n   * `status` (varchar(20), default 'active')\n\n2. **Insert exactly this initial data into `employee_projects`**:\n   * Project 1: name='Database Modernization', start_date='2024-01-15', end_date='2024-06-30', budget=250000.00, status='active'\n   * Project 2: name='Employee Portal Upgrade', start_date='2024-02-01', end_date='2024-05-15', budget=180000.00, status='active'\n   * Project 3: name='HR Analytics Dashboard', start_date='2023-11-01', end_date='2024-01-31', budget=120000.00, status='active'\n\nThis will establish the basic project tracking foundation for the company.\n"
  },
  {
    "path": "tasks/postgres/easy/employees/employee_projects_basic/meta.json",
    "content": "{\n  \"task_id\": \"employee_projects_basic\",\n  \"task_name\": \"Employee Projects Basic\",\n  \"category_id\": \"employees\",\n  \"category_name\": \"Employees\",\n  \"description\": \"Create the employee_projects table with the specified schema and insert the three starter projects for modernization, portal upgrade, and analytics.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"schema design\",\n    \"data loading\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Enum \\\"employees\\\".\\\"employee_gender\\\" {\\n  \\\"M\\\"\\n  \\\"F\\\"\\n}\\n\\nTable \\\"employees\\\".\\\"department\\\" {\\n  \\\"id\\\" bpchar(4) [pk, not null]\\n  \\\"dept_name\\\" varchar(40) [unique, not null]\\n}\\n\\nTable \\\"employees\\\".\\\"department_employee\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"department_id\\\" bpchar(4) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, department_id) [type: btree, name: \\\"idx_16982_primary\\\"]\\n    department_id [type: btree, name: \\\"idx_16982_dept_no\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"department_manager\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"department_id\\\" bpchar(4) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, department_id) [type: btree, name: \\\"idx_16985_primary\\\"]\\n    department_id [type: btree, name: \\\"idx_16985_dept_no\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"employee\\\" {\\n  \\\"id\\\" int8 [pk, not null, increment]\\n  \\\"birth_date\\\" date [not null]\\n  \\\"first_name\\\" varchar(14) [not null]\\n  \\\"last_name\\\" varchar(16) [not null]\\n  \\\"gender\\\" employees.employee_gender [not null]\\n  \\\"hire_date\\\" date [not null]\\n}\\n\\nTable \\\"employees\\\".\\\"salary\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"amount\\\" int8 [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, from_date) [type: btree, name: \\\"idx_16991_primary\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"title\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"title\\\" varchar(50) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date\\n\\n  Indexes {\\n    (employee_id, title, from_date) [type: btree, name: \\\"idx_16994_primary\\\"]\\n  }\\n}\\n\\nRef \\\"dept_emp_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_employee\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_emp_ibfk_2\\\":\\\"employees\\\".\\\"department\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_employee\\\".\\\"department_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_manager_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_manager\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_manager_ibfk_2\\\":\\\"employees\\\".\\\"department\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_manager\\\".\\\"department_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"salaries_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"salary\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"titles_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"title\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/employees.sql.gz\"\n  }\n}\n"
  },
  {
    "path": "tasks/postgres/easy/employees/employee_projects_basic/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Task 5: Database Schema and Data Operations\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nfrom decimal import Decimal\n\ndef rows_match(actual_row, expected_row):\n    \"\"\"\n    Compare two rows with appropriate tolerance.\n    For Decimal types: allows 0.1 tolerance\n    For date types: convert to string for comparison\n    For other types: requires exact match\n    \"\"\"\n    if len(actual_row) != len(expected_row):\n        return False\n    \n    for actual, expected in zip(actual_row, expected_row):\n        if isinstance(actual, Decimal) and isinstance(expected, (Decimal, float, int)):\n            if abs(float(actual) - float(expected)) > 0.1:\n                return False\n        elif hasattr(actual, 'strftime'):  # datetime.date or datetime.datetime\n            if str(actual) != str(expected):\n                return False\n        elif actual != expected:\n            return False\n    \n    return True\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\")\n    }\n\n\ndef verify_project_data(conn) -> bool:\n    \"\"\"Verify that project data was inserted and updated correctly.\"\"\"\n    with conn.cursor() as cur:\n        # Check project data after updates\n        cur.execute(\"\"\"\n            SELECT project_name, start_date, end_date, budget, status\n            FROM employees.employee_projects\n            ORDER BY project_name\n        \"\"\")\n        projects = cur.fetchall()\n        \n        if len(projects) != 3:\n            print(f\"❌ Expected 3 projects, found {len(projects)}\")\n            return False\n            \n        # Expected final state after all updates\n        expected = {\n            'Database Modernization': ('2024-01-15', '2024-06-30', 250000.00, 'active'),\n            'Employee Portal Upgrade': ('2024-02-01', '2024-05-15', 180000.00, 'active'),\n            'HR Analytics Dashboard': ('2023-11-01', '2024-01-31', 120000.00, 'active')\n        }\n        \n        for project in projects:\n            name = project[0]\n            if name not in expected:\n                print(f\"❌ Unexpected project: {name}\")\n                return False\n                \n            exp = expected[name]\n            # Use rows_match for comparison\n            expected_row = (name,) + exp\n            if not rows_match(project, expected_row):\n                print(f\"❌ Project {name} data mismatch: expected {expected_row}, got {project}\")\n                return False\n                \n        print(\"✅ Project data is correct\")\n        return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 50)\n\n    # Get connection parameters\n    conn_params = get_connection_params()\n\n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n\n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n\n        # Verify all components\n        success = verify_project_data(conn)\n\n        conn.close()\n\n        if success:\n            print(\"\\n🎉 Task verification: PASS\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Task verification: FAIL\")\n            sys.exit(1)\n\n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/easy/employees/hiring_year_summary/description.md",
    "content": "Create a hiring year summary table to help HR track employee retention trends over the years. This analysis shows how many employees were hired each year and how many are still with the company.\n\n## Your Task:\n\n**Create the hiring year summary table** — build a table called `hiring_year_summary` in the `employees` schema with these exact columns:\n\n* `hire_year` (integer) — year employees were hired\n* `employees_hired` (integer) — number of employees hired that year\n* `still_employed` (integer) — how many from that year are still employed (have active salary where to_date = '9999-01-01')\n* `retention_rate` (decimal) — percentage still employed (still_employed / employees_hired * 100)\n\n## Requirements:\n\n1. Extract the hire year from the `hire_date` column in the `employees` table\n2. Count total employees hired in each year\n3. Determine which employees are still employed by checking for active salary records (to_date = '9999-01-01' in the `salaries` table)\n4. Order results by hire_year in ascending order\n\nThis analysis will help HR understand retention patterns and identify years with particularly high or low retention rates.\n"
  },
  {
    "path": "tasks/postgres/easy/employees/hiring_year_summary/meta.json",
    "content": "{\n  \"task_id\": \"hiring_year_summary\",\n  \"task_name\": \"Hiring Year Summary\",\n  \"category_id\": \"employees\",\n  \"category_name\": \"Employees\",\n  \"description\": \"Summarize hires per year into hiring_year_summary, including still-employed counts and retention percentages using active salary rows.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"reporting and analytics\",\n    \"retention analysis\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Enum \\\"employees\\\".\\\"employee_gender\\\" {\\n  \\\"M\\\"\\n  \\\"F\\\"\\n}\\n\\nTable \\\"employees\\\".\\\"department\\\" {\\n  \\\"id\\\" bpchar(4) [pk, not null]\\n  \\\"dept_name\\\" varchar(40) [unique, not null]\\n}\\n\\nTable \\\"employees\\\".\\\"department_employee\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"department_id\\\" bpchar(4) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, department_id) [type: btree, name: \\\"idx_16982_primary\\\"]\\n    department_id [type: btree, name: \\\"idx_16982_dept_no\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"department_manager\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"department_id\\\" bpchar(4) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, department_id) [type: btree, name: \\\"idx_16985_primary\\\"]\\n    department_id [type: btree, name: \\\"idx_16985_dept_no\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"employee\\\" {\\n  \\\"id\\\" int8 [pk, not null, increment]\\n  \\\"birth_date\\\" date [not null]\\n  \\\"first_name\\\" varchar(14) [not null]\\n  \\\"last_name\\\" varchar(16) [not null]\\n  \\\"gender\\\" employees.employee_gender [not null]\\n  \\\"hire_date\\\" date [not null]\\n}\\n\\nTable \\\"employees\\\".\\\"salary\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"amount\\\" int8 [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, from_date) [type: btree, name: \\\"idx_16991_primary\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"title\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"title\\\" varchar(50) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date\\n\\n  Indexes {\\n    (employee_id, title, from_date) [type: btree, name: \\\"idx_16994_primary\\\"]\\n  }\\n}\\n\\nRef \\\"dept_emp_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_employee\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_emp_ibfk_2\\\":\\\"employees\\\".\\\"department\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_employee\\\".\\\"department_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_manager_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_manager\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_manager_ibfk_2\\\":\\\"employees\\\".\\\"department\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_manager\\\".\\\"department_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"salaries_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"salary\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"titles_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"title\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/employees.sql.gz\"\n  }\n}\n"
  },
  {
    "path": "tasks/postgres/easy/employees/hiring_year_summary/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Task 3: Employee Demographics Report\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nfrom decimal import Decimal\n\ndef rows_match(actual_row, expected_row):\n    \"\"\"\n    Compare two rows with appropriate tolerance.\n    For Decimal types: allows 0.1 tolerance\n    For other types: requires exact match\n    \"\"\"\n    if len(actual_row) != len(expected_row):\n        return False\n    \n    for actual, expected in zip(actual_row, expected_row):\n        if isinstance(actual, Decimal) and isinstance(expected, Decimal):\n            if abs(float(actual) - float(expected)) > 0.1:\n                return False\n        elif actual != expected:\n            return False\n    \n    return True\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\")\n    }\n\ndef verify_hiring_year_results(conn) -> bool:\n    \"\"\"Verify the hiring year summary results.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results from the created table\n        cur.execute(\"\"\"\n            SELECT hire_year, employees_hired, still_employed, retention_rate\n            FROM employees.hiring_year_summary\n            ORDER BY hire_year\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        # Execute ground truth query\n        cur.execute(\"\"\"\n            WITH current_emp AS (\n            SELECT DISTINCT s.employee_id\n            FROM employees.salary s\n            WHERE s.to_date = DATE '9999-01-01'\n            ),\n            base AS (\n            SELECT e.id, EXTRACT(YEAR FROM e.hire_date)::INT AS hire_year\n            FROM employees.employee e\n            WHERE e.hire_date IS NOT NULL\n            )\n            SELECT\n            b.hire_year,\n            COUNT(*)::INT AS employees_hired,\n            COUNT(*) FILTER (WHERE ce.employee_id IS NOT NULL)::INT AS still_employed,\n            (COUNT(*) FILTER (WHERE ce.employee_id IS NOT NULL))::DECIMAL\n                / NULLIF(COUNT(*), 0) * 100 AS retention_rate\n            FROM base b\n            LEFT JOIN current_emp ce ON ce.employee_id = b.id\n            GROUP BY b.hire_year\n            ORDER BY b.hire_year;\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} hiring year results, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:  # Only show first 5 mismatches\n                    print(f\"❌ Row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total mismatches: {mismatches}\")\n            return False\n\n        print(f\"✅ Hiring year summary results are correct ({len(actual_results)} records)\")\n        return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 50)\n\n    # Get connection parameters\n    conn_params = get_connection_params()\n\n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n\n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n\n        # Verify all four analysis results\n        success = verify_hiring_year_results(conn)\n\n        conn.close()\n\n        if success:\n            print(\"\\n🎉 Task verification: PASS\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Task verification: FAIL\")\n            sys.exit(1)\n\n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/easy/lego/basic_security_setup/description.md",
    "content": "Set up basic database security with role-based access control and Row-Level Security (RLS) for the LEGO database.\n\n## Your Tasks:\n\n### 1. Create Database Role and Permissions\n\nCreate a new database role called `theme_analyst` with the following permissions:\n\n* `SELECT` permissions on all reference tables: `lego_themes`, `lego_colors`, `lego_parts`, `lego_part_categories`\n* `SELECT` permissions on main data tables: `lego_sets`, `lego_inventories`, `lego_inventory_parts`\n* No `INSERT`, `UPDATE`, or `DELETE` permissions on any tables\n\n### 2. Enable Row-Level Security\n\nEnable RLS on the following tables:\n\n* `lego_sets`\n* `lego_inventories`\n* `lego_inventory_parts`\n\n## Requirements:\n\n- Use `CREATE ROLE` to create the `theme_analyst` role\n- Use `GRANT SELECT` statements to assign the appropriate permissions\n- Use `ALTER TABLE ... ENABLE ROW LEVEL SECURITY` to enable RLS on each table\n\n## Expected Outcome:\n\nAfter completing these tasks:\n- The `theme_analyst` role should exist with read-only access to specified tables\n- Row-Level Security should be enabled (but not yet enforced with policies) on the three main data tables\n- The role should have no write permissions on any table\n\nThis sets up the foundation for implementing theme-based data isolation policies.\n"
  },
  {
    "path": "tasks/postgres/easy/lego/basic_security_setup/meta.json",
    "content": "{\n  \"task_id\": \"basic_security_setup\",\n  \"task_name\": \"Basic Security Setup\",\n  \"category_id\": \"lego\",\n  \"category_name\": \"Lego\",\n  \"description\": \"Create the read-only theme_analyst role with SELECT rights on LEGO reference tables and enable row-level security on sets and inventory tables.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"security\",\n    \"access control\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Table \\\"lego_colors\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" varchar(255) [not null]\\n  \\\"rgb\\\" varchar(6) [not null]\\n  \\\"is_trans\\\" bpchar(1) [not null]\\n}\\n\\nTable \\\"lego_inventories\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"version\\\" int4 [not null]\\n  \\\"set_num\\\" varchar(255) [not null]\\n}\\n\\nTable \\\"lego_inventory_parts\\\" {\\n  \\\"inventory_id\\\" int4 [not null]\\n  \\\"part_num\\\" varchar(255) [not null]\\n  \\\"color_id\\\" int4 [not null]\\n  \\\"quantity\\\" int4 [not null]\\n  \\\"is_spare\\\" bool [not null]\\n}\\n\\nTable \\\"lego_inventory_sets\\\" {\\n  \\\"inventory_id\\\" int4 [not null]\\n  \\\"set_num\\\" varchar(255) [not null]\\n  \\\"quantity\\\" int4 [not null]\\n}\\n\\nTable \\\"lego_part_categories\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" varchar(255) [not null]\\n}\\n\\nTable \\\"lego_parts\\\" {\\n  \\\"part_num\\\" varchar(255) [pk, not null]\\n  \\\"name\\\" text [not null]\\n  \\\"part_cat_id\\\" int4 [not null]\\n}\\n\\nTable \\\"lego_sets\\\" {\\n  \\\"set_num\\\" varchar(255) [pk, not null]\\n  \\\"name\\\" varchar(255) [not null]\\n  \\\"year\\\" int4\\n  \\\"theme_id\\\" int4\\n  \\\"num_parts\\\" int4\\n}\\n\\nTable \\\"lego_themes\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" varchar(255) [not null]\\n  \\\"parent_id\\\" int4\\n}\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/lego.sql\"\n  }\n}\n"
  },
  {
    "path": "tasks/postgres/easy/lego/basic_security_setup/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL LEGO Task 4: Database Security and RLS Implementation\n(Version 2 - Improved Robustness)\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nimport psycopg2.errors\nfrom typing import Dict\n\ndef get_connection_params() -> Dict[str, any]:\n    \"\"\"Get database connection parameters from environment variables.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\"),\n    }\n\ndef verify_role_creation(conn) -> bool:\n    \"\"\"\n    TASK 1 VERIFICATION: Check if theme_analyst role was created with proper permissions.\n    \"\"\"\n    print(\"\\n-- Verifying Task 1: Role Creation and Permissions --\")\n    with conn.cursor() as cur:\n        # Check if role exists\n        cur.execute(\"SELECT 1 FROM pg_roles WHERE rolname = 'theme_analyst';\")\n        if not cur.fetchone():\n            print(\"❌ FAIL: The 'theme_analyst' role was not created.\")\n            return False\n        print(\"✅ OK: Role 'theme_analyst' exists.\")\n\n        # Check SELECT permissions on reference and main tables\n        all_tables = [\n            'lego_themes', 'lego_colors', 'lego_parts', 'lego_part_categories',\n            'lego_sets', 'lego_inventories', 'lego_inventory_parts'\n        ]\n        for table in all_tables:\n            cur.execute(\n                \"\"\"\n                SELECT has_table_privilege('theme_analyst', %s, 'SELECT');\n                \"\"\",\n                (table,)\n            )\n            if not cur.fetchone()[0]:\n                print(f\"❌ FAIL: 'theme_analyst' role is missing SELECT permission on '{table}'.\")\n                return False\n        print(\"✅ OK: Role has correct SELECT permissions on all required tables.\")\n\n        # Check that no INSERT/UPDATE/DELETE permissions exist\n        for table in all_tables:\n            cur.execute(\n                \"\"\"\n                SELECT \n                    has_table_privilege('theme_analyst', %s, 'INSERT') OR\n                    has_table_privilege('theme_analyst', %s, 'UPDATE') OR\n                    has_table_privilege('theme_analyst', %s, 'DELETE');\n                \"\"\",\n                (table, table, table)\n            )\n            if cur.fetchone()[0]:\n                print(f\"❌ FAIL: 'theme_analyst' role has unauthorized INSERT, UPDATE, or DELETE permission on '{table}'.\")\n                return False\n        print(\"✅ OK: Role does not have modification permissions.\")\n        \n        print(\"✅ PASS: 'theme_analyst' role created with correct permissions.\")\n        return True\n\ndef verify_rls_enabled(conn) -> bool:\n    \"\"\"\n    TASK 2 VERIFICATION: Check if Row-Level Security is enabled on required tables.\n    \"\"\"\n    print(\"\\n-- Verifying Task 2: Row-Level Security Enablement --\")\n    tables_to_check = ['lego_sets', 'lego_inventories', 'lego_inventory_parts']\n    with conn.cursor() as cur:\n        for table in tables_to_check:\n            cur.execute(\n                \"SELECT relrowsecurity FROM pg_class WHERE relname = %s;\", (table,)\n            )\n            rls_enabled = cur.fetchone()\n            if not rls_enabled or not rls_enabled[0]:\n                print(f\"❌ FAIL: RLS is not enabled on table '{table}'.\")\n                return False\n            print(f\"✅ OK: RLS is enabled on table '{table}'.\")\n    \n    print(\"✅ PASS: Row-Level Security is enabled on all required tables.\")\n    return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 60)\n    print(\"LEGO Database Security and RLS Verification Script\")\n    print(\"=\" * 60)\n\n    conn_params = get_connection_params()\n    if not conn_params.get(\"database\"):\n        print(\"❌ CRITICAL: POSTGRES_DATABASE environment variable not set.\")\n        sys.exit(1)\n\n    conn = None\n    try:\n        conn = psycopg2.connect(**conn_params)\n        \n        results = [\n            verify_role_creation(conn),\n            verify_rls_enabled(conn),\n        ]\n\n        if all(results):\n            print(\"\\n🎉 Overall Result: PASS - All security tasks verified successfully!\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Overall Result: FAIL - One or more verification steps failed.\")\n            sys.exit(1)\n\n    except psycopg2.OperationalError as e:\n        print(f\"❌ CRITICAL: Could not connect to the database. Check credentials and host. Details: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ CRITICAL: An unexpected error occurred. Details: {e}\")\n        sys.exit(1)\n    finally:\n        if conn:\n            conn.close()\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/postgres/easy/lego/fix_data_inconsistencies/description.md",
    "content": "Fix data inconsistencies in the LEGO database where the reported part count in the `lego_sets` table does not match the actual sum of non-spare parts in the latest inventory version.\n\n## Consistency Rule\n\nFor any given `set_num`, the following must be true:\n`lego_sets.num_parts = SUM(quantity)` FROM `lego_inventory_parts` WHERE `inventory_id` IN (latest inventory for that set) AND `is_spare` = false\n\n**Important**: If a set has no inventory records, the consistency check should be skipped.\n\n## Your Tasks:\n\n### Task 1: Identify Data Inconsistencies\n\n**Objective**: Write a single `SELECT` query to find all sets where the stored `num_parts` does not match the actual calculated number of parts from the latest inventory.\n\n1. **Find the Latest Inventory**: For each `set_num`, find its latest inventory id by getting the `MAX(version)` from the `lego_inventories` table.\n2. **Calculate Actual Part Count**: For these latest inventories, join with `lego_inventory_parts` and calculate the `SUM(quantity)`, but only for parts where `is_spare` is false.\n3. **Compare and Filter**: Join this calculated result back to the `lego_sets` table and return the rows where `lego_sets.num_parts` is different from your calculated sum.\n\n### Task 2: Fix Existing Inconsistencies\n\n**Objective**: Correct all mismatched `num_parts` values using a clear, multi-step process with a temporary table.\n\n#### Step 1: Create a Temporary Table\nCreate a temporary table (e.g., `correct_counts`) with two columns: `set_num` (text) and `actual_parts` (integer).\n\n#### Step 2: Populate the Temporary Table\nWrite an `INSERT` statement that calculates the correct part count for every single set listed in the `lego_sets` table.\n\n- The query must start by selecting from `public.lego_sets`.\n- It must then `LEFT JOIN` to a subquery that contains the part-counting logic (finding the latest inventory version and summing the non-spare parts).\n- Use `COALESCE` on the final result from the subquery to ensure that any set without parts or without an inventory record gets a value of `0`, not `NULL`.\n\n#### Step 3: Update from the Temporary Table\nWrite a final, simple `UPDATE` statement that joins the `lego_sets` table with your temporary table on `set_num` and sets `num_parts` to the `actual_parts` value.\n\n## Expected Outcome:\n\nAfter completing these tasks, all sets in the `lego_sets` table should have their `num_parts` correctly reflecting the sum of non-spare parts from their latest inventory version.\n"
  },
  {
    "path": "tasks/postgres/easy/lego/fix_data_inconsistencies/meta.json",
    "content": "{\n  \"task_id\": \"fix_data_inconsistencies\",\n  \"task_name\": \"Fix Data Inconsistencies\",\n  \"category_id\": \"lego\",\n  \"category_name\": \"Lego\",\n  \"description\": \"Recalculate each LEGO set's part count from the latest inventory, stage the results, and update lego_sets.num_parts to remove mismatches.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"data integrity enforcement\",\n    \"data reconciliation\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Table \\\"lego_colors\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" varchar(255) [not null]\\n  \\\"rgb\\\" varchar(6) [not null]\\n  \\\"is_trans\\\" bpchar(1) [not null]\\n}\\n\\nTable \\\"lego_inventories\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"version\\\" int4 [not null]\\n  \\\"set_num\\\" varchar(255) [not null]\\n}\\n\\nTable \\\"lego_inventory_parts\\\" {\\n  \\\"inventory_id\\\" int4 [not null]\\n  \\\"part_num\\\" varchar(255) [not null]\\n  \\\"color_id\\\" int4 [not null]\\n  \\\"quantity\\\" int4 [not null]\\n  \\\"is_spare\\\" bool [not null]\\n}\\n\\nTable \\\"lego_inventory_sets\\\" {\\n  \\\"inventory_id\\\" int4 [not null]\\n  \\\"set_num\\\" varchar(255) [not null]\\n  \\\"quantity\\\" int4 [not null]\\n}\\n\\nTable \\\"lego_part_categories\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" varchar(255) [not null]\\n}\\n\\nTable \\\"lego_parts\\\" {\\n  \\\"part_num\\\" varchar(255) [pk, not null]\\n  \\\"name\\\" text [not null]\\n  \\\"part_cat_id\\\" int4 [not null]\\n}\\n\\nTable \\\"lego_sets\\\" {\\n  \\\"set_num\\\" varchar(255) [pk, not null]\\n  \\\"name\\\" varchar(255) [not null]\\n  \\\"year\\\" int4\\n  \\\"theme_id\\\" int4\\n  \\\"num_parts\\\" int4\\n}\\n\\nTable \\\"lego_themes\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" varchar(255) [not null]\\n  \\\"parent_id\\\" int4\\n}\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/lego.sql\"\n  }\n}\n"
  },
  {
    "path": "tasks/postgres/easy/lego/fix_data_inconsistencies/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL LEGO Task 1: Parts Consistency Fix & Constraints\nVersion 2.1: Relaxed consistency check to allow for one known corner case mismatch.\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nimport psycopg2.errors\nfrom typing import Optional, Tuple, List\n\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters from environment variables.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\"),\n    }\n\n\ndef fetch_candidate_part_row(cur) -> Optional[Tuple[int, str, str, int]]:\n    \"\"\"\n    Picks a concrete, non-spare inventory part from the latest inventory of any set.\n    This provides a reliable target for testing update and insert triggers.\n\n    Returns a tuple: (inventory_id, set_num, part_num, color_id) or None.\n    \"\"\"\n    cur.execute(\n        \"\"\"\n        WITH latest_inv AS (\n            SELECT set_num, MAX(version) AS max_version\n            FROM public.lego_inventories\n            GROUP BY set_num\n        ), inv AS (\n            SELECT li.id, li.set_num\n            FROM public.lego_inventories li\n            JOIN latest_inv lv ON lv.set_num = li.set_num AND lv.max_version = li.version\n        )\n        SELECT i.id AS inventory_id, i.set_num, lip.part_num, lip.color_id\n        FROM inv i\n        JOIN public.lego_inventory_parts lip ON lip.inventory_id = i.id\n        WHERE lip.is_spare = false AND lip.quantity > 0\n        LIMIT 1;\n        \"\"\"\n    )\n    return cur.fetchone()\n\n\ndef get_mismatch_count(cur) -> int:\n    \"\"\"Returns the number of sets where num_parts mismatches the computed actual sum.\"\"\"\n    cur.execute(\n        \"\"\"\n        WITH latest_inv AS (\n            SELECT set_num, MAX(version) AS max_version\n            FROM public.lego_inventories\n            GROUP BY set_num\n        ), inv_latest AS (\n            SELECT li.set_num, li.id\n            FROM public.lego_inventories li\n            JOIN latest_inv lv ON lv.set_num = li.set_num AND lv.max_version = li.version\n        ), parts_agg AS (\n            SELECT\n                i.set_num,\n                SUM(lip.quantity) AS actual_parts\n            FROM inv_latest i\n            JOIN public.lego_inventory_parts lip ON lip.inventory_id = i.id\n            WHERE lip.is_spare = false\n            GROUP BY i.set_num\n        )\n        SELECT COUNT(*)\n        FROM public.lego_sets s\n        LEFT JOIN parts_agg pa ON s.set_num = pa.set_num\n        WHERE s.num_parts <> COALESCE(pa.actual_parts, 0);\n        \"\"\"\n    )\n    return cur.fetchone()[0]\n\n\ndef verify_data_consistency(conn) -> bool:\n    \"\"\"\n    TASK 1 VERIFICATION: Checks if the initial data fix was successful.\n    (Relaxed: Allows for one corner-case mismatch).\n    \"\"\"\n    print(\"\\n-- Verifying Task 1: Data Consistency Fix (Relaxed) --\")\n    with conn.cursor() as cur:\n        count = get_mismatch_count(cur)\n        # RELAXED CONDITION: Allow 0 or 1 mismatch to pass.\n        if count > 1:\n            print(f\"❌ FAIL: Found {count} sets with inconsistent part counts. Expected 0 or 1 after fix.\")\n            return False\n        \n        print(\"✅ PASS: Data consistency check passed (allowing for one known mismatch).\")\n        return True\n\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 60)\n    print(\"LEGO Database Consistency Verification Script\")\n    print(\"=\" * 60)\n\n    conn_params = get_connection_params()\n    if not conn_params.get(\"database\"):\n        print(\"❌ CRITICAL: POSTGRES_DATABASE environment variable not set.\")\n        sys.exit(1)\n\n    try:\n        with psycopg2.connect(**conn_params) as conn:\n            conn.autocommit = False # Ensure we control transactions\n\n            # Run all verification steps\n            results = [\n                verify_data_consistency(conn),\n            ]\n\n            if all(results):\n                print(\"\\n🎉 Overall Result: PASS - All tasks verified successfully!\")\n                sys.exit(0)\n            else:\n                print(\"\\n❌ Overall Result: FAIL - One or more verification steps failed.\")\n                sys.exit(1)\n\n    except psycopg2.OperationalError as e:\n        print(f\"❌ CRITICAL: Could not connect to the database. Details: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ CRITICAL: An unexpected error occurred during verification. Details: {e}\")\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/postgres/easy/sports/create_performance_indexes/description.md",
    "content": "Create indexes to optimize participant and statistics queries in the sports database.\n\n## Your Task:\n\nCreate two indexes to improve query performance:\n\n1. **Index on participants_events table**: Create an index on the `participant_id` column of the `participants_events` table\n2. **Composite index on stats table**: Create a composite index on the `stats` table using columns `stat_holder_type` and `stat_holder_id` (in that order)\n\n## Requirements:\n\n- Create an index on `participants_events(participant_id)`\n- Create a composite index on `stats(stat_holder_type, stat_holder_id)`\n- Index names can be anything you choose (e.g., `idx_participants_events_participant_id`, `idx_stats_holder`)\n- Use the standard CREATE INDEX syntax\n\n## Expected Outcome:\n\nAfter creating these indexes, queries that involve participant filtering and statistics lookups will run significantly faster.\n"
  },
  {
    "path": "tasks/postgres/easy/sports/create_performance_indexes/meta.json",
    "content": "{\n  \"task_id\": \"create_performance_indexes\",\n  \"task_name\": \"Create Performance Indexes\",\n  \"category_id\": \"sports\",\n  \"category_name\": \"Sports\",\n  \"description\": \"Create indexes on participants_events.participant_id and stats(stat_holder_type, stat_holder_id) to accelerate performance reporting.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-11-15\",\n  \"difficulty\": \"L1\",\n  \"tags\": [\n    \"performance optimization\",\n    \"indexing\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Table \\\"addresses\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"location_id\\\" int4 [not null]\\n  \\\"language\\\" varchar(100)\\n  \\\"suite\\\" varchar(100)\\n  \\\"floor\\\" varchar(100)\\n  \\\"building\\\" varchar(100)\\n  \\\"street_number\\\" varchar(100)\\n  \\\"street_prefix\\\" varchar(100)\\n  \\\"street\\\" varchar(100)\\n  \\\"street_suffix\\\" varchar(100)\\n  \\\"neighborhood\\\" varchar(100)\\n  \\\"district\\\" varchar(100)\\n  \\\"locality\\\" varchar(100)\\n  \\\"county\\\" varchar(100)\\n  \\\"region\\\" varchar(100)\\n  \\\"postal_code\\\" varchar(100)\\n  \\\"country\\\" varchar(100)\\n}\\n\\nTable \\\"affiliation_phases\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"ancestor_affiliation_id\\\" int4\\n  \\\"start_season_id\\\" int4\\n  \\\"start_date_time\\\" timestamp\\n  \\\"end_season_id\\\" int4\\n  \\\"end_date_time\\\" timestamp\\n}\\n\\nTable \\\"affiliations\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"affiliation_key\\\" varchar(100) [not null]\\n  \\\"affiliation_type\\\" varchar(100)\\n  \\\"publisher_id\\\" int4 [not null]\\n}\\n\\nTable \\\"affiliations_documents\\\" {\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"document_id\\\" int4 [not null]\\n}\\n\\nTable \\\"affiliations_events\\\" {\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n}\\n\\nTable \\\"affiliations_media\\\" {\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"media_id\\\" int4 [not null]\\n}\\n\\nTable \\\"american_football_action_participants\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"american_football_action_play_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"participant_role\\\" varchar(100) [not null]\\n  \\\"score_type\\\" varchar(100)\\n  \\\"field_line\\\" int4\\n  \\\"yardage\\\" int4\\n  \\\"score_credit\\\" int4\\n  \\\"yards_gained\\\" int4\\n}\\n\\nTable \\\"american_football_action_plays\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"american_football_event_state_id\\\" int4 [not null]\\n  \\\"play_type\\\" varchar(100)\\n  \\\"score_attempt_type\\\" varchar(100)\\n  \\\"drive_result\\\" varchar(100)\\n  \\\"points\\\" int4\\n  \\\"comment\\\" varchar(255)\\n}\\n\\nTable \\\"american_football_defensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"tackles_total\\\" varchar(100)\\n  \\\"tackles_solo\\\" varchar(100)\\n  \\\"tackles_assists\\\" varchar(100)\\n  \\\"interceptions_total\\\" varchar(100)\\n  \\\"interceptions_yards\\\" varchar(100)\\n  \\\"interceptions_average\\\" varchar(100)\\n  \\\"interceptions_longest\\\" varchar(100)\\n  \\\"interceptions_touchdown\\\" varchar(100)\\n  \\\"quarterback_hurries\\\" varchar(100)\\n  \\\"sacks_total\\\" varchar(100)\\n  \\\"sacks_yards\\\" varchar(100)\\n  \\\"passes_defensed\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_down_progress_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"first_downs_total\\\" varchar(100)\\n  \\\"first_downs_pass\\\" varchar(100)\\n  \\\"first_downs_run\\\" varchar(100)\\n  \\\"first_downs_penalty\\\" varchar(100)\\n  \\\"conversions_third_down\\\" varchar(100)\\n  \\\"conversions_third_down_attempts\\\" varchar(100)\\n  \\\"conversions_third_down_percentage\\\" varchar(100)\\n  \\\"conversions_fourth_down\\\" varchar(100)\\n  \\\"conversions_fourth_down_attempts\\\" varchar(100)\\n  \\\"conversions_fourth_down_percentage\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int2\\n  \\\"sequence_number\\\" int4\\n  \\\"period_value\\\" int4\\n  \\\"period_time_elapsed\\\" varchar(100)\\n  \\\"period_time_remaining\\\" varchar(100)\\n  \\\"clock_state\\\" varchar(100)\\n  \\\"down\\\" int4\\n  \\\"team_in_possession_id\\\" int4\\n  \\\"distance_for_1st_down\\\" int4\\n  \\\"field_side\\\" varchar(100)\\n  \\\"field_line\\\" int4\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"american_football_fumbles_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"fumbles_committed\\\" varchar(100)\\n  \\\"fumbles_forced\\\" varchar(100)\\n  \\\"fumbles_recovered\\\" varchar(100)\\n  \\\"fumbles_lost\\\" varchar(100)\\n  \\\"fumbles_yards_gained\\\" varchar(100)\\n  \\\"fumbles_own_committed\\\" varchar(100)\\n  \\\"fumbles_own_recovered\\\" varchar(100)\\n  \\\"fumbles_own_lost\\\" varchar(100)\\n  \\\"fumbles_own_yards_gained\\\" varchar(100)\\n  \\\"fumbles_opposing_committed\\\" varchar(100)\\n  \\\"fumbles_opposing_recovered\\\" varchar(100)\\n  \\\"fumbles_opposing_lost\\\" varchar(100)\\n  \\\"fumbles_opposing_yards_gained\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_offensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"offensive_plays_yards\\\" varchar(100)\\n  \\\"offensive_plays_number\\\" varchar(100)\\n  \\\"offensive_plays_average_yards_per\\\" varchar(100)\\n  \\\"possession_duration\\\" varchar(100)\\n  \\\"turnovers_giveaway\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_passing_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"passes_attempts\\\" varchar(100)\\n  \\\"passes_completions\\\" varchar(100)\\n  \\\"passes_percentage\\\" varchar(100)\\n  \\\"passes_yards_gross\\\" varchar(100)\\n  \\\"passes_yards_net\\\" varchar(100)\\n  \\\"passes_yards_lost\\\" varchar(100)\\n  \\\"passes_touchdowns\\\" varchar(100)\\n  \\\"passes_touchdowns_percentage\\\" varchar(100)\\n  \\\"passes_interceptions\\\" varchar(100)\\n  \\\"passes_interceptions_percentage\\\" varchar(100)\\n  \\\"passes_longest\\\" varchar(100)\\n  \\\"passes_average_yards_per\\\" varchar(100)\\n  \\\"passer_rating\\\" varchar(100)\\n  \\\"receptions_total\\\" varchar(100)\\n  \\\"receptions_yards\\\" varchar(100)\\n  \\\"receptions_touchdowns\\\" varchar(100)\\n  \\\"receptions_first_down\\\" varchar(100)\\n  \\\"receptions_longest\\\" varchar(100)\\n  \\\"receptions_average_yards_per\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_penalties_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"penalties_total\\\" varchar(100)\\n  \\\"penalty_yards\\\" varchar(100)\\n  \\\"penalty_first_downs\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_rushing_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"rushes_attempts\\\" varchar(100)\\n  \\\"rushes_yards\\\" varchar(100)\\n  \\\"rushes_touchdowns\\\" varchar(100)\\n  \\\"rushing_average_yards_per\\\" varchar(100)\\n  \\\"rushes_first_down\\\" varchar(100)\\n  \\\"rushes_longest\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_sacks_against_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"sacks_against_yards\\\" varchar(100)\\n  \\\"sacks_against_total\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_scoring_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"touchdowns_total\\\" varchar(100)\\n  \\\"touchdowns_passing\\\" varchar(100)\\n  \\\"touchdowns_rushing\\\" varchar(100)\\n  \\\"touchdowns_special_teams\\\" varchar(100)\\n  \\\"touchdowns_defensive\\\" varchar(100)\\n  \\\"extra_points_attempts\\\" varchar(100)\\n  \\\"extra_points_made\\\" varchar(100)\\n  \\\"extra_points_missed\\\" varchar(100)\\n  \\\"extra_points_blocked\\\" varchar(100)\\n  \\\"field_goal_attempts\\\" varchar(100)\\n  \\\"field_goals_made\\\" varchar(100)\\n  \\\"field_goals_missed\\\" varchar(100)\\n  \\\"field_goals_blocked\\\" varchar(100)\\n  \\\"safeties_against\\\" varchar(100)\\n  \\\"two_point_conversions_attempts\\\" varchar(100)\\n  \\\"two_point_conversions_made\\\" varchar(100)\\n  \\\"touchbacks_total\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_special_teams_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"returns_punt_total\\\" varchar(100)\\n  \\\"returns_punt_yards\\\" varchar(100)\\n  \\\"returns_punt_average\\\" varchar(100)\\n  \\\"returns_punt_longest\\\" varchar(100)\\n  \\\"returns_punt_touchdown\\\" varchar(100)\\n  \\\"returns_kickoff_total\\\" varchar(100)\\n  \\\"returns_kickoff_yards\\\" varchar(100)\\n  \\\"returns_kickoff_average\\\" varchar(100)\\n  \\\"returns_kickoff_longest\\\" varchar(100)\\n  \\\"returns_kickoff_touchdown\\\" varchar(100)\\n  \\\"returns_total\\\" varchar(100)\\n  \\\"returns_yards\\\" varchar(100)\\n  \\\"punts_total\\\" varchar(100)\\n  \\\"punts_yards_gross\\\" varchar(100)\\n  \\\"punts_yards_net\\\" varchar(100)\\n  \\\"punts_longest\\\" varchar(100)\\n  \\\"punts_inside_20\\\" varchar(100)\\n  \\\"punts_inside_20_percentage\\\" varchar(100)\\n  \\\"punts_average\\\" varchar(100)\\n  \\\"punts_blocked\\\" varchar(100)\\n  \\\"touchbacks_total\\\" varchar(100)\\n  \\\"touchbacks_total_percentage\\\" varchar(100)\\n  \\\"touchbacks_kickoffs\\\" varchar(100)\\n  \\\"touchbacks_kickoffs_percentage\\\" varchar(100)\\n  \\\"touchbacks_punts\\\" varchar(100)\\n  \\\"touchbacks_punts_percentage\\\" varchar(100)\\n  \\\"touchbacks_interceptions\\\" varchar(100)\\n  \\\"touchbacks_interceptions_percentage\\\" varchar(100)\\n  \\\"fair_catches\\\" varchar(100)\\n}\\n\\nTable \\\"baseball_action_contact_details\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"baseball_action_pitch_id\\\" int4 [not null]\\n  \\\"location\\\" varchar(100)\\n  \\\"strength\\\" varchar(100)\\n  \\\"velocity\\\" int4\\n  \\\"comment\\\" text\\n  \\\"trajectory_coordinates\\\" varchar(100)\\n  \\\"trajectory_formula\\\" varchar(100)\\n}\\n\\nTable \\\"baseball_action_pitches\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"baseball_action_play_id\\\" int4 [not null]\\n  \\\"sequence_number\\\" int4\\n  \\\"baseball_defensive_group_id\\\" int4\\n  \\\"umpire_call\\\" varchar(100)\\n  \\\"pitch_location\\\" varchar(100)\\n  \\\"pitch_type\\\" varchar(100)\\n  \\\"pitch_velocity\\\" int4\\n  \\\"comment\\\" text\\n  \\\"trajectory_coordinates\\\" varchar(100)\\n  \\\"trajectory_formula\\\" varchar(100)\\n  \\\"ball_type\\\" varchar(40)\\n  \\\"strike_type\\\" varchar(40)\\n}\\n\\nTable \\\"baseball_action_plays\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"baseball_event_state_id\\\" int4 [not null]\\n  \\\"play_type\\\" varchar(100)\\n  \\\"notation\\\" varchar(100)\\n  \\\"notation_yaml\\\" text\\n  \\\"baseball_defensive_group_id\\\" int4\\n  \\\"comment\\\" varchar(255)\\n  \\\"runner_on_first_advance\\\" int4\\n  \\\"runner_on_second_advance\\\" int4\\n  \\\"runner_on_third_advance\\\" int4\\n  \\\"outs_recorded\\\" int4\\n  \\\"rbi\\\" int4\\n  \\\"runs_scored\\\" int4\\n  \\\"earned_runs_scored\\\" varchar(100)\\n}\\n\\nTable \\\"baseball_action_substitutions\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"baseball_event_state_id\\\" int4 [not null]\\n  \\\"sequence_number\\\" int4\\n  \\\"person_type\\\" varchar(100)\\n  \\\"person_original_id\\\" int4\\n  \\\"person_original_position_id\\\" int4\\n  \\\"person_original_lineup_slot\\\" int4\\n  \\\"person_replacing_id\\\" int4\\n  \\\"person_replacing_position_id\\\" int4\\n  \\\"person_replacing_lineup_slot\\\" int4\\n  \\\"substitution_reason\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n}\\n\\nTable \\\"baseball_defensive_group\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n}\\n\\nTable \\\"baseball_defensive_players\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"baseball_defensive_group_id\\\" int4 [not null]\\n  \\\"player_id\\\" int4 [not null]\\n  \\\"position_id\\\" int4 [not null]\\n}\\n\\nTable \\\"baseball_defensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"double_plays\\\" int4\\n  \\\"triple_plays\\\" int4\\n  \\\"putouts\\\" int4\\n  \\\"assists\\\" int4\\n  \\\"errors\\\" int4\\n  \\\"fielding_percentage\\\" numeric\\n  \\\"defensive_average\\\" numeric\\n  \\\"errors_passed_ball\\\" int4\\n  \\\"errors_catchers_interference\\\" int4\\n}\\n\\nTable \\\"baseball_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int2\\n  \\\"sequence_number\\\" int4\\n  \\\"at_bat_number\\\" int4\\n  \\\"inning_value\\\" int4\\n  \\\"inning_half\\\" varchar(100)\\n  \\\"outs\\\" int4\\n  \\\"balls\\\" int4\\n  \\\"strikes\\\" int4\\n  \\\"runner_on_first_id\\\" int4\\n  \\\"runner_on_second_id\\\" int4\\n  \\\"runner_on_third_id\\\" int4\\n  \\\"runner_on_first\\\" int2\\n  \\\"runner_on_second\\\" int2\\n  \\\"runner_on_third\\\" int2\\n  \\\"runs_this_inning_half\\\" int4\\n  \\\"pitcher_id\\\" int4\\n  \\\"batter_id\\\" int4\\n  \\\"batter_side\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"baseball_offensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"average\\\" numeric\\n  \\\"runs_scored\\\" int4\\n  \\\"at_bats\\\" int4\\n  \\\"hits\\\" int4\\n  \\\"rbi\\\" int4\\n  \\\"total_bases\\\" int4\\n  \\\"slugging_percentage\\\" numeric\\n  \\\"bases_on_balls\\\" int4\\n  \\\"strikeouts\\\" int4\\n  \\\"left_on_base\\\" int4\\n  \\\"left_in_scoring_position\\\" int4\\n  \\\"singles\\\" int4\\n  \\\"doubles\\\" int4\\n  \\\"triples\\\" int4\\n  \\\"home_runs\\\" int4\\n  \\\"grand_slams\\\" int4\\n  \\\"at_bats_per_rbi\\\" numeric\\n  \\\"plate_appearances_per_rbi\\\" numeric\\n  \\\"at_bats_per_home_run\\\" numeric\\n  \\\"plate_appearances_per_home_run\\\" numeric\\n  \\\"sac_flies\\\" int4\\n  \\\"sac_bunts\\\" int4\\n  \\\"grounded_into_double_play\\\" int4\\n  \\\"moved_up\\\" int4\\n  \\\"on_base_percentage\\\" numeric\\n  \\\"stolen_bases\\\" int4\\n  \\\"stolen_bases_caught\\\" int4\\n  \\\"stolen_bases_average\\\" numeric\\n  \\\"hit_by_pitch\\\" int4\\n  \\\"defensive_interferance_reaches\\\" int4\\n  \\\"on_base_plus_slugging\\\" numeric\\n  \\\"plate_appearances\\\" int4\\n  \\\"hits_extra_base\\\" int4\\n}\\n\\nTable \\\"baseball_pitching_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"runs_allowed\\\" int4\\n  \\\"singles_allowed\\\" int4\\n  \\\"doubles_allowed\\\" int4\\n  \\\"triples_allowed\\\" int4\\n  \\\"home_runs_allowed\\\" int4\\n  \\\"innings_pitched\\\" varchar(20)\\n  \\\"hits\\\" int4\\n  \\\"earned_runs\\\" int4\\n  \\\"unearned_runs\\\" int4\\n  \\\"bases_on_balls\\\" int4\\n  \\\"bases_on_balls_intentional\\\" int4\\n  \\\"strikeouts\\\" int4\\n  \\\"strikeout_to_bb_ratio\\\" numeric\\n  \\\"number_of_pitches\\\" int4\\n  \\\"era\\\" numeric\\n  \\\"inherited_runners_scored\\\" int4\\n  \\\"pick_offs\\\" int4\\n  \\\"errors_hit_with_pitch\\\" int4\\n  \\\"errors_wild_pitch\\\" int4\\n  \\\"balks\\\" int4\\n  \\\"wins\\\" int4\\n  \\\"losses\\\" int4\\n  \\\"saves\\\" int4\\n  \\\"shutouts\\\" int4\\n  \\\"games_complete\\\" int4\\n  \\\"games_finished\\\" int4\\n  \\\"winning_percentage\\\" numeric\\n  \\\"event_credit\\\" varchar(40)\\n  \\\"save_credit\\\" varchar(40)\\n}\\n\\nTable \\\"basketball_defensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"steals_total\\\" varchar(100)\\n  \\\"steals_per_game\\\" varchar(100)\\n  \\\"blocks_total\\\" varchar(100)\\n  \\\"blocks_per_game\\\" varchar(100)\\n}\\n\\nTable \\\"basketball_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int4\\n  \\\"sequence_number\\\" int4\\n  \\\"period_value\\\" varchar(100)\\n  \\\"period_time_elapsed\\\" varchar(100)\\n  \\\"period_time_remaining\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"basketball_offensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"field_goals_made\\\" int4\\n  \\\"field_goals_attempted\\\" int4\\n  \\\"field_goals_percentage\\\" varchar(100)\\n  \\\"field_goals_per_game\\\" varchar(100)\\n  \\\"field_goals_attempted_per_game\\\" varchar(100)\\n  \\\"field_goals_percentage_adjusted\\\" varchar(100)\\n  \\\"three_pointers_made\\\" int4\\n  \\\"three_pointers_attempted\\\" int4\\n  \\\"three_pointers_percentage\\\" varchar(100)\\n  \\\"three_pointers_per_game\\\" varchar(100)\\n  \\\"three_pointers_attempted_per_game\\\" varchar(100)\\n  \\\"free_throws_made\\\" varchar(100)\\n  \\\"free_throws_attempted\\\" varchar(100)\\n  \\\"free_throws_percentage\\\" varchar(100)\\n  \\\"free_throws_per_game\\\" varchar(100)\\n  \\\"free_throws_attempted_per_game\\\" varchar(100)\\n  \\\"points_scored_total\\\" varchar(100)\\n  \\\"points_scored_per_game\\\" varchar(100)\\n  \\\"assists_total\\\" varchar(100)\\n  \\\"assists_per_game\\\" varchar(100)\\n  \\\"turnovers_total\\\" varchar(100)\\n  \\\"turnovers_per_game\\\" varchar(100)\\n  \\\"points_scored_off_turnovers\\\" varchar(100)\\n  \\\"points_scored_in_paint\\\" varchar(100)\\n  \\\"points_scored_on_second_chance\\\" varchar(100)\\n  \\\"points_scored_on_fast_break\\\" varchar(100)\\n}\\n\\nTable \\\"basketball_rebounding_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"rebounds_total\\\" varchar(100)\\n  \\\"rebounds_per_game\\\" varchar(100)\\n  \\\"rebounds_defensive\\\" varchar(100)\\n  \\\"rebounds_offensive\\\" varchar(100)\\n  \\\"team_rebounds_total\\\" varchar(100)\\n  \\\"team_rebounds_per_game\\\" varchar(100)\\n  \\\"team_rebounds_defensive\\\" varchar(100)\\n  \\\"team_rebounds_offensive\\\" varchar(100)\\n}\\n\\nTable \\\"basketball_team_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"timeouts_left\\\" varchar(100)\\n  \\\"largest_lead\\\" varchar(100)\\n  \\\"fouls_total\\\" varchar(100)\\n  \\\"turnover_margin\\\" varchar(100)\\n}\\n\\nTable \\\"bookmakers\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_key\\\" varchar(100)\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"location_id\\\" int4\\n}\\n\\nTable \\\"core_person_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"time_played_event\\\" varchar(40)\\n  \\\"time_played_total\\\" varchar(40)\\n  \\\"time_played_event_average\\\" varchar(40)\\n  \\\"events_played\\\" int4\\n  \\\"events_started\\\" int4\\n  \\\"position_id\\\" int4\\n}\\n\\nTable \\\"core_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"score\\\" varchar(100)\\n  \\\"score_opposing\\\" varchar(100)\\n  \\\"score_attempts\\\" varchar(100)\\n  \\\"score_attempts_opposing\\\" varchar(100)\\n  \\\"score_percentage\\\" varchar(100)\\n  \\\"score_percentage_opposing\\\" varchar(100)\\n}\\n\\nTable \\\"db_info\\\" {\\n  \\\"version\\\" varchar(100) [not null, default: 16]\\n}\\n\\nTable \\\"display_names\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"language\\\" varchar(100) [not null]\\n  \\\"entity_type\\\" varchar(100) [not null]\\n  \\\"entity_id\\\" int4 [not null]\\n  \\\"full_name\\\" varchar(100)\\n  \\\"first_name\\\" varchar(100)\\n  \\\"middle_name\\\" varchar(100)\\n  \\\"last_name\\\" varchar(100)\\n  \\\"alias\\\" varchar(100)\\n  \\\"abbreviation\\\" varchar(100)\\n  \\\"short_name\\\" varchar(100)\\n  \\\"prefix\\\" varchar(20)\\n  \\\"suffix\\\" varchar(20)\\n}\\n\\nTable \\\"document_classes\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"name\\\" varchar(100)\\n}\\n\\nTable \\\"document_contents\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"document_id\\\" int4 [not null]\\n  \\\"sportsml\\\" varchar(200)\\n  \\\"abstract\\\" text\\n}\\n\\nTable \\\"document_fixtures\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"fixture_key\\\" varchar(100)\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"name\\\" varchar(100)\\n  \\\"document_class_id\\\" int4 [not null]\\n}\\n\\nTable \\\"document_fixtures_events\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"document_fixture_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"latest_document_id\\\" int4 [not null]\\n  \\\"last_update\\\" timestamp\\n}\\n\\nTable \\\"document_package_entry\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"document_package_id\\\" int4 [not null]\\n  \\\"rank\\\" varchar(100)\\n  \\\"document_id\\\" int4 [not null]\\n  \\\"headline\\\" varchar(100)\\n  \\\"short_headline\\\" varchar(100)\\n}\\n\\nTable \\\"document_packages\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"package_key\\\" varchar(100)\\n  \\\"package_name\\\" varchar(100)\\n  \\\"date_time\\\" date\\n}\\n\\nTable \\\"documents\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"doc_id\\\" varchar(75) [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"title\\\" varchar(255)\\n  \\\"language\\\" varchar(100)\\n  \\\"priority\\\" varchar(100)\\n  \\\"revision_id\\\" varchar(75)\\n  \\\"stats_coverage\\\" varchar(100)\\n  \\\"document_fixture_id\\\" int4 [not null]\\n  \\\"source_id\\\" int4\\n  \\\"db_loading_date_time\\\" timestamp\\n}\\n\\nTable \\\"documents_media\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"document_id\\\" int4 [not null]\\n  \\\"media_id\\\" int4 [not null]\\n  \\\"media_caption_id\\\" int4 [not null]\\n}\\n\\nTable \\\"events\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_key\\\" varchar(100) [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"start_date_time\\\" timestamp\\n  \\\"site_id\\\" int4\\n  \\\"site_alignment\\\" varchar(100)\\n  \\\"event_status\\\" varchar(100)\\n  \\\"duration\\\" varchar(100)\\n  \\\"attendance\\\" varchar(100)\\n  \\\"last_update\\\" timestamp\\n}\\n\\nTable \\\"events_documents\\\" {\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"document_id\\\" int4 [not null]\\n}\\n\\nTable \\\"events_media\\\" {\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"media_id\\\" int4 [not null]\\n}\\n\\nTable \\\"events_sub_seasons\\\" {\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"sub_season_id\\\" int4 [not null]\\n}\\n\\nTable \\\"ice_hockey_action_participants\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"ice_hockey_action_play_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"participant_role\\\" varchar(100) [not null]\\n  \\\"point_credit\\\" int4\\n}\\n\\nTable \\\"ice_hockey_action_plays\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"ice_hockey_event_state_id\\\" int4 [not null]\\n  \\\"play_type\\\" varchar(100)\\n  \\\"score_attempt_type\\\" varchar(100)\\n  \\\"play_result\\\" varchar(100)\\n  \\\"comment\\\" varchar(255)\\n}\\n\\nTable \\\"ice_hockey_defensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"shots_power_play_allowed\\\" varchar(100)\\n  \\\"shots_penalty_shot_allowed\\\" varchar(100)\\n  \\\"goals_power_play_allowed\\\" varchar(100)\\n  \\\"goals_penalty_shot_allowed\\\" varchar(100)\\n  \\\"goals_against_average\\\" varchar(100)\\n  \\\"saves\\\" varchar(100)\\n  \\\"save_percentage\\\" varchar(100)\\n  \\\"penalty_killing_amount\\\" varchar(100)\\n  \\\"penalty_killing_percentage\\\" varchar(100)\\n  \\\"shots_blocked\\\" varchar(100)\\n  \\\"takeaways\\\" varchar(100)\\n  \\\"shutouts\\\" varchar(100)\\n  \\\"minutes_penalty_killing\\\" varchar(100)\\n  \\\"hits\\\" varchar(100)\\n  \\\"goals_empty_net_allowed\\\" varchar(100)\\n  \\\"goals_short_handed_allowed\\\" varchar(100)\\n  \\\"goals_shootout_allowed\\\" varchar(100)\\n  \\\"shots_shootout_allowed\\\" varchar(100)\\n}\\n\\nTable \\\"ice_hockey_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int4\\n  \\\"sequence_number\\\" int4\\n  \\\"period_value\\\" varchar(100)\\n  \\\"period_time_elapsed\\\" varchar(100)\\n  \\\"period_time_remaining\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"ice_hockey_offensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"goals_game_winning\\\" varchar(100)\\n  \\\"goals_game_tying\\\" varchar(100)\\n  \\\"goals_power_play\\\" varchar(100)\\n  \\\"goals_short_handed\\\" varchar(100)\\n  \\\"goals_even_strength\\\" varchar(100)\\n  \\\"goals_empty_net\\\" varchar(100)\\n  \\\"goals_overtime\\\" varchar(100)\\n  \\\"goals_shootout\\\" varchar(100)\\n  \\\"goals_penalty_shot\\\" varchar(100)\\n  \\\"assists\\\" varchar(100)\\n  \\\"points\\\" varchar(100)\\n  \\\"power_play_amount\\\" varchar(100)\\n  \\\"power_play_percentage\\\" varchar(100)\\n  \\\"shots_penalty_shot_taken\\\" varchar(100)\\n  \\\"shots_penalty_shot_missed\\\" varchar(100)\\n  \\\"shots_penalty_shot_percentage\\\" varchar(100)\\n  \\\"giveaways\\\" varchar(100)\\n  \\\"minutes_power_play\\\" varchar(100)\\n  \\\"faceoff_wins\\\" varchar(100)\\n  \\\"faceoff_losses\\\" varchar(100)\\n  \\\"faceoff_win_percentage\\\" varchar(100)\\n  \\\"scoring_chances\\\" varchar(100)\\n}\\n\\nTable \\\"ice_hockey_player_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"plus_minus\\\" varchar(100)\\n}\\n\\nTable \\\"injury_phases\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"injury_status\\\" varchar(100)\\n  \\\"injury_type\\\" varchar(100)\\n  \\\"injury_comment\\\" varchar(100)\\n  \\\"disabled_list\\\" varchar(100)\\n  \\\"start_date_time\\\" timestamp\\n  \\\"end_date_time\\\" timestamp\\n  \\\"season_id\\\" int4\\n  \\\"phase_type\\\" varchar(100)\\n  \\\"injury_side\\\" varchar(100)\\n}\\n\\nTable \\\"key_aliases\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"key_id\\\" int4 [not null]\\n  \\\"key_root_id\\\" int4 [not null]\\n}\\n\\nTable \\\"key_roots\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"key_type\\\" varchar(100)\\n}\\n\\nTable \\\"latest_revisions\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"revision_id\\\" varchar(75) [not null]\\n  \\\"latest_document_id\\\" int4 [not null]\\n}\\n\\nTable \\\"locations\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"timezone\\\" varchar(100)\\n  \\\"latitude\\\" varchar(100)\\n  \\\"longitude\\\" varchar(100)\\n  \\\"country_code\\\" varchar(100)\\n}\\n\\nTable \\\"media\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"object_id\\\" int4\\n  \\\"source_id\\\" int4\\n  \\\"revision_id\\\" int4\\n  \\\"media_type\\\" varchar(100)\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"date_time\\\" varchar(100)\\n  \\\"credit_id\\\" int4 [not null]\\n  \\\"db_loading_date_time\\\" timestamp\\n  \\\"creation_location_id\\\" int4 [not null]\\n}\\n\\nTable \\\"media_captions\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"media_id\\\" int4 [not null]\\n  \\\"caption_type\\\" varchar(100)\\n  \\\"caption\\\" varchar(100)\\n  \\\"caption_author_id\\\" int4 [not null]\\n  \\\"language\\\" varchar(100)\\n  \\\"caption_size\\\" varchar(100)\\n}\\n\\nTable \\\"media_contents\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"media_id\\\" int4 [not null]\\n  \\\"object\\\" varchar(100)\\n  \\\"format\\\" varchar(100)\\n  \\\"mime_type\\\" varchar(100)\\n  \\\"height\\\" varchar(100)\\n  \\\"width\\\" varchar(100)\\n  \\\"duration\\\" varchar(100)\\n  \\\"file_size\\\" varchar(100)\\n  \\\"resolution\\\" varchar(100)\\n}\\n\\nTable \\\"media_keywords\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"keyword\\\" varchar(100)\\n  \\\"media_id\\\" int4 [not null]\\n}\\n\\nTable \\\"motor_racing_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int4\\n  \\\"sequence_number\\\" int4\\n  \\\"lap\\\" varchar(100)\\n  \\\"laps_remaining\\\" varchar(100)\\n  \\\"time_elapsed\\\" varchar(100)\\n  \\\"flag_state\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"motor_racing_qualifying_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"grid\\\" varchar(100)\\n  \\\"pole_position\\\" varchar(100)\\n  \\\"pole_wins\\\" varchar(100)\\n  \\\"qualifying_speed\\\" varchar(100)\\n  \\\"qualifying_speed_units\\\" varchar(100)\\n  \\\"qualifying_time\\\" varchar(100)\\n  \\\"qualifying_position\\\" varchar(100)\\n}\\n\\nTable \\\"motor_racing_race_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"time_behind_leader\\\" varchar(100)\\n  \\\"laps_behind_leader\\\" varchar(100)\\n  \\\"time_ahead_follower\\\" varchar(100)\\n  \\\"laps_ahead_follower\\\" varchar(100)\\n  \\\"time\\\" varchar(100)\\n  \\\"points\\\" varchar(100)\\n  \\\"points_rookie\\\" varchar(100)\\n  \\\"bonus\\\" varchar(100)\\n  \\\"laps_completed\\\" varchar(100)\\n  \\\"laps_leading_total\\\" varchar(100)\\n  \\\"distance_leading\\\" varchar(100)\\n  \\\"distance_completed\\\" varchar(100)\\n  \\\"distance_units\\\" varchar(40)\\n  \\\"speed_average\\\" varchar(40)\\n  \\\"speed_units\\\" varchar(40)\\n  \\\"status\\\" varchar(40)\\n  \\\"finishes_top_5\\\" varchar(40)\\n  \\\"finishes_top_10\\\" varchar(40)\\n  \\\"starts\\\" varchar(40)\\n  \\\"finishes\\\" varchar(40)\\n  \\\"non_finishes\\\" varchar(40)\\n  \\\"wins\\\" varchar(40)\\n  \\\"races_leading\\\" varchar(40)\\n  \\\"money\\\" varchar(40)\\n  \\\"money_units\\\" varchar(40)\\n  \\\"leads_total\\\" varchar(40)\\n}\\n\\nTable \\\"outcome_totals\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"standing_subgroup_id\\\" int4 [not null]\\n  \\\"outcome_holder_type\\\" varchar(100)\\n  \\\"outcome_holder_id\\\" int4\\n  \\\"rank\\\" varchar(100)\\n  \\\"wins\\\" varchar(100)\\n  \\\"losses\\\" varchar(100)\\n  \\\"ties\\\" varchar(100)\\n  \\\"undecideds\\\" varchar(100)\\n  \\\"winning_percentage\\\" varchar(100)\\n  \\\"points_scored_for\\\" varchar(100)\\n  \\\"points_scored_against\\\" varchar(100)\\n  \\\"points_difference\\\" varchar(100)\\n  \\\"standing_points\\\" varchar(100)\\n  \\\"streak_type\\\" varchar(100)\\n  \\\"streak_duration\\\" varchar(100)\\n  \\\"streak_total\\\" varchar(100)\\n  \\\"streak_start\\\" date\\n  \\\"streak_end\\\" date\\n}\\n\\nTable \\\"participants_events\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"participant_type\\\" varchar(100) [not null]\\n  \\\"participant_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"alignment\\\" varchar(100)\\n  \\\"score\\\" varchar(100)\\n  \\\"event_outcome\\\" varchar(100)\\n  \\\"rank\\\" int4\\n}\\n\\nTable \\\"periods\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"participant_event_id\\\" int4 [not null]\\n  \\\"period_value\\\" varchar(100)\\n  \\\"score\\\" varchar(100)\\n}\\n\\nTable \\\"person_event_metadata\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"status\\\" varchar(100)\\n  \\\"health\\\" varchar(100)\\n  \\\"weight\\\" varchar(100)\\n  \\\"role_id\\\" int4\\n  \\\"position_id\\\" int4\\n  \\\"team_id\\\" int4\\n  \\\"lineup_slot\\\" int4\\n  \\\"lineup_slot_sequence\\\" int4\\n}\\n\\nTable \\\"person_phases\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"membership_type\\\" varchar(40) [not null]\\n  \\\"membership_id\\\" int4 [not null]\\n  \\\"role_id\\\" int4\\n  \\\"role_status\\\" varchar(40)\\n  \\\"phase_status\\\" varchar(40)\\n  \\\"uniform_number\\\" varchar(20)\\n  \\\"regular_position_id\\\" int4\\n  \\\"regular_position_depth\\\" varchar(40)\\n  \\\"height\\\" varchar(100)\\n  \\\"weight\\\" varchar(100)\\n  \\\"start_date_time\\\" timestamp\\n  \\\"start_season_id\\\" int4\\n  \\\"end_date_time\\\" timestamp\\n  \\\"end_season_id\\\" int4\\n  \\\"entry_reason\\\" varchar(40)\\n  \\\"exit_reason\\\" varchar(40)\\n  \\\"selection_level\\\" int4\\n  \\\"selection_sublevel\\\" int4\\n  \\\"selection_overall\\\" int4\\n}\\n\\nTable \\\"persons\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"person_key\\\" varchar(100) [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"gender\\\" varchar(20)\\n  \\\"birth_date\\\" varchar(30)\\n  \\\"death_date\\\" varchar(30)\\n  \\\"birth_location_id\\\" int4\\n  \\\"hometown_location_id\\\" int4\\n  \\\"residence_location_id\\\" int4\\n  \\\"death_location_id\\\" int4\\n}\\n\\nTable \\\"persons_documents\\\" {\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"document_id\\\" int4 [not null]\\n}\\n\\nTable \\\"persons_media\\\" {\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"media_id\\\" int4 [not null]\\n}\\n\\nTable \\\"positions\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"abbreviation\\\" varchar(100) [not null]\\n}\\n\\nTable \\\"publishers\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"publisher_key\\\" varchar(100) [not null]\\n  \\\"publisher_name\\\" varchar(100)\\n}\\n\\nTable \\\"roles\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"role_key\\\" varchar(100) [not null]\\n  \\\"role_name\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n}\\n\\nTable \\\"seasons\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"season_key\\\" int4 [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"league_id\\\" int4 [not null]\\n  \\\"start_date_time\\\" timestamp\\n  \\\"end_date_time\\\" timestamp\\n}\\n\\nTable \\\"sites\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"site_key\\\" int4 [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"location_id\\\" int4\\n}\\n\\nTable \\\"soccer_defensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"shots_penalty_shot_allowed\\\" varchar(100)\\n  \\\"goals_penalty_shot_allowed\\\" varchar(100)\\n  \\\"goals_against_average\\\" varchar(100)\\n  \\\"goals_against_total\\\" varchar(100)\\n  \\\"saves\\\" varchar(100)\\n  \\\"save_percentage\\\" varchar(100)\\n  \\\"catches_punches\\\" varchar(100)\\n  \\\"shots_on_goal_total\\\" varchar(100)\\n  \\\"shots_shootout_total\\\" varchar(100)\\n  \\\"shots_shootout_allowed\\\" varchar(100)\\n  \\\"shots_blocked\\\" varchar(100)\\n  \\\"shutouts\\\" varchar(100)\\n}\\n\\nTable \\\"soccer_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int4\\n  \\\"sequence_number\\\" int4\\n  \\\"period_value\\\" varchar(100)\\n  \\\"period_time_elapsed\\\" varchar(100)\\n  \\\"period_time_remaining\\\" varchar(100)\\n  \\\"minutes_elapsed\\\" varchar(100)\\n  \\\"period_minute_elapsed\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"soccer_foul_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"fouls_suffered\\\" varchar(100)\\n  \\\"fouls_commited\\\" varchar(100)\\n  \\\"cautions_total\\\" varchar(100)\\n  \\\"cautions_pending\\\" varchar(100)\\n  \\\"caution_points_total\\\" varchar(100)\\n  \\\"caution_points_pending\\\" varchar(100)\\n  \\\"ejections_total\\\" varchar(100)\\n}\\n\\nTable \\\"soccer_offensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"goals_game_winning\\\" varchar(100)\\n  \\\"goals_game_tying\\\" varchar(100)\\n  \\\"goals_overtime\\\" varchar(100)\\n  \\\"goals_shootout\\\" varchar(100)\\n  \\\"goals_total\\\" varchar(100)\\n  \\\"assists_game_winning\\\" varchar(100)\\n  \\\"assists_game_tying\\\" varchar(100)\\n  \\\"assists_overtime\\\" varchar(100)\\n  \\\"assists_total\\\" varchar(100)\\n  \\\"points\\\" varchar(100)\\n  \\\"shots_total\\\" varchar(100)\\n  \\\"shots_on_goal_total\\\" varchar(100)\\n  \\\"shots_hit_frame\\\" varchar(100)\\n  \\\"shots_penalty_shot_taken\\\" varchar(100)\\n  \\\"shots_penalty_shot_scored\\\" varchar(100)\\n  \\\"shots_penalty_shot_missed\\\" varchar(40)\\n  \\\"shots_penalty_shot_percentage\\\" varchar(40)\\n  \\\"shots_shootout_taken\\\" varchar(40)\\n  \\\"shots_shootout_scored\\\" varchar(40)\\n  \\\"shots_shootout_missed\\\" varchar(40)\\n  \\\"shots_shootout_percentage\\\" varchar(40)\\n  \\\"giveaways\\\" varchar(40)\\n  \\\"offsides\\\" varchar(40)\\n  \\\"corner_kicks\\\" varchar(40)\\n  \\\"hat_tricks\\\" varchar(40)\\n}\\n\\nTable \\\"standing_subgroups\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"standing_id\\\" int4 [not null]\\n  \\\"affiliation_id\\\" int4 [not null]\\n}\\n\\nTable \\\"standings\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"standing_type\\\" varchar(100)\\n  \\\"sub_season_id\\\" int4 [not null]\\n  \\\"last_updated\\\" varchar(100)\\n  \\\"duration_scope\\\" varchar(100)\\n  \\\"competition_scope\\\" varchar(100)\\n  \\\"competition_scope_id\\\" varchar(100)\\n  \\\"alignment_scope\\\" varchar(100)\\n  \\\"site_scope\\\" varchar(100)\\n  \\\"scoping_label\\\" varchar(100)\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"source\\\" varchar(100)\\n}\\n\\nTable \\\"stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"stat_repository_type\\\" varchar(100)\\n  \\\"stat_repository_id\\\" int4 [not null]\\n  \\\"stat_holder_type\\\" varchar(100)\\n  \\\"stat_holder_id\\\" int4\\n  \\\"stat_coverage_type\\\" varchar(100)\\n  \\\"stat_coverage_id\\\" int4\\n  \\\"context\\\" varchar(40) [not null]\\n}\\n\\nTable \\\"sub_periods\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"period_id\\\" int4 [not null]\\n  \\\"sub_period_value\\\" varchar(100)\\n  \\\"score\\\" varchar(100)\\n}\\n\\nTable \\\"sub_seasons\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"sub_season_key\\\" varchar(100) [not null]\\n  \\\"season_id\\\" int4 [not null]\\n  \\\"sub_season_type\\\" varchar(100) [not null]\\n  \\\"start_date_time\\\" timestamp\\n  \\\"end_date_time\\\" timestamp\\n}\\n\\nTable \\\"team_american_football_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"yards_per_attempt\\\" varchar(100)\\n  \\\"average_starting_position\\\" varchar(100)\\n  \\\"timeouts\\\" varchar(100)\\n  \\\"time_of_possession\\\" varchar(100)\\n  \\\"turnover_ratio\\\" varchar(100)\\n}\\n\\nTable \\\"team_phases\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"start_season_id\\\" int4\\n  \\\"end_season_id\\\" int4\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"start_date_time\\\" varchar(100)\\n  \\\"end_date_time\\\" varchar(100)\\n  \\\"phase_status\\\" varchar(40)\\n  \\\"role_id\\\" int4\\n}\\n\\nTable \\\"teams\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"team_key\\\" varchar(100) [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"home_site_id\\\" int4\\n}\\n\\nTable \\\"teams_documents\\\" {\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"document_id\\\" int4 [not null]\\n}\\n\\nTable \\\"teams_media\\\" {\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"media_id\\\" int4 [not null]\\n}\\n\\nTable \\\"tennis_action_points\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"sub_period_id\\\" varchar(100)\\n  \\\"sequence_number\\\" varchar(100)\\n  \\\"win_type\\\" varchar(100)\\n}\\n\\nTable \\\"tennis_action_volleys\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"sequence_number\\\" varchar(100)\\n  \\\"tennis_action_points_id\\\" int4\\n  \\\"landing_location\\\" varchar(100)\\n  \\\"swing_type\\\" varchar(100)\\n  \\\"result\\\" varchar(100)\\n  \\\"spin_type\\\" varchar(100)\\n  \\\"trajectory_details\\\" varchar(100)\\n}\\n\\nTable \\\"tennis_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int4\\n  \\\"sequence_number\\\" int4\\n  \\\"tennis_set\\\" varchar(100)\\n  \\\"game\\\" varchar(100)\\n  \\\"server_person_id\\\" int4\\n  \\\"server_score\\\" varchar(100)\\n  \\\"receiver_person_id\\\" int4\\n  \\\"receiver_score\\\" varchar(100)\\n  \\\"service_number\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"tennis_return_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"returns_played\\\" varchar(100)\\n  \\\"matches_played\\\" varchar(100)\\n  \\\"first_service_return_points_won\\\" varchar(100)\\n  \\\"first_service_return_points_won_pct\\\" varchar(100)\\n  \\\"second_service_return_points_won\\\" varchar(100)\\n  \\\"second_service_return_points_won_pct\\\" varchar(100)\\n  \\\"return_games_played\\\" varchar(100)\\n  \\\"return_games_won\\\" varchar(100)\\n  \\\"return_games_won_pct\\\" varchar(100)\\n  \\\"break_points_played\\\" varchar(100)\\n  \\\"break_points_converted\\\" varchar(100)\\n  \\\"break_points_converted_pct\\\" varchar(100)\\n}\\n\\nTable \\\"tennis_service_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"services_played\\\" varchar(100)\\n  \\\"matches_played\\\" varchar(100)\\n  \\\"aces\\\" varchar(100)\\n  \\\"first_services_good\\\" varchar(100)\\n  \\\"first_services_good_pct\\\" varchar(100)\\n  \\\"first_service_points_won\\\" varchar(100)\\n  \\\"first_service_points_won_pct\\\" varchar(100)\\n  \\\"second_service_points_won\\\" varchar(100)\\n  \\\"second_service_points_won_pct\\\" varchar(100)\\n  \\\"service_games_played\\\" varchar(100)\\n  \\\"service_games_won\\\" varchar(100)\\n  \\\"service_games_won_pct\\\" varchar(100)\\n  \\\"break_points_played\\\" varchar(100)\\n  \\\"break_points_saved\\\" varchar(100)\\n  \\\"break_points_saved_pct\\\" varchar(100)\\n}\\n\\nTable \\\"wagering_moneylines\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4\\n  \\\"rotation_key\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n  \\\"vigorish\\\" varchar(100)\\n  \\\"line\\\" varchar(100)\\n  \\\"line_opening\\\" varchar(100)\\n  \\\"prediction\\\" varchar(100)\\n}\\n\\nTable \\\"wagering_odds_lines\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4\\n  \\\"rotation_key\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n  \\\"numerator\\\" varchar(100)\\n  \\\"denominator\\\" varchar(100)\\n  \\\"prediction\\\" varchar(100)\\n  \\\"payout_calculation\\\" varchar(100)\\n  \\\"payout_amount\\\" varchar(100)\\n}\\n\\nTable \\\"wagering_runlines\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4\\n  \\\"rotation_key\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n  \\\"vigorish\\\" varchar(100)\\n  \\\"line\\\" varchar(100)\\n  \\\"line_opening\\\" varchar(100)\\n  \\\"line_value\\\" varchar(100)\\n  \\\"prediction\\\" varchar(100)\\n}\\n\\nTable \\\"wagering_straight_spread_lines\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4\\n  \\\"rotation_key\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n  \\\"vigorish\\\" varchar(100)\\n  \\\"line_value\\\" varchar(100)\\n  \\\"line_value_opening\\\" varchar(100)\\n  \\\"prediction\\\" varchar(100)\\n}\\n\\nTable \\\"wagering_total_score_lines\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4\\n  \\\"rotation_key\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n  \\\"vigorish\\\" varchar(100)\\n  \\\"line_over\\\" varchar(100)\\n  \\\"line_under\\\" varchar(100)\\n  \\\"total\\\" varchar(100)\\n  \\\"total_opening\\\" varchar(100)\\n  \\\"prediction\\\" varchar(100)\\n}\\n\\nTable \\\"weather_conditions\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"temperature\\\" varchar(100)\\n  \\\"temperature_units\\\" varchar(40)\\n  \\\"humidity\\\" varchar(100)\\n  \\\"clouds\\\" varchar(100)\\n  \\\"wind_direction\\\" varchar(100)\\n  \\\"wind_velocity\\\" varchar(100)\\n  \\\"weather_code\\\" varchar(100)\\n}\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/yugabyte/yugabyte-db/blob/master/sample/sportsdb_tables.sql\"\n  }\n}\n"
  },
  {
    "path": "tasks/postgres/easy/sports/create_performance_indexes/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Sports Task 3: Query Performance Optimization\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nfrom decimal import Decimal\n\ndef rows_match(actual_row, expected_row):\n    \"\"\"\n    Compare two rows with appropriate tolerance.\n    For Decimal types: allows 0.001 tolerance\n    For other types: requires exact match\n    \"\"\"\n    if len(actual_row) != len(expected_row):\n        return False\n    \n    for actual, expected in zip(actual_row, expected_row):\n        if isinstance(actual, Decimal) and isinstance(expected, Decimal):\n            if abs(float(actual) - float(expected)) > 0.001:\n                return False\n        elif isinstance(actual, float) and isinstance(expected, float):\n            if abs(actual - expected) > 0.001:\n                return False\n        elif actual != expected:\n            return False\n    \n    return True\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\", \"sports\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\", \"postgres\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\", \"postgres\")\n    }\n\n\ndef verify_performance_optimization(conn) -> bool:\n    \"\"\"Verify that key performance optimization indexes have been implemented.\"\"\"\n    with conn.cursor() as cur:\n        print(\"\\n🔍 Checking for critical performance indexes...\")\n        \n        # Check 1: participants_events.participant_id index (critical for subqueries)\n        cur.execute(\"\"\"\n            SELECT indexname, indexdef \n            FROM pg_indexes \n            WHERE schemaname = 'public' \n            AND tablename = 'participants_events'\n            AND indexdef LIKE '%participant_id%'\n        \"\"\")\n        participant_indexes = cur.fetchall()\n        has_participant_index = len(participant_indexes) > 0\n        \n        # Check 2: stats table optimization (critical for subquery filtering)\n        cur.execute(\"\"\"\n            SELECT indexname, indexdef \n            FROM pg_indexes \n            WHERE schemaname = 'public' \n            AND tablename = 'stats'\n            AND indexdef LIKE '%stat_holder_type%'\n            AND indexdef LIKE '%stat_holder_id%'\n        \"\"\")\n        stats_indexes = cur.fetchall()\n        has_stats_index = len(stats_indexes) > 0\n        \n        # Report findings\n        critical_indexes_found = 0\n        \n        if has_participant_index:\n            print(\"✅ Found participant filtering index on participants_events.participant_id\")\n            critical_indexes_found += 1\n        else:\n            print(\"❌ Missing critical index on participants_events.participant_id\")\n            \n        if has_stats_index:\n            print(\"✅ Found subquery optimization index on stats table\")\n            critical_indexes_found += 1\n        else:\n            print(\"❌ Missing critical index on stats table\")\n        \n        # Must have both critical indexes for this subquery-heavy query\n        if critical_indexes_found >= 2:\n            print(f\"\\n✅ Performance optimization: PASS ({critical_indexes_found}/2 critical indexes found)\")\n            return True\n        else:\n            print(f\"\\n❌ Performance optimization: FAIL ({critical_indexes_found}/2 critical indexes found)\")\n            print(\"   Create these critical indexes:\")\n            print(\"   - CREATE INDEX ON participants_events(participant_id);\")\n            print(\"   - CREATE INDEX ON stats(stat_holder_type, stat_holder_id);\")\n            return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 50)\n    print(\"Verifying Sports Task 3: Query Performance Optimization\")\n    print(\"=\" * 50)\n\n    # Get connection parameters\n    conn_params = get_connection_params()\n\n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n\n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n\n        # Verify all components\n        success = verify_performance_optimization(conn)\n\n        conn.close()\n\n        if success:\n            print(\"\\n🎉 Task verification: PASS\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Task verification: FAIL\")\n            sys.exit(1)\n\n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/standard/chinook/customer_data_migration/description.md",
    "content": "Migrate customer data from an acquired company to PostgreSQL using efficient bulk operations.\n\n## Your Mission:\n\nChinook Music Store has recently acquired \"MelodyMart,\" a competing music retailer. Their customer database needs to be migrated into Chinook's PostgreSQL database.\n\n## Migration Requirements:\n\n1. **Process all customer records from the data table below** and migrate them into the `Customer` table \n2. **Apply business logic during migration**:\n   - Assign `CustomerID` values starting from the next available ID\n   - Assign all customers to support representative with EmployeeId 3\n   - Set `Fax` field to NULL for all migrated customers\n3. **Avoid individual INSERT statements**\n\n## Customer Data to Migrate:\n\n| FirstName | LastName | Company | Address | City | State | Country | PostalCode | Phone | Email |\n|-----------|----------|---------|---------|------|-------|---------|------------|-------|--------|\n| Danielle | Johnson | Sanchez-Taylor | 819 Johnson Course | East William | AK | USA | 74064 | 386-3794 | danielle.johnson@sancheztaylor.com |\n| Katherine | Moore | Peterson-Moore | 16155 Roman Stream Suite 816 | New Kellystad | OK | USA | 25704 | 103-4131 | katherine_moore@petersonmoore.org |\n| Joshua | Reid | Martin-Kelly | 192 Frank Light Suite 835 | East Lydiamouth | MO | USA | 35594 | 139-5376 | joshua_reid@martinkelly.org |\n| Douglas | Taylor | Hoffman, Baker and Richards | 3287 Katelyn Wall Apt. 226 | South Patrickmouth | NC | USA | 33454 | 801-8451 | douglast@hoffmanbakerand.net |\n| Ryan | Chavez | Liu, Baker and Mason | 148 Eric Track | New Stephanie | NC | USA | 00575 | 957-0154 | r.chavez@liubakerandmaso.com |\n| Brian | Humphrey | Miller Group | 227 Joseph Well | Brandtside | WV | USA | 96174 | 346-5787 | brian.humphrey@millergroup.com |\n| John | Brown | Chapman and Sons | 10310 Jones Freeway | Elizabethborough | ND | USA | 17843 | 997-3763 | john.brown@chapmanandsons.com |\n| Collin | Jordan | Jenkins-Shields | 106 Mcbride Coves | East James | NV | USA | 18874 | 624-7317 | collin.jordan@jenkinsshields.com |\n| Brent | Kidd | Novak and Sons | 7736 Franklin Alley | Bakermouth | LA | USA | 55945 | 872-3430 | brent.kidd@novakandsons.com |\n| Julie | Brown | Woods, Calhoun and Schmidt | 121 Emma Freeway | Wilsonshire | IA | USA | 76381 | 909-1699 | julieb@woodscalhounand.net |\n| Sarah | Harris | Edwards, Baker and Anderson | 5107 Charles Forest Suite 251 | West Justin | NV | USA | 71701 | 498-0841 | s.harris@edwardsbakerand.com |\n| Joseph | Preston | Tran, Nelson and Jacobs | 48740 Cynthia Village Suite 005 | Lake Tina | GA | USA | 97655 | 786-8011 | j.preston@trannelsonandja.com |\n| Amy | Davenport | Tran, Jordan and Williams | 53315 Dickson Summit Apt. 322 | Johnsonmouth | WY | USA | 54465 | 342-1607 | a.davenport@tranjordanandwi.com |\n| James | Sellers | Torres-Pope | 03654 Tammy Harbors | Darlenefurt | TX | USA | 70783 | 501-4294 | james.sellers@torrespope.com |\n| Daniel | Hamilton | Hartman, Graham and Joyce | 9340 Smith Valley | West Ryan | TN | USA | 43780 | 951-4846 | danielh@hartmangrahaman.net |\n| Richard | Phillips | Lee Ltd | 299 Sullivan Village Apt. 443 | Floydmouth | NH | USA | 58406 | 738-7214 | richardp@leeltd.net |\n| Clarence | Crane | Chambers and Sons | 00379 Stanley Roads | Lake Heather | NM | USA | 52884 | 320-1632 | clarence_crane@chambersandsons.org |\n| Brent | Wright | Bryant Group | 9868 Merritt Summit Suite 743 | Katiehaven | NM | USA | 82650 | 347-1434 | brentw@bryantgroup.net |\n| Luis | Fernandez | Hernandez Group | 316 Rivera Mountain | Brownchester | MS | USA | 77057 | 096-7054 | luis_fernandez@hernandezgroup.org |\n| Melissa | Ashley | Medina-Navarro | 3467 Paul Skyway | Ramseymouth | PW | USA | 17229 | 980-6990 | melissa.ashley@medinanavarro.com |\n| Dawn | Taylor | White-Green | 75564 King Common Suite 080 | Jeffreyland | WI | USA | 85927 | 003-3092 | d.taylor@whitegreen.com |\n| David | Caldwell | Gould, Marshall and Scott | 99124 Beth Inlet Suite 631 | North Heidi | ME | USA | 90188 | 919-0586 | davidc@gouldmarshallan.net |\n| Casey | Holland | Atkinson Group | 5726 Jessica Run | Christinaside | WI | USA | 63873 | 769-4531 | caseyh@atkinsongroup.net |\n| Nicole | Sanchez | Hudson-Barnett | 75273 Salinas Junctions Suite 948 | New Stacyland | IA | USA | 94882 | 678-3777 | nicole.sanchez@hudsonbarnett.com |\n| Christopher | Walker | Sanchez, Beck and Wood | 8557 Parker Fort Apt. 351 | East Javier | NJ | USA | 36742 | 989-4134 | c.walker@sanchezbeckandw.com |\n| Michael | Turner | Ferguson, Hill and Mccann | 271 Audrey Mountains Suite 752 | West Shelleyfort | DE | USA | 09065 | 671-9022 | michaelt@fergusonhilland.net |\n| Christopher | Wright | Duran, Obrien and Gibbs | 677 Dalton Meadow | Ashleyton | RI | USA | 97505 | 133-4123 | c.wright@duranobrienandg.com |\n| Andrea | Moore | Hayes-Wheeler | 34471 Sandra Turnpike Apt. 618 | Lake Edward | KY | USA | 19144 | 102-4994 | andrea_moore@hayeswheeler.org |\n| David | Barker | Powell, Nelson and Fernandez | 90659 Johnson Forks Apt. 490 | South April | NV | USA | 36959 | 296-7175 | david_barker@powellnelsonand.org |\n| Mathew | Santiago | Rivera Ltd | 6807 Leonard Islands Apt. 680 | Gutierrezborough | NC | USA | 47920 | 977-0348 | m.santiago@riveraltd.com |\n| Sara | Kim | Washington, Johnson and Mccoy | 248 Andrea Course | Port Robin | NH | USA | 15897 | 274-8467 | sara_kim@washingtonjohns.org |\n| John | Arnold | Lee-Greene | 46584 Justin Hills | Grimesmouth | ND | USA | 63984 | 558-8675 | j.arnold@leegreene.com |\n| Tina | Allen | Hall-Rowe | 7662 Hanna Crossroad | Mollymouth | CT | USA | 69438 | 702-6217 | tinaa@hallrowe.net |\n| Matthew | Schwartz | Miller, Murphy and Craig | 7809 Jimmy Spur Suite 316 | Port Cynthiaville | NV | USA | 22306 | 400-5045 | matthews@millermurphyand.net |\n| Ryan | Sanchez | Knight-Sparks | 19693 Durham Divide | South Dana | NH | USA | 33967 | 074-8217 | ryans@knightsparks.net |\n| Vanessa | Evans | Vaughn-Bryant | 67136 Andrews Squares Suite 064 | New Michelleton | PW | USA | 79983 | 743-9533 | vanessae@vaughnbryant.net |\n| Erica | Le | Becker, Taylor and Davis | 7095 Christopher Hill | Julieburgh | ID | USA | 17823 | 858-8424 | erica_le@beckertaylorand.org |\n| Tammy | Phillips | Brock-Mcdonald | 36851 Smith Plain | South Miguelview | OR | USA | 50442 | 513-7098 | tammyp@brockmcdonald.net |\n| Rose | Walker | Reid Group | 612 Sophia Hollow Suite 113 | South Shawn | TN | USA | 97905 | 869-2617 | rose_walker@reidgroup.org |\n| Sheila | Ramirez | Wood, Ramos and Sampson | 58506 Lopez Crossing Suite 139 | North Kristinbury | DC | USA | 74501 | 318-3933 | sheilar@woodramosandsam.net |\n| Kim | Kramer | Smith, Garrison and Thomas | 421 David Knolls | New Mario | HI | USA | 35283 | 026-8117 | kim_kramer@smithgarrisonan.org |\n| Kimberly | Palmer | Hayes and Sons | 847 Bruce Neck | Simmonsville | NM | USA | 93876 | 711-5921 | k.palmer@hayesandsons.com |\n| Joshua | Schultz | Joseph, James and Harper | 8961 Melissa Run Apt. 673 | Morganmouth | MO | USA | 55025 | 156-5452 | joshua_schultz@josephjamesandh.org |\n| Carlos | Decker | Reynolds Ltd | 80988 Santiago Loop Suite 604 | Michaelshire | NY | USA | 28385 | 273-1585 | carlos.decker@reynoldsltd.com |\n| Kathryn | Andrews | Bruce-Villegas | 402 Park Inlet | Michaelburgh | VI | USA | 19277 | 961-2018 | k.andrews@brucevillegas.com |\n| Nicholas | Chavez | Wood Ltd | 910 Eric River Apt. 147 | Tuckermouth | MT | USA | 36305 | 381-5614 | nicholas_chavez@woodltd.org |\n| Alison | Parker | Foster PLC | 34324 Murphy Avenue | Burgessburgh | DC | USA | 50335 | 838-8516 | alison.parker@fosterplc.com |\n| Ryan | Stevens | Atkins PLC | 664 Richard Islands Apt. 975 | South Meganbury | NE | USA | 77685 | 681-6453 | ryans@atkinsplc.net |\n| Kimberly | Jones | Wilson, Hicks and Bullock | 2312 Gonzalez Rapids Apt. 127 | Webstershire | NV | USA | 89778 | 995-5271 | kimberly_jones@wilsonhicksandb.org |\n| Scott | Turner | Vargas-Bell | 7700 Decker Club | New Brookefurt | NH | USA | 76565 | 807-9359 | scott_turner@vargasbell.org |\n| Walter | Rosario | Garcia-Nolan | 182 John Mill Suite 889 | West Nathan | LA | USA | 51280 | 659-0515 | walter.rosario@garcianolan.com |\n| Angela | Hughes | Cummings-Douglas | 1925 Ponce Square | Andersonland | ME | USA | 73760 | 652-8168 | angelah@cummingsdouglas.net |\n| Andrew | Parker | Peterson Group | 22141 Ebony Wells | New Nicholas | GA | USA | 24204 | 927-0653 | andrew_parker@petersongroup.org |\n| Cheryl | Goodwin | Young-Allen | 59774 Shaw Manor Apt. 392 | Brettfort | VI | USA | 49156 | 818-1412 | cherylg@youngallen.net |\n| Shannon | Palmer | Davis-Lozano | 0606 Young Common Suite 305 | Port Jennifermouth | WY | USA | 19643 | 204-7277 | shannon.palmer@davislozano.com |\n| Rebecca | Smith | Conley PLC | 43410 Robert Underpass Suite 117 | Lake Zacharybury | VT | USA | 19319 | 460-9539 | rebecca_smith@conleyplc.org |\n| Jacob | Barnett | Villegas, Jones and Fox | 7065 Burgess Knolls | West Johnville | WI | USA | 76772 | 520-5852 | jacob_barnett@villegasjonesan.org |\n| Tina | Mendoza | Cain Inc | 43030 Mahoney Passage Suite 874 | Port Deborahport | MI | USA | 06766 | 541-5667 | tina_mendoza@caininc.org |\n| Matthew | Lopez | Jimenez, Glass and Stone | 616 Amy Islands | North Markport | ME | USA | 58948 | 962-7570 | matthewl@jimenezglassand.net |\n| Christina | Graham | Whitney, Gould and Jones | 8202 Johnson Cliff Apt. 556 | New Ericmouth | MN | USA | 49261 | 719-2856 | christinag@whitneygouldand.net |\n| Debra | Wright | Johnson and Sons | 681 Hampton Squares Suite 394 | Gonzalezberg | PR | USA | 10207 | 727-1551 | debraw@johnsonandsons.net |\n| Patricia | York | Mckinney, Graves and Thompson | 313 Joel Park Apt. 589 | Tannerside | DC | USA | 80710 | 114-6786 | patricia_york@mckinneygravesa.org |\n| Madeline | Jones | Day-Cole | 89226 Marie Path Apt. 422 | Sarahbury | MI | USA | 68513 | 414-3842 | madelinej@daycole.net |\n| Christina | Davis | Jackson, David and Moore | 001 Stacy Trail Suite 396 | South Pamelaside | LA | USA | 84637 | 473-6471 | christina.davis@jacksondavidand.com |\n| Eric | Perry | Harris-Lawson | 556 Kathleen Passage Apt. 537 | West Shannonberg | CT | USA | 07133 | 469-6325 | ericp@harrislawson.net |\n| James | Moore | Owens, Koch and Jimenez | 8733 Williams Haven | Harperfort | LA | USA | 70846 | 016-2456 | jamesm@owenskochandjim.net |\n| Brandon | Williams | Lee, Tran and Jones | 499 David Court Suite 558 | Kariborough | PA | USA | 67232 | 680-0025 | brandon_williams@leetranandjones.org |\n| April | Hernandez | Taylor, Velazquez and Flores | 495 Erickson Hills Suite 055 | South Brandytown | PA | USA | 62706 | 499-3097 | a.hernandez@taylorvelazquez.com |\n| Alexandria | Griffith | Hernandez-Becker | 130 Edwards Drive | Vaughnchester | NY | USA | 80648 | 702-8385 | alexandria_griffith@hernandezbecker.org |\n| Alicia | Edwards | Stevens PLC | 549 Lee Gateway Suite 843 | Kellieborough | UT | USA | 92905 | 757-5844 | alicia.edwards@stevensplc.com |\n| Ashley | Daniels | Cardenas-Blevins | 0415 Douglas Summit | Lewisside | KY | USA | 74165 | 421-9933 | ashley.daniels@cardenasblevins.com |\n| Elizabeth | Schmidt | Hall, Garcia and Rivera | 20826 Woods Flats Suite 540 | Lake Audreyside | WA | USA | 95281 | 026-2067 | e.schmidt@hallgarciaandri.com |\n| Sharon | Hayden | Mcdowell-Smith | 4788 Small Dale | Nelsonville | MA | USA | 21799 | 742-0549 | s.hayden@mcdowellsmith.com |\n| Gregory | Chase | Wilcox-Robertson | 1227 Boyle Avenue | Patrickmouth | WV | USA | 35496 | 549-9045 | g.chase@wilcoxrobertson.com |\n| Bryan | Wilson | Moore-Parks | 145 Jeffrey Dale Suite 279 | Robertside | PW | USA | 62213 | 833-9187 | bryanw@mooreparks.net |\n| Christian | Elliott | Poole PLC | 822 Bond Mills | Lake Jamieshire | NM | USA | 12420 | 870-7286 | christian_elliott@pooleplc.org |\n| Anne | Hansen | Roman, Cummings and Foster | 391 Rodney Squares | New Virginialand | NJ | USA | 04660 | 462-2656 | anne_hansen@romancummingsan.org |\n| Molly | Knox | Miller-Brandt | 512 Rice Stream | Port Adam | AK | USA | 39608 | 786-8633 | molly_knox@millerbrandt.org |\n| Michael | Hill | Cannon, Johnson and Keller | 31190 Harper Squares | East Joyfurt | NV | USA | 31216 | 830-2843 | michaelh@cannonjohnsonan.net |\n| Barbara | Barton | Young-Walter | 4408 Connie Meadow | Williamsstad | SD | USA | 88495 | 685-6624 | barbara_barton@youngwalter.org |\n| Ivan | Medina | Atkinson LLC | 0866 Paul Glens | West Deborah | NV | USA | 49138 | 183-0469 | ivan.medina@atkinsonllc.com |\n| Morgan | Lopez | Ramsey, Hansen and Mendoza | 0331 Rocha Square Apt. 638 | Kimberlyfurt | NH | USA | 70447 | 544-5877 | morgan.lopez@ramseyhansenand.com |\n| Leah | Bowen | Rocha-Wood | 93204 Phillips Flat Suite 369 | South Andrea | TX | USA | 44746 | 477-7252 | l.bowen@rochawood.com |\n| Jennifer | Freeman | Mooney, Bernard and Warren | 006 Megan Fort | Lake Edwardborough | NY | USA | 60271 | 509-9770 | jennifer.freeman@mooneybernardan.com |\n| Amanda | Jenkins | Moreno LLC | 86211 John River Suite 546 | West Susanmouth | OK | USA | 32378 | 341-0166 | amanda_jenkins@morenollc.org |\n| Angela | Brown | Warner Inc | 5918 Jerry Ways Suite 401 | Rachelshire | TN | USA | 04813 | 250-3926 | angela.brown@warnerinc.com |\n| Kevin | Elliott | Davenport, Price and Mosley | 2185 Connor Fort Apt. 599 | Novakmouth | AK | USA | 83616 | 477-3586 | kevin_elliott@davenportpricea.org |\n| Jacob | Willis | Miller-Montgomery | 114 Norman Tunnel | Lake Peter | MN | USA | 14466 | 104-7541 | j.willis@millermontgomer.com |\n| Christopher | Jordan | Peters, Russell and Johnson | 199 Shields Bridge Suite 661 | New Adriana | TX | USA | 50404 | 224-4472 | christopher.jordan@petersrussellan.com |\n| Gary | Hill | Washington-Jones | 79937 Derek Avenue Suite 596 | Scottchester | GU | USA | 85833 | 924-5937 | garyh@washingtonjones.net |\n| Gregory | Sanders | Carter-Neal | 356 Velasquez Lock Suite 193 | Lake Katrina | AK | USA | 95818 | 737-4167 | g.sanders@carterneal.com |\n| Cynthia | Allen | Moore, Henderson and Bennett | 796 Stephens Turnpike Suite 891 | Port Johnstad | GA | USA | 85304 | 909-6561 | cynthia.allen@moorehendersona.com |\n| Corey | Walker | Stone, Carpenter and Johnston | 6798 Michael Burg Suite 146 | North Marieberg | MI | USA | 41381 | 573-8757 | corey.walker@stonecarpentera.com |\n| Samuel | Horton | Jones-Williams | 51238 Andrea Isle | Mullenbury | AS | USA | 53591 | 226-6093 | samuel_horton@joneswilliams.org |\n| Brittany | Price | Lewis, Ramirez and Padilla | 182 Nguyen Mount | West Emilyfort | NC | USA | 84270 | 596-9691 | brittanyp@lewisramirezand.net |\n| Michael | Ellis | Cervantes Ltd | 912 Wilson Inlet Apt. 252 | Barnesberg | OK | USA | 50794 | 627-8282 | michael_ellis@cervantesltd.org |\n| Keith | Lopez | Harvey-Glenn | 2368 Ortiz Overpass | Mckinneymouth | NM | USA | 22423 | 190-3404 | k.lopez@harveyglenn.com |\n| Amanda | Jackson | Cunningham-Barton | 819 Joseph Plains Suite 807 | South Curtis | MP | USA | 86179 | 340-7451 | amanda_jackson@cunninghambarto.org |\n| Michelle | Wilson | Clark Ltd | 962 Kristen Via Apt. 095 | Candiceburgh | MD | USA | 92782 | 449-4812 | michelle_wilson@clarkltd.org |\n| Samantha | Riddle | Martinez, Cline and Wright | 67294 Brooks Club Apt. 684 | Shawnfort | MD | USA | 76779 | 017-5186 | s.riddle@martinezclinean.com |\n| Tammy | Summers | Adams-Clayton | 929 Kramer Springs Apt. 017 | North Sarahburgh | NV | USA | 60337 | 063-2424 | tammy.summers@adamsclayton.com |\n| Diamond | Wright | Beck-Banks | 4361 Aaron Neck | East Brittneyhaven | TX | USA | 58836 | 005-1627 | diamond.wright@beckbanks.com |\n| Jeremy | Davis | Garcia LLC | 62218 Chelsey Expressway Suite 532 | Jensenmouth | VI | USA | 28975 | 112-1965 | jeremy_davis@garciallc.org |\n| Leonard | Taylor | Newman-Wright | 043 Julie Hill Apt. 376 | East Victorland | NC | USA | 02082 | 552-6965 | l.taylor@newmanwright.com |\n| Kathryn | Best | Smith Inc | 3006 Fuller Parkway | Hendersonfurt | CO | USA | 84457 | 889-2414 | kathryn.best@smithinc.com |\n| William | Harris | Herrera Group | 6303 Sandy Crescent | Salazarton | ME | USA | 87805 | 210-2027 | williamh@herreragroup.net |\n| Alexandra | Logan | Green, Watson and Brady | 105 Nelson Circles Suite 917 | Dixonton | NM | USA | 74803 | 252-4191 | a.logan@greenwatsonandb.com |\n| Joyce | Smith | Sanchez Group | 2208 Walker Gateway Suite 541 | Davidton | HI | USA | 29754 | 806-1744 | joyces@sanchezgroup.net |\n| Christopher | Bryant | Gonzalez-Elliott | 937 Vargas Park Apt. 832 | South Andrewside | MI | USA | 83855 | 050-6413 | c.bryant@gonzalezelliott.com |\n| Robert | Woodward | Dawson Inc | 86571 William Route | Jonesshire | AR | USA | 57515 | 234-4565 | robertw@dawsoninc.net |\n| Shawn | Hall | Taylor PLC | 12775 Martinez Knolls | South Kyle | KS | USA | 16218 | 124-9035 | s.hall@taylorplc.com |\n| Christopher | Wright | Foster-Williams | 2067 Cody Cove Apt. 100 | East James | MO | USA | 49291 | 199-4101 | c.wright@fosterwilliams.com |\n| Rachel | Ramos | Davis LLC | 70296 Crawford Light | Thompsonborough | PW | USA | 25031 | 447-2099 | r.ramos@davisllc.com |\n| Deborah | Porter | Mendoza, Miller and Reyes | 83806 Castillo Tunnel Suite 598 | Paulburgh | AK | USA | 42296 | 930-4078 | deborahp@mendozamilleran.net |\n| Katie | Key | Garcia Ltd | 8039 Kelly Villages | East Joel | MD | USA | 97245 | 590-5992 | k.key@garcialtd.com |\n| Mary | Cochran | Weaver-Thompson | 03930 Smith Ridges | Port David | VT | USA | 23761 | 500-2921 | maryc@weaverthompson.net |\n| Susan | Brooks | Foster, Garcia and Turner | 67528 Walker Radial | South Kurt | UT | USA | 39103 | 220-9690 | s.brooks@fostergarciaand.com |\n| Carrie | Mccall | Walker, Cunningham and Zuniga | 1355 Daisy Corners | Seanview | IL | USA | 33208 | 154-1006 | carrie_mccall@walkercunningha.org |\n| Jessica | Costa | Snyder-Gray | 79327 Lauren Bypass Suite 054 | North Matthewfurt | GA | USA | 96443 | 181-5997 | jessica.costa@snydergray.com |\n| Ryan | Valdez | Preston, Moore and Garcia | 68844 Young Causeway | Armstrongfort | FL | USA | 07645 | 506-1497 | r.valdez@prestonmooreand.com |\n| Collin | Clark | Carter, Miller and Anthony | 7741 Lopez Light Suite 270 | Scottview | IN | USA | 35701 | 902-1158 | collin_clark@cartermillerand.org |\n| Tara | Lawrence | Brown, Hughes and Mills | 374 Ralph Walk Apt. 898 | North Stacy | NV | USA | 23160 | 233-2061 | tara_lawrence@brownhughesandm.org |\n| James | Carson | Flowers LLC | 116 Arnold Walks Suite 870 | Rodriguezberg | FL | USA | 74765 | 991-1914 | jamesc@flowersllc.net |\n| Natalie | Baker | Washington, Lynch and Johnson | 2996 Randy Isle Apt. 074 | Andrewport | ME | USA | 37246 | 713-2475 | natalieb@washingtonlynch.net |\n| Jessica | Jacobs | Lopez and Sons | 785 Zachary Estate Apt. 486 | Port Melissa | FM | USA | 75038 | 023-3030 | jessica_jacobs@lopezandsons.org |\n| Brent | Ward | Hill Group | 103 Burns Mission Apt. 798 | Maxview | WA | USA | 90790 | 140-6029 | b.ward@hillgroup.com |\n| Mercedes | Holland | Clark, Pearson and Palmer | 2290 Johnny Valley | Jenniferview | NE | USA | 49846 | 574-3748 | mercedes_holland@clarkpearsonand.org |\n| Breanna | Smith | Levy, Franco and Hoffman | 1715 Davidson Wall Suite 443 | New Kathy | MH | USA | 07942 | 965-2074 | breannas@levyfrancoandho.net |\n| Rebecca | Sullivan | Johnson, Erickson and Armstrong | 3875 Bruce Ville | West Connor | DC | USA | 97614 | 482-5135 | r.sullivan@johnsonerickson.com |\n| Julie | Parker | Watson-Richards | 70999 Thomas Fields Apt. 684 | Brownberg | DC | USA | 26754 | 569-7252 | julie.parker@watsonrichards.com |\n| Tony | Welch | Edwards Inc | 4329 Tracy Track | East Christinachester | MO | USA | 56734 | 760-0835 | tony.welch@edwardsinc.com |\n| Patricia | Sherman | Lee, Rhodes and Sims | 54216 Jackson View | West Stacymouth | VA | USA | 68696 | 985-6257 | patricias@leerhodesandsim.net |\n| Karen | Martin | Smith-Walker | 09821 Dawson Turnpike | South Nancyview | WI | USA | 70589 | 909-0100 | karen.martin@smithwalker.com |\n| Robert | James | King, Miles and Harris | 6184 Robert Cove | West Danielville | NM | USA | 26538 | 934-8356 | robertj@kingmilesandhar.net |\n| Ethan | Kelley | Watts Group | 00119 Hernandez Course Apt. 143 | Hintonport | KS | USA | 61354 | 012-0455 | ethan_kelley@wattsgroup.org |\n| Joanna | Davis | Smith and Sons | 5794 Nathan Junctions | North Richard | NH | USA | 36130 | 676-2120 | j.davis@smithandsons.com |\n| Dale | Pruitt | Pham-Gregory | 659 Michelle Villages | South Samantha | DE | USA | 54408 | 701-4508 | d.pruitt@phamgregory.com |\n| Tiffany | Santiago | Stone-Watts | 3756 Mary Point | North Dawnburgh | NY | USA | 62011 | 721-7535 | tiffanys@stonewatts.net |\n| Brent | Walker | Gray, Montoya and Miller | 717 Stewart Parks Apt. 166 | New Andrealand | WY | USA | 79695 | 948-8375 | brentw@graymontoyaandm.net |\n| Marcia | Velasquez | Rivera-Saunders | 571 Katherine Forges Apt. 554 | Jacquelineton | MH | USA | 22017 | 726-1493 | m.velasquez@riverasaunders.com |\n| David | Phelps | Bryant and Sons | 60917 Barrett Parkways Apt. 708 | New Savannahshire | NJ | USA | 67129 | 292-2169 | davidp@bryantandsons.net |\n| William | Cruz | Moon, Farmer and Hill | 7226 Cameron Plaza Suite 833 | New Jennifer | TX | USA | 45759 | 228-8515 | william_cruz@moonfarmerandhi.org |\n| Brandi | Bender | Butler, Adkins and Skinner | 0810 Thomas Skyway Apt. 342 | Francesberg | MP | USA | 08631 | 438-0571 | b.bender@butleradkinsand.com |\n| Julia | Hoffman | Dixon Ltd | 066 Frye Spur Suite 800 | Jamesmouth | MP | USA | 30064 | 598-9334 | julia_hoffman@dixonltd.org |\n| Gregory | Fleming | Rivers Ltd | 0648 Anderson Prairie | Adammouth | VT | USA | 20791 | 025-9094 | gregory_fleming@riversltd.org |\n| Kristy | Pierce | Bowers LLC | 81826 Davis Forges | Lake Martin | MN | USA | 38980 | 398-7801 | kristyp@bowersllc.net |\n| Sean | Conway | Sellers, Sanchez and Williams | 1648 Johnson Path Suite 887 | Williamsborough | MD | USA | 67858 | 112-8801 | s.conway@sellerssancheza.com |\n| Ellen | Ayala | Coleman, Garcia and Medina | 120 Love Camp Apt. 102 | Angelashire | GU | USA | 30338 | 466-7665 | ellen.ayala@colemangarciaan.com |\n| Perry | Wilson | May PLC | 901 Reilly Coves | Kristinport | PA | USA | 11839 | 476-6072 | p.wilson@mayplc.com |\n| Derek | Myers | Phillips, Walters and Evans | 88210 Ashley Lock Apt. 435 | South Rebecca | PR | USA | 67682 | 222-3943 | derek.myers@phillipswalters.com |\n| Howard | Marsh | York PLC | 814 John Flat Suite 552 | North Justin | CA | USA | 25863 | 577-5949 | h.marsh@yorkplc.com |\n| Ariana | Diaz | Benjamin-Jackson | 36452 Humphrey Mountain Suite 547 | East Debbieland | MP | USA | 37281 | 283-4110 | ariana.diaz@benjaminjackson.com |\n| Lisa | Riley | Lewis, Johnson and Green | 256 Patricia Radial Suite 278 | South Michaeltown | TN | USA | 31811 | 928-2722 | l.riley@lewisjohnsonand.com |\n| Jill | Webb | Williams-Juarez | 45303 Hughes Motorway | North Tinamouth | CT | USA | 92741 | 844-9892 | jill_webb@williamsjuarez.org |\n| Desiree | Diaz | Villanueva, Miller and King | 655 Sparks Rapids | New Nicolemouth | GA | USA | 30646 | 184-3222 | desireed@villanuevamille.net |\n| Carolyn | Montoya | Hall, Shepherd and Cortez | 773 Deborah Loop Apt. 302 | East Crystal | AZ | USA | 75509 | 202-4286 | carolyn.montoya@hallshepherdand.com |\n| Natalie | Luna | Valentine-Robinson | 2369 Laura View Apt. 984 | Lake Gina | NH | USA | 78689 | 913-6621 | natalie.luna@valentinerobins.com |\n| James | Heath | Cohen, Serrano and Jacobs | 9908 Christopher Shoals | New Amber | AL | USA | 89441 | 686-5086 | j.heath@cohenserranoand.com |\n| Shawna | Olson | Bell-Ballard | 2473 Justin Wells | Scotttown | VT | USA | 97972 | 098-1806 | s.olson@bellballard.com |\n| Gwendolyn | Stewart | Rodriguez-Simmons | 8695 Braun Locks Apt. 688 | Whiteside | OH | USA | 63908 | 449-5621 | g.stewart@rodriguezsimmon.com |\n| Sean | Lyons | Garcia PLC | 8902 Oconnell Avenue Apt. 279 | Davisview | IN | USA | 49107 | 190-6698 | seanl@garciaplc.net |\n| Jennifer | Harper | Bowman Group | 84309 Christina Spring | West Johntown | GA | USA | 11883 | 465-6693 | jennifer.harper@bowmangroup.com |\n| Jillian | Jones | Dunn Ltd | 4393 Spears Ports Apt. 426 | New Charlesport | MA | USA | 15837 | 848-9476 | jillian_jones@dunnltd.org |\n| Kayla | Todd | Maldonado-Mosley | 1416 Erica Forks | Robertstad | NC | USA | 70709 | 043-4165 | kayla.todd@maldonadomosley.com |\n| Angela | White | Gomez-Shannon | 37333 Clark Flats Apt. 952 | North Samanthafort | RI | USA | 01369 | 807-5957 | angelaw@gomezshannon.net |\n| Travis | Joyce | Ramirez, Walker and Ray | 678 Wayne Lock | South Tiffany | UT | USA | 68423 | 750-0369 | travis.joyce@ramirezwalkeran.com |\n| Mark | Salazar | Lopez-Baker | 9552 Coleman Manor Suite 564 | Whiteberg | OK | USA | 90417 | 314-3866 | m.salazar@lopezbaker.com |\n| Dustin | Haley | Kennedy Inc | 7288 Floyd Hills | Annashire | AR | USA | 52720 | 120-3471 | dustin_haley@kennedyinc.org |\n| Julie | Green | Castro-Frederick | 0615 Barbara Run Apt. 455 | Hamptonmouth | FM | USA | 10778 | 694-7225 | julie_green@castrofrederick.org |\n| Crystal | Duncan | Miller LLC | 5449 Nelson Mills | Juliehaven | NV | USA | 54763 | 220-2341 | c.duncan@millerllc.com |\n| Garrett | Garcia | Zuniga Group | 68114 Christopher Loaf | Jeromeport | NV | USA | 82615 | 228-2005 | garrettg@zunigagroup.net |\n| Michelle | Mcdonald | Donovan, Dunn and Taylor | 979 Mills Route | Reginafort | ND | USA | 30271 | 174-5642 | michellem@donovandunnandt.net |\n| Alex | Mills | Cooper Group | 774 Katie Union | Carlatown | OH | USA | 49475 | 368-6632 | alex_mills@coopergroup.org |\n| Maria | Walker | Henderson and Sons | 8463 Ian Highway Apt. 797 | Jackiefort | ID | USA | 42528 | 020-8021 | mariaw@hendersonandson.net |\n| Joseph | Espinoza | Smith, Davis and Smith | 6475 Terry Bypass | Christopherberg | AR | USA | 35432 | 618-7234 | joseph_espinoza@smithdavisandsm.org |\n| Maria | Martinez | Wright, Wise and Ramos | 71837 Maldonado Inlet | Ericton | WA | USA | 72535 | 814-7435 | maria.martinez@wrightwiseandra.com |\n| Michelle | Robinson | Young Group | 24916 Albert Canyon Suite 925 | East Ericland | TX | USA | 81588 | 500-5281 | m.robinson@younggroup.com |\n| Tony | Stewart | Kramer, Sherman and Trujillo | 306 Ramsey Glen Apt. 778 | Amyfort | ID | USA | 74779 | 285-5749 | t.stewart@kramershermanan.com |\n| Casey | Moore | Weiss-Weaver | 86209 Parsons Garden Suite 186 | New Felicia | WI | USA | 72782 | 294-5651 | casey.moore@weissweaver.com |\n| Alexandra | Jones | White Inc | 73109 Barrett Pine | Brandonbury | PA | USA | 94590 | 103-7170 | alexandraj@whiteinc.net |\n| Angela | Hurley | Short-Bauer | 480 Mary Club | New Colton | VA | USA | 30780 | 863-3839 | a.hurley@shortbauer.com |\n| Angela | Grant | Garcia, Fowler and Howard | 612 Andrea Parkways Suite 289 | Mahoneymouth | OH | USA | 43054 | 566-5939 | a.grant@garciafowlerand.com |\n| Nicholas | Pierce | King, Nixon and West | 04908 Victoria Hollow Apt. 433 | Andrewview | PW | USA | 73070 | 889-9210 | nicholas_pierce@kingnixonandwes.org |\n| Michael | Taylor | Preston-Wright | 1969 Jessica Stream Suite 727 | New Dawnton | VA | USA | 76035 | 610-5566 | michael.taylor@prestonwright.com |\n| Molly | Perez | Atkinson, Mcfarland and Walters | 48058 Mark Square Apt. 206 | Mullinsshire | NY | USA | 12308 | 364-6225 | molly.perez@atkinsonmcfarla.com |\n| Thomas | Mcgee | Ross, Miller and Shaw | 78376 Ann Street | East Charles | WI | USA | 56870 | 591-1665 | thomasm@rossmillerandsh.net |\n| James | Cooper | Johnson, Torres and Huerta | 270 James Landing Apt. 110 | New Sara | VI | USA | 38208 | 051-4770 | jamesc@johnsontorresan.net |\n| Jason | Medina | Payne LLC | 206 Jonathan Circle Suite 394 | South Dianatown | CA | USA | 51441 | 451-0463 | jason_medina@paynellc.org |\n| William | Mckinney | Washington-Harper | 38780 John Pines | Matthewfurt | WA | USA | 21079 | 055-5438 | williamm@washingtonharpe.net |\n| Lisa | Garrett | Zamora-Briggs | 432 Prince Shoals | North Jessica | NC | USA | 89367 | 936-3926 | lisag@zamorabriggs.net |\n| Renee | Murphy | Anderson, Delgado and Carpenter | 48262 Lonnie Point | East Lonnieberg | VA | USA | 04365 | 566-4742 | r.murphy@andersondelgado.com |\n| Daniel | Lopez | Jensen, Obrien and Salazar | 05172 Joseph Landing | Port Paul | NJ | USA | 18525 | 233-0604 | daniel_lopez@jensenobrienand.org |\n| Jeffrey | Powers | Todd Inc | 9757 Ronald Trail | New Jillfurt | VA | USA | 41513 | 699-9880 | jeffrey.powers@toddinc.com |\n| Shannon | Wilcox | Rich and Sons | 086 James Mill Suite 447 | South Kelly | PW | USA | 07650 | 827-7181 | s.wilcox@richandsons.com |\n| Kimberly | Pace | Payne, Long and Morris | 79371 Nguyen Run | Lake Jessica | CO | USA | 15464 | 751-8689 | k.pace@paynelongandmor.com |\n| Nicholas | James | Barr PLC | 22064 Cross Mission | Courtneyville | MH | USA | 17746 | 309-4077 | nicholas_james@barrplc.org |\n| Amy | Smith | Young-Chapman | 6719 John Plaza Suite 983 | East Eddiestad | AZ | USA | 19555 | 099-4510 | amy.smith@youngchapman.com |\n| Robert | Thompson | Mitchell, Guerrero and Graves | 9501 Morris Light | Port Ronaldside | CA | USA | 38883 | 721-4586 | r.thompson@mitchellguerrer.com |\n| Heather | Salazar | Duncan Ltd | 9469 Green Ports | Sarashire | NM | USA | 68619 | 772-9343 | heather.salazar@duncanltd.com |\n| David | Marshall | Mclaughlin and Sons | 0558 Alex Flats Suite 414 | Williammouth | WI | USA | 01304 | 155-6990 | d.marshall@mclaughlinandso.com |"
  },
  {
    "path": "tasks/postgres/standard/chinook/customer_data_migration/meta.json",
    "content": "{\n  \"task_id\": \"customer_data_migration\",\n  \"task_name\": \"Customer Data Migration\",\n  \"category_id\": \"chinook\",\n  \"category_name\": \"Chinook\",\n  \"description\": \"Migrate customer data from acquired company MelodyMart into Chinook database using bulk operations and business logic.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-08-12\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data migration\",\n    \"transactional operations\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Table \\\"Album\\\" {\\n  \\\"AlbumId\\\" int4 [pk, not null]\\n  \\\"Title\\\" varchar(160) [not null]\\n  \\\"ArtistId\\\" int4 [not null]\\n\\n  Indexes {\\n    ArtistId [type: btree, name: \\\"IFK_AlbumArtistId\\\"]\\n  }\\n}\\n\\nTable \\\"Artist\\\" {\\n  \\\"ArtistId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(120)\\n}\\n\\nTable \\\"Customer\\\" {\\n  \\\"CustomerId\\\" int4 [pk, not null]\\n  \\\"FirstName\\\" varchar(40) [not null]\\n  \\\"LastName\\\" varchar(20) [not null]\\n  \\\"Company\\\" varchar(80)\\n  \\\"Address\\\" varchar(70)\\n  \\\"City\\\" varchar(40)\\n  \\\"State\\\" varchar(40)\\n  \\\"Country\\\" varchar(40)\\n  \\\"PostalCode\\\" varchar(10)\\n  \\\"Phone\\\" varchar(24)\\n  \\\"Fax\\\" varchar(24)\\n  \\\"Email\\\" varchar(60) [not null]\\n  \\\"SupportRepId\\\" int4\\n\\n  Indexes {\\n    SupportRepId [type: btree, name: \\\"IFK_CustomerSupportRepId\\\"]\\n  }\\n}\\n\\nTable \\\"Employee\\\" {\\n  \\\"EmployeeId\\\" int4 [pk, not null]\\n  \\\"LastName\\\" varchar(20) [not null]\\n  \\\"FirstName\\\" varchar(20) [not null]\\n  \\\"Title\\\" varchar(30)\\n  \\\"ReportsTo\\\" int4\\n  \\\"BirthDate\\\" timestamp\\n  \\\"HireDate\\\" timestamp\\n  \\\"Address\\\" varchar(70)\\n  \\\"City\\\" varchar(40)\\n  \\\"State\\\" varchar(40)\\n  \\\"Country\\\" varchar(40)\\n  \\\"PostalCode\\\" varchar(10)\\n  \\\"Phone\\\" varchar(24)\\n  \\\"Fax\\\" varchar(24)\\n  \\\"Email\\\" varchar(60)\\n\\n  Indexes {\\n    ReportsTo [type: btree, name: \\\"IFK_EmployeeReportsTo\\\"]\\n  }\\n}\\n\\nTable \\\"Genre\\\" {\\n  \\\"GenreId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(120)\\n}\\n\\nTable \\\"Invoice\\\" {\\n  \\\"InvoiceId\\\" int4 [pk, not null]\\n  \\\"CustomerId\\\" int4 [not null]\\n  \\\"InvoiceDate\\\" timestamp [not null]\\n  \\\"BillingAddress\\\" varchar(70)\\n  \\\"BillingCity\\\" varchar(40)\\n  \\\"BillingState\\\" varchar(40)\\n  \\\"BillingCountry\\\" varchar(40)\\n  \\\"BillingPostalCode\\\" varchar(10)\\n  \\\"Total\\\" numeric(10,2) [not null]\\n\\n  Indexes {\\n    CustomerId [type: btree, name: \\\"IFK_InvoiceCustomerId\\\"]\\n  }\\n}\\n\\nTable \\\"InvoiceLine\\\" {\\n  \\\"InvoiceLineId\\\" int4 [pk, not null]\\n  \\\"InvoiceId\\\" int4 [not null]\\n  \\\"TrackId\\\" int4 [not null]\\n  \\\"UnitPrice\\\" numeric(10,2) [not null]\\n  \\\"Quantity\\\" int4 [not null]\\n\\n  Indexes {\\n    InvoiceId [type: btree, name: \\\"IFK_InvoiceLineInvoiceId\\\"]\\n    TrackId [type: btree, name: \\\"IFK_InvoiceLineTrackId\\\"]\\n  }\\n}\\n\\nTable \\\"MediaType\\\" {\\n  \\\"MediaTypeId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(120)\\n}\\n\\nTable \\\"Playlist\\\" {\\n  \\\"PlaylistId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(120)\\n}\\n\\nTable \\\"PlaylistTrack\\\" {\\n  \\\"PlaylistId\\\" int4 [not null]\\n  \\\"TrackId\\\" int4 [not null]\\n\\n  Indexes {\\n    (PlaylistId, TrackId) [type: btree, name: \\\"PK_PlaylistTrack\\\"]\\n    TrackId [type: btree, name: \\\"IFK_PlaylistTrackTrackId\\\"]\\n  }\\n}\\n\\nTable \\\"Track\\\" {\\n  \\\"TrackId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(200) [not null]\\n  \\\"AlbumId\\\" int4\\n  \\\"MediaTypeId\\\" int4 [not null]\\n  \\\"GenreId\\\" int4\\n  \\\"Composer\\\" varchar(220)\\n  \\\"Milliseconds\\\" int4 [not null]\\n  \\\"Bytes\\\" int4\\n  \\\"UnitPrice\\\" numeric(10,2) [not null]\\n\\n  Indexes {\\n    AlbumId [type: btree, name: \\\"IFK_TrackAlbumId\\\"]\\n    GenreId [type: btree, name: \\\"IFK_TrackGenreId\\\"]\\n    MediaTypeId [type: btree, name: \\\"IFK_TrackMediaTypeId\\\"]\\n  }\\n}\\n\\nRef \\\"FK_AlbumArtistId\\\":\\\"Artist\\\".\\\"ArtistId\\\" < \\\"Album\\\".\\\"ArtistId\\\"\\n\\nRef \\\"FK_CustomerSupportRepId\\\":\\\"Employee\\\".\\\"EmployeeId\\\" < \\\"Customer\\\".\\\"SupportRepId\\\"\\n\\nRef \\\"FK_EmployeeReportsTo\\\":\\\"Employee\\\".\\\"EmployeeId\\\" < \\\"Employee\\\".\\\"ReportsTo\\\"\\n\\nRef \\\"FK_InvoiceCustomerId\\\":\\\"Customer\\\".\\\"CustomerId\\\" < \\\"Invoice\\\".\\\"CustomerId\\\"\\n\\nRef \\\"FK_InvoiceLineInvoiceId\\\":\\\"Invoice\\\".\\\"InvoiceId\\\" < \\\"InvoiceLine\\\".\\\"InvoiceId\\\"\\n\\nRef \\\"FK_InvoiceLineTrackId\\\":\\\"Track\\\".\\\"TrackId\\\" < \\\"InvoiceLine\\\".\\\"TrackId\\\"\\n\\nRef \\\"FK_PlaylistTrackPlaylistId\\\":\\\"Playlist\\\".\\\"PlaylistId\\\" < \\\"PlaylistTrack\\\".\\\"PlaylistId\\\"\\n\\nRef \\\"FK_PlaylistTrackTrackId\\\":\\\"Track\\\".\\\"TrackId\\\" < \\\"PlaylistTrack\\\".\\\"TrackId\\\"\\n\\nRef \\\"FK_TrackAlbumId\\\":\\\"Album\\\".\\\"AlbumId\\\" < \\\"Track\\\".\\\"AlbumId\\\"\\n\\nRef \\\"FK_TrackGenreId\\\":\\\"Genre\\\".\\\"GenreId\\\" < \\\"Track\\\".\\\"GenreId\\\"\\n\\nRef \\\"FK_TrackMediaTypeId\\\":\\\"MediaType\\\".\\\"MediaTypeId\\\" < \\\"Track\\\".\\\"MediaTypeId\\\"\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/chinook.sql\"\n  }\n}"
  },
  {
    "path": "tasks/postgres/standard/chinook/customer_data_migration/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Task 2: Customer Data Migration\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nimport pickle\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\")\n    }\n\ndef load_expected_customers():\n    \"\"\"Load the expected customer data from pickle file.\"\"\"\n    import os\n    script_dir = os.path.dirname(os.path.abspath(__file__))\n    pkl_path = os.path.join(script_dir, 'customer_data.pkl')\n    \n    try:\n        with open(pkl_path, 'rb') as f:\n            return pickle.load(f)\n    except FileNotFoundError:\n        print(f\"❌ customer_data.pkl not found at {pkl_path}. Please generate customer data first.\")\n        return None\n    except Exception as e:\n        print(f\"❌ Error loading customer data: {e}\")\n        return None\n\ndef verify_migrated_customers(conn, expected_customers) -> bool:\n    \"\"\"Verify migrated customers by comparing with expected data as sets.\"\"\"\n    with conn.cursor() as cur:\n        # Get all customers with ID > 59 (the migrated ones)\n        cur.execute('''\n            SELECT \"FirstName\", \"LastName\", \"Company\", \"Address\", \"City\", \n                   \"State\", \"Country\", \"PostalCode\", \"Phone\", \"Email\", \n                   \"SupportRepId\", \"Fax\"\n            FROM \"Customer\" \n            WHERE \"CustomerId\" > 59\n        ''')\n        \n        actual_customers = cur.fetchall()\n        \n        if len(actual_customers) != len(expected_customers):\n            print(f\"❌ Expected {len(expected_customers)} migrated customers, found {len(actual_customers)}\")\n            return False\n        \n        # Convert expected customers to tuples for set comparison\n        expected_tuples = set()\n        for expected in expected_customers:\n            expected_tuple = (\n                expected['FirstName'], expected['LastName'], expected['Company'],\n                expected['Address'], expected['City'], expected['State'],\n                expected['Country'], expected['PostalCode'], expected['Phone'], \n                expected['Email'], 3, None  # SupportRepId=3, Fax=None\n            )\n            expected_tuples.add(expected_tuple)\n        \n        # Convert actual customers to set with proper type conversion\n        actual_tuples = set()\n        for row in actual_customers:\n            # Convert all fields to strings for consistent comparison\n            actual_tuple = (\n                str(row[0]) if row[0] is not None else '',  # FirstName\n                str(row[1]) if row[1] is not None else '',  # LastName  \n                str(row[2]) if row[2] is not None else '',  # Company\n                str(row[3]) if row[3] is not None else '',  # Address\n                str(row[4]) if row[4] is not None else '',  # City\n                str(row[5]) if row[5] is not None else '',  # State\n                str(row[6]) if row[6] is not None else '',  # Country\n                str(row[7]) if row[7] is not None else '',  # PostalCode\n                str(row[8]) if row[8] is not None else '',  # Phone\n                str(row[9]) if row[9] is not None else '',  # Email\n                int(row[10]) if row[10] is not None else None,  # SupportRepId\n                row[11]  # Fax (should be None)\n            )\n            actual_tuples.add(actual_tuple)\n        \n        # Check if sets are equal\n        if actual_tuples != expected_tuples:\n            missing_in_actual = expected_tuples - actual_tuples\n            extra_in_actual = actual_tuples - expected_tuples\n            \n            print(f\"❌ Customer data sets don't match!\")\n            if missing_in_actual:\n                print(f\"   Missing {len(missing_in_actual)} expected customers\")\n                for missing in list(missing_in_actual)[:3]:  # Show first 3\n                    print(f\"   Missing: {missing[0]} {missing[1]} - {missing[2]}\")\n                if len(missing_in_actual) > 3:\n                    print(f\"   ... and {len(missing_in_actual) - 3} more\")\n            \n            if extra_in_actual:\n                print(f\"   Found {len(extra_in_actual)} unexpected customers\")\n                for extra in list(extra_in_actual)[:3]:  # Show first 3\n                    print(f\"   Extra: {extra[0]} {extra[1]} - {extra[2]}\")\n                if len(extra_in_actual) > 3:\n                    print(f\"   ... and {len(extra_in_actual) - 3} more\")\n            \n            return False\n        \n        print(f\"✅ All {len(expected_customers)} customers migrated correctly\")\n        print(f\"✅ All customers assigned to SupportRepId 3\")\n        print(f\"✅ All customers have Fax field set to NULL\")\n        print(f\"✅ Customer data sets match exactly (order-independent)\")\n        \n        return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 60)\n    print(\"Verifying Customer Data Migration Task\")\n    print(\"=\" * 60)\n\n    # Load expected customer data\n    expected_customers = load_expected_customers()\n    if not expected_customers:\n        sys.exit(1)\n    \n    print(f\"Loaded {len(expected_customers)} expected customer records\")\n\n    # Get connection parameters\n    conn_params = get_connection_params()\n\n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n\n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n\n        # Verify migration\n        success = verify_migrated_customers(conn, expected_customers)\n\n        conn.close()\n\n        if success:\n            print(\"\\n🎉 Task verification: PASS\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Task verification: FAIL\")\n            sys.exit(1)\n\n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/standard/chinook/employee_hierarchy_management/description.md",
    "content": "Manage employee hierarchy and customer assignments through systematic CRUD operations.\n\n## Your Mission:\n\nChinook needs to reorganize their employee structure and reassign customer relationships. Complete a series of precise database modifications to update the employee hierarchy.\n\n## Tasks to Complete:\n\n### 1. **INSERT: Add New Employees**\nInsert exactly 2 new employees into the Employee table:\n- EmployeeId: 9, FirstName: 'Sarah', LastName: 'Johnson', Title: 'Sales Support Agent', ReportsTo: 2, BirthDate: '1985-03-15', HireDate: '2009-01-10', Address: '123 Oak Street', City: 'Calgary', State: 'AB', Country: 'Canada', PostalCode: 'T2P 5G3', Phone: '+1 (403) 555-0123', Fax: '+1 (403) 555-0124', Email: 'sarah.johnson@chinookcorp.com'\n- EmployeeId: 10, FirstName: 'Mike', LastName: 'Chen', Title: 'Sales Support Agent', ReportsTo: 2, BirthDate: '1982-08-22', HireDate: '2009-01-10', Address: '456 Pine Ave', City: 'Calgary', State: 'AB', Country: 'Canada', PostalCode: 'T2P 5G4', Phone: '+1 (403) 555-0125', Fax: '+1 (403) 555-0126', Email: 'mike.chen@chinookcorp.com'\n\n### 2. **UPDATE: Modify Existing Employee Information**\n- Change Andrew Adams (EmployeeId = 1) title from 'General Manager' to 'CEO'\n- Update Nancy Edwards (EmployeeId = 2) phone number to '+1 (403) 555-9999'\n- Change all employees with Title = 'IT Staff' to have Title = 'IT Specialist'\n\n### 3. **UPDATE: Reassign Some Customers to New Employees**\n- Update customers with CustomerId 1, 2, 3 to have SupportRepId = 9 (Sarah Johnson)\n- Update customers with CustomerId 4, 5, 6 to have SupportRepId = 10 (Mike Chen)\n\n\n### 4. **UPDATE: Reorganize Reporting Structure**\n- Change Sarah Johnson (EmployeeId = 9) to report to Andrew Adams (EmployeeId = 1) instead of Nancy Edwards\n- Change Mike Chen (EmployeeId = 10) to also report to Andrew Adams (EmployeeId = 1)\n\n### 5. **INSERT: Create Employee Performance Table**\nCreate a new table called `employee_performance`:\n- `employee_id` (integer, foreign key to Employee)\n- `customers_assigned` (integer)\n- `performance_score` (decimal)\n\nInsert records for employees 9 and 10 by calculating their actual customer assignments:\n- Sarah Johnson: calculate actual number of customers assigned to her, performance score 4.5\n- Mike Chen: calculate actual number of customers assigned to him, performance score 4.2\n\n### 6. **DELETE: Remove IT Department Employee**\n- Delete Robert King (EmployeeId = 7) from the Employee table\n- Before deletion, handle all relationships:\n  - Find who Robert reports to and reassign any employees who report to Robert to report to Robert's manager instead\n  - Find all customers assigned to Robert as their support rep and reassign them to Robert's manager\n\n### 7. **UPDATE: Promote Remaining IT Staff**\n- Promote Laura Callahan (EmployeeId = 8) from 'IT Specialist' to 'Senior IT Specialist'  \n- Update her salary information by adding a new column `salary` to Employee table (decimal type)\n- Set Laura's salary to 75000.00 and all other employees to 50000.00\n\n### 8. **Final Verification Query**\nExecute this exact query to verify all changes:\n```sql\nSELECT \n    COUNT(*) as total_employees,\n    COUNT(CASE WHEN \"Title\" = 'CEO' THEN 1 END) as ceo_count,\n    COUNT(CASE WHEN \"Title\" = 'IT Specialist' THEN 1 END) as it_specialist_count,\n    COUNT(CASE WHEN \"ReportsTo\" = 1 THEN 1 END) as reports_to_ceo\nFROM \"Employee\";\n```\n\nExpected result: total_employees = 9, ceo_count = 1, it_specialist_count = 0, reports_to_ceo = 4\n\n## Business Rules:\n* Use exact EmployeeId values as specified\n* Maintain referential integrity between Employee and Customer tables\n* All phone numbers must include country code format\n* Email addresses must follow the pattern firstname.lastname@chinookcorp.com\n\n## Expected Outcome:\nThe database should have exactly 10 employees total, with the new hierarchy structure in place and customer assignments updated accordingly."
  },
  {
    "path": "tasks/postgres/standard/chinook/employee_hierarchy_management/meta.json",
    "content": "{\n  \"task_id\": \"employee_hierarchy_management\",\n  \"task_name\": \"Employee Hierarchy Management\",\n  \"category_id\": \"chinook\",\n  \"category_name\": \"Chinook\",\n  \"description\": \"Reorganize employee structure through CRUD operations including inserts, updates, deletes, and customer reassignments.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-08-12\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data migration\",\n    \"schema design\",\n    \"transactional operations\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Table \\\"Album\\\" {\\n  \\\"AlbumId\\\" int4 [pk, not null]\\n  \\\"Title\\\" varchar(160) [not null]\\n  \\\"ArtistId\\\" int4 [not null]\\n\\n  Indexes {\\n    ArtistId [type: btree, name: \\\"IFK_AlbumArtistId\\\"]\\n  }\\n}\\n\\nTable \\\"Artist\\\" {\\n  \\\"ArtistId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(120)\\n}\\n\\nTable \\\"Customer\\\" {\\n  \\\"CustomerId\\\" int4 [pk, not null]\\n  \\\"FirstName\\\" varchar(40) [not null]\\n  \\\"LastName\\\" varchar(20) [not null]\\n  \\\"Company\\\" varchar(80)\\n  \\\"Address\\\" varchar(70)\\n  \\\"City\\\" varchar(40)\\n  \\\"State\\\" varchar(40)\\n  \\\"Country\\\" varchar(40)\\n  \\\"PostalCode\\\" varchar(10)\\n  \\\"Phone\\\" varchar(24)\\n  \\\"Fax\\\" varchar(24)\\n  \\\"Email\\\" varchar(60) [not null]\\n  \\\"SupportRepId\\\" int4\\n\\n  Indexes {\\n    SupportRepId [type: btree, name: \\\"IFK_CustomerSupportRepId\\\"]\\n  }\\n}\\n\\nTable \\\"Employee\\\" {\\n  \\\"EmployeeId\\\" int4 [pk, not null]\\n  \\\"LastName\\\" varchar(20) [not null]\\n  \\\"FirstName\\\" varchar(20) [not null]\\n  \\\"Title\\\" varchar(30)\\n  \\\"ReportsTo\\\" int4\\n  \\\"BirthDate\\\" timestamp\\n  \\\"HireDate\\\" timestamp\\n  \\\"Address\\\" varchar(70)\\n  \\\"City\\\" varchar(40)\\n  \\\"State\\\" varchar(40)\\n  \\\"Country\\\" varchar(40)\\n  \\\"PostalCode\\\" varchar(10)\\n  \\\"Phone\\\" varchar(24)\\n  \\\"Fax\\\" varchar(24)\\n  \\\"Email\\\" varchar(60)\\n\\n  Indexes {\\n    ReportsTo [type: btree, name: \\\"IFK_EmployeeReportsTo\\\"]\\n  }\\n}\\n\\nTable \\\"Genre\\\" {\\n  \\\"GenreId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(120)\\n}\\n\\nTable \\\"Invoice\\\" {\\n  \\\"InvoiceId\\\" int4 [pk, not null]\\n  \\\"CustomerId\\\" int4 [not null]\\n  \\\"InvoiceDate\\\" timestamp [not null]\\n  \\\"BillingAddress\\\" varchar(70)\\n  \\\"BillingCity\\\" varchar(40)\\n  \\\"BillingState\\\" varchar(40)\\n  \\\"BillingCountry\\\" varchar(40)\\n  \\\"BillingPostalCode\\\" varchar(10)\\n  \\\"Total\\\" numeric(10,2) [not null]\\n\\n  Indexes {\\n    CustomerId [type: btree, name: \\\"IFK_InvoiceCustomerId\\\"]\\n  }\\n}\\n\\nTable \\\"InvoiceLine\\\" {\\n  \\\"InvoiceLineId\\\" int4 [pk, not null]\\n  \\\"InvoiceId\\\" int4 [not null]\\n  \\\"TrackId\\\" int4 [not null]\\n  \\\"UnitPrice\\\" numeric(10,2) [not null]\\n  \\\"Quantity\\\" int4 [not null]\\n\\n  Indexes {\\n    InvoiceId [type: btree, name: \\\"IFK_InvoiceLineInvoiceId\\\"]\\n    TrackId [type: btree, name: \\\"IFK_InvoiceLineTrackId\\\"]\\n  }\\n}\\n\\nTable \\\"MediaType\\\" {\\n  \\\"MediaTypeId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(120)\\n}\\n\\nTable \\\"Playlist\\\" {\\n  \\\"PlaylistId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(120)\\n}\\n\\nTable \\\"PlaylistTrack\\\" {\\n  \\\"PlaylistId\\\" int4 [not null]\\n  \\\"TrackId\\\" int4 [not null]\\n\\n  Indexes {\\n    (PlaylistId, TrackId) [type: btree, name: \\\"PK_PlaylistTrack\\\"]\\n    TrackId [type: btree, name: \\\"IFK_PlaylistTrackTrackId\\\"]\\n  }\\n}\\n\\nTable \\\"Track\\\" {\\n  \\\"TrackId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(200) [not null]\\n  \\\"AlbumId\\\" int4\\n  \\\"MediaTypeId\\\" int4 [not null]\\n  \\\"GenreId\\\" int4\\n  \\\"Composer\\\" varchar(220)\\n  \\\"Milliseconds\\\" int4 [not null]\\n  \\\"Bytes\\\" int4\\n  \\\"UnitPrice\\\" numeric(10,2) [not null]\\n\\n  Indexes {\\n    AlbumId [type: btree, name: \\\"IFK_TrackAlbumId\\\"]\\n    GenreId [type: btree, name: \\\"IFK_TrackGenreId\\\"]\\n    MediaTypeId [type: btree, name: \\\"IFK_TrackMediaTypeId\\\"]\\n  }\\n}\\n\\nRef \\\"FK_AlbumArtistId\\\":\\\"Artist\\\".\\\"ArtistId\\\" < \\\"Album\\\".\\\"ArtistId\\\"\\n\\nRef \\\"FK_CustomerSupportRepId\\\":\\\"Employee\\\".\\\"EmployeeId\\\" < \\\"Customer\\\".\\\"SupportRepId\\\"\\n\\nRef \\\"FK_EmployeeReportsTo\\\":\\\"Employee\\\".\\\"EmployeeId\\\" < \\\"Employee\\\".\\\"ReportsTo\\\"\\n\\nRef \\\"FK_InvoiceCustomerId\\\":\\\"Customer\\\".\\\"CustomerId\\\" < \\\"Invoice\\\".\\\"CustomerId\\\"\\n\\nRef \\\"FK_InvoiceLineInvoiceId\\\":\\\"Invoice\\\".\\\"InvoiceId\\\" < \\\"InvoiceLine\\\".\\\"InvoiceId\\\"\\n\\nRef \\\"FK_InvoiceLineTrackId\\\":\\\"Track\\\".\\\"TrackId\\\" < \\\"InvoiceLine\\\".\\\"TrackId\\\"\\n\\nRef \\\"FK_PlaylistTrackPlaylistId\\\":\\\"Playlist\\\".\\\"PlaylistId\\\" < \\\"PlaylistTrack\\\".\\\"PlaylistId\\\"\\n\\nRef \\\"FK_PlaylistTrackTrackId\\\":\\\"Track\\\".\\\"TrackId\\\" < \\\"PlaylistTrack\\\".\\\"TrackId\\\"\\n\\nRef \\\"FK_TrackAlbumId\\\":\\\"Album\\\".\\\"AlbumId\\\" < \\\"Track\\\".\\\"AlbumId\\\"\\n\\nRef \\\"FK_TrackGenreId\\\":\\\"Genre\\\".\\\"GenreId\\\" < \\\"Track\\\".\\\"GenreId\\\"\\n\\nRef \\\"FK_TrackMediaTypeId\\\":\\\"MediaType\\\".\\\"MediaTypeId\\\" < \\\"Track\\\".\\\"MediaTypeId\\\"\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/chinook.sql\"\n  }\n}"
  },
  {
    "path": "tasks/postgres/standard/chinook/employee_hierarchy_management/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Task 3: Employee Hierarchy Management\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nfrom decimal import Decimal\n\ndef rows_match(actual_row, expected_row):\n    \"\"\"\n    Compare two rows with appropriate tolerance.\n    For Decimal types: allows 0.01 tolerance\n    For other types: requires exact match\n    \"\"\"\n    if len(actual_row) != len(expected_row):\n        return False\n    \n    for actual, expected in zip(actual_row, expected_row):\n        if isinstance(actual, Decimal) and isinstance(expected, Decimal):\n            if abs(float(actual) - float(expected)) > 0.01:\n                return False\n        elif actual != expected:\n            return False\n    \n    return True\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\")\n    }\n\ndef verify_employee_count_and_titles(conn) -> bool:\n    \"\"\"Verify the final employee count and title changes.\"\"\"\n    with conn.cursor() as cur:\n        # Check the final verification query results\n        cur.execute(\"\"\"\n            SELECT \n                COUNT(*) as total_employees,\n                COUNT(CASE WHEN \"Title\" = 'CEO' THEN 1 END) as ceo_count,\n                COUNT(CASE WHEN \"Title\" = 'IT Specialist' THEN 1 END) as it_specialist_count,\n                COUNT(CASE WHEN \"ReportsTo\" = 1 THEN 1 END) as reports_to_ceo\n            FROM \"Employee\"\n        \"\"\")\n        result = cur.fetchone()\n        \n        total_employees, ceo_count, it_specialist_count, reports_to_ceo = result\n        \n        # Expected: total_employees = 9, ceo_count = 1, it_specialist_count = 1, reports_to_ceo = 4\n        if total_employees != 9:\n            print(f\"❌ Expected 9 total employees, got {total_employees}\")\n            return False\n            \n        if ceo_count != 1:\n            print(f\"❌ Expected 1 CEO, got {ceo_count}\")\n            return False\n            \n        if it_specialist_count != 0:\n            print(f\"❌ Expected 0 IT Specialists, got {it_specialist_count}\")\n            return False\n            \n        if reports_to_ceo != 4:\n            print(f\"❌ Expected 4 employees reporting to CEO, got {reports_to_ceo}\")\n            return False\n        \n        print(\"✅ Employee count and title verification passed\")\n        return True\n\ndef verify_specific_employees(conn) -> bool:\n    \"\"\"Verify specific employee records and modifications.\"\"\"\n    with conn.cursor() as cur:\n        # Check all employee fields in one query\n        cur.execute(\"\"\"\n            SELECT \"EmployeeId\", \"LastName\", \"FirstName\", \"Title\", \"ReportsTo\", \"BirthDate\", \n                   \"HireDate\", \"Address\", \"City\", \"State\", \"Country\", \"PostalCode\", \n                   \"Phone\", \"Fax\", \"Email\"\n            FROM \"Employee\" \n            WHERE \"EmployeeId\" IN (1, 2, 9, 10)\n            ORDER BY \"EmployeeId\"\n        \"\"\")\n        employees = cur.fetchall()\n        \n        from datetime import datetime\n        \n        expected = [\n            # Andrew Adams (ID 1) - Title changes to 'CEO', phone stays original, ReportsTo stays None\n            (1, 'Adams', 'Andrew', 'CEO', None, datetime(1962, 2, 18), datetime(2002, 8, 14),\n             '11120 Jasper Ave NW', 'Edmonton', 'AB', 'Canada', 'T5K 2N1', '+1 (780) 428-9482', '+1 (780) 428-3457', 'andrew@chinookcorp.com'),\n            # Nancy Edwards (ID 2) - Phone changes, title stays 'Sales Manager', ReportsTo stays 1\n            (2, 'Edwards', 'Nancy', 'Sales Manager', 1, datetime(1958, 12, 8), datetime(2002, 5, 1),\n             '825 8 Ave SW', 'Calgary', 'AB', 'Canada', 'T2P 2T3', '+1 (403) 555-9999', '+1 (403) 262-3322', 'nancy@chinookcorp.com'),\n            # Sarah Johnson - all new data, final ReportsTo = 1 (changed in step 4)\n            (9, 'Johnson', 'Sarah', 'Sales Support Agent', 1, datetime(1985, 3, 15), datetime(2009, 1, 10),\n             '123 Oak Street', 'Calgary', 'AB', 'Canada', 'T2P 5G3', '+1 (403) 555-0123', '+1 (403) 555-0124', 'sarah.johnson@chinookcorp.com'),\n            # Mike Chen - all new data, final ReportsTo = 1 (changed in step 4)\n            (10, 'Chen', 'Mike', 'Sales Support Agent', 1, datetime(1982, 8, 22), datetime(2009, 1, 10),\n             '456 Pine Ave', 'Calgary', 'AB', 'Canada', 'T2P 5G4', '+1 (403) 555-0125', '+1 (403) 555-0126', 'mike.chen@chinookcorp.com')\n        ]\n        \n        if len(employees) != 4:\n            print(f\"❌ Expected 4 key employees, found {len(employees)}\")\n            return False\n            \n        # Full field comparison for all employees using rows_match\n        for actual, expected_emp in zip(employees, expected):\n            if not rows_match(actual, expected_emp):\n                print(f\"❌ Employee {actual[0]} row mismatch: expected {expected_emp}, got {actual}\")\n                return False\n        \n        print(\"✅ Specific employee verification passed - all fields match exactly\")\n        return True\n\ndef verify_customer_assignments(conn) -> bool:\n    \"\"\"Verify customer support representative assignments.\"\"\"\n    with conn.cursor() as cur:\n        # Check customers 1, 2, 3 are assigned to Sarah (ID 9)\n        cur.execute(\"\"\"\n            SELECT COUNT(*)\n            FROM \"Customer\" \n            WHERE \"CustomerId\" IN (1, 2, 3) AND \"SupportRepId\" = 9\n        \"\"\")\n        sarah_customers = cur.fetchone()[0]\n        \n        if sarah_customers != 3:\n            print(f\"❌ Expected 3 customers assigned to Sarah Johnson, got {sarah_customers}\")\n            return False\n        \n        # Check customers 4, 5, 6 are assigned to Mike (ID 10)\n        cur.execute(\"\"\"\n            SELECT COUNT(*)\n            FROM \"Customer\" \n            WHERE \"CustomerId\" IN (4, 5, 6) AND \"SupportRepId\" = 10\n        \"\"\")\n        mike_customers = cur.fetchone()[0]\n        \n        if mike_customers != 3:\n            print(f\"❌ Expected 3 customers assigned to Mike Chen, got {mike_customers}\")\n            return False\n        \n        print(\"✅ Customer assignment verification passed\")\n        return True\n\ndef verify_performance_table(conn) -> bool:\n    \"\"\"Verify the employee_performance table exists and has correct data.\"\"\"\n    with conn.cursor() as cur:\n        try:\n            # Get all performance records\n            cur.execute(\"\"\"\n                SELECT employee_id, customers_assigned, performance_score\n                FROM employee_performance \n                ORDER BY employee_id\n            \"\"\")\n            actual_results = cur.fetchall()\n            \n            # Get actual customer counts for verification\n            cur.execute(\"\"\"\n                SELECT \"SupportRepId\", COUNT(*) \n                FROM \"Customer\" \n                WHERE \"SupportRepId\" IN (9, 10)\n                GROUP BY \"SupportRepId\"\n                ORDER BY \"SupportRepId\"\n            \"\"\")\n            customer_counts = dict(cur.fetchall())\n            \n            expected = [\n                (9, customer_counts.get(9, 0), Decimal('4.5')),  # Sarah Johnson\n                (10, customer_counts.get(10, 0), Decimal('4.2'))  # Mike Chen\n            ]\n            \n            if len(actual_results) != 2:\n                print(f\"❌ Expected 2 performance records, got {len(actual_results)}\")\n                return False\n            \n            for actual, expected_row in zip(actual_results, expected):\n                if not rows_match(actual, expected_row):\n                    print(f\"❌ Performance record mismatch: expected {expected_row}, got {actual}\")\n                    return False\n            \n            print(\"✅ Employee performance table verification passed\")\n            return True\n            \n        except psycopg2.Error as e:\n            print(f\"❌ Employee performance table verification failed: {e}\")\n            return False\n\ndef verify_employee_deletion_and_promotion(conn) -> bool:\n    \"\"\"Verify Robert King deletion and Laura Callahan promotion.\"\"\"\n    with conn.cursor() as cur:\n        try:\n            # Verify Robert King (ID 7) is deleted\n            cur.execute(\"\"\"\n                SELECT COUNT(*) FROM \"Employee\" WHERE \"EmployeeId\" = 7\n            \"\"\")\n            if cur.fetchone()[0] != 0:\n                print(\"❌ Robert King (EmployeeId = 7) should be deleted\")\n                return False\n            \n            # Verify Laura Callahan (ID 8) promotion\n            cur.execute(\"\"\"\n                SELECT \"Title\" FROM \"Employee\" WHERE \"EmployeeId\" = 8\n            \"\"\")\n            laura_title = cur.fetchone()\n            if not laura_title or laura_title[0] != 'Senior IT Specialist':\n                print(f\"❌ Laura Callahan should have title 'Senior IT Specialist', got: {laura_title[0] if laura_title else None}\")\n                return False\n            \n            print(\"✅ Employee deletion and promotion verification passed\")\n            return True\n            \n        except psycopg2.Error as e:\n            print(f\"❌ Employee deletion/promotion verification failed: {e}\")\n            return False\n\ndef verify_salary_column(conn) -> bool:\n    \"\"\"Verify salary column exists and has correct values.\"\"\"\n    with conn.cursor() as cur:\n        try:\n            # Check if salary column exists and get all salary values\n            cur.execute(\"\"\"\n                SELECT \"EmployeeId\", salary \n                FROM \"Employee\" \n                ORDER BY \"EmployeeId\"\n            \"\"\")\n            salary_data = cur.fetchall()\n            \n            # Verify Laura (ID 8) has 75000.00, others have 50000.00\n            for emp_id, salary in salary_data:\n                expected_salary = Decimal('75000.00') if emp_id == 8 else Decimal('50000.00')\n                if salary != expected_salary:\n                    print(f\"❌ Employee {emp_id} salary should be {expected_salary}, got {salary}\")\n                    return False\n            \n            print(\"✅ Salary column verification passed\")\n            return True\n            \n        except psycopg2.Error as e:\n            print(f\"❌ Salary column verification failed: {e}\")\n            return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 50)\n    print(\"Verifying Task 3: Employee Hierarchy Management\")\n    print(\"=\" * 50)\n\n    # Get connection parameters\n    conn_params = get_connection_params()\n\n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n\n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n\n        # Run verification checks with short-circuit evaluation\n        success = (verify_employee_count_and_titles(conn) and\n                  verify_specific_employees(conn) and\n                  verify_customer_assignments(conn) and\n                  verify_performance_table(conn) and\n                  verify_employee_deletion_and_promotion(conn) and\n                  verify_salary_column(conn))\n\n        conn.close()\n\n        if success:\n            print(\"\\n🎉 Task verification: PASS\")\n            print(\"All employee hierarchy management operations completed correctly!\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Task verification: FAIL\")\n            sys.exit(1)\n\n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/standard/chinook/sales_and_music_charts/description.md",
    "content": "Create a monthly sales dashboard and top music charts system for Chinook's management team to track business performance and identify trending music content.\n\n## Your Tasks:\n\n1. **Build the monthly sales summary table** — create a table called `monthly_sales_summary` in the `public` schema with:\n   * `year_month` (varchar) — format as 'YYYY-MM' (e.g., '2009-01')\n   * `total_invoices` (integer) — number of invoices in that month\n   * `total_revenue` (decimal) — sum of all invoice totals for the month\n   * `total_tracks_sold` (integer) — total quantity of individual tracks sold\n   * `average_invoice_value` (decimal) — average invoice amount for the month\n   * `unique_customers` (integer) — count of distinct customers who made purchases\n\n2. **Create the music charts table** — build a table called `top_music_charts` in the `public` schema with:\n   * `chart_type` (varchar) — either 'top_tracks', 'top_albums', or 'top_artists'\n   * `rank_position` (integer) — ranking from 1 to 10\n   * `item_id` (integer) — ID of the track, album, or artist\n   * `item_name` (varchar) — name of the track, album, or artist\n   * `total_revenue` (decimal) — total revenue generated by this item\n\n3. **Populate the monthly sales data**:\n   * Calculate metrics for each month that has invoice data\n   * Use invoice date to determine the month\n   * **Note**: Each invoice can contain multiple invoice lines (tracks)\n\n4. **Generate the top 10 charts**:\n   * **Top Tracks**: Rank tracks by total quantity sold across all invoices\n   * **Top Albums**: Rank albums by total revenue generated from their tracks\n   * **Top Artists**: Rank artists by total revenue from all their tracks across all albums\n\n5. **Business rules to follow**:\n   * Only include months where at least one invoice exists\n   * For album rankings, sum revenue from all tracks in each album\n   * For artist rankings, sum revenue from all tracks across all their albums\n   * Handle ties by using item name alphabetically as tiebreaker\n   * Exclude any items with zero sales\n\nThis system will provide clear, actionable business intelligence for monthly reporting and music trend analysis."
  },
  {
    "path": "tasks/postgres/standard/chinook/sales_and_music_charts/meta.json",
    "content": "{\n  \"task_id\": \"sales_and_music_charts\",\n  \"task_name\": \"Sales and Music Charts\",\n  \"category_id\": \"chinook\",\n  \"category_name\": \"Chinook\",\n  \"description\": \"Create monthly sales dashboard and top music charts system for tracking business performance and trending content.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-08-12\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"reporting and analytics\",\n    \"statistical aggregation\",\n    \"schema design\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Table \\\"Album\\\" {\\n  \\\"AlbumId\\\" int4 [pk, not null]\\n  \\\"Title\\\" varchar(160) [not null]\\n  \\\"ArtistId\\\" int4 [not null]\\n\\n  Indexes {\\n    ArtistId [type: btree, name: \\\"IFK_AlbumArtistId\\\"]\\n  }\\n}\\n\\nTable \\\"Artist\\\" {\\n  \\\"ArtistId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(120)\\n}\\n\\nTable \\\"Customer\\\" {\\n  \\\"CustomerId\\\" int4 [pk, not null]\\n  \\\"FirstName\\\" varchar(40) [not null]\\n  \\\"LastName\\\" varchar(20) [not null]\\n  \\\"Company\\\" varchar(80)\\n  \\\"Address\\\" varchar(70)\\n  \\\"City\\\" varchar(40)\\n  \\\"State\\\" varchar(40)\\n  \\\"Country\\\" varchar(40)\\n  \\\"PostalCode\\\" varchar(10)\\n  \\\"Phone\\\" varchar(24)\\n  \\\"Fax\\\" varchar(24)\\n  \\\"Email\\\" varchar(60) [not null]\\n  \\\"SupportRepId\\\" int4\\n\\n  Indexes {\\n    SupportRepId [type: btree, name: \\\"IFK_CustomerSupportRepId\\\"]\\n  }\\n}\\n\\nTable \\\"Employee\\\" {\\n  \\\"EmployeeId\\\" int4 [pk, not null]\\n  \\\"LastName\\\" varchar(20) [not null]\\n  \\\"FirstName\\\" varchar(20) [not null]\\n  \\\"Title\\\" varchar(30)\\n  \\\"ReportsTo\\\" int4\\n  \\\"BirthDate\\\" timestamp\\n  \\\"HireDate\\\" timestamp\\n  \\\"Address\\\" varchar(70)\\n  \\\"City\\\" varchar(40)\\n  \\\"State\\\" varchar(40)\\n  \\\"Country\\\" varchar(40)\\n  \\\"PostalCode\\\" varchar(10)\\n  \\\"Phone\\\" varchar(24)\\n  \\\"Fax\\\" varchar(24)\\n  \\\"Email\\\" varchar(60)\\n\\n  Indexes {\\n    ReportsTo [type: btree, name: \\\"IFK_EmployeeReportsTo\\\"]\\n  }\\n}\\n\\nTable \\\"Genre\\\" {\\n  \\\"GenreId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(120)\\n}\\n\\nTable \\\"Invoice\\\" {\\n  \\\"InvoiceId\\\" int4 [pk, not null]\\n  \\\"CustomerId\\\" int4 [not null]\\n  \\\"InvoiceDate\\\" timestamp [not null]\\n  \\\"BillingAddress\\\" varchar(70)\\n  \\\"BillingCity\\\" varchar(40)\\n  \\\"BillingState\\\" varchar(40)\\n  \\\"BillingCountry\\\" varchar(40)\\n  \\\"BillingPostalCode\\\" varchar(10)\\n  \\\"Total\\\" numeric(10,2) [not null]\\n\\n  Indexes {\\n    CustomerId [type: btree, name: \\\"IFK_InvoiceCustomerId\\\"]\\n  }\\n}\\n\\nTable \\\"InvoiceLine\\\" {\\n  \\\"InvoiceLineId\\\" int4 [pk, not null]\\n  \\\"InvoiceId\\\" int4 [not null]\\n  \\\"TrackId\\\" int4 [not null]\\n  \\\"UnitPrice\\\" numeric(10,2) [not null]\\n  \\\"Quantity\\\" int4 [not null]\\n\\n  Indexes {\\n    InvoiceId [type: btree, name: \\\"IFK_InvoiceLineInvoiceId\\\"]\\n    TrackId [type: btree, name: \\\"IFK_InvoiceLineTrackId\\\"]\\n  }\\n}\\n\\nTable \\\"MediaType\\\" {\\n  \\\"MediaTypeId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(120)\\n}\\n\\nTable \\\"Playlist\\\" {\\n  \\\"PlaylistId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(120)\\n}\\n\\nTable \\\"PlaylistTrack\\\" {\\n  \\\"PlaylistId\\\" int4 [not null]\\n  \\\"TrackId\\\" int4 [not null]\\n\\n  Indexes {\\n    (PlaylistId, TrackId) [type: btree, name: \\\"PK_PlaylistTrack\\\"]\\n    TrackId [type: btree, name: \\\"IFK_PlaylistTrackTrackId\\\"]\\n  }\\n}\\n\\nTable \\\"Track\\\" {\\n  \\\"TrackId\\\" int4 [pk, not null]\\n  \\\"Name\\\" varchar(200) [not null]\\n  \\\"AlbumId\\\" int4\\n  \\\"MediaTypeId\\\" int4 [not null]\\n  \\\"GenreId\\\" int4\\n  \\\"Composer\\\" varchar(220)\\n  \\\"Milliseconds\\\" int4 [not null]\\n  \\\"Bytes\\\" int4\\n  \\\"UnitPrice\\\" numeric(10,2) [not null]\\n\\n  Indexes {\\n    AlbumId [type: btree, name: \\\"IFK_TrackAlbumId\\\"]\\n    GenreId [type: btree, name: \\\"IFK_TrackGenreId\\\"]\\n    MediaTypeId [type: btree, name: \\\"IFK_TrackMediaTypeId\\\"]\\n  }\\n}\\n\\nRef \\\"FK_AlbumArtistId\\\":\\\"Artist\\\".\\\"ArtistId\\\" < \\\"Album\\\".\\\"ArtistId\\\"\\n\\nRef \\\"FK_CustomerSupportRepId\\\":\\\"Employee\\\".\\\"EmployeeId\\\" < \\\"Customer\\\".\\\"SupportRepId\\\"\\n\\nRef \\\"FK_EmployeeReportsTo\\\":\\\"Employee\\\".\\\"EmployeeId\\\" < \\\"Employee\\\".\\\"ReportsTo\\\"\\n\\nRef \\\"FK_InvoiceCustomerId\\\":\\\"Customer\\\".\\\"CustomerId\\\" < \\\"Invoice\\\".\\\"CustomerId\\\"\\n\\nRef \\\"FK_InvoiceLineInvoiceId\\\":\\\"Invoice\\\".\\\"InvoiceId\\\" < \\\"InvoiceLine\\\".\\\"InvoiceId\\\"\\n\\nRef \\\"FK_InvoiceLineTrackId\\\":\\\"Track\\\".\\\"TrackId\\\" < \\\"InvoiceLine\\\".\\\"TrackId\\\"\\n\\nRef \\\"FK_PlaylistTrackPlaylistId\\\":\\\"Playlist\\\".\\\"PlaylistId\\\" < \\\"PlaylistTrack\\\".\\\"PlaylistId\\\"\\n\\nRef \\\"FK_PlaylistTrackTrackId\\\":\\\"Track\\\".\\\"TrackId\\\" < \\\"PlaylistTrack\\\".\\\"TrackId\\\"\\n\\nRef \\\"FK_TrackAlbumId\\\":\\\"Album\\\".\\\"AlbumId\\\" < \\\"Track\\\".\\\"AlbumId\\\"\\n\\nRef \\\"FK_TrackGenreId\\\":\\\"Genre\\\".\\\"GenreId\\\" < \\\"Track\\\".\\\"GenreId\\\"\\n\\nRef \\\"FK_TrackMediaTypeId\\\":\\\"MediaType\\\".\\\"MediaTypeId\\\" < \\\"Track\\\".\\\"MediaTypeId\\\"\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/chinook.sql\"\n  }\n}"
  },
  {
    "path": "tasks/postgres/standard/chinook/sales_and_music_charts/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Task 1: Monthly Sales Dashboard and Music Charts\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nfrom decimal import Decimal\n\ndef rows_match(actual_row, expected_row):\n    \"\"\"\n    Compare two rows with appropriate tolerance.\n    For Decimal types: allows 0.01 tolerance\n    For other types: requires exact match\n    \"\"\"\n    if len(actual_row) != len(expected_row):\n        return False\n    \n    for actual, expected in zip(actual_row, expected_row):\n        if isinstance(actual, Decimal) and isinstance(expected, Decimal):\n            if abs(float(actual) - float(expected)) > 0.01:\n                return False\n        elif actual != expected:\n            return False\n    \n    return True\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\")\n    }\n\ndef verify_monthly_sales_results(conn) -> bool:\n    \"\"\"Verify the monthly sales summary results.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results from the created table\n        cur.execute(\"\"\"\n            SELECT year_month, total_invoices, total_revenue, \n                   total_tracks_sold, average_invoice_value, unique_customers\n            FROM monthly_sales_summary \n            ORDER BY year_month\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        # Execute ground truth query\n        cur.execute(\"\"\"\n            WITH invoice_metrics AS (\n            SELECT\n                DATE_TRUNC('month', i.\"InvoiceDate\") AS ym,\n                COUNT(*)::INT                       AS total_invoices,\n                SUM(i.\"Total\")::DECIMAL             AS total_revenue,\n                AVG(i.\"Total\")::DECIMAL             AS average_invoice_value,\n                COUNT(DISTINCT i.\"CustomerId\")::INT AS unique_customers\n            FROM \"Invoice\" i\n            GROUP BY 1\n            ),\n            track_metrics AS (         \n            SELECT\n                DATE_TRUNC('month', i.\"InvoiceDate\") AS ym,\n                SUM(il.\"Quantity\")::INT              AS total_tracks_sold\n            FROM \"Invoice\" i\n            JOIN \"InvoiceLine\" il ON il.\"InvoiceId\" = i.\"InvoiceId\"\n            WHERE il.\"Quantity\" > 0                \n            GROUP BY 1\n            )\n            SELECT\n            TO_CHAR(im.ym, 'YYYY-MM')          AS year_month,\n            im.total_invoices,\n            im.total_revenue,\n            COALESCE(tm.total_tracks_sold, 0)  AS total_tracks_sold,\n            im.average_invoice_value,\n            im.unique_customers\n            FROM invoice_metrics im\n            LEFT JOIN track_metrics tm USING (ym)\n            ORDER BY year_month;\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} monthly sales records, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:  # Only show first 5 mismatches\n                    print(f\"❌ Monthly sales row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total monthly sales mismatches: {mismatches}\")\n            return False\n\n        print(f\"✅ Monthly sales results are correct ({len(actual_results)} records)\")\n        return True\n\ndef verify_music_charts_results(conn) -> bool:\n    \"\"\"Verify the music charts results.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results from the created table\n        cur.execute(\"\"\"\n            SELECT chart_type, rank_position, item_id, item_name, total_revenue\n            FROM top_music_charts\n            ORDER BY chart_type, rank_position\n        \"\"\")\n        actual_results = cur.fetchall()\n\n        # Execute ground truth queries for each chart type\n        cur.execute(\"\"\"\n            WITH track_stats AS (\n            SELECT\n                'top_tracks'::varchar AS chart_type,\n                t.\"TrackId\"           AS item_id,\n                t.\"Name\"              AS item_name,\n                SUM(il.\"UnitPrice\" * il.\"Quantity\")::DECIMAL AS total_revenue,\n                SUM(il.\"Quantity\")::INT                      AS total_quantity\n            FROM \"Track\" t\n            JOIN \"InvoiceLine\" il ON il.\"TrackId\" = t.\"TrackId\"\n            GROUP BY t.\"TrackId\", t.\"Name\"\n            HAVING SUM(il.\"Quantity\") > 0\n            ),\n            track_ranked AS (\n            SELECT\n                chart_type, item_id, item_name, total_revenue,\n                ROW_NUMBER() OVER (ORDER BY total_quantity DESC, item_name, item_id) AS rank_position\n            FROM track_stats\n            ),\n            album_rev AS (\n            SELECT\n                'top_albums'::varchar AS chart_type,\n                a.\"AlbumId\"           AS item_id,\n                a.\"Title\"             AS item_name,\n                SUM(il.\"UnitPrice\" * il.\"Quantity\")::DECIMAL AS total_revenue\n            FROM \"Album\" a\n            JOIN \"Track\" t        ON t.\"AlbumId\"  = a.\"AlbumId\"\n            JOIN \"InvoiceLine\" il ON il.\"TrackId\" = t.\"TrackId\"\n            GROUP BY a.\"AlbumId\", a.\"Title\"\n            HAVING SUM(il.\"UnitPrice\" * il.\"Quantity\") > 0\n            ),\n            album_ranked AS (\n            SELECT\n                chart_type, item_id, item_name, total_revenue,\n                ROW_NUMBER() OVER (ORDER BY total_revenue DESC, item_name, item_id) AS rank_position\n            FROM album_rev\n            ),\n            artist_rev AS (\n            SELECT\n                'top_artists'::varchar AS chart_type,\n                ar.\"ArtistId\"          AS item_id,\n                ar.\"Name\"              AS item_name,\n                SUM(il.\"UnitPrice\" * il.\"Quantity\")::DECIMAL AS total_revenue\n            FROM \"Artist\" ar\n            JOIN \"Album\"  a       ON a.\"ArtistId\" = ar.\"ArtistId\"\n            JOIN \"Track\"  t       ON t.\"AlbumId\"  = a.\"AlbumId\"\n            JOIN \"InvoiceLine\" il ON il.\"TrackId\" = t.\"TrackId\"\n            GROUP BY ar.\"ArtistId\", ar.\"Name\"\n            HAVING SUM(il.\"UnitPrice\" * il.\"Quantity\") > 0\n            ),\n            artist_ranked AS (\n            SELECT\n                chart_type, item_id, item_name, total_revenue,\n                ROW_NUMBER() OVER (ORDER BY total_revenue DESC, item_name, item_id) AS rank_position\n            FROM artist_rev\n            )\n            SELECT chart_type, rank_position, item_id, item_name, total_revenue\n            FROM (\n            SELECT * FROM track_ranked  WHERE rank_position <= 10\n            UNION ALL\n            SELECT * FROM album_ranked  WHERE rank_position <= 10\n            UNION ALL\n            SELECT * FROM artist_ranked WHERE rank_position <= 10\n            ) x\n            ORDER BY chart_type, rank_position;\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} music chart records, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:  # Only show first 5 mismatches\n                    print(f\"❌ Music chart row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total music chart mismatches: {mismatches}\")\n            return False\n\n        print(f\"✅ Music chart results are correct ({len(actual_results)} records)\")\n        return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 50)\n\n    # Get connection parameters\n    conn_params = get_connection_params()\n\n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n\n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n\n        # Verify results\n        success = verify_monthly_sales_results(conn) and verify_music_charts_results(conn)\n\n        conn.close()\n\n        if success:\n            print(\"\\n🎉 Task verification: PASS\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Task verification: FAIL\")\n            sys.exit(1)\n\n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/standard/dvdrental/customer_analysis_fix/description.md",
    "content": "Fix the customer analysis query that is producing incorrect results.\n\n## Background\n\nThe data analytics team attempted to create a customer behavior analysis query to identify active customers and analyze their spending patterns and preferences. The requirements are:\n- Only count rentals that have associated payment records (paid rentals)\n- Only include customers with at least 15 paid rentals\n- Only include customers with valid email addresses\n\nHowever, they're getting incorrect results - the query is over-counting rentals and calculating wrong spending amounts. \n\nYour task is to fix this query to produce accurate results.\n\n## The Problematic Query\n\nHere's the buggy query that needs to be fixed:\n\n```sql\nWITH customer_basic_stats AS (\n    SELECT \n        c.customer_id,\n        c.first_name || ' ' || c.last_name as customer_name,\n        ci.city as customer_city,\n        co.country as customer_country,\n        COUNT(r.rental_id) as total_rentals,\n        COUNT(DISTINCT i.film_id) as unique_films,\n        SUM(p.amount) as total_spent,\n        AVG(EXTRACT(days FROM (r.return_date - r.rental_date))) as avg_rental_duration\n    FROM customer c\n    JOIN address a ON c.address_id = a.address_id\n    JOIN city ci ON a.city_id = ci.city_id\n    JOIN country co ON ci.country_id = co.country_id\n    JOIN rental r ON c.customer_id = r.customer_id\n    JOIN inventory i ON r.inventory_id = i.inventory_id\n    JOIN payment p ON r.rental_id = p.rental_id\n    WHERE c.email IS NOT NULL\n    GROUP BY c.customer_id, c.first_name, c.last_name, ci.city, co.country\n    HAVING COUNT(r.rental_id) >= 15\n),\ncustomer_categories AS (\n    SELECT \n        c.customer_id,\n        cat.name as category_name,\n        COUNT(*) as category_count,\n        ROW_NUMBER() OVER (PARTITION BY c.customer_id ORDER BY COUNT(*) DESC, cat.name ASC) as rn\n    FROM customer c\n    JOIN rental r ON c.customer_id = r.customer_id\n    JOIN inventory i ON r.inventory_id = i.inventory_id\n    JOIN film f ON i.film_id = f.film_id\n    JOIN film_category fc ON f.film_id = fc.film_id\n    JOIN category cat ON fc.category_id = cat.category_id\n    JOIN payment p ON r.rental_id = p.rental_id\n    WHERE c.email IS NOT NULL\n    GROUP BY c.customer_id, cat.name\n),\ncustomer_actors AS (\n    SELECT \n        c.customer_id,\n        a.first_name || ' ' || a.last_name as actor_name,\n        COUNT(*) as actor_count,\n        ROW_NUMBER() OVER (PARTITION BY c.customer_id ORDER BY COUNT(*) DESC, (a.first_name || ' ' || a.last_name) ASC) as rn\n    FROM customer c\n    JOIN rental r ON c.customer_id = r.customer_id\n    JOIN inventory i ON r.inventory_id = i.inventory_id\n    JOIN film f ON i.film_id = f.film_id\n    JOIN film_actor fa ON f.film_id = fa.film_id\n    JOIN actor a ON fa.actor_id = a.actor_id\n    JOIN payment p ON r.rental_id = p.rental_id\n    WHERE c.email IS NOT NULL\n    GROUP BY c.customer_id, a.first_name, a.last_name\n),\nregional_popular_films AS (\n    SELECT \n        co.country,\n        f.title,\n        COUNT(*) as rental_count,\n        ROW_NUMBER() OVER (PARTITION BY co.country ORDER BY COUNT(*) DESC, f.title ASC) as rn\n    FROM rental r\n    JOIN inventory i ON r.inventory_id = i.inventory_id\n    JOIN film f ON i.film_id = f.film_id\n    JOIN customer c ON r.customer_id = c.customer_id\n    JOIN address a ON c.address_id = a.address_id\n    JOIN city ci ON a.city_id = ci.city_id\n    JOIN country co ON ci.country_id = co.country_id\n    JOIN payment p ON r.rental_id = p.rental_id\n    WHERE c.email IS NOT NULL\n    GROUP BY co.country, f.title\n)\nSELECT \n    cbs.customer_id,\n    cbs.customer_name,\n    cbs.customer_city,\n    cbs.customer_country,\n    cbs.total_rentals,\n    cbs.unique_films,\n    cbs.total_spent,\n    cc.category_name as favorite_category,\n    ca.actor_name as favorite_actor,\n    cbs.avg_rental_duration,\n    CASE \n        WHEN cbs.total_spent >= 150 THEN 'Premium'\n        WHEN cbs.total_spent >= 75 THEN 'Standard'\n        ELSE 'Basic'\n    END as customer_tier,\n    rpf.title as most_popular_film_in_region,\n    rpf.rental_count as regional_film_rental_count\nFROM customer_basic_stats cbs\nLEFT JOIN customer_categories cc ON cbs.customer_id = cc.customer_id AND cc.rn = 1\nLEFT JOIN customer_actors ca ON cbs.customer_id = ca.customer_id AND ca.rn = 1\nLEFT JOIN regional_popular_films rpf ON cbs.customer_country = rpf.country AND rpf.rn = 1\nORDER BY cbs.total_spent DESC, cbs.total_rentals DESC, cbs.customer_name ASC;\n```\n\n## Known Issues\n\nWhen comparing the problematic query results with the expected correct values, the following discrepancies are observed:\n\n1. **Rental count discrepancies**: Many customers show higher `total_rentals` counts than expected\n\n2. **Spending amount errors**: The `total_spent` values don't match the correct calculations \n\n3. **Incorrect favorite categories and actors**: Many customers show wrong favorite categories and actors compared to the expected results\n\n4. **Time calculation inconsistencies**: The `avg_rental_duration` values differ significantly from the correct calculations\n    - Example: Customer ID 1 shows 3.90 days instead of the expected 4.27 days\n    - Example: Customer ID 2 shows 5.23 days instead of the expected 5.69 days\n\n## Your Task\n\nDebug and fix the query to produce accurate results. Then create a table with your corrected results.\n\n1. **Fix the query** to ensure:\n   - Accurate customer spending and rental counts\n   - Correct favorite categories and actors\n   - Proper regional popular films\n\n2. **Create a table** called `customer_analysis_fixed` in the `public` schema with your corrected query results. The table should have the same columns as the original query output.\n\n**Important**: The business logic and output columns should remain the same - only fix the data accuracy issues."
  },
  {
    "path": "tasks/postgres/standard/dvdrental/customer_analysis_fix/meta.json",
    "content": "{\n  \"task_id\": \"customer_analysis_fix\",\n  \"task_name\": \"Customer Analysis Fix\",\n  \"category_id\": \"dvdrental\",\n  \"category_name\": \"DVD Rental\",\n  \"description\": \"Debug and fix customer behavior analysis query producing incorrect rental counts and spending calculations.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-08-20\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"performance optimization\",\n    \"data integrity enforcement\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Enum \\\"mpaa_rating\\\" {\\n  \\\"G\\\"\\n  \\\"PG\\\"\\n  \\\"PG-13\\\"\\n  \\\"R\\\"\\n  \\\"NC-17\\\"\\n}\\n\\nTable \\\"customer\\\" {\\n  \\\"customer_id\\\" int4 [pk, not null, increment]\\n  \\\"store_id\\\" int2 [not null]\\n  \\\"first_name\\\" varchar(45) [not null]\\n  \\\"last_name\\\" varchar(45) [not null]\\n  \\\"email\\\" varchar(50)\\n  \\\"address_id\\\" int2 [not null]\\n  \\\"activebool\\\" bool [not null, default: true]\\n  \\\"create_date\\\" date [not null, default: `('now'::text)::date`]\\n  \\\"last_update\\\" timestamp [default: `now()`]\\n  \\\"active\\\" int4\\n\\n  Indexes {\\n    address_id [type: btree, name: \\\"idx_fk_address_id\\\"]\\n    store_id [type: btree, name: \\\"idx_fk_store_id\\\"]\\n    last_name [type: btree, name: \\\"idx_last_name\\\"]\\n  }\\n}\\n\\nTable \\\"actor\\\" {\\n  \\\"actor_id\\\" int4 [pk, not null, increment]\\n  \\\"first_name\\\" varchar(45) [not null]\\n  \\\"last_name\\\" varchar(45) [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    last_name [type: btree, name: \\\"idx_actor_last_name\\\"]\\n  }\\n}\\n\\nTable \\\"category\\\" {\\n  \\\"category_id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" varchar(25) [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n}\\n\\nTable \\\"film\\\" {\\n  \\\"film_id\\\" int4 [pk, not null, increment]\\n  \\\"title\\\" varchar(255) [not null]\\n  \\\"description\\\" text\\n  \\\"release_year\\\" int4\\n  \\\"language_id\\\" int2 [not null]\\n  \\\"rental_duration\\\" int2 [not null, default: 3]\\n  \\\"rental_rate\\\" numeric(4,2) [not null, default: 4.99]\\n  \\\"length\\\" int2\\n  \\\"replacement_cost\\\" numeric(5,2) [not null, default: 19.99]\\n  \\\"rating\\\" mpaa_rating [default: 'G']\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n  \\\"special_features\\\" \\\"text[]\\\"\\n  \\\"fulltext\\\" tsvector [not null]\\n\\n  Indexes {\\n    fulltext [type: gist, name: \\\"film_fulltext_idx\\\"]\\n    language_id [type: btree, name: \\\"idx_fk_language_id\\\"]\\n    title [type: btree, name: \\\"idx_title\\\"]\\n  }\\n}\\n\\nTable \\\"film_actor\\\" {\\n  \\\"actor_id\\\" int2 [not null]\\n  \\\"film_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    (actor_id, film_id) [type: btree, name: \\\"film_actor_pkey\\\"]\\n    film_id [type: btree, name: \\\"idx_fk_film_id\\\"]\\n  }\\n}\\n\\nTable \\\"film_category\\\" {\\n  \\\"film_id\\\" int2 [not null]\\n  \\\"category_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    (film_id, category_id) [type: btree, name: \\\"film_category_pkey\\\"]\\n  }\\n}\\n\\nTable \\\"address\\\" {\\n  \\\"address_id\\\" int4 [pk, not null, increment]\\n  \\\"address\\\" varchar(50) [not null]\\n  \\\"address2\\\" varchar(50)\\n  \\\"district\\\" varchar(20) [not null]\\n  \\\"city_id\\\" int2 [not null]\\n  \\\"postal_code\\\" varchar(10)\\n  \\\"phone\\\" varchar(20) [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    city_id [type: btree, name: \\\"idx_fk_city_id\\\"]\\n  }\\n}\\n\\nTable \\\"city\\\" {\\n  \\\"city_id\\\" int4 [pk, not null, increment]\\n  \\\"city\\\" varchar(50) [not null]\\n  \\\"country_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    country_id [type: btree, name: \\\"idx_fk_country_id\\\"]\\n  }\\n}\\n\\nTable \\\"country\\\" {\\n  \\\"country_id\\\" int4 [pk, not null, increment]\\n  \\\"country\\\" varchar(50) [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n}\\n\\nTable \\\"inventory\\\" {\\n  \\\"inventory_id\\\" int4 [pk, not null, increment]\\n  \\\"film_id\\\" int2 [not null]\\n  \\\"store_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    (store_id, film_id) [type: btree, name: \\\"idx_store_id_film_id\\\"]\\n  }\\n}\\n\\nTable \\\"language\\\" {\\n  \\\"language_id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" bpchar(20) [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n}\\n\\nTable \\\"payment\\\" {\\n  \\\"payment_id\\\" int4 [pk, not null, increment]\\n  \\\"customer_id\\\" int2 [not null]\\n  \\\"staff_id\\\" int2 [not null]\\n  \\\"rental_id\\\" int4 [not null]\\n  \\\"amount\\\" numeric(5,2) [not null]\\n  \\\"payment_date\\\" timestamp [not null]\\n\\n  Indexes {\\n    rental_id [type: btree, name: \\\"idx_fk_rental_id\\\"]\\n    staff_id [type: btree, name: \\\"idx_fk_staff_id\\\"]\\n  }\\n}\\n\\nTable \\\"rental\\\" {\\n  \\\"rental_id\\\" int4 [pk, not null, increment]\\n  \\\"rental_date\\\" timestamp [not null]\\n  \\\"inventory_id\\\" int4 [not null]\\n  \\\"customer_id\\\" int2 [not null]\\n  \\\"return_date\\\" timestamp\\n  \\\"staff_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    (rental_date, inventory_id, customer_id) [type: btree, name: \\\"idx_unq_rental_rental_date_inventory_id_customer_id\\\"]\\n    inventory_id [type: btree, name: \\\"idx_fk_inventory_id\\\"]\\n  }\\n}\\n\\nTable \\\"staff\\\" {\\n  \\\"staff_id\\\" int4 [pk, not null, increment]\\n  \\\"first_name\\\" varchar(45) [not null]\\n  \\\"last_name\\\" varchar(45) [not null]\\n  \\\"address_id\\\" int2 [not null]\\n  \\\"email\\\" varchar(50)\\n  \\\"store_id\\\" int2 [not null]\\n  \\\"active\\\" bool [not null, default: true]\\n  \\\"username\\\" varchar(16) [not null]\\n  \\\"password\\\" varchar(40)\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n  \\\"picture\\\" bytea\\n}\\n\\nTable \\\"store\\\" {\\n  \\\"store_id\\\" int4 [pk, not null, increment]\\n  \\\"manager_staff_id\\\" int2 [unique, not null]\\n  \\\"address_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n}\\n\\nRef \\\"fk_address_city\\\":\\\"city\\\".\\\"city_id\\\" < \\\"address\\\".\\\"city_id\\\"\\n\\nRef \\\"fk_city\\\":\\\"country\\\".\\\"country_id\\\" < \\\"city\\\".\\\"country_id\\\"\\n\\nRef \\\"customer_address_id_fkey\\\":\\\"address\\\".\\\"address_id\\\" < \\\"customer\\\".\\\"address_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"film_language_id_fkey\\\":\\\"language\\\".\\\"language_id\\\" < \\\"film\\\".\\\"language_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"film_actor_actor_id_fkey\\\":\\\"actor\\\".\\\"actor_id\\\" < \\\"film_actor\\\".\\\"actor_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"film_actor_film_id_fkey\\\":\\\"film\\\".\\\"film_id\\\" < \\\"film_actor\\\".\\\"film_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"film_category_category_id_fkey\\\":\\\"category\\\".\\\"category_id\\\" < \\\"film_category\\\".\\\"category_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"film_category_film_id_fkey\\\":\\\"film\\\".\\\"film_id\\\" < \\\"film_category\\\".\\\"film_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"inventory_film_id_fkey\\\":\\\"film\\\".\\\"film_id\\\" < \\\"inventory\\\".\\\"film_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"payment_customer_id_fkey\\\":\\\"customer\\\".\\\"customer_id\\\" < \\\"payment\\\".\\\"customer_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"payment_rental_id_fkey\\\":\\\"rental\\\".\\\"rental_id\\\" < \\\"payment\\\".\\\"rental_id\\\" [update: cascade, delete: set null]\\n\\nRef \\\"payment_staff_id_fkey\\\":\\\"staff\\\".\\\"staff_id\\\" < \\\"payment\\\".\\\"staff_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"rental_customer_id_fkey\\\":\\\"customer\\\".\\\"customer_id\\\" < \\\"rental\\\".\\\"customer_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"rental_inventory_id_fkey\\\":\\\"inventory\\\".\\\"inventory_id\\\" < \\\"rental\\\".\\\"inventory_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"rental_staff_id_key\\\":\\\"staff\\\".\\\"staff_id\\\" < \\\"rental\\\".\\\"staff_id\\\"\\n\\nRef \\\"staff_address_id_fkey\\\":\\\"address\\\".\\\"address_id\\\" < \\\"staff\\\".\\\"address_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"store_address_id_fkey\\\":\\\"address\\\".\\\"address_id\\\" < \\\"store\\\".\\\"address_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"store_manager_staff_id_fkey\\\":\\\"staff\\\".\\\"staff_id\\\" < \\\"store\\\".\\\"manager_staff_id\\\" [update: cascade, delete: restrict]\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/gordonkwokkwok/DVD-Rental-PostgreSQL-Project\"\n  }\n}"
  },
  {
    "path": "tasks/postgres/standard/dvdrental/customer_analysis_fix/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Task 3: Fix Customer Analysis Query\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nfrom decimal import Decimal\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\")\n    }\n\ndef rows_match(actual_row, expected_row):\n    \"\"\"Compare two rows with appropriate tolerance for decimals and floats.\"\"\"\n    if len(actual_row) != len(expected_row):\n        return False\n    \n    for actual, expected in zip(actual_row, expected_row):\n        if isinstance(actual, (Decimal, float)) and isinstance(expected, (Decimal, float)):\n            # Use higher tolerance for floating point comparisons\n            if abs(float(actual) - float(expected)) > 0.1:\n                return False\n        elif actual != expected:\n            return False\n    \n    return True\n\ndef verify_customer_analysis_fixed_table(conn) -> bool:\n    \"\"\"Verify the customer_analysis_fixed table results.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results from the created table\n        cur.execute(\"\"\"\n            SELECT customer_id, customer_name, customer_city, customer_country,\n                   total_rentals, unique_films, total_spent, favorite_category,\n                   favorite_actor, avg_rental_duration, customer_tier,\n                   most_popular_film_in_region, regional_film_rental_count\n            FROM customer_analysis_fixed\n            ORDER BY total_spent DESC, total_rentals DESC, customer_name ASC\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        # Execute ground truth query (the corrected version)\n        cur.execute(\"\"\"\n            WITH paid_rentals AS (\n            SELECT DISTINCT\n                    r.rental_id,\n                    r.customer_id,\n                    r.inventory_id,\n                    r.rental_date,\n                    r.return_date\n            FROM rental r\n            JOIN payment p ON p.rental_id = r.rental_id\n            ),\n            payments_by_customer AS (\n            SELECT pr.customer_id, SUM(p.amount) AS total_spent\n            FROM paid_rentals pr\n            JOIN payment p ON p.rental_id = pr.rental_id\n            GROUP BY pr.customer_id\n            ),\n            customer_basic_stats AS (\n            SELECT\n                c.customer_id,\n                c.first_name || ' ' || c.last_name AS customer_name,\n                ci.city AS customer_city,\n                co.country AS customer_country,\n                COUNT(DISTINCT pr.rental_id) AS total_rentals,\n                COUNT(DISTINCT i.film_id) AS unique_films,\n                pbc.total_spent,\n                AVG(EXTRACT(EPOCH FROM (pr.return_date - pr.rental_date)) / 86400.0) AS avg_rental_duration\n            FROM customer c\n            JOIN address a ON c.address_id = a.address_id\n            JOIN city ci ON a.city_id = ci.city_id\n            JOIN country co ON ci.country_id = co.country_id\n            JOIN paid_rentals pr ON pr.customer_id = c.customer_id\n            JOIN inventory i ON pr.inventory_id = i.inventory_id\n            JOIN payments_by_customer pbc ON pbc.customer_id = c.customer_id\n            WHERE c.email IS NOT NULL\n            GROUP BY c.customer_id, c.first_name, c.last_name, ci.city, co.country, pbc.total_spent\n            HAVING COUNT(DISTINCT pr.rental_id) >= 15\n            ),\n            customer_categories AS (\n            SELECT\n                pr.customer_id,\n                cat.name AS category_name,\n                COUNT(*) AS category_count,\n                ROW_NUMBER() OVER (\n                    PARTITION BY pr.customer_id\n                    ORDER BY COUNT(*) DESC, cat.name ASC\n                ) AS rn\n            FROM paid_rentals pr\n            JOIN inventory i ON pr.inventory_id = i.inventory_id\n            JOIN film f ON i.film_id = f.film_id\n            JOIN film_category fc ON f.film_id = fc.film_id\n            JOIN category cat ON fc.category_id = cat.category_id\n            JOIN customer c ON pr.customer_id = c.customer_id\n            WHERE c.email IS NOT NULL\n            GROUP BY pr.customer_id, cat.name\n            ),\n            customer_actors AS (\n            SELECT\n                pr.customer_id,\n                (a.first_name || ' ' || a.last_name) AS actor_name,\n                COUNT(*) AS actor_count,\n                ROW_NUMBER() OVER (\n                    PARTITION BY pr.customer_id\n                    ORDER BY COUNT(*) DESC, (a.first_name || ' ' || a.last_name) ASC\n                ) AS rn\n            FROM paid_rentals pr\n            JOIN inventory i ON pr.inventory_id = i.inventory_id\n            JOIN film f ON i.film_id = f.film_id\n            JOIN film_actor fa ON f.film_id = fa.film_id\n            JOIN actor a ON fa.actor_id = a.actor_id\n            JOIN customer c ON pr.customer_id = c.customer_id\n            WHERE c.email IS NOT NULL\n            GROUP BY pr.customer_id, a.first_name, a.last_name\n            ),\n            regional_popular_films AS (\n            SELECT\n                co.country,\n                f.title,\n                COUNT(DISTINCT pr.rental_id) AS rental_count,\n                ROW_NUMBER() OVER (\n                    PARTITION BY co.country\n                    ORDER BY COUNT(DISTINCT pr.rental_id) DESC, f.title ASC\n                ) AS rn\n            FROM paid_rentals pr\n            JOIN customer c ON pr.customer_id = c.customer_id\n            JOIN address a ON c.address_id = a.address_id\n            JOIN city ci ON a.city_id = ci.city_id\n            JOIN country co ON ci.country_id = co.country_id\n            JOIN inventory i ON pr.inventory_id = i.inventory_id\n            JOIN film f ON i.film_id = f.film_id\n            WHERE c.email IS NOT NULL\n            GROUP BY co.country, f.title\n            )\n            SELECT\n                cbs.customer_id,\n                cbs.customer_name,\n                cbs.customer_city,\n                cbs.customer_country,\n                cbs.total_rentals,\n                cbs.unique_films,\n                cbs.total_spent,\n                cc.category_name AS favorite_category,\n                ca.actor_name AS favorite_actor,\n                cbs.avg_rental_duration,\n                CASE\n                WHEN cbs.total_spent >= 150 THEN 'Premium'\n                WHEN cbs.total_spent >= 75  THEN 'Standard'\n                ELSE 'Basic'\n                END AS customer_tier,\n                rpf.title AS most_popular_film_in_region,\n                rpf.rental_count AS regional_film_rental_count\n            FROM customer_basic_stats cbs\n            LEFT JOIN customer_categories cc\n            ON cbs.customer_id = cc.customer_id AND cc.rn = 1\n            LEFT JOIN customer_actors ca\n            ON cbs.customer_id = ca.customer_id AND ca.rn = 1\n            LEFT JOIN regional_popular_films rpf\n            ON cbs.customer_country = rpf.country AND rpf.rn = 1\n            ORDER BY cbs.total_spent DESC, cbs.total_rentals DESC, cbs.customer_name ASC;\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} rows, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:  # Only show first 5 mismatches\n                    print(f\"❌ Row {i+1} mismatch:\")\n                    print(f\"   Expected: {expected}\")\n                    print(f\"   Actual:   {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total mismatches: {mismatches}\")\n            return False\n\n        print(f\"✅ Query results are correct ({len(actual_results)} rows)\")\n        return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 70)\n    print(\"PostgreSQL Task 3 Verification: Fix Customer Analysis Query\")\n    print(\"=\" * 70)\n\n    # Get connection parameters\n    conn_params = get_connection_params()\n\n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n\n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n\n\n        # Verify results\n        success = verify_customer_analysis_fixed_table(conn)\n\n        conn.close()\n\n        if success:\n            print(\"\\n🎉 Task verification: PASS\")\n            print(\"   - Query was successfully debugged and fixed\")\n            print(\"   - All 587 rows match the expected results\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Task verification: FAIL\")\n            print(\"   - The query still has issues\")\n            print(\"   - Please review the duplicate counting problem\")\n            sys.exit(1)\n\n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/standard/dvdrental/customer_analytics_optimization/description.md",
    "content": "Optimize slow customer analytics query in the DVD rental database.\n\n## Background\n\nThe business intelligence team is running customer analytics reports, but one of their critical queries has become extremely slow. The query that used to run in milliseconds is now taking over a second to complete, causing timeout issues in their reporting dashboard.\n\n## Your Task\n\nAnalyze and optimize the performance of this customer analytics query:\n\n```sql\nSELECT \n    c.customer_id,\n    c.first_name,\n    c.last_name,\n    c.email,\n    COUNT(DISTINCT p.payment_id) as total_payments,\n    SUM(p.amount) as total_spent,\n    AVG(p.amount) as avg_payment,\n    COUNT(DISTINCT EXTRACT(month FROM p.payment_date)) as active_months,\n    MAX(p.payment_date) as last_payment,\n    MIN(p.payment_date) as first_payment,\n    (SELECT COUNT(*) FROM payment p2 WHERE p2.customer_id = c.customer_id AND p2.amount > 5.0) as high_value_payments,\n    (SELECT SUM(amount) FROM payment p3 WHERE p3.customer_id = c.customer_id AND p3.payment_date >= '2007-03-01') as recent_spending\nFROM customer c\nJOIN payment p ON c.customer_id = p.customer_id\nWHERE c.active = 1\nGROUP BY c.customer_id, c.first_name, c.last_name, c.email\nHAVING COUNT(p.payment_id) >= 10\nORDER BY total_spent DESC, total_payments DESC;\n```\n\nThe query is currently taking over 1000ms to execute and has a very high cost in the execution plan. The team needs this optimized urgently as it's blocking their daily reporting processes.\n\n## Requirements\n\n- Use `EXPLAIN ANALYZE` to identify performance bottlenecks\n- Implement appropriate database optimizations  \n- Ensure queries return accurate results after optimization\n- Document your optimization approach and performance improvements"
  },
  {
    "path": "tasks/postgres/standard/dvdrental/customer_analytics_optimization/meta.json",
    "content": "{\n  \"task_id\": \"customer_analytics_optimization\",\n  \"task_name\": \"Customer Analytics Optimization\",\n  \"category_id\": \"dvdrental\",\n  \"category_name\": \"DVD Rental\",\n  \"description\": \"Optimize slow customer analytics query with correlated subqueries causing timeout issues in reporting dashboard.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-08-20\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"performance optimization\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Enum \\\"mpaa_rating\\\" {\\n  \\\"G\\\"\\n  \\\"PG\\\"\\n  \\\"PG-13\\\"\\n  \\\"R\\\"\\n  \\\"NC-17\\\"\\n}\\n\\nTable \\\"customer\\\" {\\n  \\\"customer_id\\\" int4 [pk, not null, increment]\\n  \\\"store_id\\\" int2 [not null]\\n  \\\"first_name\\\" varchar(45) [not null]\\n  \\\"last_name\\\" varchar(45) [not null]\\n  \\\"email\\\" varchar(50)\\n  \\\"address_id\\\" int2 [not null]\\n  \\\"activebool\\\" bool [not null, default: true]\\n  \\\"create_date\\\" date [not null, default: `('now'::text)::date`]\\n  \\\"last_update\\\" timestamp [default: `now()`]\\n  \\\"active\\\" int4\\n\\n  Indexes {\\n    address_id [type: btree, name: \\\"idx_fk_address_id\\\"]\\n    store_id [type: btree, name: \\\"idx_fk_store_id\\\"]\\n    last_name [type: btree, name: \\\"idx_last_name\\\"]\\n  }\\n}\\n\\nTable \\\"actor\\\" {\\n  \\\"actor_id\\\" int4 [pk, not null, increment]\\n  \\\"first_name\\\" varchar(45) [not null]\\n  \\\"last_name\\\" varchar(45) [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    last_name [type: btree, name: \\\"idx_actor_last_name\\\"]\\n  }\\n}\\n\\nTable \\\"category\\\" {\\n  \\\"category_id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" varchar(25) [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n}\\n\\nTable \\\"film\\\" {\\n  \\\"film_id\\\" int4 [pk, not null, increment]\\n  \\\"title\\\" varchar(255) [not null]\\n  \\\"description\\\" text\\n  \\\"release_year\\\" int4\\n  \\\"language_id\\\" int2 [not null]\\n  \\\"rental_duration\\\" int2 [not null, default: 3]\\n  \\\"rental_rate\\\" numeric(4,2) [not null, default: 4.99]\\n  \\\"length\\\" int2\\n  \\\"replacement_cost\\\" numeric(5,2) [not null, default: 19.99]\\n  \\\"rating\\\" mpaa_rating [default: 'G']\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n  \\\"special_features\\\" \\\"text[]\\\"\\n  \\\"fulltext\\\" tsvector [not null]\\n\\n  Indexes {\\n    fulltext [type: gist, name: \\\"film_fulltext_idx\\\"]\\n    language_id [type: btree, name: \\\"idx_fk_language_id\\\"]\\n    title [type: btree, name: \\\"idx_title\\\"]\\n  }\\n}\\n\\nTable \\\"film_actor\\\" {\\n  \\\"actor_id\\\" int2 [not null]\\n  \\\"film_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    (actor_id, film_id) [type: btree, name: \\\"film_actor_pkey\\\"]\\n    film_id [type: btree, name: \\\"idx_fk_film_id\\\"]\\n  }\\n}\\n\\nTable \\\"film_category\\\" {\\n  \\\"film_id\\\" int2 [not null]\\n  \\\"category_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    (film_id, category_id) [type: btree, name: \\\"film_category_pkey\\\"]\\n  }\\n}\\n\\nTable \\\"address\\\" {\\n  \\\"address_id\\\" int4 [pk, not null, increment]\\n  \\\"address\\\" varchar(50) [not null]\\n  \\\"address2\\\" varchar(50)\\n  \\\"district\\\" varchar(20) [not null]\\n  \\\"city_id\\\" int2 [not null]\\n  \\\"postal_code\\\" varchar(10)\\n  \\\"phone\\\" varchar(20) [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    city_id [type: btree, name: \\\"idx_fk_city_id\\\"]\\n  }\\n}\\n\\nTable \\\"city\\\" {\\n  \\\"city_id\\\" int4 [pk, not null, increment]\\n  \\\"city\\\" varchar(50) [not null]\\n  \\\"country_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    country_id [type: btree, name: \\\"idx_fk_country_id\\\"]\\n  }\\n}\\n\\nTable \\\"country\\\" {\\n  \\\"country_id\\\" int4 [pk, not null, increment]\\n  \\\"country\\\" varchar(50) [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n}\\n\\nTable \\\"inventory\\\" {\\n  \\\"inventory_id\\\" int4 [pk, not null, increment]\\n  \\\"film_id\\\" int2 [not null]\\n  \\\"store_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    (store_id, film_id) [type: btree, name: \\\"idx_store_id_film_id\\\"]\\n  }\\n}\\n\\nTable \\\"language\\\" {\\n  \\\"language_id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" bpchar(20) [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n}\\n\\nTable \\\"payment\\\" {\\n  \\\"payment_id\\\" int4 [pk, not null, increment]\\n  \\\"customer_id\\\" int2 [not null]\\n  \\\"staff_id\\\" int2 [not null]\\n  \\\"rental_id\\\" int4 [not null]\\n  \\\"amount\\\" numeric(5,2) [not null]\\n  \\\"payment_date\\\" timestamp [not null]\\n\\n  Indexes {\\n    rental_id [type: btree, name: \\\"idx_fk_rental_id\\\"]\\n    staff_id [type: btree, name: \\\"idx_fk_staff_id\\\"]\\n  }\\n}\\n\\nTable \\\"rental\\\" {\\n  \\\"rental_id\\\" int4 [pk, not null, increment]\\n  \\\"rental_date\\\" timestamp [not null]\\n  \\\"inventory_id\\\" int4 [not null]\\n  \\\"customer_id\\\" int2 [not null]\\n  \\\"return_date\\\" timestamp\\n  \\\"staff_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    (rental_date, inventory_id, customer_id) [type: btree, name: \\\"idx_unq_rental_rental_date_inventory_id_customer_id\\\"]\\n    inventory_id [type: btree, name: \\\"idx_fk_inventory_id\\\"]\\n  }\\n}\\n\\nTable \\\"staff\\\" {\\n  \\\"staff_id\\\" int4 [pk, not null, increment]\\n  \\\"first_name\\\" varchar(45) [not null]\\n  \\\"last_name\\\" varchar(45) [not null]\\n  \\\"address_id\\\" int2 [not null]\\n  \\\"email\\\" varchar(50)\\n  \\\"store_id\\\" int2 [not null]\\n  \\\"active\\\" bool [not null, default: true]\\n  \\\"username\\\" varchar(16) [not null]\\n  \\\"password\\\" varchar(40)\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n  \\\"picture\\\" bytea\\n}\\n\\nTable \\\"store\\\" {\\n  \\\"store_id\\\" int4 [pk, not null, increment]\\n  \\\"manager_staff_id\\\" int2 [unique, not null]\\n  \\\"address_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n}\\n\\nRef \\\"fk_address_city\\\":\\\"city\\\".\\\"city_id\\\" < \\\"address\\\".\\\"city_id\\\"\\n\\nRef \\\"fk_city\\\":\\\"country\\\".\\\"country_id\\\" < \\\"city\\\".\\\"country_id\\\"\\n\\nRef \\\"customer_address_id_fkey\\\":\\\"address\\\".\\\"address_id\\\" < \\\"customer\\\".\\\"address_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"film_language_id_fkey\\\":\\\"language\\\".\\\"language_id\\\" < \\\"film\\\".\\\"language_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"film_actor_actor_id_fkey\\\":\\\"actor\\\".\\\"actor_id\\\" < \\\"film_actor\\\".\\\"actor_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"film_actor_film_id_fkey\\\":\\\"film\\\".\\\"film_id\\\" < \\\"film_actor\\\".\\\"film_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"film_category_category_id_fkey\\\":\\\"category\\\".\\\"category_id\\\" < \\\"film_category\\\".\\\"category_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"film_category_film_id_fkey\\\":\\\"film\\\".\\\"film_id\\\" < \\\"film_category\\\".\\\"film_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"inventory_film_id_fkey\\\":\\\"film\\\".\\\"film_id\\\" < \\\"inventory\\\".\\\"film_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"payment_customer_id_fkey\\\":\\\"customer\\\".\\\"customer_id\\\" < \\\"payment\\\".\\\"customer_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"payment_rental_id_fkey\\\":\\\"rental\\\".\\\"rental_id\\\" < \\\"payment\\\".\\\"rental_id\\\" [update: cascade, delete: set null]\\n\\nRef \\\"payment_staff_id_fkey\\\":\\\"staff\\\".\\\"staff_id\\\" < \\\"payment\\\".\\\"staff_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"rental_customer_id_fkey\\\":\\\"customer\\\".\\\"customer_id\\\" < \\\"rental\\\".\\\"customer_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"rental_inventory_id_fkey\\\":\\\"inventory\\\".\\\"inventory_id\\\" < \\\"rental\\\".\\\"inventory_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"rental_staff_id_key\\\":\\\"staff\\\".\\\"staff_id\\\" < \\\"rental\\\".\\\"staff_id\\\"\\n\\nRef \\\"staff_address_id_fkey\\\":\\\"address\\\".\\\"address_id\\\" < \\\"staff\\\".\\\"address_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"store_address_id_fkey\\\":\\\"address\\\".\\\"address_id\\\" < \\\"store\\\".\\\"address_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"store_manager_staff_id_fkey\\\":\\\"staff\\\".\\\"staff_id\\\" < \\\"store\\\".\\\"manager_staff_id\\\" [update: cascade, delete: restrict]\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/gordonkwokkwok/DVD-Rental-PostgreSQL-Project\"\n  }\n}"
  },
  {
    "path": "tasks/postgres/standard/dvdrental/customer_analytics_optimization/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Task 1: Customer Payment Query Optimization\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\")\n    }\n\ndef check_payment_customer_id_index(conn) -> bool:\n    \"\"\"Check if there's any index on payment.customer_id column.\"\"\"\n    with conn.cursor() as cur:\n        cur.execute(\"\"\"\n            SELECT indexname, indexdef \n            FROM pg_indexes \n            WHERE schemaname = 'public' \n            AND tablename = 'payment'\n            AND indexdef LIKE '%customer_id%'\n        \"\"\")\n        indexes = cur.fetchall()\n        print(indexes)\n        return len(indexes) > 0, indexes\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 60)\n    print(\"PostgreSQL Task 1 Verification: Customer Payment Query Optimization\")\n    print(\"=\" * 60)\n    \n    # Get connection parameters\n    conn_params = get_connection_params()\n    \n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n    \n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n        \n        print(\"\\n🔍 Checking for customer_id index on payment table...\")\n        \n        # Check if any index exists on payment.customer_id\n        has_index, indexes = check_payment_customer_id_index(conn)\n        \n        if has_index:\n            print(\"✅ Found index(es) on payment.customer_id:\")\n            for index_name, index_def in indexes:\n                print(f\"   - {index_name}: {index_def}\")\n        else:\n            print(\"❌ No index found on payment.customer_id column\")\n        \n        conn.close()\n        \n        if has_index:\n            print(f\"\\n🎉 Task verification: PASS\")\n            print(f\"   - Index on payment.customer_id exists\")\n            sys.exit(0)\n        else:\n            print(f\"\\n❌ Task verification: FAIL\")\n            print(f\"   - No index found on payment.customer_id\")\n            print(f\"   - Create an index on payment(customer_id) to optimize the queries\")\n            sys.exit(1)\n            \n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/standard/dvdrental/film_inventory_management/description.md",
    "content": "Manage film inventory operations in the DVD rental database.\n\n## Background\n\nYou are the database administrator for the DVD rental store. The store manager has requested several database operations to manage the film inventory. You need to perform multiple operations including adding new films, updating inventory, querying available films, and cleaning up old records.\n\n## Your Task\n\nComplete the following database operations in sequence:\n\n### 1. Add New Films\nAdd these two new films to the database:\n- **Film 1**: Title \"Data Science Adventures\", Description \"A thrilling journey through machine learning algorithms\", Release Year 2024, Language ID 1, Rental Duration 5 days, Rental Rate $3.99, Length 120 minutes, Replacement Cost $15.99, Rating 'PG-13'\n- **Film 2**: Title \"Cloud Computing Chronicles\", Description \"Exploring the world of distributed systems\", Release Year 2024, Language ID 1, Rental Duration 7 days, Rental Rate $4.99, Length 135 minutes, Replacement Cost $18.99, Rating 'PG'\n\n### 2. Add Inventory Records\nFor each new film, add 3 inventory records for store_id = 1 and 2 inventory records for store_id = 2.\n\n### 3. Update Film Information\nUpdate the rental_rate of all films with rating 'PG-13' to increase by 10% (multiply by 1.1).\n\n### 4. Create Available Films Table\nCreate a table called `available_films` with the following structure:\n- `film_id` (INTEGER, PRIMARY KEY)\n- `title` (VARCHAR(255), NOT NULL)\n- `rental_rate` (NUMERIC(4,2), NOT NULL)\n- `length` (SMALLINT)\n\nPopulate this table with films that meet these criteria:\n- Have rental_rate between $3.00 and $5.00\n- Have length greater than 100 minutes  \n- Are available in store_id = 1 (have at least 1 inventory record)\n\n\n### 5. Clean Up Inventory\nDelete inventory records for films that meet ALL of the following criteria:\n- Have a replacement_cost greater than $25.00\n- AND have rental_rate less than $1.00\n- AND have no rental history (no records in the rental table)\n\n\n### 6. Create Summary Report Table\nCreate a table called `film_inventory_summary` with the following structure:\n- `title` (VARCHAR(255), NOT NULL)\n- `rental_rate` (NUMERIC(4,2), NOT NULL)\n- `total_inventory` (INTEGER, NOT NULL)\n- `store1_count` (INTEGER, NOT NULL)\n- `store2_count` (INTEGER, NOT NULL)\n\nPopulate this table with a summary query that shows:\n- Film title\n- Current rental rate (after any updates from step 3)\n- Total count of inventory records across all stores\n- Count of inventory records in store_id = 1\n- Count of inventory records in store_id = 2\n\nRequirements for the summary report:\n- Include only films that currently have at least one inventory record  \n- Insert the results sorted by inventory count from highest to lowest, and then alphabetically by film title\n- Ensure all counts reflect the state after completing the previous operations\n\n## Requirements\n\n- Complete all operations in the specified sequence\n- Ensure data integrity throughout all operations\n- Verify that your operations affect the expected number of records\n- Handle any constraint violations appropriately"
  },
  {
    "path": "tasks/postgres/standard/dvdrental/film_inventory_management/meta.json",
    "content": "{\n  \"task_id\": \"film_inventory_management\",\n  \"task_name\": \"Film Inventory Management\",\n  \"category_id\": \"dvdrental\",\n  \"category_name\": \"DVD Rental\",\n  \"description\": \"Manage film inventory through multiple operations including adding films, updating records, and cleaning old data.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-08-20\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data migration\",\n    \"transactional operations\",\n    \"schema design\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Enum \\\"mpaa_rating\\\" {\\n  \\\"G\\\"\\n  \\\"PG\\\"\\n  \\\"PG-13\\\"\\n  \\\"R\\\"\\n  \\\"NC-17\\\"\\n}\\n\\nTable \\\"customer\\\" {\\n  \\\"customer_id\\\" int4 [pk, not null, increment]\\n  \\\"store_id\\\" int2 [not null]\\n  \\\"first_name\\\" varchar(45) [not null]\\n  \\\"last_name\\\" varchar(45) [not null]\\n  \\\"email\\\" varchar(50)\\n  \\\"address_id\\\" int2 [not null]\\n  \\\"activebool\\\" bool [not null, default: true]\\n  \\\"create_date\\\" date [not null, default: `('now'::text)::date`]\\n  \\\"last_update\\\" timestamp [default: `now()`]\\n  \\\"active\\\" int4\\n\\n  Indexes {\\n    address_id [type: btree, name: \\\"idx_fk_address_id\\\"]\\n    store_id [type: btree, name: \\\"idx_fk_store_id\\\"]\\n    last_name [type: btree, name: \\\"idx_last_name\\\"]\\n  }\\n}\\n\\nTable \\\"actor\\\" {\\n  \\\"actor_id\\\" int4 [pk, not null, increment]\\n  \\\"first_name\\\" varchar(45) [not null]\\n  \\\"last_name\\\" varchar(45) [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    last_name [type: btree, name: \\\"idx_actor_last_name\\\"]\\n  }\\n}\\n\\nTable \\\"category\\\" {\\n  \\\"category_id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" varchar(25) [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n}\\n\\nTable \\\"film\\\" {\\n  \\\"film_id\\\" int4 [pk, not null, increment]\\n  \\\"title\\\" varchar(255) [not null]\\n  \\\"description\\\" text\\n  \\\"release_year\\\" int4\\n  \\\"language_id\\\" int2 [not null]\\n  \\\"rental_duration\\\" int2 [not null, default: 3]\\n  \\\"rental_rate\\\" numeric(4,2) [not null, default: 4.99]\\n  \\\"length\\\" int2\\n  \\\"replacement_cost\\\" numeric(5,2) [not null, default: 19.99]\\n  \\\"rating\\\" mpaa_rating [default: 'G']\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n  \\\"special_features\\\" \\\"text[]\\\"\\n  \\\"fulltext\\\" tsvector [not null]\\n\\n  Indexes {\\n    fulltext [type: gist, name: \\\"film_fulltext_idx\\\"]\\n    language_id [type: btree, name: \\\"idx_fk_language_id\\\"]\\n    title [type: btree, name: \\\"idx_title\\\"]\\n  }\\n}\\n\\nTable \\\"film_actor\\\" {\\n  \\\"actor_id\\\" int2 [not null]\\n  \\\"film_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    (actor_id, film_id) [type: btree, name: \\\"film_actor_pkey\\\"]\\n    film_id [type: btree, name: \\\"idx_fk_film_id\\\"]\\n  }\\n}\\n\\nTable \\\"film_category\\\" {\\n  \\\"film_id\\\" int2 [not null]\\n  \\\"category_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    (film_id, category_id) [type: btree, name: \\\"film_category_pkey\\\"]\\n  }\\n}\\n\\nTable \\\"address\\\" {\\n  \\\"address_id\\\" int4 [pk, not null, increment]\\n  \\\"address\\\" varchar(50) [not null]\\n  \\\"address2\\\" varchar(50)\\n  \\\"district\\\" varchar(20) [not null]\\n  \\\"city_id\\\" int2 [not null]\\n  \\\"postal_code\\\" varchar(10)\\n  \\\"phone\\\" varchar(20) [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    city_id [type: btree, name: \\\"idx_fk_city_id\\\"]\\n  }\\n}\\n\\nTable \\\"city\\\" {\\n  \\\"city_id\\\" int4 [pk, not null, increment]\\n  \\\"city\\\" varchar(50) [not null]\\n  \\\"country_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    country_id [type: btree, name: \\\"idx_fk_country_id\\\"]\\n  }\\n}\\n\\nTable \\\"country\\\" {\\n  \\\"country_id\\\" int4 [pk, not null, increment]\\n  \\\"country\\\" varchar(50) [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n}\\n\\nTable \\\"inventory\\\" {\\n  \\\"inventory_id\\\" int4 [pk, not null, increment]\\n  \\\"film_id\\\" int2 [not null]\\n  \\\"store_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    (store_id, film_id) [type: btree, name: \\\"idx_store_id_film_id\\\"]\\n  }\\n}\\n\\nTable \\\"language\\\" {\\n  \\\"language_id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" bpchar(20) [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n}\\n\\nTable \\\"payment\\\" {\\n  \\\"payment_id\\\" int4 [pk, not null, increment]\\n  \\\"customer_id\\\" int2 [not null]\\n  \\\"staff_id\\\" int2 [not null]\\n  \\\"rental_id\\\" int4 [not null]\\n  \\\"amount\\\" numeric(5,2) [not null]\\n  \\\"payment_date\\\" timestamp [not null]\\n\\n  Indexes {\\n    rental_id [type: btree, name: \\\"idx_fk_rental_id\\\"]\\n    staff_id [type: btree, name: \\\"idx_fk_staff_id\\\"]\\n  }\\n}\\n\\nTable \\\"rental\\\" {\\n  \\\"rental_id\\\" int4 [pk, not null, increment]\\n  \\\"rental_date\\\" timestamp [not null]\\n  \\\"inventory_id\\\" int4 [not null]\\n  \\\"customer_id\\\" int2 [not null]\\n  \\\"return_date\\\" timestamp\\n  \\\"staff_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n\\n  Indexes {\\n    (rental_date, inventory_id, customer_id) [type: btree, name: \\\"idx_unq_rental_rental_date_inventory_id_customer_id\\\"]\\n    inventory_id [type: btree, name: \\\"idx_fk_inventory_id\\\"]\\n  }\\n}\\n\\nTable \\\"staff\\\" {\\n  \\\"staff_id\\\" int4 [pk, not null, increment]\\n  \\\"first_name\\\" varchar(45) [not null]\\n  \\\"last_name\\\" varchar(45) [not null]\\n  \\\"address_id\\\" int2 [not null]\\n  \\\"email\\\" varchar(50)\\n  \\\"store_id\\\" int2 [not null]\\n  \\\"active\\\" bool [not null, default: true]\\n  \\\"username\\\" varchar(16) [not null]\\n  \\\"password\\\" varchar(40)\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n  \\\"picture\\\" bytea\\n}\\n\\nTable \\\"store\\\" {\\n  \\\"store_id\\\" int4 [pk, not null, increment]\\n  \\\"manager_staff_id\\\" int2 [unique, not null]\\n  \\\"address_id\\\" int2 [not null]\\n  \\\"last_update\\\" timestamp [not null, default: `now()`]\\n}\\n\\nRef \\\"fk_address_city\\\":\\\"city\\\".\\\"city_id\\\" < \\\"address\\\".\\\"city_id\\\"\\n\\nRef \\\"fk_city\\\":\\\"country\\\".\\\"country_id\\\" < \\\"city\\\".\\\"country_id\\\"\\n\\nRef \\\"customer_address_id_fkey\\\":\\\"address\\\".\\\"address_id\\\" < \\\"customer\\\".\\\"address_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"film_language_id_fkey\\\":\\\"language\\\".\\\"language_id\\\" < \\\"film\\\".\\\"language_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"film_actor_actor_id_fkey\\\":\\\"actor\\\".\\\"actor_id\\\" < \\\"film_actor\\\".\\\"actor_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"film_actor_film_id_fkey\\\":\\\"film\\\".\\\"film_id\\\" < \\\"film_actor\\\".\\\"film_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"film_category_category_id_fkey\\\":\\\"category\\\".\\\"category_id\\\" < \\\"film_category\\\".\\\"category_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"film_category_film_id_fkey\\\":\\\"film\\\".\\\"film_id\\\" < \\\"film_category\\\".\\\"film_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"inventory_film_id_fkey\\\":\\\"film\\\".\\\"film_id\\\" < \\\"inventory\\\".\\\"film_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"payment_customer_id_fkey\\\":\\\"customer\\\".\\\"customer_id\\\" < \\\"payment\\\".\\\"customer_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"payment_rental_id_fkey\\\":\\\"rental\\\".\\\"rental_id\\\" < \\\"payment\\\".\\\"rental_id\\\" [update: cascade, delete: set null]\\n\\nRef \\\"payment_staff_id_fkey\\\":\\\"staff\\\".\\\"staff_id\\\" < \\\"payment\\\".\\\"staff_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"rental_customer_id_fkey\\\":\\\"customer\\\".\\\"customer_id\\\" < \\\"rental\\\".\\\"customer_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"rental_inventory_id_fkey\\\":\\\"inventory\\\".\\\"inventory_id\\\" < \\\"rental\\\".\\\"inventory_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"rental_staff_id_key\\\":\\\"staff\\\".\\\"staff_id\\\" < \\\"rental\\\".\\\"staff_id\\\"\\n\\nRef \\\"staff_address_id_fkey\\\":\\\"address\\\".\\\"address_id\\\" < \\\"staff\\\".\\\"address_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"store_address_id_fkey\\\":\\\"address\\\".\\\"address_id\\\" < \\\"store\\\".\\\"address_id\\\" [update: cascade, delete: restrict]\\n\\nRef \\\"store_manager_staff_id_fkey\\\":\\\"staff\\\".\\\"staff_id\\\" < \\\"store\\\".\\\"manager_staff_id\\\" [update: cascade, delete: restrict]\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/gordonkwokkwok/DVD-Rental-PostgreSQL-Project\"\n  }\n}"
  },
  {
    "path": "tasks/postgres/standard/dvdrental/film_inventory_management/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Task 4: Film Inventory Management\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nfrom decimal import Decimal\n\ndef rows_match(actual_row, expected_row):\n    \"\"\"Compare two rows with appropriate tolerance for decimals and floats.\"\"\"\n    if len(actual_row) != len(expected_row):\n        return False\n    \n    for actual, expected in zip(actual_row, expected_row):\n        if isinstance(actual, (Decimal, float)) and isinstance(expected, (Decimal, float)):\n            # Use higher tolerance for floating point comparisons\n            if abs(float(actual) - float(expected)) > 0.01:\n                return False\n        elif actual != expected:\n            return False\n    \n    return True\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\")\n    }\n\ndef check_new_films(conn) -> bool:\n    \"\"\"Check if the two new films were added correctly.\"\"\"\n    with conn.cursor() as cur:\n        cur.execute(\"\"\"\n            SELECT title, description, release_year, language_id, \n                   rental_duration, rental_rate, length, replacement_cost, \n                   rating\n            FROM film \n            WHERE title IN ('Data Science Adventures', 'Cloud Computing Chronicles')\n            ORDER BY title\n        \"\"\")\n        actual_films = cur.fetchall()\n        \n        expected_films = [\n            ('Cloud Computing Chronicles', 'Exploring the world of distributed systems', 2024, 1, 7, Decimal('4.99'), 135, Decimal('18.99'), 'PG'),\n            ('Data Science Adventures', 'A thrilling journey through machine learning algorithms', 2024, 1, 5, Decimal('4.389'), 120, Decimal('15.99'), 'PG-13')\n        ]\n        \n        if len(actual_films) != len(expected_films):\n            print(f\"❌ Expected {len(expected_films)} new films, found {len(actual_films)}\")\n            return False\n            \n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_films, expected_films)):\n            if not rows_match(actual, expected):\n                print(f\"❌ Film {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n                \n        if mismatches > 0:\n            print(f\"❌ Total film mismatches: {mismatches}\")\n            return False\n            \n        print(\"✅ Both new films added correctly\")\n        return True\n\ndef check_inventory_records(conn) -> bool:\n    \"\"\"Check if inventory records were added for new films.\"\"\"\n    with conn.cursor() as cur:\n        cur.execute(\"\"\"\n            SELECT f.title, i.store_id, COUNT(*) as count\n            FROM film f\n            JOIN inventory i ON f.film_id = i.film_id\n            WHERE f.title IN ('Data Science Adventures', 'Cloud Computing Chronicles')\n            GROUP BY f.title, i.store_id\n            ORDER BY f.title, i.store_id\n        \"\"\")\n        actual_inventory = cur.fetchall()\n        \n        expected_inventory = [\n            ('Cloud Computing Chronicles', 1, 3),\n            ('Cloud Computing Chronicles', 2, 2), \n            ('Data Science Adventures', 1, 3),\n            ('Data Science Adventures', 2, 2)\n        ]\n        \n        if len(actual_inventory) != len(expected_inventory):\n            print(f\"❌ Expected {len(expected_inventory)} inventory groups, found {len(actual_inventory)}\")\n            return False\n            \n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_inventory, expected_inventory)):\n            if not rows_match(actual, expected):\n                print(f\"❌ Inventory group {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n                \n        if mismatches > 0:\n            print(f\"❌ Total inventory mismatches: {mismatches}\")\n            return False\n                \n        print(\"✅ Inventory records added correctly\")\n        return True\n\ndef check_available_films_table(conn) -> bool:\n    \"\"\"Check if available_films table was created and populated correctly.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results from the created table\n        cur.execute(\"\"\"\n            SELECT film_id, title, rental_rate, length\n            FROM available_films\n            ORDER BY rental_rate DESC, length DESC, title ASC\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        # Execute ground truth query\n        cur.execute(\"\"\"\n            SELECT DISTINCT f.film_id, f.title, f.rental_rate, f.length\n            FROM film f\n            JOIN inventory i ON f.film_id = i.film_id\n            WHERE f.rental_rate >= 3.00 AND f.rental_rate <= 5.00\n            AND f.length > 100\n            AND i.store_id = 1\n            ORDER BY f.rental_rate DESC, f.length DESC, f.title ASC\n        \"\"\")\n        expected_results = cur.fetchall()\n        \n        if len(actual_results) != len(expected_results):\n            print(f\"❌ available_films table has {len(actual_results)} records, expected {len(expected_results)}\")\n            return False\n            \n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:  # Only show first 5 mismatches\n                    print(f\"❌ available_films row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n                \n        if mismatches > 0:\n            print(f\"❌ Total available_films mismatches: {mismatches}\")\n            return False\n            \n        print(f\"✅ available_films table created and populated correctly ({len(actual_results)} records)\")\n        return True\n\ndef check_inventory_cleanup(conn) -> bool:\n    \"\"\"Check if inventory cleanup was performed correctly.\"\"\"\n    with conn.cursor() as cur:\n        # Check that no inventory exists for films with replacement_cost > 25 AND rental_rate < 1\n        # that also don't have rental records (safe to delete)\n        cur.execute(\"\"\"\n            SELECT COUNT(*)\n            FROM inventory i\n            JOIN film f ON i.film_id = f.film_id\n            WHERE f.replacement_cost > 25.00 AND f.rental_rate < 1.00\n            AND NOT EXISTS (SELECT 1 FROM rental r WHERE r.inventory_id = i.inventory_id)\n        \"\"\")\n        \n        remaining_count = cur.fetchone()[0]\n        \n        if remaining_count > 0:\n            print(f\"❌ Found {remaining_count} inventory records that should have been deleted (no rental history)\")\n            return False\n            \n        print(\"✅ Inventory cleanup completed correctly\")\n        return True\n\ndef check_summary_table(conn) -> bool:\n    \"\"\"Check if film_inventory_summary table was created and populated correctly.\"\"\"\n    with conn.cursor() as cur:\n            \n        # Get actual results from the created table\n        cur.execute(\"\"\"\n            SELECT title, rental_rate, total_inventory, store1_count, store2_count\n            FROM film_inventory_summary\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        # Execute ground truth query\n        cur.execute(\"\"\"\n            SELECT f.title, f.rental_rate,\n                   COUNT(i.inventory_id) as total_inventory,\n                   COUNT(CASE WHEN i.store_id = 1 THEN 1 END) as store1_count,\n                   COUNT(CASE WHEN i.store_id = 2 THEN 1 END) as store2_count\n            FROM film f\n            JOIN inventory i ON f.film_id = i.film_id\n            GROUP BY f.film_id, f.title, f.rental_rate\n            ORDER BY total_inventory DESC, f.title ASC\n        \"\"\")\n        expected_results = cur.fetchall()\n        \n        if len(actual_results) != len(expected_results):\n            print(f\"❌ film_inventory_summary table has {len(actual_results)} records, expected {len(expected_results)}\")\n            return False\n            \n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:  # Only show first 5 mismatches\n                    print(f\"❌ Summary row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n                \n        if mismatches > 0:\n            print(f\"❌ Total summary table mismatches: {mismatches}\")\n            return False\n                \n        print(f\"✅ film_inventory_summary table created and populated correctly ({len(actual_results)} records)\")\n        return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 70)\n    print(\"PostgreSQL Task 4 Verification: Film Inventory Management\")\n    print(\"=\" * 70)\n    \n    # Get connection parameters\n    conn_params = get_connection_params()\n    \n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n    \n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n        \n        # Verify all operations with short-circuit evaluation\n        success = (\n            check_new_films(conn) and \n            check_inventory_records(conn) and\n            check_available_films_table(conn) and \n            check_inventory_cleanup(conn) and\n            check_summary_table(conn)\n        )\n        \n        conn.close()\n        \n        if success:\n            print(f\"\\n🎉 Task verification: PASS\")\n            sys.exit(0)\n        else:\n            print(f\"\\n❌ Task verification: FAIL\")\n            sys.exit(1)\n            \n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/standard/employees/employee_demographics_report/description.md",
    "content": "Generate a comprehensive employee demographics and basic statistics report for the annual company overview. The HR team needs simple, clear statistical summaries about our workforce composition to include in the annual report and diversity initiatives.\n\n## Your Tasks:\n\n1. **Create the gender statistics table** — build a table called `gender_statistics` in the `employees` schema with these exact columns:\n   * `gender` (varchar) — gender ('M' or 'F')\n   * `total_employees` (integer) — total number of employees of this gender\n   * `current_employees` (integer) — current employees of this gender (have active salary)\n   * `percentage_of_workforce` (decimal) — percentage of current workforce\n\n2. **Create the age group analysis table** — build a table called `age_group_analysis` in the `employees` schema with:\n   * `age_group` (varchar) — age range ('20-29', '30-39', '40-49', '50-59', '60+')\n   * `employee_count` (integer) — number of current employees in age group\n   * `avg_salary` (decimal) — average current salary for age group\n   * `avg_tenure_days` (decimal) — average days of service\n\n3. **Create the birth month distribution table** — build a table called `birth_month_distribution` in the `employees` schema with:\n   * `birth_month` (integer) — month number (1-12)\n   * `month_name` (varchar) — month name ('January', 'February', etc.)\n   * `employee_count` (integer) — total employees born in this month\n   * `current_employee_count` (integer) — current employees born in this month\n\n4. **Create the hiring year summary table** — build a table called `hiring_year_summary` in the `employees` schema with:\n   * `hire_year` (integer) — year employees were hired\n   * `employees_hired` (integer) — number of employees hired that year\n   * `still_employed` (integer) — how many from that year are still employed\n   * `retention_rate` (decimal) — percentage still employed (still_employed/employees_hired * 100)\n\n5. **Apply age group classification** based on current age:\n   * **20-29**: Ages 20-29\n   * **30-39**: Ages 30-39  \n   * **40-49**: Ages 40-49\n   * **50-59**: Ages 50-59\n   * **60+**: Ages 60 and above\n\n6. **Calculate workforce composition** — determine current workforce demographics using employees with active salary records (to_date = '9999-01-01').\n\n7. **Focus on basic statistics** — create simple counts, averages, and percentages that are easy to understand and verify.\n\nThe analysis will provide clear demographic insights for HR reporting and workforce planning.\n"
  },
  {
    "path": "tasks/postgres/standard/employees/employee_demographics_report/meta.json",
    "content": "{\n  \"task_id\": \"employee_demographics_report\",\n  \"task_name\": \"Employee Demographics Report\",\n  \"category_id\": \"employees\",\n  \"category_name\": \"Employees\",\n  \"description\": \"Generate comprehensive employee demographics report with gender statistics, age groups, birth months, and hiring trends.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-08-14\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"reporting and analytics\",\n    \"statistical aggregation\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Enum \\\"employees\\\".\\\"employee_gender\\\" {\\n  \\\"M\\\"\\n  \\\"F\\\"\\n}\\n\\nTable \\\"employees\\\".\\\"department\\\" {\\n  \\\"id\\\" bpchar(4) [pk, not null]\\n  \\\"dept_name\\\" varchar(40) [unique, not null]\\n}\\n\\nTable \\\"employees\\\".\\\"department_employee\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"department_id\\\" bpchar(4) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, department_id) [type: btree, name: \\\"idx_16982_primary\\\"]\\n    department_id [type: btree, name: \\\"idx_16982_dept_no\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"department_manager\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"department_id\\\" bpchar(4) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, department_id) [type: btree, name: \\\"idx_16985_primary\\\"]\\n    department_id [type: btree, name: \\\"idx_16985_dept_no\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"employee\\\" {\\n  \\\"id\\\" int8 [pk, not null, increment]\\n  \\\"birth_date\\\" date [not null]\\n  \\\"first_name\\\" varchar(14) [not null]\\n  \\\"last_name\\\" varchar(16) [not null]\\n  \\\"gender\\\" employees.employee_gender [not null]\\n  \\\"hire_date\\\" date [not null]\\n}\\n\\nTable \\\"employees\\\".\\\"salary\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"amount\\\" int8 [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, from_date) [type: btree, name: \\\"idx_16991_primary\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"title\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"title\\\" varchar(50) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date\\n\\n  Indexes {\\n    (employee_id, title, from_date) [type: btree, name: \\\"idx_16994_primary\\\"]\\n  }\\n}\\n\\nRef \\\"dept_emp_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_employee\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_emp_ibfk_2\\\":\\\"employees\\\".\\\"department\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_employee\\\".\\\"department_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_manager_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_manager\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_manager_ibfk_2\\\":\\\"employees\\\".\\\"department\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_manager\\\".\\\"department_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"salaries_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"salary\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"titles_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"title\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/employees.sql.gz\"\n  }\n}"
  },
  {
    "path": "tasks/postgres/standard/employees/employee_demographics_report/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Task 3: Employee Demographics Report\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nfrom decimal import Decimal\n\ndef rows_match(actual_row, expected_row):\n    \"\"\"\n    Compare two rows with appropriate tolerance.\n    For Decimal types: allows 0.1 tolerance\n    For other types: requires exact match\n    \"\"\"\n    if len(actual_row) != len(expected_row):\n        return False\n    \n    for actual, expected in zip(actual_row, expected_row):\n        if isinstance(actual, Decimal) and isinstance(expected, Decimal):\n            if abs(float(actual) - float(expected)) > 0.1:\n                return False\n        elif actual != expected:\n            return False\n    \n    return True\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\")\n    }\n\ndef verify_gender_statistics_results(conn) -> bool:\n    \"\"\"Verify the gender statistics results.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results from the created table\n        cur.execute(\"\"\"\n            SELECT gender, total_employees, current_employees, percentage_of_workforce\n            FROM employees.gender_statistics\n            ORDER BY gender\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        # Execute ground truth query\n        cur.execute(\"\"\"\n            WITH current_emp AS (\n            SELECT DISTINCT s.employee_id\n            FROM employees.salary s\n            WHERE s.to_date = DATE '9999-01-01'\n            ),\n            total_current AS (\n            SELECT COUNT(*) AS cnt\n            FROM current_emp\n            )\n            SELECT\n            e.gender::varchar AS gender,\n            COUNT(*) AS total_employees,\n            COUNT(*) FILTER (WHERE ce.employee_id IS NOT NULL) AS current_employees,\n            (COUNT(*) FILTER (WHERE ce.employee_id IS NOT NULL))::DECIMAL\n                / NULLIF((SELECT cnt FROM total_current), 0) * 100 AS percentage_of_workforce\n            FROM employees.employee e\n            LEFT JOIN current_emp ce ON ce.employee_id = e.id\n            WHERE e.gender IN ('M','F')\n            GROUP BY e.gender\n            ORDER BY gender;\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} gender statistics results, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:  # Only show first 5 mismatches\n                    print(f\"❌ Row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total mismatches: {mismatches}\")\n            return False\n\n        print(f\"✅ Gender statistics results are correct ({len(actual_results)} records)\")\n        return True\n\ndef verify_age_group_results(conn) -> bool:\n    \"\"\"Verify the age group analysis results.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results from the created table\n        cur.execute(\"\"\"\n            SELECT age_group, employee_count, avg_salary, avg_tenure_days\n            FROM employees.age_group_analysis\n            ORDER BY age_group\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        # Execute ground truth query\n        cur.execute(\"\"\"\nWITH current_salary AS (\n  SELECT employee_id, amount\n  FROM (\n    SELECT s.*,\n           ROW_NUMBER() OVER (\n             PARTITION BY s.employee_id\n             ORDER BY s.from_date DESC, s.amount DESC\n           ) AS rn\n    FROM employees.salary s\n    WHERE s.to_date = DATE '9999-01-01'\n  ) x\n  WHERE rn = 1\n),\nemp_age AS (\n  SELECT\n    e.id AS employee_id,\n    e.hire_date,\n    EXTRACT(YEAR FROM AGE(CURRENT_DATE, e.birth_date))::INT AS age_years\n  FROM employees.employee e\n  WHERE e.birth_date IS NOT NULL\n)\nSELECT\n  CASE\n    WHEN a.age_years BETWEEN 20 AND 29 THEN '20-29'\n    WHEN a.age_years BETWEEN 30 AND 39 THEN '30-39'\n    WHEN a.age_years BETWEEN 40 AND 49 THEN '40-49'\n    WHEN a.age_years BETWEEN 50 AND 59 THEN '50-59'\n    WHEN a.age_years >= 60 THEN '60+'\n  END AS age_group,\n  COUNT(*)::INT AS employee_count,\n  AVG(cs.amount) AS avg_salary,\n  AVG((CURRENT_DATE - a.hire_date)::INT) AS avg_tenure_days\nFROM emp_age a\nJOIN current_salary cs ON cs.employee_id = a.employee_id\nWHERE a.age_years >= 20\nGROUP BY 1\nORDER BY 1;\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} age group results, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:  # Only show first 5 mismatches\n                    print(f\"❌ Row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total mismatches: {mismatches}\")\n            return False\n\n        print(f\"✅ Age group analysis results are correct ({len(actual_results)} records)\")\n        return True\n\ndef verify_birth_month_results(conn) -> bool:\n    \"\"\"Verify the birth month distribution results.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results from the created table\n        cur.execute(\"\"\"\n            SELECT birth_month, month_name, employee_count, current_employee_count\n            FROM employees.birth_month_distribution\n            ORDER BY birth_month\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        # Execute ground truth query\n        cur.execute(\"\"\"\n            WITH current_emp AS (\n            SELECT DISTINCT s.employee_id\n            FROM employees.salary s\n            WHERE s.to_date = DATE '9999-01-01'\n            ),\n            months AS (\n            SELECT gs AS birth_month\n            FROM generate_series(1, 12) AS gs\n            )\n            SELECT\n            m.birth_month::INTEGER AS birth_month,\n            CASE m.birth_month\n                WHEN 1 THEN 'January'   WHEN 2 THEN 'February' WHEN 3 THEN 'March'\n                WHEN 4 THEN 'April'     WHEN 5 THEN 'May'      WHEN 6 THEN 'June'\n                WHEN 7 THEN 'July'      WHEN 8 THEN 'August'   WHEN 9 THEN 'September'\n                WHEN 10 THEN 'October'  WHEN 11 THEN 'November'WHEN 12 THEN 'December'\n            END AS month_name,\n            COUNT(e.id)::INTEGER AS employee_count,\n            COUNT(ce.employee_id)::INTEGER AS current_employee_count\n            FROM months m\n            LEFT JOIN employees.employee e\n            ON EXTRACT(MONTH FROM e.birth_date) = m.birth_month\n            LEFT JOIN current_emp ce\n            ON ce.employee_id = e.id\n            GROUP BY m.birth_month\n            ORDER BY m.birth_month;\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} birth month results, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:  # Only show first 5 mismatches\n                    print(f\"❌ Row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total mismatches: {mismatches}\")\n            return False\n\n        print(f\"✅ Birth month distribution results are correct ({len(actual_results)} records)\")\n        return True\n\ndef verify_hiring_year_results(conn) -> bool:\n    \"\"\"Verify the hiring year summary results.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results from the created table\n        cur.execute(\"\"\"\n            SELECT hire_year, employees_hired, still_employed, retention_rate\n            FROM employees.hiring_year_summary\n            ORDER BY hire_year\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        # Execute ground truth query\n        cur.execute(\"\"\"\n            WITH current_emp AS (\n            SELECT DISTINCT s.employee_id\n            FROM employees.salary s\n            WHERE s.to_date = DATE '9999-01-01'\n            ),\n            base AS (\n            SELECT e.id, EXTRACT(YEAR FROM e.hire_date)::INT AS hire_year\n            FROM employees.employee e\n            WHERE e.hire_date IS NOT NULL\n            )\n            SELECT\n            b.hire_year,\n            COUNT(*)::INT AS employees_hired,\n            COUNT(*) FILTER (WHERE ce.employee_id IS NOT NULL)::INT AS still_employed,\n            (COUNT(*) FILTER (WHERE ce.employee_id IS NOT NULL))::DECIMAL\n                / NULLIF(COUNT(*), 0) * 100 AS retention_rate\n            FROM base b\n            LEFT JOIN current_emp ce ON ce.employee_id = b.id\n            GROUP BY b.hire_year\n            ORDER BY b.hire_year;\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} hiring year results, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:  # Only show first 5 mismatches\n                    print(f\"❌ Row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total mismatches: {mismatches}\")\n            return False\n\n        print(f\"✅ Hiring year summary results are correct ({len(actual_results)} records)\")\n        return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 50)\n\n    # Get connection parameters\n    conn_params = get_connection_params()\n\n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n\n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n\n        # Verify all four analysis results\n        success = (\n            verify_gender_statistics_results(conn) and \n            verify_age_group_results(conn) and \n            verify_birth_month_results(conn) and\n            verify_hiring_year_results(conn)\n        )\n\n        conn.close()\n\n        if success:\n            print(\"\\n🎉 Task verification: PASS\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Task verification: FAIL\")\n            sys.exit(1)\n\n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/standard/employees/employee_performance_analysis/description.md",
    "content": "Create a comprehensive employee performance evaluation system that analyzes career progression patterns and salary equity across our organization. The executive team needs data-driven insights for upcoming promotion decisions and salary adjustment planning.\n\n## Your Tasks:\n\n1. **Create the employee performance analysis table** — build a table called `employee_performance_analysis` in the `employees` schema with these exact columns:\n   * `employee_id` (bigint) — the employee's ID\n   * `performance_category` (varchar) — classification of employee performance ('high_achiever', 'steady_performer', 'needs_attention')\n   * `salary_growth_rate` (decimal) — percentage salary increase from first salary record to current\n   * `days_of_service` (integer) — total days with the company\n   * `promotion_count` (integer) — number of different titles held\n\n2. **Analyze only current employees** — focus on employees who currently have active salary records (to_date = '9999-01-01').\n\n3. **Apply performance classification rules**:\n   * **High achievers**: Salary growth rate > 40% AND more than 1 title held\n   * **Needs attention**: Salary growth rate < 15% AND more than 3650 days of service (10 years)\n   * **Steady performers**: All other current employees (default category)\n\n4. **Create the department salary analysis table** — build a table called `department_salary_analysis` in the `employees` schema with:\n   * `department_name` (varchar) — the department name\n   * `avg_current_salary` (decimal) — average current salary in the department (only current employees)\n   * `employee_count` (integer) — total current employees in the department\n   * `salary_range_spread` (integer) — difference between max and min salary (current employees only)\n\n5. **Calculate salary equity metrics** — populate the department table with current salary statistics for active employees only to identify potential pay equity issues across departments.\n\nThe analysis should help leadership make informed decisions about promotions, salary adjustments, and talent retention strategies."
  },
  {
    "path": "tasks/postgres/standard/employees/employee_performance_analysis/meta.json",
    "content": "{\n  \"task_id\": \"employee_performance_analysis\",\n  \"task_name\": \"Employee Performance Analysis\",\n  \"category_id\": \"employees\",\n  \"category_name\": \"Employees\",\n  \"description\": \"Create performance evaluation system analyzing career progression patterns and salary equity for promotion and compensation decisions.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-08-14\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"reporting and analytics\",\n    \"statistical aggregation\",\n    \"schema design\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Enum \\\"employees\\\".\\\"employee_gender\\\" {\\n  \\\"M\\\"\\n  \\\"F\\\"\\n}\\n\\nTable \\\"employees\\\".\\\"department\\\" {\\n  \\\"id\\\" bpchar(4) [pk, not null]\\n  \\\"dept_name\\\" varchar(40) [unique, not null]\\n}\\n\\nTable \\\"employees\\\".\\\"department_employee\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"department_id\\\" bpchar(4) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, department_id) [type: btree, name: \\\"idx_16982_primary\\\"]\\n    department_id [type: btree, name: \\\"idx_16982_dept_no\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"department_manager\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"department_id\\\" bpchar(4) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, department_id) [type: btree, name: \\\"idx_16985_primary\\\"]\\n    department_id [type: btree, name: \\\"idx_16985_dept_no\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"employee\\\" {\\n  \\\"id\\\" int8 [pk, not null, increment]\\n  \\\"birth_date\\\" date [not null]\\n  \\\"first_name\\\" varchar(14) [not null]\\n  \\\"last_name\\\" varchar(16) [not null]\\n  \\\"gender\\\" employees.employee_gender [not null]\\n  \\\"hire_date\\\" date [not null]\\n}\\n\\nTable \\\"employees\\\".\\\"salary\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"amount\\\" int8 [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, from_date) [type: btree, name: \\\"idx_16991_primary\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"title\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"title\\\" varchar(50) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date\\n\\n  Indexes {\\n    (employee_id, title, from_date) [type: btree, name: \\\"idx_16994_primary\\\"]\\n  }\\n}\\n\\nRef \\\"dept_emp_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_employee\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_emp_ibfk_2\\\":\\\"employees\\\".\\\"department\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_employee\\\".\\\"department_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_manager_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_manager\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_manager_ibfk_2\\\":\\\"employees\\\".\\\"department\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_manager\\\".\\\"department_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"salaries_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"salary\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"titles_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"title\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/employees.sql.gz\"\n  }\n}"
  },
  {
    "path": "tasks/postgres/standard/employees/employee_performance_analysis/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Task 1: Employee Performance Analysis\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nfrom decimal import Decimal\n\ndef rows_match(actual_row, expected_row):\n    \"\"\"\n    Compare two rows with appropriate tolerance.\n    For Decimal types: allows 0.1 tolerance\n    For other types: requires exact match\n    \"\"\"\n    if len(actual_row) != len(expected_row):\n        return False\n    \n    for actual, expected in zip(actual_row, expected_row):\n        if isinstance(actual, Decimal) and isinstance(expected, Decimal):\n            if abs(float(actual) - float(expected)) > 0.1:\n                return False\n        elif actual != expected:\n            return False\n    \n    return True\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\")\n    }\n\ndef verify_performance_results(conn) -> bool:\n    \"\"\"Verify the employee performance analysis results.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results from the created table\n        cur.execute(\"\"\"\n            SELECT employee_id, performance_category, salary_growth_rate, \n                   days_of_service, promotion_count\n            FROM employees.employee_performance_analysis \n            ORDER BY employee_id\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        # Execute ground truth query - use first salary record as starting salary\n        cur.execute(\"\"\"\n            WITH current_salary AS (\n            SELECT employee_id, amount AS current_amount\n            FROM (\n                SELECT s.*,\n                    ROW_NUMBER() OVER (PARTITION BY s.employee_id\n                                        ORDER BY s.from_date DESC, s.amount DESC) AS rn\n                FROM employees.salary s\n                WHERE s.to_date = DATE '9999-01-01'\n            ) x\n            WHERE rn = 1\n            ),\n            first_salary AS (\n            SELECT employee_id, amount AS first_amount\n            FROM (\n                SELECT s.*,\n                    ROW_NUMBER() OVER (PARTITION BY s.employee_id\n                                        ORDER BY s.from_date ASC, s.amount ASC) AS rn\n                FROM employees.salary s\n            ) x\n            WHERE rn = 1\n            ),\n            title_counts AS (\n            SELECT t.employee_id, COUNT(DISTINCT t.title) AS promotion_count\n            FROM employees.title t\n            GROUP BY t.employee_id\n            ),\n            base AS (\n            SELECT e.id AS employee_id,\n                    e.hire_date,\n                    cs.current_amount,\n                    fs.first_amount,\n                    COALESCE(tc.promotion_count, 0) AS promotion_count\n            FROM employees.employee e\n            JOIN current_salary cs ON cs.employee_id = e.id\n            JOIN first_salary  fs ON fs.employee_id = e.id\n            LEFT JOIN title_counts tc ON tc.employee_id = e.id\n            ),\n            scored AS (\n            SELECT\n                employee_id,\n                ((current_amount - first_amount) / NULLIF(first_amount, 0)::NUMERIC) * 100 AS salary_growth_rate,\n                (CURRENT_DATE - hire_date)::INTEGER AS days_of_service,\n                promotion_count\n            FROM base\n            )\n            SELECT\n            s.employee_id,\n            CASE\n                WHEN s.salary_growth_rate > 40 AND s.promotion_count > 1 THEN 'high_achiever'\n                WHEN s.salary_growth_rate < 15 AND s.days_of_service > 3650 THEN 'needs_attention'\n                ELSE 'steady_performer'\n            END AS performance_category,\n            s.salary_growth_rate,\n            s.days_of_service,\n            s.promotion_count AS promotion_count\n            FROM scored s\n            ORDER BY s.employee_id;\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} performance results, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:  # Only show first 5 mismatches\n                    print(f\"❌ Row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total mismatches: {mismatches}\")\n            return False\n\n        print(f\"✅ Employee performance results are correct ({len(actual_results)} records)\")\n        return True\n\ndef verify_department_results(conn) -> bool:\n    \"\"\"Verify the department salary analysis results.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results from the created table\n        cur.execute(\"\"\"\n            SELECT department_name, avg_current_salary, employee_count, salary_range_spread\n            FROM employees.department_salary_analysis\n            ORDER BY department_name\n        \"\"\")\n        actual_results = cur.fetchall()\n\n        # Execute ground truth query\n        cur.execute(\"\"\"\n            WITH current_salary AS (\n            SELECT employee_id, amount\n            FROM (\n                SELECT s.*,\n                    ROW_NUMBER() OVER (PARTITION BY s.employee_id\n                                        ORDER BY s.from_date DESC, s.amount DESC) AS rn\n                FROM employees.salary s\n                WHERE s.to_date = DATE '9999-01-01'\n            ) x\n            WHERE rn = 1\n            ),\n            current_dept AS (\n            SELECT DISTINCT de.employee_id, de.department_id\n            FROM employees.department_employee de\n            WHERE de.to_date = DATE '9999-01-01'\n            )\n            SELECT \n            d.dept_name AS department_name,\n            AVG(cs.amount)::DECIMAL AS avg_current_salary,\n            COUNT(DISTINCT cd.employee_id) AS employee_count,\n            (MAX(cs.amount) - MIN(cs.amount)) AS salary_range_spread\n            FROM employees.department d\n            JOIN current_dept cd ON cd.department_id = d.id\n            JOIN current_salary cs ON cs.employee_id = cd.employee_id\n            GROUP BY d.id, d.dept_name\n            ORDER BY d.dept_name;\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} department results, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                print(f\"❌ Row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total mismatches: {mismatches}\")\n            return False\n\n        print(f\"✅ Department salary results are correct ({len(actual_results)} records)\")\n        return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 50)\n\n    # Get connection parameters\n    conn_params = get_connection_params()\n\n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n\n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n\n        # Verify results\n        success = verify_performance_results(conn) and verify_department_results(conn)\n\n        conn.close()\n\n        if success:\n            print(\"\\n🎉 Task verification: PASS\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Task verification: FAIL\")\n            sys.exit(1)\n\n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/standard/employees/employee_project_tracking/description.md",
    "content": "Create and manage a comprehensive employee project tracking system using database schema design and data manipulation operations. The IT team needs you to build the database structure from scratch and populate it with specific initial data to support project management workflows.\n\n## Your Tasks:\n\n1. **Create the project tracking tables** — build three new tables in the `employees` schema:\n   \n   **Table 1: `employee_projects`**\n   * `project_id` (integer, primary key, auto-increment)\n   * `project_name` (varchar(100), not null)\n   * `start_date` (date, not null)\n   * `end_date` (date)\n   * `budget` (decimal(10,2))\n   * `status` (varchar(20), default 'active')\n\n   **Table 2: `project_assignments`**\n   * `assignment_id` (integer, primary key, auto-increment)\n   * `employee_id` (bigint, not null)\n   * `project_id` (integer, not null)\n   * `role` (varchar(50), not null)\n   * `allocation_percentage` (integer, check constraint: between 1 and 100)\n   * `assigned_date` (date, not null)\n\n   **Table 3: `project_milestones`**\n   * `milestone_id` (integer, primary key, auto-increment)\n   * `project_id` (integer, not null)\n   * `milestone_name` (varchar(100), not null)\n   * `due_date` (date, not null)\n   * `completed` (boolean, default false)\n\n2. **Add foreign key relationships**:\n   * `project_assignments.employee_id` → `employees.employee.id`\n   * `project_assignments.project_id` → `employees.employee_projects.project_id`\n   * `project_milestones.project_id` → `employees.employee_projects.project_id`\n\n3. **Create performance indexes**:\n   * Index named `idx_projects_status` on `employee_projects.status`\n   * Composite index named `idx_assignments_emp_proj` on `project_assignments(employee_id, project_id)`\n   * Index named `idx_milestones_due_date` on `project_milestones.due_date`\n\n4. **Insert exactly this initial data**:\n   \n   **Into `employee_projects`:**\n   * Project 1: name='Database Modernization', start_date='2024-01-15', end_date='2024-06-30', budget=250000.00, status='active'\n   * Project 2: name='Employee Portal Upgrade', start_date='2024-02-01', end_date='2024-05-15', budget=180000.00, status='active'  \n   * Project 3: name='HR Analytics Dashboard', start_date='2023-11-01', end_date='2024-01-31', budget=120000.00, status='active'\n\n   **Into `project_assignments` (assign ALL current employees):**\n   * All employees from Development department → Project 1 ('Database Modernization'), role='Developer', allocation=80%\n   * All employees from Human Resources department → Project 2 ('Employee Portal Upgrade'), role='Business Analyst', allocation=60%\n   * All employees from Marketing department → Project 3 ('HR Analytics Dashboard'), role='Marketing Specialist', allocation=40%\n   * All employees from Finance department → Project 1 ('Database Modernization'), role='Financial Analyst', allocation=30%\n   * All employees from Sales department → Project 2 ('Employee Portal Upgrade'), role='Sales Representative', allocation=50%\n   * All employees from Research department → Project 3 ('HR Analytics Dashboard'), role='Research Analyst', allocation=70%\n   * All employees from Production department → Project 1 ('Database Modernization'), role='Production Coordinator', allocation=45%\n   * All employees from Quality Management department → Project 2 ('Employee Portal Upgrade'), role='QA Specialist', allocation=85%\n   * All employees from Customer Service department → Project 3 ('HR Analytics Dashboard'), role='Customer Success', allocation=35%\n   * All employees should have assigned_date='2024-01-01'\n\n   **Into `project_milestones`:**\n   * Project 1: 'Design Phase Complete' due '2024-03-01', 'Implementation Complete' due '2024-05-15'\n   * Project 2: 'UI/UX Approval' due '2024-03-15', 'Beta Testing' due '2024-04-30'\n   * Project 3: 'Data Collection' due '2023-12-15', 'Dashboard Launch' due '2024-01-25'\n\n5. **Perform these exact data updates**:\n   * Update Project 3 ('HR Analytics Dashboard') status to 'completed'\n   * Increase budget by 15% for all projects with status 'active'\n   * Mark the milestone 'Data Collection' as completed (set completed = true)\n\n6. **Add new column to `employee_projects`**:\n   * Add `priority` column (varchar(10)) with check constraint allowing only 'low', 'medium', 'high'\n   * Update all existing projects: set priority='high' for 'Database Modernization', priority='medium' for others\n"
  },
  {
    "path": "tasks/postgres/standard/employees/employee_project_tracking/meta.json",
    "content": "{\n  \"task_id\": \"employee_project_tracking\",\n  \"task_name\": \"Employee Project Tracking\",\n  \"category_id\": \"employees\",\n  \"category_name\": \"Employees\",\n  \"description\": \"Build project tracking system from scratch with tables for projects, assignments, milestones, and performance indexes.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-08-14\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"schema design\",\n    \"data migration\",\n    \"data integrity enforcement\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Enum \\\"employees\\\".\\\"employee_gender\\\" {\\n  \\\"M\\\"\\n  \\\"F\\\"\\n}\\n\\nTable \\\"employees\\\".\\\"department\\\" {\\n  \\\"id\\\" bpchar(4) [pk, not null]\\n  \\\"dept_name\\\" varchar(40) [unique, not null]\\n}\\n\\nTable \\\"employees\\\".\\\"department_employee\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"department_id\\\" bpchar(4) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, department_id) [type: btree, name: \\\"idx_16982_primary\\\"]\\n    department_id [type: btree, name: \\\"idx_16982_dept_no\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"department_manager\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"department_id\\\" bpchar(4) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, department_id) [type: btree, name: \\\"idx_16985_primary\\\"]\\n    department_id [type: btree, name: \\\"idx_16985_dept_no\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"employee\\\" {\\n  \\\"id\\\" int8 [pk, not null, increment]\\n  \\\"birth_date\\\" date [not null]\\n  \\\"first_name\\\" varchar(14) [not null]\\n  \\\"last_name\\\" varchar(16) [not null]\\n  \\\"gender\\\" employees.employee_gender [not null]\\n  \\\"hire_date\\\" date [not null]\\n}\\n\\nTable \\\"employees\\\".\\\"salary\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"amount\\\" int8 [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, from_date) [type: btree, name: \\\"idx_16991_primary\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"title\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"title\\\" varchar(50) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date\\n\\n  Indexes {\\n    (employee_id, title, from_date) [type: btree, name: \\\"idx_16994_primary\\\"]\\n  }\\n}\\n\\nRef \\\"dept_emp_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_employee\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_emp_ibfk_2\\\":\\\"employees\\\".\\\"department\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_employee\\\".\\\"department_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_manager_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_manager\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_manager_ibfk_2\\\":\\\"employees\\\".\\\"department\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_manager\\\".\\\"department_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"salaries_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"salary\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"titles_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"title\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/employees.sql.gz\"\n  }\n}"
  },
  {
    "path": "tasks/postgres/standard/employees/employee_project_tracking/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Task 5: Database Schema and Data Operations\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nfrom decimal import Decimal\n\ndef rows_match(actual_row, expected_row):\n    \"\"\"\n    Compare two rows with appropriate tolerance.\n    For Decimal types: allows 0.1 tolerance\n    For date types: convert to string for comparison\n    For other types: requires exact match\n    \"\"\"\n    if len(actual_row) != len(expected_row):\n        return False\n    \n    for actual, expected in zip(actual_row, expected_row):\n        if isinstance(actual, Decimal) and isinstance(expected, (Decimal, float, int)):\n            if abs(float(actual) - float(expected)) > 0.1:\n                return False\n        elif hasattr(actual, 'strftime'):  # datetime.date or datetime.datetime\n            if str(actual) != str(expected):\n                return False\n        elif actual != expected:\n            return False\n    \n    return True\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\")\n    }\n\ndef verify_table_structures(conn) -> bool:\n    \"\"\"Verify that all three tables were created with correct structure.\"\"\"\n    with conn.cursor() as cur:\n        # Check if tables exist\n        cur.execute(\"\"\"\n            SELECT table_name FROM information_schema.tables \n            WHERE table_schema = 'employees' \n            AND table_name IN ('employee_projects', 'project_assignments', 'project_milestones')\n            ORDER BY table_name\n        \"\"\")\n        tables = [row[0] for row in cur.fetchall()]\n        \n        if len(tables) != 3:\n            print(f\"❌ Expected 3 tables, found {len(tables)}: {tables}\")\n            return False\n            \n        # Check foreign key constraints exist\n        cur.execute(\"\"\"\n            SELECT COUNT(*) FROM information_schema.table_constraints \n            WHERE table_schema = 'employees' \n            AND constraint_type = 'FOREIGN KEY'\n            AND table_name IN ('project_assignments', 'project_milestones')\n        \"\"\")\n        fkey_count = cur.fetchone()[0]\n        \n        if fkey_count != 3:\n            print(f\"❌ Expected 3 foreign key constraints, found {fkey_count}\")\n            return False\n            \n        # Check if priority column exists (added in step 6)\n        cur.execute(\"\"\"\n            SELECT COUNT(*) FROM information_schema.columns \n            WHERE table_schema = 'employees' AND table_name = 'employee_projects'\n            AND column_name = 'priority'\n        \"\"\")\n        priority_exists = cur.fetchone()[0]\n        \n        if priority_exists == 0:\n            print(\"❌ Priority column was not added to employee_projects table\")\n            return False\n            \n        print(\"✅ Table structures are correct\")\n        return True\n\ndef verify_indexes(conn) -> bool:\n    \"\"\"Verify that required indexes were created.\"\"\"\n    with conn.cursor() as cur:\n        # Check for specific indexes\n        cur.execute(\"\"\"\n            SELECT COUNT(*) \n            FROM pg_indexes \n            WHERE schemaname = 'employees' \n            AND indexname IN ('idx_projects_status', 'idx_assignments_emp_proj', 'idx_milestones_due_date')\n        \"\"\")\n        index_count = cur.fetchone()[0]\n        \n        if index_count != 3:\n            print(f\"❌ Expected 3 required indexes, got {index_count}\")\n            return False\n                \n        print(\"✅ All required indexes are present\")\n        return True\n\ndef verify_project_data(conn) -> bool:\n    \"\"\"Verify that project data was inserted and updated correctly.\"\"\"\n    with conn.cursor() as cur:\n        # Check project data after updates\n        cur.execute(\"\"\"\n            SELECT project_name, start_date, end_date, budget, status, priority\n            FROM employees.employee_projects\n            ORDER BY project_name\n        \"\"\")\n        projects = cur.fetchall()\n        \n        if len(projects) != 3:\n            print(f\"❌ Expected 3 projects, found {len(projects)}\")\n            return False\n            \n        # Expected final state after all updates\n        expected = {\n            'Database Modernization': ('2024-01-15', '2024-06-30', 287500.00, 'active', 'high'),\n            'Employee Portal Upgrade': ('2024-02-01', '2024-05-15', 207000.00, 'active', 'medium'),\n            'HR Analytics Dashboard': ('2023-11-01', '2024-01-31', 120000.00, 'completed', 'medium')\n        }\n        \n        for project in projects:\n            name = project[0]\n            if name not in expected:\n                print(f\"❌ Unexpected project: {name}\")\n                return False\n                \n            exp = expected[name]\n            # Use rows_match for comparison\n            expected_row = (name,) + exp\n            if not rows_match(project, expected_row):\n                print(f\"❌ Project {name} data mismatch: expected {expected_row}, got {project}\")\n                return False\n                \n        print(\"✅ Project data is correct\")\n        return True\n\ndef verify_assignment_data(conn) -> bool:\n    \"\"\"Verify that all current employees were assigned to projects by department.\"\"\"\n    with conn.cursor() as cur:\n        # Check total assignment count matches current employee count\n        cur.execute(\"\"\"\n            SELECT COUNT(*) FROM employees.project_assignments\n        \"\"\")\n        assignment_count = cur.fetchone()[0]\n        \n        cur.execute(\"\"\"\n            SELECT COUNT(DISTINCT de.employee_id) \n            FROM employees.department_employee de\n            WHERE de.to_date = '9999-01-01'\n        \"\"\")\n        current_employee_count = cur.fetchone()[0]\n        \n        if assignment_count != current_employee_count:\n            print(f\"❌ Expected {current_employee_count} assignments, found {assignment_count}\")\n            return False\n            \n        # Check department-project mapping\n        cur.execute(\"\"\"\n            SELECT d.dept_name, pa.project_id, pa.role, pa.allocation_percentage, COUNT(*)\n            FROM employees.project_assignments pa\n            JOIN employees.department_employee de ON pa.employee_id = de.employee_id AND de.to_date = '9999-01-01'\n            JOIN employees.department d ON de.department_id = d.id\n            JOIN employees.employee_projects ep ON pa.project_id = ep.project_id\n            GROUP BY d.dept_name, pa.project_id, pa.role, pa.allocation_percentage\n            ORDER BY d.dept_name\n        \"\"\")\n        dept_assignments = cur.fetchall()\n        \n        # Expected department-project mappings\n        expected_mappings = {\n            'Development': (1, 'Developer', 80),\n            'Human Resources': (2, 'Business Analyst', 60),\n            'Marketing': (3, 'Marketing Specialist', 40),\n            'Finance': (1, 'Financial Analyst', 30),\n            'Sales': (2, 'Sales Representative', 50),\n            'Research': (3, 'Research Analyst', 70),\n            'Production': (1, 'Production Coordinator', 45),\n            'Quality Management': (2, 'QA Specialist', 85),\n            'Customer Service': (3, 'Customer Success', 35)\n        }\n        \n        dept_found = {}\n        for assignment in dept_assignments:\n            dept_name, project_id, role, allocation, _ = assignment  # Ignore count\n            if dept_name in dept_found:\n                print(f\"❌ Department {dept_name} has multiple assignments\")\n                return False\n            dept_found[dept_name] = (project_id, role, allocation)\n            \n        for dept, expected in expected_mappings.items():\n            if dept not in dept_found:\n                print(f\"❌ Department {dept} has no assignments\")\n                return False\n            if dept_found[dept] != expected:\n                print(f\"❌ Department {dept} assignment mismatch: expected {expected}, got {dept_found[dept]}\")\n                return False\n                \n        # Check that all assignments have correct assigned_date\n        cur.execute(\"\"\"\n            SELECT COUNT(*) FROM employees.project_assignments \n            WHERE assigned_date != '2024-01-01'\n        \"\"\")\n        wrong_date_count = cur.fetchone()[0]\n        \n        if wrong_date_count > 0:\n            print(f\"❌ {wrong_date_count} assignments have incorrect assigned_date\")\n            return False\n                \n        print(\"✅ Assignment data is correct\")\n        return True\n\ndef verify_milestone_data(conn) -> bool:\n    \"\"\"Verify that milestone data was inserted and updated correctly.\"\"\"\n    with conn.cursor() as cur:\n        cur.execute(\"\"\"\n            SELECT project_id, milestone_name, due_date, completed\n            FROM employees.project_milestones\n            ORDER BY project_id, milestone_name\n        \"\"\")\n        milestones = cur.fetchall()\n        \n        if len(milestones) != 6:\n            print(f\"❌ Expected 6 milestones, found {len(milestones)}\")\n            return False\n            \n        # Expected milestones\n        expected_milestones = {\n            (1, 'Design Phase Complete'): ('2024-03-01', False),\n            (1, 'Implementation Complete'): ('2024-05-15', False),\n            (2, 'UI/UX Approval'): ('2024-03-15', False),\n            (2, 'Beta Testing'): ('2024-04-30', False),\n            (3, 'Data Collection'): ('2023-12-15', True),  # Should be completed\n            (3, 'Dashboard Launch'): ('2024-01-25', False)\n        }\n        \n        for milestone in milestones:\n            project_id, name, due_date, completed = milestone\n            key = (project_id, name)\n            \n            if key not in expected_milestones:\n                print(f\"❌ Unexpected milestone: {key}\")\n                return False\n                \n            expected_due, expected_completed = expected_milestones[key]\n            if str(due_date) != expected_due or completed != expected_completed:\n                print(f\"❌ Milestone {name} mismatch: expected ({expected_due}, {expected_completed}), got ({due_date}, {completed})\")\n                return False\n                \n        print(\"✅ Milestone data is correct\")\n        return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 50)\n\n    # Get connection parameters\n    conn_params = get_connection_params()\n\n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n\n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n\n        # Verify all components\n        success = (\n            verify_table_structures(conn) and \n            verify_indexes(conn) and\n            verify_project_data(conn) and\n            verify_assignment_data(conn) and\n            verify_milestone_data(conn)\n        )\n\n        conn.close()\n\n        if success:\n            print(\"\\n🎉 Task verification: PASS\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Task verification: FAIL\")\n            sys.exit(1)\n\n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/standard/employees/employee_retention_analysis/description.md",
    "content": "Analyze employee retention patterns and identify factors contributing to turnover across the organization. The HR leadership team needs comprehensive insights to develop targeted retention strategies and reduce costly employee attrition.\n\n## Your Tasks:\n\n1. **Create the retention analysis table** — build a table called `employee_retention_analysis` in the `employees` schema with these exact columns:\n   * `department_name` (varchar) — the department name\n   * `total_employees_ever` (integer) — total number of employees who have ever worked in this department\n   * `current_employees` (integer) — number of current employees in the department\n   * `former_employees` (integer) — number of employees who left the department\n   * `retention_rate` (decimal) — percentage of employees still with the company (current/total * 100)\n\n2. **Create the high-risk employee identification table** — build a table called `high_risk_employees` in the `employees` schema with:\n   * `employee_id` (bigint) — the employee's ID  \n   * `full_name` (varchar) — concatenated first and last name\n   * `current_department` (varchar) — current department name\n   * `tenure_days` (integer) — days with the company\n   * `current_salary` (integer) — current salary amount\n   * `risk_category` (varchar) — risk level ('high_risk', 'medium_risk', 'low_risk')\n   \n   **Note**: Analyze only current employees (those with active salary records where to_date = '9999-01-01').\n\n3. **Create the turnover trend analysis table** — build a table called `turnover_trend_analysis` in the `employees` schema with:\n   * `departure_year` (integer) — year when employees left (extract from to_date of salary records)\n   * `departures_count` (integer) — number of employees who left that year\n   * `avg_tenure_days` (decimal) — average tenure in days for employees who left that year\n   * `avg_final_salary` (decimal) — average final salary of departed employees that year\n\n4. **Apply risk assessment criteria** for current employees:\n   * **High risk**: Employees in departments with retention rate < 80% AND tenure < 1095 days (3 years)\n   * **Medium risk**: Employees in departments with retention rate < 85% AND tenure < 1825 days (5 years)  \n   * **Low risk**: All other current employees\n\n5. **Analyze departure trends** — examine employees who left between 1985-2002, grouping by departure year.\n\n6. **Handle final salary selection** — when calculating `avg_final_salary`, if an employee has multiple salary records with the same departure date, select the record with the latest start date. If there are still ties, select the record with the highest salary amount.\n\n7. **Focus appropriately** — use current employees for risk analysis, all historical data for retention rates, and former employees for trend analysis.\n\nThe comprehensive analysis will help identify retention patterns, at-risk employees, and historical turnover trends to guide strategic workforce planning.\n"
  },
  {
    "path": "tasks/postgres/standard/employees/employee_retention_analysis/meta.json",
    "content": "{\n  \"task_id\": \"employee_retention_analysis\",\n  \"task_name\": \"Employee Retention Analysis\",\n  \"category_id\": \"employees\",\n  \"category_name\": \"Employees\",\n  \"description\": \"Analyze retention patterns identifying turnover factors and high-risk employees to develop targeted retention strategies.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"reporting and analytics\",\n    \"statistical aggregation\",\n    \"audit and compliance\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Enum \\\"employees\\\".\\\"employee_gender\\\" {\\n  \\\"M\\\"\\n  \\\"F\\\"\\n}\\n\\nTable \\\"employees\\\".\\\"department\\\" {\\n  \\\"id\\\" bpchar(4) [pk, not null]\\n  \\\"dept_name\\\" varchar(40) [unique, not null]\\n}\\n\\nTable \\\"employees\\\".\\\"department_employee\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"department_id\\\" bpchar(4) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, department_id) [type: btree, name: \\\"idx_16982_primary\\\"]\\n    department_id [type: btree, name: \\\"idx_16982_dept_no\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"department_manager\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"department_id\\\" bpchar(4) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, department_id) [type: btree, name: \\\"idx_16985_primary\\\"]\\n    department_id [type: btree, name: \\\"idx_16985_dept_no\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"employee\\\" {\\n  \\\"id\\\" int8 [pk, not null, increment]\\n  \\\"birth_date\\\" date [not null]\\n  \\\"first_name\\\" varchar(14) [not null]\\n  \\\"last_name\\\" varchar(16) [not null]\\n  \\\"gender\\\" employees.employee_gender [not null]\\n  \\\"hire_date\\\" date [not null]\\n}\\n\\nTable \\\"employees\\\".\\\"salary\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"amount\\\" int8 [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, from_date) [type: btree, name: \\\"idx_16991_primary\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"title\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"title\\\" varchar(50) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date\\n\\n  Indexes {\\n    (employee_id, title, from_date) [type: btree, name: \\\"idx_16994_primary\\\"]\\n  }\\n}\\n\\nRef \\\"dept_emp_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_employee\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_emp_ibfk_2\\\":\\\"employees\\\".\\\"department\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_employee\\\".\\\"department_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_manager_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_manager\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_manager_ibfk_2\\\":\\\"employees\\\".\\\"department\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_manager\\\".\\\"department_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"salaries_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"salary\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"titles_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"title\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/employees.sql.gz\"\n  }\n}"
  },
  {
    "path": "tasks/postgres/standard/employees/employee_retention_analysis/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Task 2: Employee Retention Analysis\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nfrom decimal import Decimal\n\ndef rows_match(actual_row, expected_row):\n    \"\"\"\n    Compare two rows with appropriate tolerance.\n    For Decimal types: allows 0.1 tolerance\n    For other types: requires exact match\n    \"\"\"\n    if len(actual_row) != len(expected_row):\n        return False\n    \n    for actual, expected in zip(actual_row, expected_row):\n        if isinstance(actual, Decimal) and isinstance(expected, Decimal):\n            if abs(float(actual) - float(expected)) > 0.1:\n                return False\n        elif actual != expected:\n            return False\n    \n    return True\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\")\n    }\n\ndef verify_retention_analysis_results(conn) -> bool:\n    \"\"\"Verify the employee retention analysis results.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results from the created table\n        cur.execute(\"\"\"\n            SELECT department_name, total_employees_ever, current_employees, \n                   former_employees, retention_rate\n            FROM employees.employee_retention_analysis\n            ORDER BY department_name\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        # Execute ground truth query\n        cur.execute(\"\"\"\n            SELECT\n            d.dept_name AS department_name,\n            COUNT(DISTINCT de.employee_id) AS total_employees_ever,\n            COUNT(DISTINCT de.employee_id) FILTER (WHERE de.to_date = DATE '9999-01-01') AS current_employees,\n            (COUNT(DISTINCT de.employee_id)\n            - COUNT(DISTINCT de.employee_id) FILTER (WHERE de.to_date = DATE '9999-01-01')) AS former_employees,\n            (COUNT(DISTINCT de.employee_id) FILTER (WHERE de.to_date = DATE '9999-01-01'))::DECIMAL\n                / NULLIF(COUNT(DISTINCT de.employee_id), 0) * 100 AS retention_rate\n            FROM employees.department d\n            LEFT JOIN employees.department_employee de\n            ON d.id = de.department_id\n            GROUP BY d.id, d.dept_name\n            ORDER BY d.dept_name\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} retention analysis results, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:  # Only show first 5 mismatches\n                    print(f\"❌ Row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total mismatches: {mismatches}\")\n            return False\n\n        print(f\"✅ Employee retention analysis results are correct ({len(actual_results)} records)\")\n        return True\n\ndef verify_high_risk_results(conn) -> bool:\n    \"\"\"Verify the high risk employee analysis results.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results from the created table\n        cur.execute(\"\"\"\n            SELECT employee_id, full_name, current_department, tenure_days, \n                   current_salary, risk_category\n            FROM employees.high_risk_employees\n            ORDER BY employee_id\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        # Execute ground truth query - only current employees\n        cur.execute(\"\"\"\n            WITH current_salary AS (\n            SELECT employee_id, amount AS current_amount\n            FROM (\n                SELECT s.*,\n                    ROW_NUMBER() OVER (PARTITION BY s.employee_id\n                                        ORDER BY s.from_date DESC, s.amount DESC) AS rn\n                FROM employees.salary s\n                WHERE s.to_date = DATE '9999-01-01'\n            ) x\n            WHERE rn = 1\n            ),\n            current_dept AS (\n            SELECT employee_id, department_id\n            FROM (\n                SELECT de.*,\n                    ROW_NUMBER() OVER (PARTITION BY de.employee_id\n                                        ORDER BY de.from_date DESC, de.department_id) AS rn\n                FROM employees.department_employee de\n                WHERE de.to_date = DATE '9999-01-01'\n            ) x\n            WHERE rn = 1\n            ),\n            dept_retention AS (\n            SELECT\n                d.id   AS department_id,\n                d.dept_name,\n                COUNT(DISTINCT de.employee_id) AS total_employees_ever,\n                COUNT(DISTINCT de.employee_id) FILTER (WHERE de.to_date = DATE '9999-01-01') AS current_employees,\n                (COUNT(DISTINCT de.employee_id) FILTER (WHERE de.to_date = DATE '9999-01-01'))::NUMERIC\n                / NULLIF(COUNT(DISTINCT de.employee_id), 0) * 100 AS retention_rate\n            FROM employees.department d\n            LEFT JOIN employees.department_employee de\n                    ON de.department_id = d.id\n            GROUP BY d.id, d.dept_name\n            )\n            SELECT\n            e.id AS employee_id,\n            CONCAT(e.first_name, ' ', e.last_name) AS full_name,\n            d.dept_name AS current_department,\n            (CURRENT_DATE - e.hire_date)::INTEGER AS tenure_days,\n            cs.current_amount::INTEGER AS current_salary,\n            CASE\n                WHEN dr.retention_rate < 80  AND (CURRENT_DATE - e.hire_date) < 1095 THEN 'high_risk'\n                WHEN dr.retention_rate < 85  AND (CURRENT_DATE - e.hire_date) < 1825 THEN 'medium_risk'\n                ELSE 'low_risk'\n            END AS risk_category\n            FROM employees.employee e\n            JOIN current_salary cs ON cs.employee_id = e.id\n            JOIN current_dept   cd ON cd.employee_id = e.id\n            JOIN employees.department d ON d.id = cd.department_id\n            JOIN dept_retention dr ON dr.department_id = d.id\n            ORDER BY e.id;\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} high risk analysis results, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:  # Only show first 5 mismatches\n                    print(f\"❌ Row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total mismatches: {mismatches}\")\n            return False\n\n        print(f\"✅ High risk employee analysis results are correct ({len(actual_results)} records)\")\n        return True\n\ndef verify_turnover_trend_results(conn) -> bool:\n    \"\"\"Verify the turnover trend analysis results.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results from the created table\n        cur.execute(\"\"\"\n            SELECT departure_year, departures_count, avg_tenure_days, avg_final_salary\n            FROM employees.turnover_trend_analysis\n            ORDER BY departure_year\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        # Execute ground truth query - simplified version\n        cur.execute(\"\"\"\n            WITH last_non_current_salary AS (\n            SELECT\n                s.employee_id,\n                s.to_date      AS departure_date,\n                s.amount       AS final_salary,\n                ROW_NUMBER() OVER (\n                PARTITION BY s.employee_id\n                ORDER BY s.to_date DESC, s.from_date DESC, s.amount DESC\n                ) AS rn\n            FROM employees.salary s\n            WHERE s.to_date <> DATE '9999-01-01'\n                AND NOT EXISTS (\n                SELECT 1\n                FROM employees.salary s_cur\n                WHERE s_cur.employee_id = s.employee_id\n                    AND s_cur.to_date = DATE '9999-01-01'\n                )\n            ),\n            departed AS (\n            SELECT employee_id, departure_date, final_salary\n            FROM last_non_current_salary\n            WHERE rn = 1\n            ),\n            with_tenure AS (\n            SELECT\n                e.id AS employee_id,\n                d.departure_date,\n                d.final_salary,\n                (d.departure_date - e.hire_date)::INTEGER AS tenure_days\n            FROM employees.employee e\n            JOIN departed d ON d.employee_id = e.id\n            )\n            SELECT\n            EXTRACT(YEAR FROM departure_date)::INTEGER AS departure_year,\n            COUNT(*)::INTEGER                         AS departures_count,\n            AVG(tenure_days)                          AS avg_tenure_days,\n            AVG(final_salary)                         AS avg_final_salary\n            FROM with_tenure\n            WHERE departure_date BETWEEN DATE '1985-01-01' AND DATE '2002-12-31'\n            GROUP BY EXTRACT(YEAR FROM departure_date)\n            ORDER BY departure_year;\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} turnover trend results, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:  # Only show first 5 mismatches\n                    print(f\"❌ Row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total mismatches: {mismatches}\")\n            return False\n\n        print(f\"✅ Turnover trend analysis results are correct ({len(actual_results)} records)\")\n        return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 50)\n\n    # Get connection parameters\n    conn_params = get_connection_params()\n\n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n\n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n\n        # Verify all three analysis results\n        success = (\n            verify_retention_analysis_results(conn) and \n            verify_high_risk_results(conn) and \n            verify_turnover_trend_results(conn)\n        )\n\n        conn.close()\n\n        if success:\n            print(\"\\n🎉 Task verification: PASS\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Task verification: FAIL\")\n            sys.exit(1)\n\n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/standard/employees/executive_dashboard_automation/description.md",
    "content": "Design a comprehensive reporting and automation system for executive dashboard and real-time monitoring. The executive team needs automated reports, data views, and trigger-based notifications to track key business metrics without manual intervention.\n\n## Your Tasks:\n\n1. **Create executive summary views** — build three materialized views in the `employees` schema:\n   \n   **View 1: `exec_department_summary`**\n   * `department_name` (varchar) — department name\n   * `total_employees` (integer) — current active employee count\n   * `avg_salary` (decimal) — average current salary\n   * `total_payroll` (bigint) — total monthly payroll cost\n   * `manager_name` (varchar) — current department manager name\n\n   **View 2: `exec_hiring_trends`**  \n   * `hire_year` (integer) — year employees were hired\n   * `employees_hired` (integer) — number hired that year\n   * `avg_starting_salary` (decimal) — average first salary of hires that year\n   * `retention_rate` (decimal) — percentage still employed\n   * `top_hiring_department` (varchar) — department that hired the most that year\n\n   **View 3: `exec_salary_distribution`**\n   * `salary_band` (varchar) — salary ranges ('30K-50K', '50K-70K', '70K-90K', '90K-110K', '110K+')  \n   * `employee_count` (integer) — employees in this salary band\n   * `percentage_of_workforce` (decimal) — percentage of total workforce\n   * `most_common_title` (varchar) — most frequent job title in this band\n\n2. **Create stored procedure for report generation**:\n   \n   **Procedure: `generate_monthly_report(report_date DATE)`**\n   * Create a table `monthly_reports` with columns: report_id (auto-increment), report_date, department_count, total_employees (current active employees only), avg_salary, generated_at\n   * Insert one summary record using the report_date as identifier and current database statistics (not historical data for that date)\n   * Return the generated report_id\n\n3. **Create notification triggers**:\n   \n   **Trigger: `high_salary_alert`**\n   * Fires when a new salary record is inserted with amount > 120000\n   * Inserts alert into `salary_alerts` table with: employee_id, salary_amount, alert_date, status='new'\n\n4. **Insert test data to verify triggers**:\n   * Update employee 10001's current salary: first set their current salary record to_date='2024-01-31', then insert new salary record with amount 125000, from_date='2024-02-01', to_date='9999-01-01'\n   * Refresh all materialized views after inserting new data to ensure they reflect the updated information\n\n5. **Execute the stored procedure**:\n   * Call `generate_monthly_report('2024-01-01')` to create January report\n   * Query the generated report to verify execution\n\n6. **Create performance indexes**:\n   * Index on `salary_alerts.status` for alert processing\n   * Composite index on `monthly_reports(report_date, department_count)` for trend analysis"
  },
  {
    "path": "tasks/postgres/standard/employees/executive_dashboard_automation/meta.json",
    "content": "{\n  \"task_id\": \"executive_dashboard_automation\",\n  \"task_name\": \"Executive Dashboard Automation\",\n  \"category_id\": \"employees\",\n  \"category_name\": \"Employees\",\n  \"description\": \"Design automated reporting system with materialized views, stored procedures, and triggers for executive dashboard monitoring.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"reporting and analytics\",\n    \"stored procedures and functions\",\n    \"schema design\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Enum \\\"employees\\\".\\\"employee_gender\\\" {\\n  \\\"M\\\"\\n  \\\"F\\\"\\n}\\n\\nTable \\\"employees\\\".\\\"department\\\" {\\n  \\\"id\\\" bpchar(4) [pk, not null]\\n  \\\"dept_name\\\" varchar(40) [unique, not null]\\n}\\n\\nTable \\\"employees\\\".\\\"department_employee\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"department_id\\\" bpchar(4) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, department_id) [type: btree, name: \\\"idx_16982_primary\\\"]\\n    department_id [type: btree, name: \\\"idx_16982_dept_no\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"department_manager\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"department_id\\\" bpchar(4) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, department_id) [type: btree, name: \\\"idx_16985_primary\\\"]\\n    department_id [type: btree, name: \\\"idx_16985_dept_no\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"employee\\\" {\\n  \\\"id\\\" int8 [pk, not null, increment]\\n  \\\"birth_date\\\" date [not null]\\n  \\\"first_name\\\" varchar(14) [not null]\\n  \\\"last_name\\\" varchar(16) [not null]\\n  \\\"gender\\\" employees.employee_gender [not null]\\n  \\\"hire_date\\\" date [not null]\\n}\\n\\nTable \\\"employees\\\".\\\"salary\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"amount\\\" int8 [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, from_date) [type: btree, name: \\\"idx_16991_primary\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"title\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"title\\\" varchar(50) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date\\n\\n  Indexes {\\n    (employee_id, title, from_date) [type: btree, name: \\\"idx_16994_primary\\\"]\\n  }\\n}\\n\\nRef \\\"dept_emp_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_employee\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_emp_ibfk_2\\\":\\\"employees\\\".\\\"department\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_employee\\\".\\\"department_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_manager_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_manager\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_manager_ibfk_2\\\":\\\"employees\\\".\\\"department\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_manager\\\".\\\"department_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"salaries_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"salary\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"titles_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"title\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/employees.sql.gz\"\n  }\n}"
  },
  {
    "path": "tasks/postgres/standard/employees/executive_dashboard_automation/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Task 6: Reporting and Automation System\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nfrom decimal import Decimal\n\ndef rows_match(actual_row, expected_row):\n    \"\"\"\n    Compare two rows with appropriate tolerance.\n    For Decimal types: allows 0.1 tolerance\n    For date types: convert to string for comparison\n    For other types: requires exact match\n    \"\"\"\n    if len(actual_row) != len(expected_row):\n        return False\n    \n    for actual, expected in zip(actual_row, expected_row):\n        if isinstance(actual, Decimal) and isinstance(expected, (Decimal, float, int)):\n            if abs(float(actual) - float(expected)) > 0.1:\n                return False\n        elif hasattr(actual, 'strftime'):  # datetime.date or datetime.datetime\n            if str(actual) != str(expected):\n                return False\n        elif actual != expected:\n            return False\n    \n    return True\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\")\n    }\n\ndef verify_materialized_views(conn) -> bool:\n    \"\"\"Verify that materialized views were created and populated correctly.\"\"\"\n    with conn.cursor() as cur:\n        # Check if materialized views exist\n        cur.execute(\"\"\"\n            SELECT matviewname FROM pg_matviews \n            WHERE schemaname = 'employees' \n            AND matviewname IN ('exec_department_summary', 'exec_hiring_trends', 'exec_salary_distribution')\n            ORDER BY matviewname\n        \"\"\")\n        views = [row[0] for row in cur.fetchall()]\n        \n        expected_views = ['exec_department_summary', 'exec_hiring_trends', 'exec_salary_distribution']\n        if set(views) != set(expected_views):\n            print(f\"❌ Expected views {expected_views}, found {views}\")\n            return False\n        \n        # Check all departments' data accuracy\n        cur.execute(\"\"\"\n            SELECT department_name, total_employees, avg_salary, total_payroll, manager_name\n            FROM employees.exec_department_summary\n            ORDER BY department_name\n        \"\"\")\n        view_data = cur.fetchall()\n        \n        # Get actual data for all departments\n        cur.execute(\"\"\"\n            WITH current_salary AS (\n            SELECT employee_id, amount\n            FROM (\n                SELECT s.*,\n                    ROW_NUMBER() OVER (\n                        PARTITION BY s.employee_id\n                        ORDER BY s.from_date DESC, s.amount DESC\n                    ) AS rn\n                FROM employees.salary s\n                WHERE s.to_date = DATE '9999-01-01'\n            ) x\n            WHERE rn = 1\n            ),\n            current_dept AS (\n            SELECT DISTINCT de.employee_id, de.department_id\n            FROM employees.department_employee de\n            WHERE de.to_date = DATE '9999-01-01'\n            ),\n            current_manager AS (\n            SELECT department_id,\n                    CONCAT(e.first_name, ' ', e.last_name) AS manager_name\n            FROM (\n                SELECT dm.*,\n                    ROW_NUMBER() OVER (\n                        PARTITION BY dm.department_id\n                        ORDER BY dm.from_date DESC, dm.employee_id\n                    ) AS rn\n                FROM employees.department_manager dm\n                WHERE dm.to_date = DATE '9999-01-01'\n            ) dm\n            JOIN employees.employee e ON e.id = dm.employee_id\n            WHERE dm.rn = 1\n            )\n            SELECT\n            d.dept_name AS department_name,\n            COUNT(cd.employee_id)::INT AS total_employees,\n            AVG(cs.amount)::DECIMAL   AS avg_salary,\n            COALESCE(SUM(cs.amount), 0)::BIGINT AS total_payroll,\n            cm.manager_name\n            FROM employees.department d\n            LEFT JOIN current_dept   cd ON cd.department_id = d.id\n            LEFT JOIN current_salary cs ON cs.employee_id = cd.employee_id\n            LEFT JOIN current_manager cm ON cm.department_id = d.id\n            GROUP BY d.id, d.dept_name, cm.manager_name\n            ORDER BY d.dept_name;\n        \"\"\")\n        actual_data = cur.fetchall()\n        \n        if len(view_data) != len(actual_data):\n            print(f\"❌ Department count mismatch: view={len(view_data)}, actual={len(actual_data)}\")\n            return False\n            \n        for view_row, actual_row in zip(view_data, actual_data):\n            if not rows_match(view_row, actual_row):\n                print(f\"❌ Department summary data incorrect for {view_row[0]}: view={view_row}, actual={actual_row}\")\n                return False\n            \n        # Check all hiring trends data accuracy\n        cur.execute(\"\"\"\n            SELECT hire_year, employees_hired, avg_starting_salary, retention_rate, top_hiring_department\n            FROM employees.exec_hiring_trends\n            ORDER BY hire_year\n        \"\"\")\n        hiring_view_data = cur.fetchall()\n        \n        # Get actual data for all years\n        cur.execute(\"\"\"\n            WITH first_salary AS (\n            SELECT employee_id, amount AS starting_salary\n            FROM (\n                SELECT s.*,\n                    ROW_NUMBER() OVER (\n                        PARTITION BY s.employee_id\n                        ORDER BY s.from_date ASC, s.amount ASC\n                    ) AS rn\n                FROM employees.salary s\n            ) x\n            WHERE rn = 1\n            ),\n            current_emp AS (\n            SELECT DISTINCT s.employee_id\n            FROM employees.salary s\n            WHERE s.to_date = DATE '9999-01-01'\n            ),\n            first_dept AS (\n            SELECT employee_id, department_id\n            FROM (\n                SELECT de.*,\n                    ROW_NUMBER() OVER (\n                        PARTITION BY de.employee_id\n                        ORDER BY de.from_date ASC, de.department_id\n                    ) AS rn\n                FROM employees.department_employee de\n            ) x\n            WHERE rn = 1\n            ),\n            hire_base AS (\n            SELECT e.id AS employee_id,\n                    EXTRACT(YEAR FROM e.hire_date)::INT AS hire_year\n            FROM employees.employee e\n            WHERE e.hire_date IS NOT NULL\n            ),\n            hire_by_dept_year AS (\n            SELECT hb.hire_year,\n                    d.dept_name,\n                    COUNT(*) AS dept_hires\n            FROM hire_base hb\n            LEFT JOIN first_dept fd ON fd.employee_id = hb.employee_id\n            LEFT JOIN employees.department d ON d.id = fd.department_id\n            GROUP BY hb.hire_year, d.dept_name\n            ),\n            top_dept_per_year AS (\n            SELECT hire_year,\n                    dept_name AS top_hiring_department\n            FROM (\n                SELECT hire_year, dept_name, dept_hires,\n                    ROW_NUMBER() OVER (\n                        PARTITION BY hire_year\n                        ORDER BY dept_hires DESC NULLS LAST, dept_name\n                    ) AS rn\n                FROM hire_by_dept_year\n            ) t\n            WHERE rn = 1\n            )\n            SELECT\n            hb.hire_year,\n            COUNT(*)::INT AS employees_hired,\n            AVG(fs.starting_salary)::DECIMAL AS avg_starting_salary,\n            (COUNT(ce.employee_id)::DECIMAL / NULLIF(COUNT(*), 0) * 100) AS retention_rate,\n            td.top_hiring_department\n            FROM hire_base hb\n            LEFT JOIN first_salary fs   ON fs.employee_id = hb.employee_id\n            LEFT JOIN current_emp ce    ON ce.employee_id = hb.employee_id\n            LEFT JOIN top_dept_per_year td ON td.hire_year = hb.hire_year\n            GROUP BY hb.hire_year, td.top_hiring_department\n            ORDER BY hb.hire_year;\n        \"\"\")\n        actual_hiring_data = cur.fetchall()\n        \n        if len(hiring_view_data) != len(actual_hiring_data):\n            print(f\"❌ Hiring trends count mismatch: view={len(hiring_view_data)}, actual={len(actual_hiring_data)}\")\n            return False\n        \n        for hiring_view, actual_hiring in zip(hiring_view_data, actual_hiring_data):\n            # Now compare all 5 fields including top_hiring_department\n            if not rows_match(hiring_view, actual_hiring):\n                print(f\"❌ Hiring trends data incorrect for year {hiring_view[0]}: view={hiring_view}, actual={actual_hiring}\")\n                return False\n                \n            \n        # Check all salary bands' data accuracy\n        cur.execute(\"\"\"\n            WITH band_order AS (\n            SELECT '30K-50K' AS band, 1 AS ord UNION ALL\n            SELECT '50K-70K', 2 UNION ALL\n            SELECT '70K-90K', 3 UNION ALL\n            SELECT '90K-110K',4 UNION ALL\n            SELECT '110K+',   5\n            )\n            SELECT salary_band, employee_count, percentage_of_workforce, most_common_title\n            FROM employees.exec_salary_distribution v\n            JOIN band_order bo ON bo.band = v.salary_band\n            ORDER BY bo.ord;\n        \"\"\")\n        view_bands = cur.fetchall()\n        \n        # Calculate actual data for all bands\n        cur.execute(\"\"\"\n            WITH current_salary AS (\n            SELECT employee_id, amount\n            FROM (\n                SELECT s.*,\n                    ROW_NUMBER() OVER (\n                        PARTITION BY s.employee_id\n                        ORDER BY s.from_date DESC, s.amount DESC\n                    ) AS rn\n                FROM employees.salary s\n                WHERE s.to_date = DATE '9999-01-01'\n            ) x\n            WHERE rn = 1\n            ),\n            current_title AS (\n            SELECT employee_id, title\n            FROM (\n                SELECT t.*,\n                    ROW_NUMBER() OVER (\n                        PARTITION BY t.employee_id\n                        ORDER BY t.from_date DESC, t.title\n                    ) AS rn\n                FROM employees.title t\n                WHERE t.to_date = DATE '9999-01-01'\n            ) x\n            WHERE rn = 1\n            ),\n            base AS (\n            SELECT cs.employee_id, cs.amount, COALESCE(ct.title, 'Unknown') AS title\n            FROM current_salary cs\n            LEFT JOIN current_title ct ON ct.employee_id = cs.employee_id\n            ),\n            banded AS (\n            SELECT\n                CASE\n                WHEN amount <  50000 THEN '30K-50K'\n                WHEN amount <  70000 THEN '50K-70K'\n                WHEN amount <  90000 THEN '70K-90K'\n                WHEN amount < 110000 THEN '90K-110K'\n                ELSE '110K+'\n                END AS salary_band,\n                title,\n                employee_id\n            FROM base\n            ),\n            band_counts AS (\n            SELECT salary_band, COUNT(DISTINCT employee_id) AS employee_count\n            FROM banded\n            GROUP BY salary_band\n            ),\n            title_counts AS (\n            SELECT salary_band, title, COUNT(DISTINCT employee_id) AS title_count\n            FROM banded\n            GROUP BY salary_band, title\n            ),\n            top_titles AS (\n            SELECT salary_band, title AS most_common_title\n            FROM (\n                SELECT salary_band, title, title_count,\n                    ROW_NUMBER() OVER (\n                        PARTITION BY salary_band\n                        ORDER BY title_count DESC, title\n                    ) AS rn\n                FROM title_counts\n            ) t\n            WHERE rn = 1\n            ),\n            workforce AS (\n            SELECT COUNT(DISTINCT employee_id) AS total_current\n            FROM base\n            ),\n            band_order AS (\n            SELECT '30K-50K' AS band, 1 AS ord UNION ALL\n            SELECT '50K-70K', 2 UNION ALL\n            SELECT '70K-90K', 3 UNION ALL\n            SELECT '90K-110K', 4 UNION ALL\n            SELECT '110K+',   5\n            )\n            SELECT\n            bc.salary_band,\n            bc.employee_count::INT AS employee_count,\n            (bc.employee_count::DECIMAL / NULLIF((SELECT total_current FROM workforce), 0) * 100) AS percentage_of_workforce,\n            tt.most_common_title\n            FROM band_counts bc\n            LEFT JOIN top_titles tt ON tt.salary_band = bc.salary_band\n            LEFT JOIN band_order  bo ON bo.band = bc.salary_band\n            ORDER BY bo.ord;        \n        \"\"\")\n        actual_bands = cur.fetchall()\n        \n        # Compare view data with actual data\n        if len(view_bands) != len(actual_bands):\n            print(f\"❌ Salary band count mismatch: view={len(view_bands)}, actual={len(actual_bands)}\")\n            return False\n            \n        for view_band, actual_band in zip(view_bands, actual_bands):\n            if not rows_match(view_band, actual_band):\n                print(f\"❌ Salary band {actual_band[0]} data incorrect: view={view_band}, actual={actual_band}\")\n                return False\n            \n        print(\"✅ All materialized views are created and contain correct data\")\n        return True\n\ndef verify_stored_procedures(conn) -> bool:\n    \"\"\"Verify that stored procedure was created.\"\"\"\n    with conn.cursor() as cur:\n        # Check if procedure exists\n        cur.execute(\"\"\"\n            SELECT routine_name FROM information_schema.routines \n            WHERE routine_schema = 'employees' \n            AND routine_type = 'FUNCTION'\n            AND routine_name = 'generate_monthly_report'\n        \"\"\")\n        procedures = [row[0] for row in cur.fetchall()]\n        \n        if 'generate_monthly_report' not in procedures:\n            print(\"❌ generate_monthly_report procedure not found\")\n            return False\n            \n        # Check if monthly_reports table exists with correct structure\n        cur.execute(\"\"\"\n            SELECT COUNT(*) FROM information_schema.columns \n            WHERE table_schema = 'employees' AND table_name = 'monthly_reports'\n            AND column_name IN ('report_id', 'report_date', 'department_count', 'total_employees', 'avg_salary', 'generated_at')\n        \"\"\")\n        report_columns = cur.fetchone()[0]\n        if report_columns != 6:\n            print(\"❌ monthly_reports table missing required columns\")\n            return False\n            \n        print(\"✅ Stored procedure and supporting table are created\")\n        return True\n\ndef verify_triggers(conn) -> bool:\n    \"\"\"Verify that triggers were created and fired correctly.\"\"\"\n    with conn.cursor() as cur:\n        # Check if triggers exist\n        cur.execute(\"\"\"\n            SELECT trigger_name FROM information_schema.triggers \n            WHERE trigger_schema = 'employees'\n            AND trigger_name = 'high_salary_alert'\n        \"\"\")\n        triggers = [row[0] for row in cur.fetchall()]\n        \n        if 'high_salary_alert' not in triggers:\n            print(\"❌ high_salary_alert trigger not found\")\n            return False\n            \n        # Check if trigger support table exists\n        cur.execute(\"\"\"\n            SELECT table_name FROM information_schema.tables \n            WHERE table_schema = 'employees' \n            AND table_name = 'salary_alerts'\n        \"\"\")\n        trigger_tables = [row[0] for row in cur.fetchall()]\n        \n        if 'salary_alerts' not in trigger_tables:\n            print(\"❌ salary_alerts table not found\")\n            return False\n            \n        # Check if the old salary record was properly closed\n        cur.execute(\"\"\"\n            SELECT COUNT(*) FROM employees.salary \n            WHERE employee_id = 10001 AND to_date = '2024-01-31'\n        \"\"\")\n        old_salary_count = cur.fetchone()[0]\n        if old_salary_count == 0:\n            print(\"❌ Old salary record for employee 10001 was not properly closed with to_date='2024-01-31'\")\n            return False\n            \n        # Check if the new salary record was inserted\n        cur.execute(\"\"\"\n            SELECT COUNT(*) FROM employees.salary \n            WHERE employee_id = 10001 AND amount = 125000 \n            AND from_date = '2024-02-01' AND to_date = '9999-01-01'\n        \"\"\")\n        new_salary_count = cur.fetchone()[0]\n        if new_salary_count == 0:\n            print(\"❌ New salary record for employee 10001 with amount 125000 was not inserted\")\n            return False\n            \n        # Check if high salary alert was triggered with specific details\n        cur.execute(\"\"\"\n            SELECT COUNT(*) FROM employees.salary_alerts \n            WHERE employee_id = 10001 AND salary_amount = 125000 AND status = 'new'\n        \"\"\")\n        alert_count = cur.fetchone()[0]\n        if alert_count == 0:\n            print(\"❌ High salary alert was not triggered correctly for employee 10001 with amount 125000\")\n            return False\n            \n        print(\"✅ Trigger is created and functioning correctly\")\n        return True\n\ndef verify_procedure_execution(conn) -> bool:\n    \"\"\"Verify that stored procedure was executed with correct data.\"\"\"\n    with conn.cursor() as cur:\n        # Check if monthly report data matches actual statistics\n        cur.execute(\"\"\"\n            SELECT department_count, total_employees, avg_salary\n            FROM employees.monthly_reports \n            WHERE report_date = '2024-01-01'\n        \"\"\")\n        report_data = cur.fetchone()\n        if not report_data:\n            print(\"❌ Monthly report for 2024-01-01 was not generated\")\n            return False\n            \n        # Get actual current statistics to compare\n        cur.execute(\"\"\"\nWITH current_salary AS (\n  SELECT employee_id, amount\n  FROM (\n    SELECT s.*,\n           ROW_NUMBER() OVER (\n             PARTITION BY s.employee_id\n             ORDER BY s.from_date DESC, s.amount DESC\n           ) AS rn\n    FROM employees.salary s\n    WHERE s.to_date = DATE '9999-01-01'\n  ) x\n  WHERE rn = 1\n),\ncurrent_dept AS (\n  SELECT DISTINCT de.employee_id, de.department_id\n  FROM employees.department_employee de\n  WHERE de.to_date = DATE '9999-01-01'\n),\nbase AS (\n  SELECT cd.department_id, cs.employee_id, cs.amount\n  FROM current_dept cd\n  JOIN current_salary cs ON cs.employee_id = cd.employee_id\n)\nSELECT\n  COUNT(DISTINCT department_id)        AS actual_dept_count,\n  COUNT(DISTINCT employee_id)          AS actual_total_employees,\n  AVG(amount)::DECIMAL                 AS actual_avg_salary\nFROM base;\n        \"\"\")\n        actual_stats = cur.fetchone()\n        \n        # Compare report data with actual data  \n        if not rows_match(report_data, actual_stats):\n            print(f\"❌ Monthly report data incorrect: expected {actual_stats}, got {report_data}\")\n            return False\n                \n        print(\"✅ Stored procedure executed with correct data\")\n        return True\n\ndef verify_indexes(conn) -> bool:\n    \"\"\"Verify that performance indexes were created.\"\"\"\n    with conn.cursor() as cur:\n        # Check for required indexes\n        cur.execute(\"\"\"\n            SELECT indexname FROM pg_indexes \n            WHERE schemaname = 'employees' \n            AND tablename IN ('salary_alerts', 'monthly_reports')\n            AND indexname LIKE 'idx_%'\n            ORDER BY indexname\n        \"\"\")\n        indexes = [row[0] for row in cur.fetchall()]\n        \n        # Should have at least 2 indexes created\n        if len(indexes) < 2:\n            print(f\"❌ Expected at least 2 performance indexes, found {len(indexes)}\")\n            return False\n            \n        print(\"✅ Performance indexes are created\")\n        return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 50)\n\n    # Get connection parameters\n    conn_params = get_connection_params()\n\n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n\n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n\n        # Verify all components\n        success = (\n            verify_materialized_views(conn) and \n            verify_stored_procedures(conn) and\n            verify_triggers(conn) and\n            verify_procedure_execution(conn) and\n            verify_indexes(conn)\n        )\n\n        conn.close()\n\n        if success:\n            print(\"\\n🎉 Task verification: PASS\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Task verification: FAIL\")\n            sys.exit(1)\n\n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/standard/employees/management_structure_analysis/description.md",
    "content": "Conduct a comprehensive management structure analysis to evaluate leadership effectiveness and organizational hierarchy. The executive team needs insights into management tenure, span of control, and leadership transitions to optimize the management structure and succession planning.\n\n## Your Tasks:\n\n1. **Create the manager profile table** — build a table called `manager_profile` in the `employees` schema with these exact columns:\n   * `manager_id` (bigint) — the manager's employee ID\n   * `manager_name` (varchar) — concatenated first and last name\n   * `current_department` (varchar) — current department they manage (NULL if not current)\n   * `management_periods` (integer) — total number of management assignments (including multiple periods in same department)\n   * `current_manager` (boolean) — whether they are currently a manager\n\n2. **Create the department leadership table** — build a table called `department_leadership` in the `employees` schema with:\n   * `department_name` (varchar) — the department name\n   * `current_manager_name` (varchar) — current manager's full name\n   * `manager_start_date` (date) — when current manager started\n   * `total_historical_managers` (integer) — total number of managers this department has had\n\n3. **Create the management transition table** — build a table called `management_transitions` in the `employees` schema with:\n   * `department_name` (varchar) — the department name\n   * `transition_year` (integer) — year when management changed\n   * `outgoing_manager` (varchar) — previous manager's name\n   * `incoming_manager` (varchar) — new manager's name ('No Successor' if department had no immediate replacement)\n   * `transition_gap_days` (integer) — days between managers (0 if immediate or no successor)\n\n4. **Create the span of control table** — build a table called `span_of_control` in the `employees` schema with:\n   * `manager_id` (bigint) — the manager's employee ID\n   * `manager_name` (varchar) — manager's full name\n   * `department_name` (varchar) — department they manage\n   * `total_employees` (integer) — total employees in their department\n   * `current_employees` (integer) — current active employees in department\n   * `management_load` (varchar) — assessment ('light', 'moderate', 'heavy') based on current employees\n\n5. **Apply management load classification**:\n   * **Light**: < 5,000 current employees\n   * **Moderate**: 5,000 - 15,000 current employees\n   * **Heavy**: > 15,000 current employees\n\n6. **Focus on current managers only** for span of control analysis — use managers with active management roles (to_date = '9999-01-01').\n\n7. **Track all management history** for profiles and transitions — include both current and former managers to understand complete leadership evolution.\n\nThe analysis will provide insights into management effectiveness, departmental stability, and organizational structure optimization opportunities.\n"
  },
  {
    "path": "tasks/postgres/standard/employees/management_structure_analysis/meta.json",
    "content": "{\n  \"task_id\": \"management_structure_analysis\",\n  \"task_name\": \"Management Structure Analysis\",\n  \"category_id\": \"employees\",\n  \"category_name\": \"Employees\",\n  \"description\": \"Analyze management structure evaluating leadership effectiveness, span of control, and management transitions for succession planning.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"reporting and analytics\",\n    \"statistical aggregation\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Enum \\\"employees\\\".\\\"employee_gender\\\" {\\n  \\\"M\\\"\\n  \\\"F\\\"\\n}\\n\\nTable \\\"employees\\\".\\\"department\\\" {\\n  \\\"id\\\" bpchar(4) [pk, not null]\\n  \\\"dept_name\\\" varchar(40) [unique, not null]\\n}\\n\\nTable \\\"employees\\\".\\\"department_employee\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"department_id\\\" bpchar(4) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, department_id) [type: btree, name: \\\"idx_16982_primary\\\"]\\n    department_id [type: btree, name: \\\"idx_16982_dept_no\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"department_manager\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"department_id\\\" bpchar(4) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, department_id) [type: btree, name: \\\"idx_16985_primary\\\"]\\n    department_id [type: btree, name: \\\"idx_16985_dept_no\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"employee\\\" {\\n  \\\"id\\\" int8 [pk, not null, increment]\\n  \\\"birth_date\\\" date [not null]\\n  \\\"first_name\\\" varchar(14) [not null]\\n  \\\"last_name\\\" varchar(16) [not null]\\n  \\\"gender\\\" employees.employee_gender [not null]\\n  \\\"hire_date\\\" date [not null]\\n}\\n\\nTable \\\"employees\\\".\\\"salary\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"amount\\\" int8 [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date [not null]\\n\\n  Indexes {\\n    (employee_id, from_date) [type: btree, name: \\\"idx_16991_primary\\\"]\\n  }\\n}\\n\\nTable \\\"employees\\\".\\\"title\\\" {\\n  \\\"employee_id\\\" int8 [not null]\\n  \\\"title\\\" varchar(50) [not null]\\n  \\\"from_date\\\" date [not null]\\n  \\\"to_date\\\" date\\n\\n  Indexes {\\n    (employee_id, title, from_date) [type: btree, name: \\\"idx_16994_primary\\\"]\\n  }\\n}\\n\\nRef \\\"dept_emp_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_employee\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_emp_ibfk_2\\\":\\\"employees\\\".\\\"department\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_employee\\\".\\\"department_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_manager_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_manager\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"dept_manager_ibfk_2\\\":\\\"employees\\\".\\\"department\\\".\\\"id\\\" < \\\"employees\\\".\\\"department_manager\\\".\\\"department_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"salaries_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"salary\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\\nRef \\\"titles_ibfk_1\\\":\\\"employees\\\".\\\"employee\\\".\\\"id\\\" < \\\"employees\\\".\\\"title\\\".\\\"employee_id\\\" [update: restrict, delete: cascade]\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/employees.sql.gz\"\n  }\n}"
  },
  {
    "path": "tasks/postgres/standard/employees/management_structure_analysis/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Task 4: Management Structure Analysis\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nfrom decimal import Decimal\n\ndef rows_match(actual_row, expected_row):\n    \"\"\"\n    Compare two rows with appropriate tolerance.\n    For Decimal types: allows 0.1 tolerance\n    For other types: requires exact match\n    \"\"\"\n    if len(actual_row) != len(expected_row):\n        return False\n    \n    for actual, expected in zip(actual_row, expected_row):\n        if isinstance(actual, Decimal) and isinstance(expected, Decimal):\n            if abs(float(actual) - float(expected)) > 0.1:\n                return False\n        elif actual != expected:\n            return False\n    \n    return True\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\")\n    }\n\ndef verify_manager_profile_results(conn) -> bool:\n    \"\"\"Verify the manager profile results.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results from the created table\n        cur.execute(\"\"\"\n            SELECT manager_id, manager_name, current_department, \n                   management_periods, current_manager\n            FROM employees.manager_profile\n            ORDER BY manager_id\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        # Execute ground truth query\n        cur.execute(\"\"\"\n            WITH dm AS (\n            SELECT dm.employee_id,\n                    dm.department_id,\n                    dm.from_date,\n                    dm.to_date\n            FROM employees.department_manager dm\n            ),\n            manager_periods AS (\n            SELECT employee_id, COUNT(*)::INT AS management_periods\n            FROM dm\n            GROUP BY employee_id\n            ),\n            current_assignment AS (\n            SELECT employee_id, department_id\n            FROM (\n                SELECT d.*,\n                    ROW_NUMBER() OVER (\n                        PARTITION BY d.employee_id\n                        ORDER BY d.from_date DESC, d.department_id\n                    ) AS rn\n                FROM dm d\n                WHERE d.to_date = DATE '9999-01-01'\n            ) x\n            WHERE rn = 1\n            ),\n            manager_names AS (\n            SELECT e.id AS manager_id,\n                    CONCAT(e.first_name, ' ', e.last_name) AS manager_name\n            FROM employees.employee e\n            WHERE EXISTS (SELECT 1 FROM dm WHERE employee_id = e.id)\n            )\n            SELECT\n            mn.manager_id,\n            mn.manager_name,\n            d.dept_name AS current_department,\n            mp.management_periods,\n            (d.dept_name IS NOT NULL) AS current_manager\n            FROM manager_names mn\n            JOIN manager_periods mp ON mp.employee_id = mn.manager_id\n            LEFT JOIN current_assignment ca ON ca.employee_id = mn.manager_id\n            LEFT JOIN employees.department d ON d.id = ca.department_id\n            ORDER BY mn.manager_id;\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} manager profile results, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:  # Only show first 5 mismatches\n                    print(f\"❌ Row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total mismatches: {mismatches}\")\n            return False\n\n        print(f\"✅ Manager profile results are correct ({len(actual_results)} records)\")\n        return True\n\ndef verify_department_leadership_results(conn) -> bool:\n    \"\"\"Verify the department leadership results.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results from the created table\n        cur.execute(\"\"\"\n            SELECT department_name, current_manager_name, manager_start_date, \n                   total_historical_managers\n            FROM employees.department_leadership\n            ORDER BY department_name\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        # Execute ground truth query\n        cur.execute(\"\"\"\n            WITH current_mgr AS (\n            SELECT department_id,\n                    CONCAT(e.first_name, ' ', e.last_name) AS current_manager_name,\n                    dm.from_date AS manager_start_date\n            FROM (\n                SELECT dm.*,\n                    ROW_NUMBER() OVER (\n                        PARTITION BY dm.department_id\n                        ORDER BY dm.from_date DESC, dm.employee_id\n                    ) AS rn\n                FROM employees.department_manager dm\n                WHERE dm.to_date = DATE '9999-01-01'\n            ) dm\n            JOIN employees.employee e ON e.id = dm.employee_id\n            WHERE dm.rn = 1\n            ),\n            hist AS (\n            SELECT dm.department_id, COUNT(DISTINCT dm.employee_id)::INT AS total_historical_managers\n            FROM employees.department_manager dm\n            GROUP BY dm.department_id\n            )\n            SELECT\n            d.dept_name                              AS department_name,\n            cm.current_manager_name,\n            cm.manager_start_date,\n            COALESCE(h.total_historical_managers,0)  AS total_historical_managers\n            FROM employees.department d\n            LEFT JOIN current_mgr cm ON cm.department_id = d.id\n            LEFT JOIN hist        h  ON h.department_id = d.id\n            ORDER BY d.dept_name;\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} department leadership results, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:  # Only show first 5 mismatches\n                    print(f\"❌ Row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total mismatches: {mismatches}\")\n            return False\n\n        print(f\"✅ Department leadership results are correct ({len(actual_results)} records)\")\n        return True\n\ndef verify_management_transitions_results(conn) -> bool:\n    \"\"\"Verify the management transitions results.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results from the created table\n        cur.execute(\"\"\"\n            SELECT department_name, transition_year, outgoing_manager, incoming_manager, transition_gap_days\n            FROM employees.management_transitions\n            ORDER BY department_name, transition_year\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        # Execute ground truth query\n        cur.execute(\"\"\"\n            WITH mgr AS (\n            SELECT\n                d.id AS department_id,\n                d.dept_name,\n                dm.employee_id,\n                dm.from_date,\n                dm.to_date,\n                CONCAT(e.first_name, ' ', e.last_name) AS manager_name\n            FROM employees.department_manager dm\n            JOIN employees.department d ON d.id = dm.department_id\n            JOIN employees.employee  e ON e.id = dm.employee_id\n            ),\n            ordered AS (\n            SELECT\n                department_id,\n                dept_name,\n                employee_id,\n                manager_name,\n                from_date,\n                to_date,\n                ROW_NUMBER() OVER (\n                PARTITION BY department_id\n                ORDER BY from_date, to_date, employee_id\n                ) AS rn,\n                LEAD(manager_name) OVER (\n                PARTITION BY department_id\n                ORDER BY from_date, to_date, employee_id\n                ) AS next_manager_name,\n                LEAD(from_date) OVER (\n                PARTITION BY department_id\n                ORDER BY from_date, to_date, employee_id\n                ) AS next_from_date\n            FROM mgr\n            )\n            SELECT\n            o.dept_name                                   AS department_name,\n            EXTRACT(YEAR FROM o.to_date)::INT             AS transition_year,\n            o.manager_name                                AS outgoing_manager,\n            COALESCE(o.next_manager_name, 'No Successor') AS incoming_manager,\n            COALESCE(GREATEST((o.next_from_date - o.to_date - 1), 0), 0)::INT AS transition_gap_days\n            FROM ordered o\n            WHERE o.to_date <> DATE '9999-01-01'\n            ORDER BY department_name, transition_year;\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} management transitions results, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:  # Only show first 5 mismatches\n                    print(f\"❌ Row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total mismatches: {mismatches}\")\n            return False\n\n        print(f\"✅ Management transitions results are correct ({len(actual_results)} records)\")\n        return True\n\ndef verify_span_of_control_results(conn) -> bool:\n    \"\"\"Verify the span of control results.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results from the created table\n        cur.execute(\"\"\"\n            SELECT manager_id, manager_name, department_name, total_employees, \n                   current_employees, management_load\n            FROM employees.span_of_control\n            ORDER BY manager_id\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        # Execute ground truth query\n        cur.execute(\"\"\"\n            WITH dept_total AS (\n            SELECT de.department_id, COUNT(DISTINCT de.employee_id)::INT AS total_employees\n            FROM employees.department_employee de\n            GROUP BY de.department_id\n            ),\n            dept_current AS (\n            SELECT de.department_id, COUNT(DISTINCT de.employee_id)::INT AS current_employees\n            FROM employees.department_employee de\n            JOIN employees.salary s\n                ON s.employee_id = de.employee_id\n            AND s.to_date = DATE '9999-01-01'\n            WHERE de.to_date = DATE '9999-01-01'\n            GROUP BY de.department_id\n            )\n            SELECT\n            dm.employee_id AS manager_id,\n            CONCAT(e.first_name, ' ', e.last_name) AS manager_name,\n            d.dept_name AS department_name,\n            COALESCE(dt.total_employees, 0)  AS total_employees,\n            COALESCE(dc.current_employees, 0) AS current_employees,\n            CASE\n                WHEN COALESCE(dc.current_employees, 0) < 5000  THEN 'light'\n                WHEN COALESCE(dc.current_employees, 0) <= 15000 THEN 'moderate'\n                ELSE 'heavy'\n            END AS management_load\n            FROM employees.department_manager dm\n            JOIN employees.employee  e ON e.id = dm.employee_id\n            JOIN employees.department d ON d.id = dm.department_id\n            LEFT JOIN dept_total  dt ON dt.department_id = dm.department_id\n            LEFT JOIN dept_current dc ON dc.department_id = dm.department_id\n            WHERE dm.to_date = DATE '9999-01-01'\n            ORDER BY dm.employee_id, d.dept_name;\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} span of control results, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:  # Only show first 5 mismatches\n                    print(f\"❌ Row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total mismatches: {mismatches}\")\n            return False\n\n        print(f\"✅ Span of control results are correct ({len(actual_results)} records)\")\n        return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 50)\n\n    # Get connection parameters\n    conn_params = get_connection_params()\n\n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n\n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n\n        # Verify all four analysis results\n        success = (\n            verify_manager_profile_results(conn) and \n            verify_department_leadership_results(conn) and \n            verify_management_transitions_results(conn) and\n            verify_span_of_control_results(conn)\n        )\n\n        conn.close()\n\n        if success:\n            print(\"\\n🎉 Task verification: PASS\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Task verification: FAIL\")\n            sys.exit(1)\n\n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/standard/lego/consistency_enforcement/description.md",
    "content": "Implement a data consistency enforcement system for the LEGO database. The system must ensure that the reported part count in the `lego_sets` table matches the actual sum of non-spare parts in the latest inventory version. This involves a three-step process: identifying existing inconsistencies, fixing them, and creating a trigger-based constraint system to prevent future issues.\n\n### Consistency Rule\nFor any given `set_num`, the following invariant must be maintained:\n`lego_sets.num_parts = SUM(quantity)` FROM `lego_inventory_parts` WHERE `inventory_id` IN (latest inventory for that set) AND `is_spare` = false\n\n**Important**: If a set has no inventory records, the consistency check should be skipped.\n\n# Your Tasks:\n\n## Task 1: Identify Data Inconsistencies\n\n### Objective\nWrite a single `SELECT` query to find all sets where the stored `num_parts` does not match the actual calculated number of parts from the latest inventory.\n\n1.  **Find the Latest Inventory**: For each `set_num`, find its latest inventory id by getting the `MAX(version)` from the `lego_inventories` table.\n2.  **Calculate Actual Part Count**: For these latest inventories, join with `lego_inventory_parts` and calculate the `SUM(quantity)`, but only for parts where `is_spare` is false.\n3.  **Compare and Filter**: Join this calculated result back to the `lego_sets` table and return the rows where `lego_sets.num_parts` is different from your calculated sum.\n\n## Task 2: Fix Existing Inconsistencies\n\n### Objective\nCorrect all mismatched `num_parts` values using a clear, multi-step process with a temporary table. This approach is designed to be robust against all edge cases.\n\n#### Step 1: Create a Temporary Table\nCreate a temporary table (e.g., `correct_counts`) with two columns: `set_num` (text) and `actual_parts` (integer).\n\n#### Step 2: Populate the Temporary Table\nThis is the most critical step. Write an `INSERT` statement that calculates the correct part count for every single set listed in the `lego_sets` table.\n\n-   The query must start by selecting from `public.lego_sets`.\n-   It must then `LEFT JOIN` to a subquery that contains the part-counting logic (finding the latest inventory version and summing the non-spare parts).\n-   Use `COALESCE` on the final result from the subquery to ensure that any set without parts or without an inventory record gets a value of `0`, not `NULL`.\n\n#### Step 3: Update from the Temporary Table\n\nWrite a final, simple `UPDATE` statement that joins the `lego_sets` table with your temporary table on `set_num` and sets `num_parts` to the `actual_parts` value.\n\n## Task 3: Create Constraint Enforcement System\n\n### Objective\n\nImplement a deferrable constraint trigger system to enforce the consistency rule automatically for all future `INSERT` and `UPDATE` operations.\n\n### Part A: Create the Trigger Function\n\nCreate a single PL/pgSQL function, preferably named `check_set_parts_consistency()`, that performs the core validation.\n\n**Function Requirements**:\n\n  - Returns `trigger`.\n  - Accepts no arguments.\n  - Contains the core validation logic:\n      - **Identify the `set_num` to check**. This is the most critical part. The `set_num` must be retrieved based on which table fired the trigger (`TG_TABLE_NAME`):\n          - If `lego_sets` or `lego_inventories`: get the `set_num` directly from `NEW.set_num`.\n          - If `lego_inventory_parts`: you must first query `lego_inventories` using `NEW.inventory_id` to find the corresponding `set_num`.\n      - **Perform the check**. For the identified `set_num`, execute the same core logic from Task 1 to get the `actual_parts` count and the `stored_num_parts` from the `lego_sets` table.\n      - **Raise an exception on failure**. If `actual_parts` does not equal `stored_num_parts`, the function must raise an exception to block the transaction (e.g., `RAISE EXCEPTION 'Inconsistent part count for set %', relevant_set_num;`).\n      - **Return `NEW` on success**. If the check passes or is skipped, the function should `RETURN NEW`.\n\n### Part B: Create the Constraint Triggers\n\nCreate three separate `CONSTRAINT TRIGGER` statements that attach the function from Part A to the following tables:\n\n  - `public.lego_sets`\n  - `public.lego_inventories`\n  - `public.lego_inventory_parts`\n\n**Crucial Trigger Requirements**:\n\n  - Each trigger must fire `AFTER INSERT OR UPDATE`.\n  - Each trigger **MUST** be `DEFERRABLE` and `INITIALLY IMMEDIATE`. This is non-negotiable for the verification to pass.\n  - Each trigger must execute the function `FOR EACH ROW`."
  },
  {
    "path": "tasks/postgres/standard/lego/consistency_enforcement/meta.json",
    "content": "{\n  \"task_id\": \"consistency_enforcement\",\n  \"task_name\": \"Consistency Enforcement\",\n  \"category_id\": \"lego\",\n  \"category_name\": \"Lego\",\n  \"description\": \"Implement data consistency system ensuring reported part counts match actual inventory using triggers and constraint enforcement.\",\n  \"author\": \"Jiawei Wang\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"data integrity enforcement\",\n    \"stored procedures and functions\",\n    \"transactional operations\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Table \\\"lego_colors\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" varchar(255) [not null]\\n  \\\"rgb\\\" varchar(6) [not null]\\n  \\\"is_trans\\\" bpchar(1) [not null]\\n}\\n\\nTable \\\"lego_inventories\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"version\\\" int4 [not null]\\n  \\\"set_num\\\" varchar(255) [not null]\\n}\\n\\nTable \\\"lego_inventory_parts\\\" {\\n  \\\"inventory_id\\\" int4 [not null]\\n  \\\"part_num\\\" varchar(255) [not null]\\n  \\\"color_id\\\" int4 [not null]\\n  \\\"quantity\\\" int4 [not null]\\n  \\\"is_spare\\\" bool [not null]\\n}\\n\\nTable \\\"lego_inventory_sets\\\" {\\n  \\\"inventory_id\\\" int4 [not null]\\n  \\\"set_num\\\" varchar(255) [not null]\\n  \\\"quantity\\\" int4 [not null]\\n}\\n\\nTable \\\"lego_part_categories\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" varchar(255) [not null]\\n}\\n\\nTable \\\"lego_parts\\\" {\\n  \\\"part_num\\\" varchar(255) [pk, not null]\\n  \\\"name\\\" text [not null]\\n  \\\"part_cat_id\\\" int4 [not null]\\n}\\n\\nTable \\\"lego_sets\\\" {\\n  \\\"set_num\\\" varchar(255) [pk, not null]\\n  \\\"name\\\" varchar(255) [not null]\\n  \\\"year\\\" int4\\n  \\\"theme_id\\\" int4\\n  \\\"num_parts\\\" int4\\n}\\n\\nTable \\\"lego_themes\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" varchar(255) [not null]\\n  \\\"parent_id\\\" int4\\n}\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/lego.sql\"\n  }\n}"
  },
  {
    "path": "tasks/postgres/standard/lego/consistency_enforcement/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL LEGO Task 1: Parts Consistency Fix & Constraints\nVersion 2.1: Relaxed consistency check to allow for one known corner case mismatch.\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nimport psycopg2.errors\nfrom typing import Optional, Tuple, List\n\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters from environment variables.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\"),\n    }\n\n\ndef fetch_candidate_part_row(cur) -> Optional[Tuple[int, str, str, int]]:\n    \"\"\"\n    Picks a concrete, non-spare inventory part from the latest inventory of any set.\n    This provides a reliable target for testing update and insert triggers.\n\n    Returns a tuple: (inventory_id, set_num, part_num, color_id) or None.\n    \"\"\"\n    cur.execute(\n        \"\"\"\n        WITH latest_inv AS (\n            SELECT set_num, MAX(version) AS max_version\n            FROM public.lego_inventories\n            GROUP BY set_num\n        ), inv AS (\n            SELECT li.id, li.set_num\n            FROM public.lego_inventories li\n            JOIN latest_inv lv ON lv.set_num = li.set_num AND lv.max_version = li.version\n        )\n        SELECT i.id AS inventory_id, i.set_num, lip.part_num, lip.color_id\n        FROM inv i\n        JOIN public.lego_inventory_parts lip ON lip.inventory_id = i.id\n        WHERE lip.is_spare = false AND lip.quantity > 0\n        LIMIT 1;\n        \"\"\"\n    )\n    return cur.fetchone()\n\n\ndef get_mismatch_count(cur) -> int:\n    \"\"\"Returns the number of sets where num_parts mismatches the computed actual sum.\"\"\"\n    cur.execute(\n        \"\"\"\n        WITH latest_inv AS (\n            SELECT set_num, MAX(version) AS max_version\n            FROM public.lego_inventories\n            GROUP BY set_num\n        ), inv_latest AS (\n            SELECT li.set_num, li.id\n            FROM public.lego_inventories li\n            JOIN latest_inv lv ON lv.set_num = li.set_num AND lv.max_version = li.version\n        ), parts_agg AS (\n            SELECT\n                i.set_num,\n                SUM(lip.quantity) AS actual_parts\n            FROM inv_latest i\n            JOIN public.lego_inventory_parts lip ON lip.inventory_id = i.id\n            WHERE lip.is_spare = false\n            GROUP BY i.set_num\n        )\n        SELECT COUNT(*)\n        FROM public.lego_sets s\n        LEFT JOIN parts_agg pa ON s.set_num = pa.set_num\n        WHERE s.num_parts <> COALESCE(pa.actual_parts, 0);\n        \"\"\"\n    )\n    return cur.fetchone()[0]\n\n\ndef verify_data_consistency(conn) -> bool:\n    \"\"\"\n    TASK 1 VERIFICATION: Checks if the initial data fix was successful.\n    (Relaxed: Allows for one corner-case mismatch).\n    \"\"\"\n    print(\"\\n-- Verifying Task 1: Data Consistency Fix (Relaxed) --\")\n    with conn.cursor() as cur:\n        count = get_mismatch_count(cur)\n        # RELAXED CONDITION: Allow 0 or 1 mismatch to pass.\n        if count > 1:\n            print(f\"❌ FAIL: Found {count} sets with inconsistent part counts. Expected 0 or 1 after fix.\")\n            return False\n        \n        print(\"✅ PASS: Data consistency check passed (allowing for one known mismatch).\")\n        return True\n\n\ndef verify_constraint_triggers_exist(conn) -> bool:\n    \"\"\"\n    TASK 2 VERIFICATION (Part A): Checks if constraint triggers are attached to all required tables.\n    This is more robust than checking names or a total count.\n    \"\"\"\n    print(\"\\n-- Verifying Task 2: Constraint Trigger Existence --\")\n    tables_to_check = [\n        'public.lego_inventory_parts',\n        'public.lego_inventories',\n        'public.lego_sets'\n    ]\n    all_triggers_found = True\n    with conn.cursor() as cur:\n        for table in tables_to_check:\n            cur.execute(\n                \"\"\"\n                SELECT COUNT(*)\n                FROM pg_trigger\n                WHERE tgrelid = %s::regclass AND tgconstraint <> 0;\n                \"\"\",\n                (table,)\n            )\n            trigger_count = cur.fetchone()[0]\n            if trigger_count == 0:\n                print(f\"❌ FAIL: No constraint trigger found on table '{table}'.\")\n                all_triggers_found = False\n            else:\n                print(f\"✅ OK: Found constraint trigger(s) on table '{table}'.\")\n\n    if all_triggers_found:\n        print(\"✅ PASS: Constraint triggers are attached to all required tables.\")\n    return all_triggers_found\n\n\ndef verify_violation_is_blocked(conn) -> bool:\n    \"\"\"\n    TASK 2 VERIFICATION (Part B): Checks if triggers block a direct, inconsistent write.\n    An attempt to increment a part quantity without updating the set's total should fail.\n    \"\"\"\n    print(\"\\n-- Verifying Task 2: Immediate Constraint Enforcement --\")\n    with conn.cursor() as cur:\n        candidate = fetch_candidate_part_row(cur)\n        if not candidate:\n            print(\"⚠️ SKIP: No candidate part row found to test constraints. Cannot verify.\")\n            return True # Skip if no data to test\n\n        inventory_id, _, part_num, color_id = candidate\n        try:\n            # This transaction should fail due to the trigger\n            cur.execute(\n                \"\"\"\n                UPDATE public.lego_inventory_parts\n                SET quantity = quantity + 1\n                WHERE inventory_id = %s AND part_num = %s AND color_id = %s;\n                \"\"\",\n                (inventory_id, part_num, color_id),\n            )\n            # If we reach here, the trigger failed to block the update.\n            conn.rollback()\n            print(\"❌ FAIL: An inconsistent write was NOT blocked by the trigger.\")\n            return False\n        except psycopg2.Error as e:\n            # We expect an error. Specifically, a constraint violation error.\n            conn.rollback()\n            # 23514 is check_violation, but custom triggers might raise others.\n            # Any error here is considered a success as the transaction was blocked.\n            print(f\"✅ PASS: Inconsistent write was correctly blocked by the trigger. (Error: {e.pgcode})\")\n            return True\n\n\ndef verify_deferred_transaction_is_allowed(conn) -> bool:\n    \"\"\"\n    TASK 2 VERIFICATION (Part C): Checks if a coordinated, consistent update is allowed\n    when constraints are deferred.\n    \"\"\"\n    print(\"\\n-- Verifying Task 2: Deferred Constraint Enforcement --\")\n    with conn.cursor() as cur:\n        candidate = fetch_candidate_part_row(cur)\n        if not candidate:\n            print(\"⚠️ SKIP: No candidate part row found. Cannot test deferred transaction.\")\n            return True # Skip if no data to test\n\n    inventory_id, set_num, part_num, color_id = candidate\n\n    try:\n        # This multi-statement transaction should succeed with deferred constraints\n        with conn.cursor() as cur:\n            cur.execute(\"BEGIN;\")\n            cur.execute(\"SET CONSTRAINTS ALL DEFERRED;\")\n            cur.execute(\n                \"UPDATE public.lego_inventory_parts SET quantity = quantity + 1 WHERE inventory_id = %s AND part_num = %s AND color_id = %s;\",\n                (inventory_id, part_num, color_id),\n            )\n            cur.execute(\n                \"UPDATE public.lego_sets SET num_parts = num_parts + 1 WHERE set_num = %s;\",\n                (set_num,),\n            )\n            cur.execute(\"COMMIT;\") # This will fail if constraints are not deferrable or logic is wrong\n        print(\"✅ PASS: Coordinated update with deferred constraints committed successfully.\")\n\n        # Revert changes to leave DB in its original state\n        with conn.cursor() as cur:\n            cur.execute(\"BEGIN;\")\n            cur.execute(\"SET CONSTRAINTS ALL DEFERRED;\")\n            cur.execute(\n                \"UPDATE public.lego_inventory_parts SET quantity = quantity - 1 WHERE inventory_id = %s AND part_num = %s AND color_id = %s;\",\n                (inventory_id, part_num, color_id),\n            )\n            cur.execute(\n                \"UPDATE public.lego_sets SET num_parts = num_parts - 1 WHERE set_num = %s;\",\n                (set_num,),\n            )\n            cur.execute(\"COMMIT;\")\n        print(\"INFO: Test changes were successfully reverted.\")\n        return True\n\n    except psycopg2.Error as e:\n        conn.rollback()\n        print(f\"❌ FAIL: Deferred transaction failed to commit. Error: {e}\")\n        return False\n\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 60)\n    print(\"LEGO Database Consistency Verification Script\")\n    print(\"=\" * 60)\n\n    conn_params = get_connection_params()\n    if not conn_params.get(\"database\"):\n        print(\"❌ CRITICAL: POSTGRES_DATABASE environment variable not set.\")\n        sys.exit(1)\n\n    try:\n        with psycopg2.connect(**conn_params) as conn:\n            conn.autocommit = False # Ensure we control transactions\n\n            # Run all verification steps\n            results = [\n                verify_data_consistency(conn),\n                verify_constraint_triggers_exist(conn),\n                verify_violation_is_blocked(conn),\n                verify_deferred_transaction_is_allowed(conn),\n            ]\n\n            if all(results):\n                print(\"\\n🎉 Overall Result: PASS - All tasks verified successfully!\")\n                sys.exit(0)\n            else:\n                print(\"\\n❌ Overall Result: FAIL - One or more verification steps failed.\")\n                sys.exit(1)\n\n    except psycopg2.OperationalError as e:\n        print(f\"❌ CRITICAL: Could not connect to the database. Details: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ CRITICAL: An unexpected error occurred during verification. Details: {e}\")\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/postgres/standard/lego/database_security_policies/description.md",
    "content": "Implement a comprehensive database security system with Row-Level Security (RLS) policies and role-based access control for the LEGO database. The system must ensure theme-based data isolation and prevent unauthorized access across different LEGO themes.\n\n## Your Tasks:\n\n1. **Create database role and permissions** — Create a new database role called `theme_analyst` with the following permissions:\n   * `SELECT` permissions on all reference tables: `lego_themes`, `lego_colors`, `lego_parts`, `lego_part_categories`\n   * `SELECT` permissions on main data tables: `lego_sets`, `lego_inventories`, `lego_inventory_parts`\n   * No `INSERT`, `UPDATE`, or `DELETE` permissions on any tables\n\n2. **Enable Row-Level Security** — Enable RLS on the following tables:\n   * `lego_sets`\n   * `lego_inventories` \n   * `lego_inventory_parts`\n\n3. **Create RLS policies** — Implement theme-based data isolation policies:\n   \n   **Policy 1: `theme_sets_policy` on `lego_sets`**\n   * Allows access only to sets where `theme_id = 18` (Star Wars theme)\n   * Policy should use a function that checks the current user's theme assignment\n   \n   **Policy 2: `theme_inventories_policy` on `lego_inventories`**\n   * Allows access only to inventories for sets with `theme_id = 18`\n   * Must join with `lego_sets` table to check theme_id\n   \n   **Policy 3: `theme_inventory_parts_policy` on `lego_inventory_parts`**\n   * Allows access only to inventory parts for sets with `theme_id = 18`\n   * Must join through `lego_inventories` and `lego_sets` to check theme_id\n\n4. **Create theme assignment function** — Create a function `get_user_theme_id()` that:\n   * Returns `18` for the `theme_analyst` role (Star Wars theme)\n   * Can be extended to support other themes in the future\n   * Uses `current_user` to determine the appropriate theme_id\n\n5. **Test the security implementation** — Execute verification queries that demonstrate:\n   * Star Wars theme (theme_id=18) returns exactly 2 sets: '65081-1' and 'K8008-1'\n   * Technic theme (theme_id=1) returns 0 sets when accessed by theme_analyst role\n   * Cross-theme data access is properly blocked\n   * Reference tables are accessible for all data\n\n6. **Create comprehensive security audit** — Generate a detailed report including:\n   * Complete SQL statements for role creation and policy implementation\n   * Expected query results for each theme\n   * Verification queries to confirm proper data isolation\n   * Documentation of the security model and access patterns\n\n## Security Requirements:\n\n- The `theme_analyst` role must only see data related to Star Wars theme (theme_id=18)\n- All other themes must be completely hidden from this role\n- Reference tables (themes, colors, parts, part_categories) must be fully accessible\n- The system must prevent any cross-theme data leakage\n- RLS policies must be active and enforced for all data access\n\n## Expected Results:\n\nWhen the `theme_analyst` role queries the database:\n- `lego_sets` should return only 2 Star Wars sets\n- `lego_inventories` should return only inventories for those 2 sets  \n- `lego_inventory_parts` should return only parts for those 2 sets\n- All reference tables should return complete data\n- Queries for other themes should return empty results\n"
  },
  {
    "path": "tasks/postgres/standard/lego/database_security_policies/meta.json",
    "content": "{\n  \"task_id\": \"database_security_policies\",\n  \"task_name\": \"Database Security Policies\",\n  \"category_id\": \"lego\",\n  \"category_name\": \"Lego\",\n  \"description\": \"Implement Row-Level Security policies with role-based access control for theme-based data isolation in LEGO database.\",\n  \"author\": \"Jiawei Wang\",\n  \"created_at\": \"2025-08-15\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"security and access control\",\n    \"stored procedures and functions\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Table \\\"lego_colors\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" varchar(255) [not null]\\n  \\\"rgb\\\" varchar(6) [not null]\\n  \\\"is_trans\\\" bpchar(1) [not null]\\n}\\n\\nTable \\\"lego_inventories\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"version\\\" int4 [not null]\\n  \\\"set_num\\\" varchar(255) [not null]\\n}\\n\\nTable \\\"lego_inventory_parts\\\" {\\n  \\\"inventory_id\\\" int4 [not null]\\n  \\\"part_num\\\" varchar(255) [not null]\\n  \\\"color_id\\\" int4 [not null]\\n  \\\"quantity\\\" int4 [not null]\\n  \\\"is_spare\\\" bool [not null]\\n}\\n\\nTable \\\"lego_inventory_sets\\\" {\\n  \\\"inventory_id\\\" int4 [not null]\\n  \\\"set_num\\\" varchar(255) [not null]\\n  \\\"quantity\\\" int4 [not null]\\n}\\n\\nTable \\\"lego_part_categories\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" varchar(255) [not null]\\n}\\n\\nTable \\\"lego_parts\\\" {\\n  \\\"part_num\\\" varchar(255) [pk, not null]\\n  \\\"name\\\" text [not null]\\n  \\\"part_cat_id\\\" int4 [not null]\\n}\\n\\nTable \\\"lego_sets\\\" {\\n  \\\"set_num\\\" varchar(255) [pk, not null]\\n  \\\"name\\\" varchar(255) [not null]\\n  \\\"year\\\" int4\\n  \\\"theme_id\\\" int4\\n  \\\"num_parts\\\" int4\\n}\\n\\nTable \\\"lego_themes\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" varchar(255) [not null]\\n  \\\"parent_id\\\" int4\\n}\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/lego.sql\"\n  }\n}"
  },
  {
    "path": "tasks/postgres/standard/lego/database_security_policies/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL LEGO Task 4: Database Security and RLS Implementation\n(Version 2 - Improved Robustness)\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nimport psycopg2.errors\nfrom typing import Dict\n\ndef get_connection_params() -> Dict[str, any]:\n    \"\"\"Get database connection parameters from environment variables.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\"),\n    }\n\ndef verify_role_creation(conn) -> bool:\n    \"\"\"\n    TASK 1 VERIFICATION: Check if theme_analyst role was created with proper permissions.\n    \"\"\"\n    print(\"\\n-- Verifying Task 1: Role Creation and Permissions --\")\n    with conn.cursor() as cur:\n        # Check if role exists\n        cur.execute(\"SELECT 1 FROM pg_roles WHERE rolname = 'theme_analyst';\")\n        if not cur.fetchone():\n            print(\"❌ FAIL: The 'theme_analyst' role was not created.\")\n            return False\n        print(\"✅ OK: Role 'theme_analyst' exists.\")\n\n        # Check SELECT permissions on reference and main tables\n        all_tables = [\n            'lego_themes', 'lego_colors', 'lego_parts', 'lego_part_categories',\n            'lego_sets', 'lego_inventories', 'lego_inventory_parts'\n        ]\n        for table in all_tables:\n            cur.execute(\n                \"\"\"\n                SELECT has_table_privilege('theme_analyst', %s, 'SELECT');\n                \"\"\",\n                (table,)\n            )\n            if not cur.fetchone()[0]:\n                print(f\"❌ FAIL: 'theme_analyst' role is missing SELECT permission on '{table}'.\")\n                return False\n        print(\"✅ OK: Role has correct SELECT permissions on all required tables.\")\n\n        # Check that no INSERT/UPDATE/DELETE permissions exist\n        for table in all_tables:\n            cur.execute(\n                \"\"\"\n                SELECT \n                    has_table_privilege('theme_analyst', %s, 'INSERT') OR\n                    has_table_privilege('theme_analyst', %s, 'UPDATE') OR\n                    has_table_privilege('theme_analyst', %s, 'DELETE');\n                \"\"\",\n                (table, table, table)\n            )\n            if cur.fetchone()[0]:\n                print(f\"❌ FAIL: 'theme_analyst' role has unauthorized INSERT, UPDATE, or DELETE permission on '{table}'.\")\n                return False\n        print(\"✅ OK: Role does not have modification permissions.\")\n        \n        print(\"✅ PASS: 'theme_analyst' role created with correct permissions.\")\n        return True\n\ndef verify_rls_enabled(conn) -> bool:\n    \"\"\"\n    TASK 2 VERIFICATION: Check if Row-Level Security is enabled on required tables.\n    \"\"\"\n    print(\"\\n-- Verifying Task 2: Row-Level Security Enablement --\")\n    tables_to_check = ['lego_sets', 'lego_inventories', 'lego_inventory_parts']\n    with conn.cursor() as cur:\n        for table in tables_to_check:\n            cur.execute(\n                \"SELECT relrowsecurity FROM pg_class WHERE relname = %s;\", (table,)\n            )\n            rls_enabled = cur.fetchone()\n            if not rls_enabled or not rls_enabled[0]:\n                print(f\"❌ FAIL: RLS is not enabled on table '{table}'.\")\n                return False\n            print(f\"✅ OK: RLS is enabled on table '{table}'.\")\n    \n    print(\"✅ PASS: Row-Level Security is enabled on all required tables.\")\n    return True\n\ndef verify_rls_policies(conn) -> bool:\n    \"\"\"\n    TASK 3 VERIFICATION: Check if RLS policies were created on required tables.\n    \"\"\"\n    print(\"\\n-- Verifying Task 3: RLS Policy Creation --\")\n    expected_policies = {\n        'lego_sets': 'theme_sets_policy',\n        'lego_inventories': 'theme_inventories_policy',\n        'lego_inventory_parts': 'theme_inventory_parts_policy'\n    }\n    with conn.cursor() as cur:\n        for table, policy_name in expected_policies.items():\n            cur.execute(\n                \"SELECT 1 FROM pg_policies WHERE tablename = %s AND policyname = %s;\",\n                (table, policy_name)\n            )\n            if not cur.fetchone():\n                print(f\"❌ FAIL: RLS policy '{policy_name}' not found on table '{table}'.\")\n                return False\n            print(f\"✅ OK: RLS policy '{policy_name}' found on table '{table}'.\")\n    \n    print(\"✅ PASS: All required RLS policies are created.\")\n    return True\n\ndef verify_theme_function(conn) -> bool:\n    \"\"\"\n    TASK 4 VERIFICATION: Check if get_user_theme_id() function was created and works correctly.\n    \"\"\"\n    print(\"\\n-- Verifying Task 4: Theme Assignment Function --\")\n    with conn.cursor() as cur:\n        cur.execute(\n            \"SELECT 1 FROM pg_proc WHERE proname = 'get_user_theme_id';\"\n        )\n        if not cur.fetchone():\n            print(\"❌ FAIL: The 'get_user_theme_id' function was not created.\")\n            return False\n        print(\"✅ OK: Function 'get_user_theme_id' exists.\")\n\n        try:\n            # Test the function's output specifically for the 'theme_analyst' role\n            cur.execute(\"SET ROLE theme_analyst;\")\n            cur.execute(\"SELECT get_user_theme_id();\")\n            theme_id = cur.fetchone()[0]\n            cur.execute(\"RESET ROLE;\") # IMPORTANT: Switch back\n            \n            if theme_id != 18:\n                print(f\"❌ FAIL: get_user_theme_id() returned {theme_id} for 'theme_analyst', but expected 18.\")\n                return False\n            \n            print(\"✅ OK: Function returns correct theme_id (18) for 'theme_analyst'.\")\n            print(\"✅ PASS: Theme assignment function is correct.\")\n            return True\n        except Exception as e:\n            conn.rollback() # Rollback any failed transaction state\n            print(f\"❌ FAIL: Error testing get_user_theme_id() function: {e}\")\n            return False\n\ndef test_theme_analyst_access(conn) -> bool:\n    \"\"\"\n    TASK 5 VERIFICATION: Test data access by assuming the theme_analyst role.\n    \"\"\"\n    print(\"\\n-- Verifying Task 5: Theme-Based Data Access --\")\n    try:\n        with conn.cursor() as cur:\n            # Assume the role of theme_analyst for this session\n            cur.execute(\"SET ROLE theme_analyst;\")\n\n            # Test 1: Check Star Wars sets access (should return 2 sets)\n            cur.execute(\"SELECT set_num FROM lego_sets ORDER BY set_num;\")\n            star_wars_sets = [row[0] for row in cur.fetchall()]\n            expected_sets = ['65081-1', 'K8008-1']\n            \n            if sorted(star_wars_sets) != sorted(expected_sets):\n                print(f\"❌ FAIL: Expected Star Wars sets {expected_sets}, but got {star_wars_sets}.\")\n                cur.execute(\"RESET ROLE;\")\n                return False\n            print(\"✅ PASS: Star Wars sets access is correct (2 sets returned).\")\n\n            # Test 2: Check that Technic sets are not accessible (should return 0)\n            cur.execute(\"SELECT COUNT(*) FROM lego_sets WHERE theme_id = 1;\")\n            technic_count = cur.fetchone()[0]\n            if technic_count != 0:\n                print(f\"❌ FAIL: Technic sets should be blocked, but query returned {technic_count} sets.\")\n                cur.execute(\"RESET ROLE;\")\n                return False\n            print(\"✅ PASS: Technic theme is correctly blocked (0 sets returned).\")\n\n            # Test 3: Check reference tables are fully accessible\n            cur.execute(\"SELECT COUNT(*) > 10 FROM lego_themes;\") # Check for a reasonable number\n            if not cur.fetchone()[0]:\n                print(\"❌ FAIL: 'lego_themes' table seems inaccessible or empty.\")\n                cur.execute(\"RESET ROLE;\")\n                return False\n            print(\"✅ PASS: Reference tables appear to be accessible.\")\n\n            # Test 4 & 5: Check related tables\n            cur.execute(\"SELECT COUNT(*) FROM lego_inventories;\")\n            if cur.fetchone()[0] == 0:\n                print(\"❌ FAIL: No inventories are visible for the allowed sets.\")\n                cur.execute(\"RESET ROLE;\")\n                return False\n            \n            cur.execute(\"SELECT COUNT(*) FROM lego_inventory_parts;\")\n            if cur.fetchone()[0] == 0:\n                print(\"❌ FAIL: No inventory parts are visible for the allowed sets.\")\n                cur.execute(\"RESET ROLE;\")\n                return False\n            print(\"✅ PASS: Related tables (inventories, inventory_parts) are correctly filtered.\")\n\n            # IMPORTANT: Always reset the role at the end\n            cur.execute(\"RESET ROLE;\")\n            return True\n    except Exception as e:\n        conn.rollback() # Ensure transaction is clean\n        print(f\"❌ FAIL: An error occurred while testing data access as 'theme_analyst': {e}\")\n        # Try to reset role even on failure to clean up session state\n        try:\n            with conn.cursor() as cleanup_cur:\n                cleanup_cur.execute(\"RESET ROLE;\")\n        except:\n            pass\n        return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 60)\n    print(\"LEGO Database Security and RLS Verification Script\")\n    print(\"=\" * 60)\n\n    conn_params = get_connection_params()\n    if not conn_params.get(\"database\"):\n        print(\"❌ CRITICAL: POSTGRES_DATABASE environment variable not set.\")\n        sys.exit(1)\n\n    conn = None\n    try:\n        conn = psycopg2.connect(**conn_params)\n        \n        results = [\n            verify_role_creation(conn),\n            verify_rls_enabled(conn),\n            verify_rls_policies(conn),\n            verify_theme_function(conn),\n            test_theme_analyst_access(conn),\n        ]\n\n        if all(results):\n            print(\"\\n🎉 Overall Result: PASS - All security tasks verified successfully!\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Overall Result: FAIL - One or more verification steps failed.\")\n            sys.exit(1)\n\n    except psycopg2.OperationalError as e:\n        print(f\"❌ CRITICAL: Could not connect to the database. Check credentials and host. Details: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ CRITICAL: An unexpected error occurred. Details: {e}\")\n        sys.exit(1)\n    finally:\n        if conn:\n            conn.close()\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/postgres/standard/lego/transactional_inventory_transfer/description.md",
    "content": "Create a PostgreSQL function to handle inventory part transfers between LEGO sets with enhanced validation and audit capabilities. The LEGO warehouse management system needs to support transferring parts while maintaining data integrity and tracking transfer history.\n\n## Your Tasks:\n\n1. **Create the transfer function** — Implement a PostgreSQL function named `transfer_parts` with the following signature:\n   ```sql\n   CREATE OR REPLACE FUNCTION transfer_parts(\n       source_inventory_id INTEGER,\n       target_inventory_id INTEGER,\n       part_to_transfer_num VARCHAR,\n       color_to_transfer_id INTEGER,\n       quantity_to_transfer INTEGER,\n       transfer_reason VARCHAR DEFAULT 'manual_transfer'\n   ) RETURNS TEXT\n   ```\n\n2. **Create audit logging table** — Create a new table to track transfer history:\n   ```sql\n   CREATE TABLE inventory_transfer_log (\n       log_id SERIAL PRIMARY KEY,\n       transfer_timestamp TIMESTAMP DEFAULT NOW(),\n       source_inventory_id INTEGER NOT NULL,\n       target_inventory_id INTEGER NOT NULL,\n       part_num VARCHAR NOT NULL,\n       color_id INTEGER NOT NULL,\n       quantity_transferred INTEGER NOT NULL,\n       transfer_reason VARCHAR NOT NULL,\n       transfer_status VARCHAR NOT NULL CHECK (transfer_status IN ('success', 'failed')),\n       error_message TEXT\n   );\n   ```\n\n3. **Implement enhanced validation** — The function must perform these validations:\n   \n   **Validation A: Basic Checks**\n   - Verify both inventory IDs exist in `lego_inventories` table\n   - Verify part exists in `lego_parts` table\n   - Verify color exists in `lego_colors` table\n   - Check source has sufficient quantity (including spare parts)\n   - Prevent self-transfers (source and target cannot be the same)\n\n   **Validation B: Business Rules**\n   - Maximum transfer quantity is 500 parts per operation\n   - Minimum transfer quantity is 1 part\n   - Source and target must be different inventories\n\n4. **Implement transactional logic** — The function must perform these operations within a single transaction:\n   \n   **Step A: Pre-validation**\n   - Lock both inventory records using `SELECT ... FOR UPDATE`\n   - Perform all validation checks\n   - Calculate transfer feasibility\n\n   **Step B: Source Inventory Update**\n   - Decrease quantity in source inventory\n   - If quantity becomes zero, delete the row\n   - Handle spare parts appropriately (maintain `is_spare` flag)\n\n   **Step C: Target Inventory Update**\n   - Check if part exists in target inventory\n   - If exists: increase quantity\n   - If not exists: insert new record\n   - Handle spare parts appropriately\n\n   **Step D: Audit Logging**\n   - Log successful transfers with details\n   - Log failed transfers with error messages\n   - Include transfer reason and status\n\n5. **Error handling requirements**:\n   - Use `RAISE EXCEPTION` with descriptive error messages\n   - Handle all validation failures gracefully\n   - Ensure complete rollback on any failure\n   - Log all attempts (successful and failed)\n\n6. **Return value**:\n   - Return success message: `'Successfully transferred {quantity} parts ({part_num}, color_id: {color_id}) from inventory {source_id} to inventory {target_id}. Reason: {reason}'`\n   - Include transfer details and reason in the message\n\n## Function Requirements:\n\n- **Transaction Safety**: All operations wrapped in transaction block\n- **Data Integrity**: No partial updates possible\n- **Audit Trail**: Complete logging of all transfer attempts\n- **Validation**: Comprehensive input and business rule validation\n- **Error Recovery**: Failed transfers leave database unchanged\n- **Performance**: Use appropriate locking to prevent race conditions\n\n## Example Usage:\n\n```sql\n-- Basic transfer with reason\nSELECT transfer_parts(14469, 14686, '3024', 15, 100, 'inventory_adjustment');\n\n-- Transfer to new inventory (should create new record)\nSELECT transfer_parts(11124, 14686, '3001', 4, 50, 'part_redistribution');\n\n-- This should fail due to insufficient quantity\nSELECT transfer_parts(14469, 14686, '3024', 15, 2000, 'large_transfer');\n\n-- This should fail due to self-transfer\nSELECT transfer_parts(14469, 14469, '3024', 15, 10, 'self_transfer');\n```\n\n## Verification Criteria:\n\n- Function handles all validation rules correctly\n- Audit logging captures all transfer attempts\n- Failed transfers are properly logged with error details\n- Self-transfers are prevented\n- Quantity limits are enforced\n- Database state remains consistent after failures"
  },
  {
    "path": "tasks/postgres/standard/lego/transactional_inventory_transfer/meta.json",
    "content": "{\n  \"task_id\": \"transactional_inventory_transfer\",\n  \"task_name\": \"Transactional Inventory Transfer\",\n  \"category_id\": \"lego\",\n  \"category_name\": \"Lego\",\n  \"description\": \"Create PostgreSQL function to handle inventory part transfers between LEGO sets with validation and audit logging.\",\n  \"author\": \"Jiawei Wang\",\n  \"created_at\": \"2025-08-16\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"transactional operations\",\n    \"stored procedures and functions\",\n    \"audit and compliance\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Table \\\"lego_colors\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" varchar(255) [not null]\\n  \\\"rgb\\\" varchar(6) [not null]\\n  \\\"is_trans\\\" bpchar(1) [not null]\\n}\\n\\nTable \\\"lego_inventories\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"version\\\" int4 [not null]\\n  \\\"set_num\\\" varchar(255) [not null]\\n}\\n\\nTable \\\"lego_inventory_parts\\\" {\\n  \\\"inventory_id\\\" int4 [not null]\\n  \\\"part_num\\\" varchar(255) [not null]\\n  \\\"color_id\\\" int4 [not null]\\n  \\\"quantity\\\" int4 [not null]\\n  \\\"is_spare\\\" bool [not null]\\n}\\n\\nTable \\\"lego_inventory_sets\\\" {\\n  \\\"inventory_id\\\" int4 [not null]\\n  \\\"set_num\\\" varchar(255) [not null]\\n  \\\"quantity\\\" int4 [not null]\\n}\\n\\nTable \\\"lego_part_categories\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" varchar(255) [not null]\\n}\\n\\nTable \\\"lego_parts\\\" {\\n  \\\"part_num\\\" varchar(255) [pk, not null]\\n  \\\"name\\\" text [not null]\\n  \\\"part_cat_id\\\" int4 [not null]\\n}\\n\\nTable \\\"lego_sets\\\" {\\n  \\\"set_num\\\" varchar(255) [pk, not null]\\n  \\\"name\\\" varchar(255) [not null]\\n  \\\"year\\\" int4\\n  \\\"theme_id\\\" int4\\n  \\\"num_parts\\\" int4\\n}\\n\\nTable \\\"lego_themes\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"name\\\" varchar(255) [not null]\\n  \\\"parent_id\\\" int4\\n}\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/neondatabase-labs/postgres-sample-dbs/blob/main/lego.sql\"\n  }\n}"
  },
  {
    "path": "tasks/postgres/standard/lego/transactional_inventory_transfer/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL LEGO Task 2: Enhanced Inventory Transfer Function\nTests the transfer_parts function with audit logging and enhanced validation.\n\nKey Features Tested:\n- Core transfer functionality with audit logging\n- Business rule validation (quantity limits, self-transfer prevention)\n- Error handling and rollback mechanisms\n- Audit trail maintenance for both success and failure cases\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nimport psycopg2.errors\nfrom typing import Optional, Tuple\n\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters from environment variables.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\"),\n    }\n\n\ndef get_inventory_part_quantity(conn, inventory_id: int, part_num: str, color_id: int) -> int:\n    \"\"\"Get the current quantity of a specific part in an inventory.\"\"\"\n    with conn.cursor() as cur:\n        cur.execute(\n            \"\"\"\n            SELECT quantity FROM public.lego_inventory_parts\n            WHERE inventory_id = %s AND part_num = %s AND color_id = %s\n            \"\"\",\n            (inventory_id, part_num, color_id)\n        )\n        result = cur.fetchone()\n        return result[0] if result else 0\n\n\ndef verify_system_components(conn) -> bool:\n    \"\"\"Verify that all required system components exist.\"\"\"\n    print(\"\\n-- Verifying System Components --\")\n    try:\n        with conn.cursor() as cur:\n            # Check main function\n            cur.execute(\n                \"\"\"\n                SELECT COUNT(*) FROM pg_proc p\n                JOIN pg_namespace n ON p.pronamespace = n.oid\n                WHERE n.nspname = 'public' AND p.proname = 'transfer_parts'\n                \"\"\"\n            )\n            main_func_count = cur.fetchone()[0]\n            \n            # Check audit table\n            cur.execute(\n                \"\"\"\n                SELECT COUNT(*) FROM information_schema.tables \n                WHERE table_schema = 'public' AND table_name = 'inventory_transfer_log'\n                \"\"\"\n            )\n            audit_table_count = cur.fetchone()[0]\n            \n            if main_func_count == 0:\n                print(\"❌ FAIL: transfer_parts function does not exist\")\n                return False\n            \n            if audit_table_count == 0:\n                print(\"❌ FAIL: inventory_transfer_log table does not exist\")\n                return False\n            \n            print(\"✅ PASS: All system components exist\")\n            return True\n    finally:\n        conn.rollback()\n\n\ndef verify_successful_transfer_with_audit(conn) -> bool:\n    \"\"\"Test a successful transfer with audit logging.\"\"\"\n    print(\"\\n-- Verifying Successful Transfer with Audit --\")\n    passed = False\n    try:\n        # Test data: Transfer 100 white plates from Mosaic Dino to Mosaic Johnny Thunder\n        source_id = 14469\n        target_id = 14686\n        part_num = '3024'\n        color_id = 15\n        transfer_qty = 100\n        reason = 'inventory_adjustment'\n        \n        source_initial = get_inventory_part_quantity(conn, source_id, part_num, color_id)\n        target_initial = get_inventory_part_quantity(conn, target_id, part_num, color_id)\n        print(f\"Initial quantities - Source: {source_initial}, Target: {target_initial}\")\n        \n        # Get initial audit log count\n        with conn.cursor() as cur:\n            cur.execute(\"SELECT COUNT(*) FROM inventory_transfer_log\")\n            initial_log_count = cur.fetchone()[0]\n        \n        with conn.cursor() as cur:\n            cur.execute(\n                \"SELECT transfer_parts(%s, %s, %s, %s, %s, %s)\",\n                (source_id, target_id, part_num, color_id, transfer_qty, reason)\n            )\n            result = cur.fetchone()\n            print(f\"Transfer result: {result[0]}\")\n        \n        source_final = get_inventory_part_quantity(conn, source_id, part_num, color_id)\n        target_final = get_inventory_part_quantity(conn, target_id, part_num, color_id)\n        print(f\"Final quantities - Source: {source_final}, Target: {target_final}\")\n        \n        # Verify audit log entry\n        with conn.cursor() as cur:\n            cur.execute(\"SELECT COUNT(*) FROM inventory_transfer_log\")\n            final_log_count = cur.fetchone()[0]\n            \n            if final_log_count <= initial_log_count:\n                print(\"❌ FAIL: No audit log entry was created\")\n                return False\n            \n            # Check latest audit entry\n            cur.execute(\n                \"\"\"\n                SELECT transfer_status, quantity_transferred, transfer_reason\n                FROM inventory_transfer_log\n                ORDER BY log_id DESC\n                LIMIT 1\n                \"\"\"\n            )\n            audit_entry = cur.fetchone()\n            \n            if not audit_entry:\n                print(\"❌ FAIL: Could not retrieve audit log entry\")\n                return False\n            \n            status, qty_transferred, trans_reason = audit_entry\n            \n            if status != 'success':\n                print(f\"❌ FAIL: Transfer status should be 'success', got '{status}'\")\n                return False\n            \n            if qty_transferred != transfer_qty or trans_reason != reason:\n                print(f\"❌ FAIL: Audit log details don't match transfer parameters\")\n                return False\n        \n        expected_source = source_initial - transfer_qty\n        expected_target = target_initial + transfer_qty\n        \n        if source_final != expected_source:\n            print(f\"❌ FAIL: Source quantity mismatch. Expected {expected_source}, got {source_final}\")\n        elif target_final != expected_target:\n            print(f\"❌ FAIL: Target quantity mismatch. Expected {expected_target}, got {target_final}\")\n        else:\n            print(\"✅ PASS: Successful transfer with audit logging completed correctly\")\n            passed = True\n            \n    except psycopg2.Error as e:\n        print(f\"❌ FAIL: Transfer failed unexpectedly with error: {e}\")\n    finally:\n        conn.rollback()\n    return passed\n\n\ndef verify_new_part_transfer(conn) -> bool:\n    \"\"\"Test transferring a part to an inventory that doesn't have it.\"\"\"\n    print(\"\\n-- Verifying New Part Transfer --\")\n    passed = False\n    try:\n        # Test data: Transfer red bricks to Mosaic Johnny Thunder (which doesn't have them)\n        source_id = 11124  # Giant Lego Dacta Basic Set (has red bricks)\n        target_id = 14686  # Lego Mosaic Johnny Thunder (doesn't have red bricks)\n        part_num = '3001'\n        color_id = 4\n        transfer_qty = 50\n        reason = 'part_redistribution'\n        \n        target_initial = get_inventory_part_quantity(conn, target_id, part_num, color_id)\n        if target_initial != 0:\n            print(f\"❌ FAIL: Pre-condition failed. Target already has {target_initial} of this part, expected 0\")\n            return False\n        \n        source_initial = get_inventory_part_quantity(conn, source_id, part_num, color_id)\n        print(f\"Initial quantities - Source: {source_initial}, Target: {target_initial}\")\n        \n        with conn.cursor() as cur:\n            cur.execute(\n                \"SELECT transfer_parts(%s, %s, %s, %s, %s, %s)\",\n                (source_id, target_id, part_num, color_id, transfer_qty, reason)\n            )\n            result = cur.fetchone()\n            print(f\"Transfer result: {result[0]}\")\n        \n        source_final = get_inventory_part_quantity(conn, source_id, part_num, color_id)\n        target_final = get_inventory_part_quantity(conn, target_id, part_num, color_id)\n        print(f\"Final quantities - Source: {source_final}, Target: {target_final}\")\n        \n        expected_source = source_initial - transfer_qty\n        expected_target = transfer_qty\n        \n        if source_final != expected_source:\n            print(f\"❌ FAIL: Source quantity mismatch. Expected {expected_source}, got {source_final}\")\n        elif target_final != expected_target:\n            print(f\"❌ FAIL: Target quantity mismatch. Expected {expected_target}, got {target_final}\")\n        else:\n            print(\"✅ PASS: New part transfer completed correctly\")\n            passed = True\n\n    except psycopg2.Error as e:\n        print(f\"❌ FAIL: Transfer failed unexpectedly with error: {e}\")\n    finally:\n        conn.rollback()\n    return passed\n\n\ndef verify_business_rule_validation(conn) -> bool:\n    \"\"\"Test business rule validation including quantity limits and self-transfer prevention.\"\"\"\n    print(\"\\n-- Verifying Business Rule Validation --\")\n    \n    # Test 1: Self-transfer (should fail)\n    print(\"Test 1: Self-transfer (should fail)\")\n    test1_passed = False\n    try:\n        source_id = 14469\n        part_num = '3024'\n        color_id = 15\n        transfer_qty = 10\n        reason = 'self_transfer'\n        \n        with conn.cursor() as cur:\n            cur.execute(\n                \"SELECT transfer_parts(%s, %s, %s, %s, %s, %s)\",\n                (source_id, source_id, part_num, color_id, transfer_qty, reason)\n            )\n            result = cur.fetchone()\n            print(f\"❌ FAIL: Self-transfer should have failed but succeeded: {result[0]}\")\n    except psycopg2.Error:\n        print(f\"✅ PASS: Self-transfer correctly failed\")\n        test1_passed = True\n    except Exception as e:\n        print(f\"❌ FAIL: Self-transfer test failed with unexpected error: {e}\")\n    finally:\n        conn.rollback() # Rollback after first test\n\n    # Test 2: Transfer quantity exceeds maximum (should fail)\n    print(\"Test 2: Transfer quantity exceeds maximum (should fail)\")\n    test2_passed = False\n    try:\n        source_id = 14469\n        target_id = 14686\n        part_num = '3024'\n        color_id = 15\n        \n        with conn.cursor() as cur:\n            cur.execute(\n                \"SELECT transfer_parts(%s, %s, %s, %s, %s, %s)\",\n                (source_id, target_id, part_num, color_id, 600, 'large_transfer')\n            )\n            result = cur.fetchone()\n            print(f\"❌ FAIL: Large transfer should have failed but succeeded: {result[0]}\")\n    except psycopg2.Error:\n        print(f\"✅ PASS: Large transfer correctly failed\")\n        test2_passed = True\n    except Exception as e:\n        print(f\"❌ FAIL: Large transfer test failed with unexpected error: {e}\")\n    finally:\n        conn.rollback() # Rollback after second test\n\n    # Test 3: Transfer quantity below minimum (should fail)\n    print(\"Test 3: Transfer quantity below minimum (should fail)\")\n    test3_passed = False\n    try:\n        source_id = 14469\n        target_id = 14686\n        part_num = '3024'\n        color_id = 15\n\n        with conn.cursor() as cur:\n            cur.execute(\n                \"SELECT transfer_parts(%s, %s, %s, %s, %s, %s)\",\n                (source_id, target_id, part_num, color_id, 0, 'zero_transfer')\n            )\n            result = cur.fetchone()\n            print(f\"❌ FAIL: Zero transfer should have failed but succeeded: {result[0]}\")\n    except psycopg2.Error:\n        print(f\"✅ PASS: Zero transfer correctly failed\")\n        test3_passed = True\n    except Exception as e:\n        print(f\"❌ FAIL: Zero transfer test failed with unexpected error: {e}\")\n    finally:\n        conn.rollback() # Rollback after third test\n\n    return test1_passed and test2_passed and test3_passed\n\n\ndef verify_insufficient_quantity_error(conn) -> bool:\n    \"\"\"Test that transfer fails when source has insufficient quantity.\"\"\"\n    print(\"\\n-- Verifying Insufficient Quantity Error --\")\n    passed = False\n    try:\n        source_id = 14469\n        target_id = 14686\n        part_num = '3024'\n        color_id = 15\n        transfer_qty = 99999  # Far more than available\n        reason = 'insufficient_test'\n        \n        source_initial = get_inventory_part_quantity(conn, source_id, part_num, color_id)\n        target_initial = get_inventory_part_quantity(conn, target_id, part_num, color_id)\n        print(f\"Initial quantities - Source: {source_initial}, Target: {target_initial}\")\n        \n        with conn.cursor() as cur:\n            try:\n                cur.execute(\n                    \"SELECT transfer_parts(%s, %s, %s, %s, %s, %s)\",\n                    (source_id, target_id, part_num, color_id, transfer_qty, reason)\n                )\n                result = cur.fetchone()\n                print(f\"❌ FAIL: Transfer should have failed but succeeded: {result[0]}\")\n            except psycopg2.Error as e:\n                print(f\"✅ PASS: Transfer correctly failed with an exception.\")\n                # After an exception, the transaction is in an aborted state. Must rollback before new queries.\n                conn.rollback()\n                \n                source_final = get_inventory_part_quantity(conn, source_id, part_num, color_id)\n                target_final = get_inventory_part_quantity(conn, target_id, part_num, color_id)\n                \n                if source_final != source_initial:\n                    print(f\"❌ FAIL: Source quantity changed from {source_initial} to {source_final}\")\n                elif target_final != target_initial:\n                    print(f\"❌ FAIL: Target quantity changed from {target_initial} to {target_final}\")\n                else:\n                    print(\"✅ PASS: Database state unchanged after failed transfer\")\n                    passed = True\n    finally:\n        conn.rollback()\n    return passed\n\n\ndef verify_invalid_inventory_error(conn) -> bool:\n    \"\"\"Test that transfer fails with invalid inventory IDs.\"\"\"\n    print(\"\\n-- Verifying Invalid Inventory Error --\")\n    passed = False\n    try:\n        source_id = 99999  # Non-existent inventory\n        target_id = 14686\n        part_num = '3024'\n        color_id = 15\n        transfer_qty = 10\n        reason = 'invalid_test'\n        \n        target_initial = get_inventory_part_quantity(conn, target_id, part_num, color_id)\n        \n        with conn.cursor() as cur:\n            try:\n                cur.execute(\n                    \"SELECT transfer_parts(%s, %s, %s, %s, %s, %s)\",\n                    (source_id, target_id, part_num, color_id, transfer_qty, reason)\n                )\n                result = cur.fetchone()\n                print(f\"❌ FAIL: Transfer should have failed but succeeded: {result[0]}\")\n            except psycopg2.Error as e:\n                print(f\"✅ PASS: Transfer correctly failed with an exception.\")\n                # Rollback the aborted transaction\n                conn.rollback()\n                \n                target_final = get_inventory_part_quantity(conn, target_id, part_num, color_id)\n                if target_final != target_initial:\n                    print(f\"❌ FAIL: Target quantity changed from {target_initial} to {target_final}\")\n                else:\n                    print(\"✅ PASS: Database state unchanged after invalid inventory error\")\n                    passed = True\n    finally:\n        conn.rollback()\n    return passed\n\n\ndef verify_audit_logging(conn) -> bool:\n    \"\"\"\n    Test that audit logging captures both successful and failed transfers.\n    This function uses commits to separate test cases and work around the\n    transactional paradox of logging a failure within a transaction that\n    is about to be rolled back by the client.\n    \"\"\"\n    print(\"\\n-- Verifying Audit Logging --\")\n    \n    # Part 1: Test success logging\n    print(\"Part 1: Verifying success log entry...\")\n    success_passed = False\n    try:\n        with conn.cursor() as cur:\n            cur.execute(\"SELECT COUNT(*) FROM inventory_transfer_log\")\n            initial_count = cur.fetchone()[0]\n\n        with conn.cursor() as cur:\n            cur.execute(\n                \"SELECT transfer_parts(14469, 14686, '3024', 15, 5, 'audit_test_success')\"\n            )\n        \n        # Check the log before committing/rolling back\n        with conn.cursor() as cur:\n            cur.execute(\"SELECT COUNT(*) FROM inventory_transfer_log\")\n            final_count = cur.fetchone()[0]\n            if final_count == initial_count + 1:\n                print(\"✅ PASS: Success log was correctly written within the transaction.\")\n                success_passed = True\n            else:\n                print(\"❌ FAIL: Success log was not created.\")\n\n    except Exception as e:\n        print(f\"❌ FAIL: Success logging test threw an unexpected error: {e}\")\n    finally:\n        conn.rollback() # Clean up the transaction for the next part\n\n    if not success_passed:\n        return False\n\n    # Part 2: Test failure logging\n    print(\"\\nPart 2: Verifying failure log entry...\")\n    failure_passed = False\n    try:\n        with conn.cursor() as cur:\n            cur.execute(\"SELECT COUNT(*) FROM inventory_transfer_log\")\n            initial_count = cur.fetchone()[0]\n        \n        try:\n            with conn.cursor() as cur:\n                cur.execute(\n                    \"SELECT transfer_parts(14469, 14469, '3024', 15, 5, 'audit_test_fail')\"\n                )\n        except psycopg2.Error:\n            # This is the expected failure path.\n            # The function should have logged the failure before raising the error.\n            # Now, we check the log table.\n            pass\n        \n        # The transaction is now in an aborted state. We must rollback to issue new commands.\n        conn.rollback()\n\n        with conn.cursor() as cur:\n            cur.execute(\"SELECT COUNT(*) FROM inventory_transfer_log\")\n            final_count = cur.fetchone()[0]\n            if final_count == initial_count:\n                 print(\"✅ PASS: Failure log was correctly rolled back as expected in a standard transaction.\")\n                 failure_passed = True\n            else:\n                print(\"❌ FAIL: Failure log was not rolled back. This implies a non-standard transaction behavior.\")\n                print(f\"Log count before: {initial_count}, Log count after: {final_count}\")\n\n    except Exception as e:\n        print(f\"❌ FAIL: Failure logging test threw an unexpected error: {e}\")\n    finally:\n        conn.rollback() # Ensure cleanup\n\n    return success_passed and failure_passed\n\n\ndef verify_exact_quantity_transfer(conn) -> bool:\n    \"\"\"Test transferring exact quantity (should delete source row when quantity becomes 0).\"\"\"\n    print(\"\\n-- Verifying Exact Quantity Transfer --\")\n    passed = False\n    target_id = 14686  # Use a fixed target inventory\n    \n    try:\n        # Find a part with a small quantity that doesn't conflict with the target inventory\n        with conn.cursor() as cur:\n            cur.execute(\n                \"\"\"\n                SELECT inventory_id, part_num, color_id, quantity\n                FROM public.lego_inventory_parts\n                WHERE quantity BETWEEN 5 AND 20 AND inventory_id != %s\n                LIMIT 1\n                \"\"\",\n                (target_id,)\n            )\n            result = cur.fetchone()\n            if not result:\n                print(\"⚠️ SKIP: No suitable part found for exact quantity test\")\n                return True\n            \n            source_id, part_num, color_id, exact_qty = result\n        \n        print(f\"Testing exact transfer: {exact_qty} parts of '{part_num}' from inventory {source_id} to {target_id}\")\n        \n        source_initial = get_inventory_part_quantity(conn, source_id, part_num, color_id)\n        target_initial = get_inventory_part_quantity(conn, target_id, part_num, color_id)\n        print(f\"Initial quantities - Source: {source_initial}, Target: {target_initial}\")\n\n        with conn.cursor() as cur:\n            cur.execute(\n                \"SELECT transfer_parts(%s, %s, %s, %s, %s, %s)\",\n                (source_id, target_id, part_num, color_id, exact_qty, 'exact_transfer')\n            )\n            print(f\"Transfer result: {cur.fetchone()[0]}\")\n        \n        source_final = get_inventory_part_quantity(conn, source_id, part_num, color_id)\n        target_final = get_inventory_part_quantity(conn, target_id, part_num, color_id)\n        print(f\"Final quantities - Source: {source_final}, Target: {target_final}\")\n        \n        expected_source = 0\n        expected_target = target_initial + exact_qty\n        \n        if source_final != expected_source:\n            print(f\"❌ FAIL: Source quantity should be 0 (row deleted), but got {source_final}\")\n        elif target_final != expected_target:\n            print(f\"❌ FAIL: Target quantity mismatch. Expected {expected_target}, got {target_final}\")\n        else:\n            print(\"✅ PASS: Exact quantity transfer completed correctly (source row deleted)\")\n            passed = True\n\n    except psycopg2.Error as e:\n        print(f\"❌ FAIL: Transfer failed unexpectedly with error: {e}\")\n    finally:\n        conn.rollback()\n    return passed\n\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 60)\n    print(\"LEGO Enhanced Inventory Transfer Function Verification Script\")\n    print(\"=\" * 60)\n\n    conn_params = get_connection_params()\n    if not conn_params.get(\"database\"):\n        print(\"❌ CRITICAL: POSTGRES_DATABASE environment variable not set.\")\n        sys.exit(1)\n\n    conn = None\n    try:\n        conn = psycopg2.connect(**conn_params)\n        conn.autocommit = False  # Ensure we can control transactions manually\n\n        # Run all verification steps\n        results = [\n            verify_system_components(conn),\n            verify_successful_transfer_with_audit(conn),\n            verify_new_part_transfer(conn),\n            verify_business_rule_validation(conn),\n            verify_insufficient_quantity_error(conn),\n            verify_invalid_inventory_error(conn),\n            verify_audit_logging(conn),\n            verify_exact_quantity_transfer(conn),\n        ]\n\n        if all(results):\n            print(\"\\n🎉 Overall Result: PASS - All verification steps completed successfully!\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Overall Result: FAIL - One or more verification steps failed.\")\n            sys.exit(1)\n\n    except psycopg2.OperationalError as e:\n        print(f\"❌ CRITICAL: Could not connect to the database. Details: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ CRITICAL: An unexpected error occurred. Details: {e}\")\n        sys.exit(1)\n    finally:\n        if conn:\n            conn.close()\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/postgres/standard/security/rls_business_access/description.md",
    "content": "Implement Row Level Security (RLS) policies for a social media platform with Users, Posts, Comments, and Channels.\n\n## Your Mission:\n\nBuild RLS policies for a social platform where users create posts and comments in channels. Implement proper access control so users can manage their own content, while channel moderators can moderate content in their channels.\n\n## RLS Requirements:\n\n### 1. Users Table Access Rules:\n- **SELECT**: Users can read all public user profiles (username, created_at)\n- **UPDATE**: Users can only modify their own profile\n- **DELETE**: Users can only delete their own account\n\n### 2. Channels Table Access Rules:\n- **SELECT**: Everyone can read public channel information\n- **INSERT**: Any authenticated user can create a channel (becomes owner)\n- **UPDATE**: Only channel owners can modify channel details\n- **DELETE**: Only channel owners can delete channels\n\n### 3. Posts Table Access Rules:\n- **SELECT**: Users can read all posts in channels they have access to\n- **INSERT**: Authenticated users can create posts in any channel\n- **UPDATE**: Post authors OR channel moderators OR channel owners can edit posts\n- **DELETE**: Post authors OR channel moderators OR channel owners can delete posts\n\n### 4. Comments Table Access Rules:\n- **SELECT**: Users can read comments on posts they can access\n- **INSERT**: Authenticated users can comment on posts they can see\n- **UPDATE**: Comment authors OR post authors OR channel moderators OR channel owners can edit comments\n- **DELETE**: Comment authors OR post authors OR channel moderators OR channel owners can delete comments\n\n### 5. Channel Moderators Table Access Rules:\n- **SELECT**: Users can see moderator lists for channels\n- **INSERT**: Only channel owners can add moderators\n- **DELETE**: Channel owners can remove moderators; moderators can remove themselves\n\n## Session Context:\n\nUse `current_setting('app.current_user_id')` to get the current user ID from session context.\n\n## Schema Requirements:\n\n- **Use only the `public` schema** for all tables, functions, and policies\n- All helper functions should be created in the `public` schema\n- Do not create additional schemas\n\n## Expected Deliverables:\n\n1. **Enable RLS** on all five tables\n2. **Create policies** for SELECT, INSERT, UPDATE, DELETE operations on each table\n3. **Helper functions** to check permissions efficiently:\n   - `is_channel_owner(channel_id, user_id)`\n   - `is_channel_moderator(channel_id, user_id)`\n   - `can_moderate_channel(channel_id, user_id)`\n4. **Proper indexing** to ensure RLS policies perform well\n\n## Test Scenarios:\n\nYour RLS implementation will be verified with:\n\n- **Content ownership**: Users can only edit their own posts/comments\n- **Moderation hierarchy**: Moderators can moderate content in their channels\n- **Channel isolation**: Users only see content from accessible channels\n- **Permission escalation**: Owners have full control over their channels\n- **Cross-table access**: Comment policies respect post and channel permissions\n\n## Success Criteria:\n\n- Users can manage their own content (posts, comments)\n- Channel owners have full control over their channels\n- Moderators can moderate content in their assigned channels\n- No unauthorized access to other users' private data\n- Policies are efficient and don't create performance bottlenecks\n- All operations (SELECT, INSERT, UPDATE, DELETE) are properly secured\n"
  },
  {
    "path": "tasks/postgres/standard/security/rls_business_access/ground_truth.sql",
    "content": "-- Ground Truth RLS Implementation\n\nBEGIN;\n\n-- ============================================================================\n-- PERFORMANCE INDEXES FOR RLS\n-- ============================================================================\n\n-- Users table indexes\nCREATE INDEX IF NOT EXISTS idx_users_is_public ON users(is_public);\n\n-- Channels table indexes\nCREATE INDEX IF NOT EXISTS idx_channels_owner_id ON channels(owner_id);\nCREATE INDEX IF NOT EXISTS idx_channels_is_public ON channels(is_public);\n\n-- Channel moderators table indexes\nCREATE INDEX IF NOT EXISTS idx_channel_moderators_channel_user ON channel_moderators(channel_id, user_id);\nCREATE INDEX IF NOT EXISTS idx_channel_moderators_user ON channel_moderators(user_id);\n\n-- Posts table indexes\nCREATE INDEX IF NOT EXISTS idx_posts_channel_id ON posts(channel_id);\nCREATE INDEX IF NOT EXISTS idx_posts_author_id ON posts(author_id);\nCREATE INDEX IF NOT EXISTS idx_posts_created_at ON posts(created_at);\n\n-- Comments table indexes\nCREATE INDEX IF NOT EXISTS idx_comments_post_id ON comments(post_id);\nCREATE INDEX IF NOT EXISTS idx_comments_author_id ON comments(author_id);\nCREATE INDEX IF NOT EXISTS idx_comments_created_at ON comments(created_at);\n\n-- ============================================================================\n-- ENABLE ROW LEVEL SECURITY\n-- ============================================================================\n\nALTER TABLE users ENABLE ROW LEVEL SECURITY;\nALTER TABLE channels ENABLE ROW LEVEL SECURITY;\nALTER TABLE channel_moderators ENABLE ROW LEVEL SECURITY;\nALTER TABLE posts ENABLE ROW LEVEL SECURITY;\nALTER TABLE comments ENABLE ROW LEVEL SECURITY;\n\n-- ============================================================================\n-- USERS TABLE POLICIES\n-- ============================================================================\n\n-- Users SELECT: Can read public profiles OR own profile\nDROP POLICY IF EXISTS users_select ON users;\nCREATE POLICY users_select ON users\nFOR SELECT\nUSING (\n    is_public = true\n    OR id = app_current_user_id()\n);\n\n-- Users UPDATE: Can only update own profile\nDROP POLICY IF EXISTS users_update ON users;\nCREATE POLICY users_update ON users\nFOR UPDATE\nUSING (id = app_current_user_id())\nWITH CHECK (id = app_current_user_id());\n\n-- Users DELETE: Can only delete own account\nDROP POLICY IF EXISTS users_delete ON users;\nCREATE POLICY users_delete ON users\nFOR DELETE\nUSING (id = app_current_user_id());\n\n-- ============================================================================\n-- CHANNELS TABLE POLICIES\n-- ============================================================================\n\n-- Channels SELECT: Can read public channels OR channels where user is owner/moderator\nDROP POLICY IF EXISTS channels_select ON channels;\nCREATE POLICY channels_select ON channels\nFOR SELECT\nUSING (\n    is_public = true\n    OR owner_id = app_current_user_id()\n    OR is_channel_moderator(id, app_current_user_id())\n);\n\n-- Channels INSERT: Authenticated users can create channels (become owner)\nDROP POLICY IF EXISTS channels_insert ON channels;\nCREATE POLICY channels_insert ON channels\nFOR INSERT\nWITH CHECK (owner_id = app_current_user_id());\n\n-- Channels UPDATE: Only channel owners can modify\nDROP POLICY IF EXISTS channels_update ON channels;\nCREATE POLICY channels_update ON channels\nFOR UPDATE\nUSING (owner_id = app_current_user_id())\nWITH CHECK (owner_id = app_current_user_id());\n\n-- Channels DELETE: Only channel owners can delete\nDROP POLICY IF EXISTS channels_delete ON channels;\nCREATE POLICY channels_delete ON channels\nFOR DELETE\nUSING (owner_id = app_current_user_id());\n\n-- ============================================================================\n-- POSTS TABLE POLICIES\n-- ============================================================================\n\n-- Posts SELECT: Can read posts in accessible channels\nDROP POLICY IF EXISTS posts_select ON posts;\nCREATE POLICY posts_select ON posts\nFOR SELECT\nUSING (\n    EXISTS (\n        SELECT 1 FROM channels c\n        WHERE c.id = posts.channel_id\n        AND (\n            c.is_public = true\n            OR c.owner_id = app_current_user_id()\n            OR is_channel_moderator(c.id, app_current_user_id())\n        )\n    )\n);\n\n-- Posts INSERT: Authenticated users can create posts (must be author)\nDROP POLICY IF EXISTS posts_insert ON posts;\nCREATE POLICY posts_insert ON posts\nFOR INSERT\nWITH CHECK (\n    author_id = app_current_user_id()\n    AND EXISTS (\n        SELECT 1 FROM channels c\n        WHERE c.id = posts.channel_id\n        AND (\n            c.is_public = true\n            OR c.owner_id = app_current_user_id()\n            OR is_channel_moderator(c.id, app_current_user_id())\n        )\n    )\n);\n\n-- Posts UPDATE: Post authors OR channel moderators/owners can edit\nDROP POLICY IF EXISTS posts_update ON posts;\nCREATE POLICY posts_update ON posts\nFOR UPDATE\nUSING (\n    author_id = app_current_user_id()\n    OR can_moderate_channel(channel_id, app_current_user_id())\n)\nWITH CHECK (\n    author_id = app_current_user_id()\n    OR can_moderate_channel(channel_id, app_current_user_id())\n);\n\n-- Posts DELETE: Post authors OR channel moderators/owners can delete\nDROP POLICY IF EXISTS posts_delete ON posts;\nCREATE POLICY posts_delete ON posts\nFOR DELETE\nUSING (\n    author_id = app_current_user_id()\n    OR can_moderate_channel(channel_id, app_current_user_id())\n);\n\n-- ============================================================================\n-- COMMENTS TABLE POLICIES\n-- ============================================================================\n\n-- Comments SELECT: Can read comments on accessible posts\nDROP POLICY IF EXISTS comments_select ON comments;\nCREATE POLICY comments_select ON comments\nFOR SELECT\nUSING (\n    EXISTS (\n        SELECT 1 FROM posts p\n        JOIN channels c ON c.id = p.channel_id\n        WHERE p.id = comments.post_id\n        AND (\n            c.is_public = true\n            OR c.owner_id = app_current_user_id()\n            OR is_channel_moderator(c.id, app_current_user_id())\n        )\n    )\n);\n\n-- Comments INSERT: Authenticated users can comment on accessible posts\nDROP POLICY IF EXISTS comments_insert ON comments;\nCREATE POLICY comments_insert ON comments\nFOR INSERT\nWITH CHECK (\n    author_id = app_current_user_id()\n    AND EXISTS (\n        SELECT 1 FROM posts p\n        JOIN channels c ON c.id = p.channel_id\n        WHERE p.id = comments.post_id\n        AND (\n            c.is_public = true\n            OR c.owner_id = app_current_user_id()\n            OR is_channel_moderator(c.id, app_current_user_id())\n        )\n    )\n);\n\n-- Comments UPDATE: Comment authors OR post authors OR channel moderators/owners can edit\nDROP POLICY IF EXISTS comments_update ON comments;\nCREATE POLICY comments_update ON comments\nFOR UPDATE\nUSING (\n    author_id = app_current_user_id()\n    OR EXISTS (\n        SELECT 1 FROM posts p\n        WHERE p.id = comments.post_id\n        AND (\n            p.author_id = app_current_user_id()\n            OR can_moderate_channel(p.channel_id, app_current_user_id())\n        )\n    )\n)\nWITH CHECK (\n    author_id = app_current_user_id()\n    OR EXISTS (\n        SELECT 1 FROM posts p\n        WHERE p.id = comments.post_id\n        AND (\n            p.author_id = app_current_user_id()\n            OR can_moderate_channel(p.channel_id, app_current_user_id())\n        )\n    )\n);\n\n-- Comments DELETE: Comment authors OR post authors OR channel moderators/owners can delete\nDROP POLICY IF EXISTS comments_delete ON comments;\nCREATE POLICY comments_delete ON comments\nFOR DELETE\nUSING (\n    author_id = app_current_user_id()\n    OR EXISTS (\n        SELECT 1 FROM posts p\n        WHERE p.id = comments.post_id\n        AND (\n            p.author_id = app_current_user_id()\n            OR can_moderate_channel(p.channel_id, app_current_user_id())\n        )\n    )\n);\n\n-- ============================================================================\n-- CHANNEL MODERATORS TABLE POLICIES\n-- ============================================================================\n\n-- Channel moderators SELECT: Visible to users who can access the channel\nDROP POLICY IF EXISTS channel_moderators_select ON channel_moderators;\nCREATE POLICY channel_moderators_select ON channel_moderators\nFOR SELECT\nUSING (\n    EXISTS (\n        SELECT 1 FROM channels c\n        WHERE c.id = channel_moderators.channel_id\n        AND (\n            c.is_public = true\n            OR c.owner_id = app_current_user_id()\n            OR is_channel_moderator(c.id, app_current_user_id())\n        )\n    )\n);\n\n-- Channel moderators INSERT: Only channel owners can add moderators\nDROP POLICY IF EXISTS channel_moderators_insert ON channel_moderators;\nCREATE POLICY channel_moderators_insert ON channel_moderators\nFOR INSERT\nWITH CHECK (is_channel_owner(channel_id, app_current_user_id()));\n\n-- Channel moderators DELETE: Channel owners can remove any; moderators can remove themselves\nDROP POLICY IF EXISTS channel_moderators_delete ON channel_moderators;\nCREATE POLICY channel_moderators_delete ON channel_moderators\nFOR DELETE\nUSING (\n    is_channel_owner(channel_id, app_current_user_id())\n    OR user_id = app_current_user_id()\n);\n\n-- ============================================================================\n-- USAGE NOTES\n-- ============================================================================\n\n/*\nUsage Instructions:\n1. Set session context before queries:\n   SET app.current_user_id = '<user-uuid>';\n\n2. For anonymous users:\n   SET app.current_user_id = '';\n\n3. Test examples:\n   -- Alice (owner of general channel)\n   SET app.current_user_id = '11111111-1111-1111-1111-111111111111';\n\n   -- Bob (moderator of general channel)\n   SET app.current_user_id = '22222222-2222-2222-2222-222222222222';\n*/\n\nCOMMIT;\n"
  },
  {
    "path": "tasks/postgres/standard/security/rls_business_access/meta.json",
    "content": "{\n  \"task_id\": \"rls_business_access\",\n  \"task_name\": \"RLS Business Access\",\n  \"category_id\": \"security\",\n  \"category_name\": \"Security\",\n  \"description\": \"Implement Row Level Security policies for social platform with proper access control for posts, comments, and channels.\",\n  \"author\": \"Fanshi Zhang\",\n  \"created_at\": \"2025-08-17\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"security and access control\",\n    \"stored procedures and functions\",\n    \"schema design\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Table \\\"users\\\" {\\n  \\\"id\\\" uuid [pk, not null, default: `gen_random_uuid()`]\\n  \\\"username\\\" varchar(50) [unique, not null]\\n  \\\"email\\\" varchar(100) [unique, not null]\\n  \\\"is_public\\\" bool [default: false]\\n  \\\"created_at\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n\\n  Indexes {\\n    is_public [type: btree, name: \\\"idx_users_is_public\\\"]\\n  }\\n}\\n\\nTable \\\"channels\\\" {\\n  \\\"id\\\" uuid [pk, not null, default: `gen_random_uuid()`]\\n  \\\"name\\\" varchar(100) [not null]\\n  \\\"description\\\" text\\n  \\\"is_public\\\" bool [default: true]\\n  \\\"owner_id\\\" uuid\\n  \\\"created_at\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n\\n  Indexes {\\n    is_public [type: btree, name: \\\"idx_channels_is_public\\\"]\\n    owner_id [type: btree, name: \\\"idx_channels_owner_id\\\"]\\n  }\\n}\\n\\nTable \\\"channel_moderators\\\" {\\n  \\\"channel_id\\\" uuid [not null]\\n  \\\"user_id\\\" uuid [not null]\\n  \\\"created_at\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n\\n  Indexes {\\n    (channel_id, user_id) [type: btree, name: \\\"channel_moderators_pkey\\\"]\\n    (channel_id, user_id) [type: btree, name: \\\"idx_channel_moderators_channel_user\\\"]\\n    user_id [type: btree, name: \\\"idx_channel_moderators_user\\\"]\\n  }\\n}\\n\\nTable \\\"posts\\\" {\\n  \\\"id\\\" uuid [pk, not null, default: `gen_random_uuid()`]\\n  \\\"channel_id\\\" uuid\\n  \\\"author_id\\\" uuid\\n  \\\"title\\\" varchar(200) [not null]\\n  \\\"content\\\" text\\n  \\\"created_at\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n  \\\"updated_at\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n\\n  Indexes {\\n    author_id [type: btree, name: \\\"idx_posts_author_id\\\"]\\n    channel_id [type: btree, name: \\\"idx_posts_channel_id\\\"]\\n    created_at [type: btree, name: \\\"idx_posts_created_at\\\"]\\n  }\\n}\\n\\nTable \\\"comments\\\" {\\n  \\\"id\\\" uuid [pk, not null, default: `gen_random_uuid()`]\\n  \\\"post_id\\\" uuid\\n  \\\"author_id\\\" uuid\\n  \\\"content\\\" text [not null]\\n  \\\"created_at\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n  \\\"updated_at\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n\\n  Indexes {\\n    author_id [type: btree, name: \\\"idx_comments_author_id\\\"]\\n    created_at [type: btree, name: \\\"idx_comments_created_at\\\"]\\n    post_id [type: btree, name: \\\"idx_comments_post_id\\\"]\\n  }\\n}\\n\\nRef \\\"channel_moderators_channel_id_fkey\\\":\\\"channels\\\".\\\"id\\\" < \\\"channel_moderators\\\".\\\"channel_id\\\" [delete: cascade]\\n\\nRef \\\"channel_moderators_user_id_fkey\\\":\\\"users\\\".\\\"id\\\" < \\\"channel_moderators\\\".\\\"user_id\\\" [delete: cascade]\\n\\nRef \\\"channels_owner_id_fkey\\\":\\\"users\\\".\\\"id\\\" < \\\"channels\\\".\\\"owner_id\\\" [delete: cascade]\\n\\nRef \\\"comments_author_id_fkey\\\":\\\"users\\\".\\\"id\\\" < \\\"comments\\\".\\\"author_id\\\" [delete: cascade]\\n\\nRef \\\"comments_post_id_fkey\\\":\\\"posts\\\".\\\"id\\\" < \\\"comments\\\".\\\"post_id\\\" [delete: cascade]\\n\\nRef \\\"posts_author_id_fkey\\\":\\\"users\\\".\\\"id\\\" < \\\"posts\\\".\\\"author_id\\\" [delete: cascade]\\n\\nRef \\\"posts_channel_id_fkey\\\":\\\"channels\\\".\\\"id\\\" < \\\"posts\\\".\\\"channel_id\\\" [delete: cascade]\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": null\n  }\n}\n"
  },
  {
    "path": "tasks/postgres/standard/security/rls_business_access/prepare_environment.py",
    "content": "#!/usr/bin/env python3\n\nimport os\nimport psycopg2\nfrom psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT\nimport sys\n\ndef setup_rls_environment():\n    \"\"\"\n    Set up a PostgreSQL environment for a social media platform with RLS policies.\n    Creates Users, Channels, Posts, Comments, and Channel Moderators for testing RLS implementations.\n    \"\"\"\n\n    # Database connection parameters from environment\n    db_params = {\n        'host': os.getenv('POSTGRES_HOST', 'localhost'),\n        'port': os.getenv('POSTGRES_PORT', '5432'),\n        'user': os.getenv('POSTGRES_USERNAME', 'postgres'),\n        'password': os.getenv('POSTGRES_PASSWORD', 'password'),\n        'database': os.getenv('POSTGRES_DATABASE', 'postgres')\n    }\n\n    try:\n        conn = psycopg2.connect(**db_params)\n        conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)\n        cur = conn.cursor()\n\n        # 1. Users Table (with correct field name for verification)\n        cur.execute(\"\"\"\n            CREATE TABLE IF NOT EXISTS users (\n                id UUID PRIMARY KEY DEFAULT gen_random_uuid(),\n                username VARCHAR(50) UNIQUE NOT NULL,\n                email VARCHAR(100) UNIQUE NOT NULL,\n                is_public BOOLEAN DEFAULT false,\n                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP\n            );\n        \"\"\")\n        print(\"✓ Created users table\")\n\n        # 2. Channels Table\n        cur.execute(\"\"\"\n            CREATE TABLE IF NOT EXISTS channels (\n                id UUID PRIMARY KEY DEFAULT gen_random_uuid(),\n                name VARCHAR(100) NOT NULL,\n                description TEXT,\n                is_public BOOLEAN DEFAULT true,\n                owner_id UUID REFERENCES users(id) ON DELETE CASCADE,\n                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP\n            );\n        \"\"\")\n        print(\"✓ Created channels table\")\n\n        # 3. Channel Moderators Table\n        cur.execute(\"\"\"\n            CREATE TABLE IF NOT EXISTS channel_moderators (\n                channel_id UUID REFERENCES channels(id) ON DELETE CASCADE,\n                user_id UUID REFERENCES users(id) ON DELETE CASCADE,\n                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n                PRIMARY KEY (channel_id, user_id)\n            );\n        \"\"\")\n        print(\"✓ Created channel_moderators table\")\n\n        # 4. Posts Table\n        cur.execute(\"\"\"\n            CREATE TABLE IF NOT EXISTS posts (\n                id UUID PRIMARY KEY DEFAULT gen_random_uuid(),\n                channel_id UUID REFERENCES channels(id) ON DELETE CASCADE,\n                author_id UUID REFERENCES users(id) ON DELETE CASCADE,\n                title VARCHAR(200) NOT NULL,\n                content TEXT,\n                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP\n            );\n        \"\"\")\n        print(\"✓ Created posts table\")\n\n        # 5. Comments Table\n        cur.execute(\"\"\"\n            CREATE TABLE IF NOT EXISTS comments (\n                id UUID PRIMARY KEY DEFAULT gen_random_uuid(),\n                post_id UUID REFERENCES posts(id) ON DELETE CASCADE,\n                author_id UUID REFERENCES users(id) ON DELETE CASCADE,\n                content TEXT NOT NULL,\n                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP\n            );\n        \"\"\")\n        print(\"✓ Created comments table\")\n\n        # Create helper functions for RLS (matching ground truth expectations)\n        cur.execute(\"\"\"\n            -- Function to get current user ID from session context\n            CREATE OR REPLACE FUNCTION app_current_user_id()\n            RETURNS UUID AS $$\n            BEGIN\n                RETURN NULLIF(current_setting('app.current_user_id', true), '')::UUID;\n            END;\n            $$ LANGUAGE plpgsql SECURITY DEFINER STABLE PARALLEL SAFE;\n\n            -- Function to check if user owns a channel\n            CREATE OR REPLACE FUNCTION is_channel_owner(p_channel_id UUID, p_user_id UUID)\n            RETURNS BOOLEAN AS $$\n            BEGIN\n                RETURN EXISTS (\n                    SELECT 1 FROM channels\n                    WHERE id = p_channel_id AND owner_id = p_user_id\n                );\n            END;\n            $$ LANGUAGE plpgsql SECURITY DEFINER STABLE PARALLEL SAFE;\n\n            -- Function to check if user moderates a channel\n            CREATE OR REPLACE FUNCTION is_channel_moderator(p_channel_id UUID, p_user_id UUID)\n            RETURNS BOOLEAN AS $$\n            BEGIN\n                RETURN EXISTS (\n                    SELECT 1 FROM channel_moderators\n                    WHERE channel_id = p_channel_id AND user_id = p_user_id\n                );\n            END;\n            $$ LANGUAGE plpgsql SECURITY DEFINER STABLE PARALLEL SAFE;\n\n            -- Function to check if user can moderate channel (owner OR moderator)\n            CREATE OR REPLACE FUNCTION can_moderate_channel(p_channel_id UUID, p_user_id UUID)\n            RETURNS BOOLEAN AS $$\n            BEGIN\n                RETURN is_channel_owner(p_channel_id, p_user_id)\n                       OR is_channel_moderator(p_channel_id, p_user_id);\n            END;\n            $$ LANGUAGE plpgsql SECURITY DEFINER STABLE PARALLEL SAFE;\n        \"\"\")\n        print(\"✓ Created RLS helper functions\")\n\n        # Insert sample data\n        print(\"\\nInserting sample data...\")\n\n        # Sample users (exact UUIDs expected by verification script)\n        cur.execute(\"\"\"\n            INSERT INTO users (id, username, email, is_public) VALUES\n            ('11111111-1111-1111-1111-111111111111', 'alice', 'alice@example.com', true),\n            ('22222222-2222-2222-2222-222222222222', 'bob', 'bob@example.com', true),\n            ('33333333-3333-3333-3333-333333333333', 'charlie', 'charlie@example.com', false),\n            ('44444444-4444-4444-4444-444444444444', 'diana', 'diana@example.com', true),\n            ('55555555-5555-5555-5555-555555555555', 'eve', 'eve@example.com', false)\n            ON CONFLICT (id) DO NOTHING;\n        \"\"\")\n        print(\"✓ Created 5 sample users\")\n\n        # Sample channels (exact UUIDs expected by verification script)\n        cur.execute(\"\"\"\n            INSERT INTO channels (id, name, description, is_public, owner_id) VALUES\n            ('aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa', 'general', 'General discussion channel', true, '11111111-1111-1111-1111-111111111111'),\n            ('bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb', 'tech-talk', 'Technical discussions', true, '22222222-2222-2222-2222-222222222222'),\n            ('cccccccc-cccc-cccc-cccc-cccccccccccc', 'random', 'Random conversations', false, '33333333-3333-3333-3333-333333333333')\n            ON CONFLICT (id) DO NOTHING;\n        \"\"\")\n        print(\"✓ Created 3 sample channels\")\n\n        # Sample moderators (exact relationships expected by verification script)\n        cur.execute(\"\"\"\n            INSERT INTO channel_moderators (channel_id, user_id) VALUES\n            ('aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa', '22222222-2222-2222-2222-222222222222'),  -- Bob moderates general\n            ('bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb', '44444444-4444-4444-4444-444444444444')  -- Diana moderates tech-talk\n            ON CONFLICT (channel_id, user_id) DO NOTHING;\n        \"\"\")\n        print(\"✓ Created sample moderator assignments\")\n\n        # Sample posts (exact UUIDs expected by verification script)\n        cur.execute(\"\"\"\n            INSERT INTO posts (id, channel_id, author_id, title, content) VALUES\n            ('dddddddd-dddd-dddd-dddd-dddddddddddd', 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa', '11111111-1111-1111-1111-111111111111', 'Welcome to the platform!', 'This is our first post'),\n            ('eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee', 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa', '33333333-3333-3333-3333-333333333333', 'Hello everyone', 'Nice to meet you all'),\n            ('ffffffff-ffff-ffff-ffff-ffffffffffff', 'bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb', '22222222-2222-2222-2222-222222222222', 'PostgreSQL RLS Tutorial', 'Let''s discuss Row Level Security'),\n            ('10101010-1010-1010-1010-101010101010', 'cccccccc-cccc-cccc-cccc-cccccccccccc', '55555555-5555-5555-5555-555555555555', 'Random thoughts', 'Just some random content here')\n            ON CONFLICT (id) DO NOTHING;\n        \"\"\")\n        print(\"✓ Created 4 sample posts\")\n\n        # Sample comments (exact UUIDs expected by verification script)\n        cur.execute(\"\"\"\n            INSERT INTO comments (id, post_id, author_id, content) VALUES\n            ('99999999-9999-9999-9999-999999999999', 'dddddddd-dddd-dddd-dddd-dddddddddddd', '22222222-2222-2222-2222-222222222222', 'Great to have you here!'),\n            ('88888888-8888-8888-8888-888888888888', 'dddddddd-dddd-dddd-dddd-dddddddddddd', '33333333-3333-3333-3333-333333333333', 'Thanks for setting this up'),\n            ('77777777-7777-7777-7777-777777777777', 'ffffffff-ffff-ffff-ffff-ffffffffffff', '44444444-4444-4444-4444-444444444444', 'RLS is really powerful!'),\n            ('66666666-6666-6666-6666-666666666666', 'eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee', '11111111-1111-1111-1111-111111111111', 'Welcome Charlie!')\n            ON CONFLICT (id) DO NOTHING;\n        \"\"\")\n        print(\"✓ Created 4 sample comments\")\n\n        # Create indexes for better RLS performance\n        cur.execute(\"\"\"\n            CREATE INDEX IF NOT EXISTS idx_channels_owner_id ON channels(owner_id);\n            CREATE INDEX IF NOT EXISTS idx_channels_is_public ON channels(is_public);\n            CREATE INDEX IF NOT EXISTS idx_channel_moderators_channel_user ON channel_moderators(channel_id, user_id);\n            CREATE INDEX IF NOT EXISTS idx_channel_moderators_user ON channel_moderators(user_id);\n            CREATE INDEX IF NOT EXISTS idx_posts_channel_id ON posts(channel_id);\n            CREATE INDEX IF NOT EXISTS idx_posts_author_id ON posts(author_id);\n            CREATE INDEX IF NOT EXISTS idx_posts_created_at ON posts(created_at);\n            CREATE INDEX IF NOT EXISTS idx_comments_post_id ON comments(post_id);\n            CREATE INDEX IF NOT EXISTS idx_comments_author_id ON comments(author_id);\n            CREATE INDEX IF NOT EXISTS idx_comments_created_at ON comments(created_at);\n            CREATE INDEX IF NOT EXISTS idx_users_is_public ON users(is_public);\n        \"\"\")\n        print(\"✓ Created performance indexes for RLS\")\n\n        cur.close()\n        conn.close()\n\n    except Exception as e:\n        print(f\"Error setting up environment: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    setup_rls_environment()\n"
  },
  {
    "path": "tasks/postgres/standard/security/rls_business_access/verify.py",
    "content": "#!/usr/bin/env python3\n\nimport os\nimport psycopg2\nfrom psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT\nimport sys\n\ndef verify_rls_implementation():\n    \"\"\"\n    Verify that Row Level Security policies have been properly implemented\n    for the social media platform with Users, Posts, Comments, and Channels.\n    \"\"\"\n\n    # Database connection parameters from environment\n    admin_db_params = {\n        'host': os.getenv('POSTGRES_HOST', 'localhost'),\n        'port': os.getenv('POSTGRES_PORT', '5432'),\n        'user': os.getenv('POSTGRES_USERNAME', 'postgres'),\n        'password': os.getenv('POSTGRES_PASSWORD', 'password'),\n        'database': os.getenv('POSTGRES_DATABASE', 'postgres')\n    }\n\n    # Test user parameters (non-superuser for proper RLS testing)\n    test_db_params = {\n        'host': os.getenv('POSTGRES_HOST', 'localhost'),\n        'port': os.getenv('POSTGRES_PORT', '5432'),\n        'user': 'test_user',\n        'password': 'testpass',\n        'database': os.getenv('POSTGRES_DATABASE', 'postgres')\n    }\n\n    try:\n        # First connect as admin to ensure test user exists\n        admin_conn = psycopg2.connect(**admin_db_params)\n        admin_conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)\n        admin_cur = admin_conn.cursor()\n\n        # Create test user if it doesn't exist\n        try:\n            admin_cur.execute(\"CREATE ROLE test_user LOGIN PASSWORD 'testpass';\")\n        except psycopg2.Error:\n            pass  # User already exists\n\n        # Grant necessary permissions to test user on the current database\n        admin_cur.execute(\"SELECT current_database();\")\n        current_db_name = admin_cur.fetchone()[0]\n\n        admin_cur.execute(f\"GRANT CONNECT ON DATABASE \\\"{current_db_name}\\\" TO test_user;\")\n        admin_cur.execute(\"GRANT USAGE ON SCHEMA public TO test_user;\")\n        admin_cur.execute(\"GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO test_user;\")\n        admin_cur.execute(\"GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO test_user;\")\n        admin_cur.execute(\"GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO test_user;\")\n\n        admin_cur.close()\n        admin_conn.close()\n\n        # Update test_db_params with the correct database name\n        test_db_params['database'] = current_db_name\n\n        # Now connect as test user for RLS verification\n        conn = psycopg2.connect(**test_db_params)\n        conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)\n        cur = conn.cursor()\n\n        print(\"Verifying...\")\n\n        test_results = []\n\n        # Test 1: Check if RLS is enabled on all tables\n        print(\"\\n1. Checking RLS enablement...\")\n        expected_tables = ['users', 'channels', 'channel_moderators', 'posts', 'comments']\n\n        for table in expected_tables:\n            cur.execute(\"\"\"\n                SELECT relrowsecurity\n                FROM pg_class\n                WHERE relname = %s AND relkind = 'r'\n            \"\"\", (table,))\n            result = cur.fetchone()\n\n            if result and result[0]:\n                test_results.append(f\"✓ RLS enabled on {table}\")\n            else:\n                test_results.append(f\"✗ RLS NOT enabled on {table}\")\n\n        # Test 2: Users can only update their own profile\n        print(\"\\n2. Testing user profile access control...\")\n\n        # Alice tries to update her own profile (should work)\n        try:\n            cur.execute(\"SET app.current_user_id = '11111111-1111-1111-1111-111111111111';\")  # Alice\n            cur.execute(\"\"\"\n                UPDATE users\n                SET email = 'alice.updated@example.com'\n                WHERE id = '11111111-1111-1111-1111-111111111111'\n            \"\"\")\n            test_results.append(\"✓ Users can update their own profile\")\n        except Exception as e:\n            test_results.append(f\"✗ User cannot update own profile: {e}\")\n\n        # Alice tries to update Bob's profile (should fail)\n        try:\n            cur.execute(\"SET app.current_user_id = '11111111-1111-1111-1111-111111111111';\")  # Alice\n            cur.execute(\"\"\"\n                UPDATE users\n                SET email = 'bob.hacked@example.com'\n                WHERE id = '22222222-2222-2222-2222-222222222222'\n            \"\"\")\n            # Check if the update actually affected any rows (RLS blocks by affecting 0 rows)\n            if cur.rowcount == 0:\n                test_results.append(\"✓ Users blocked from updating other users' profiles\")\n            else:\n                test_results.append(\"✗ User was able to update another user's profile (should be blocked)\")\n        except psycopg2.Error:\n            test_results.append(\"✓ Users blocked from updating other users' profiles\")\n\n        # Test 3: Channel ownership controls\n        print(\"\\n3. Testing channel ownership controls...\")\n\n        # Alice (owner of general channel) tries to update her channel\n        try:\n            cur.execute(\"SET app.current_user_id = '11111111-1111-1111-1111-111111111111';\")  # Alice\n            cur.execute(\"\"\"\n                UPDATE channels\n                SET description = 'Updated by Alice'\n                WHERE id = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa'\n            \"\"\")\n            test_results.append(\"✓ Channel owners can update their channels\")\n        except Exception as e:\n            test_results.append(f\"✗ Channel owner cannot update channel: {e}\")\n\n        # Charlie tries to update Alice's channel (should fail)\n        try:\n            cur.execute(\"SET app.current_user_id = '33333333-3333-3333-3333-333333333333';\")  # Charlie\n            cur.execute(\"\"\"\n                UPDATE channels\n                SET description = 'Hacked by Charlie'\n                WHERE id = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa'\n            \"\"\")\n            # Check if the update actually affected any rows (RLS blocks by affecting 0 rows)\n            if cur.rowcount == 0:\n                test_results.append(\"✓ Non-owners blocked from updating channels\")\n            else:\n                test_results.append(\"✗ Non-owner was able to update channel (should be blocked)\")\n        except psycopg2.Error:\n            test_results.append(\"✓ Non-owners blocked from updating channels\")\n\n        # Test 4: Post authorship and moderation controls\n        print(\"\\n4. Testing post access controls...\")\n\n        # Alice (author) tries to update her own post\n        try:\n            cur.execute(\"SET app.current_user_id = '11111111-1111-1111-1111-111111111111';\")  # Alice\n            cur.execute(\"\"\"\n                UPDATE posts\n                SET title = 'Updated by Alice'\n                WHERE id = 'dddddddd-dddd-dddd-dddd-dddddddddddd'\n            \"\"\")\n            test_results.append(\"✓ Post authors can update their posts\")\n        except Exception as e:\n            test_results.append(f\"✗ Post author cannot update post: {e}\")\n\n        # Bob (moderator of general) tries to update Alice's post (should work)\n        try:\n            cur.execute(\"SET app.current_user_id = '22222222-2222-2222-2222-222222222222';\")  # Bob (moderator)\n            cur.execute(\"\"\"\n                UPDATE posts\n                SET content = 'Moderated by Bob'\n                WHERE id = 'dddddddd-dddd-dddd-dddd-dddddddddddd'\n            \"\"\")\n            test_results.append(\"✓ Channel moderators can update posts in their channels\")\n        except Exception as e:\n            test_results.append(f\"✗ Channel moderator cannot update post: {e}\")\n\n        # Eve tries to update Alice's post (should fail - not author, owner, or moderator)\n        try:\n            cur.execute(\"SET app.current_user_id = '55555555-5555-5555-5555-555555555555';\")  # Eve\n            cur.execute(\"\"\"\n                UPDATE posts\n                SET content = 'Hacked by Eve'\n                WHERE id = 'dddddddd-dddd-dddd-dddd-dddddddddddd'\n            \"\"\")\n            # Check if the update actually affected any rows (RLS blocks by affecting 0 rows)\n            if cur.rowcount == 0:\n                test_results.append(\"✓ Unauthorized users blocked from updating posts\")\n            else:\n                test_results.append(\"✗ Unauthorized user was able to update post (should be blocked)\")\n        except psycopg2.Error:\n            test_results.append(\"✓ Unauthorized users blocked from updating posts\")\n\n        # Test 5: Comment access controls\n        print(\"\\n5. Testing comment access controls...\")\n\n        # Bob (comment author) tries to update his own comment\n        try:\n            cur.execute(\"SET app.current_user_id = '22222222-2222-2222-2222-222222222222';\")  # Bob\n            cur.execute(\"\"\"\n                UPDATE comments\n                SET content = 'Updated by Bob himself'\n                WHERE id = '99999999-9999-9999-9999-999999999999'\n            \"\"\")\n            test_results.append(\"✓ Comment authors can update their comments\")\n        except Exception as e:\n            test_results.append(f\"✗ Comment author cannot update comment: {e}\")\n\n        # Alice (post author) tries to update Bob's comment on her post (should work)\n        try:\n            cur.execute(\"SET app.current_user_id = '11111111-1111-1111-1111-111111111111';\")  # Alice (post author)\n            cur.execute(\"\"\"\n                UPDATE comments\n                SET content = 'Moderated by post author Alice'\n                WHERE id = '99999999-9999-9999-9999-999999999999'\n            \"\"\")\n            test_results.append(\"✓ Post authors can moderate comments on their posts\")\n        except Exception as e:\n            test_results.append(f\"✗ Post author cannot moderate comment: {e}\")\n\n        # Test 6: Channel moderator assignment controls\n        print(\"\\n6. Testing moderator assignment controls...\")\n\n        # Alice (channel owner) tries to add a moderator\n        try:\n            cur.execute(\"SET app.current_user_id = '11111111-1111-1111-1111-111111111111';\")  # Alice (owner of general)\n            cur.execute(\"\"\"\n                INSERT INTO channel_moderators (channel_id, user_id)\n                VALUES ('aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa', '33333333-3333-3333-3333-333333333333')\n            \"\"\")\n            test_results.append(\"✓ Channel owners can add moderators\")\n        except Exception as e:\n            test_results.append(f\"✗ Channel owner cannot add moderator: {e}\")\n\n        # Charlie tries to add himself as moderator to Bob's channel (should fail)\n        try:\n            cur.execute(\"SET app.current_user_id = '33333333-3333-3333-3333-333333333333';\")  # Charlie\n            cur.execute(\"\"\"\n                INSERT INTO channel_moderators (channel_id, user_id)\n                VALUES ('bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb', '33333333-3333-3333-3333-333333333333')\n            \"\"\")\n            # Check if the insert actually affected any rows (RLS blocks by affecting 0 rows)\n            if cur.rowcount == 0:\n                test_results.append(\"✓ Non-owners blocked from adding moderators\")\n            else:\n                test_results.append(\"✗ Non-owner was able to add moderator (should be blocked)\")\n        except psycopg2.Error:\n            test_results.append(\"✓ Non-owners blocked from adding moderators\")\n\n        # Test 7: Content visibility based on user context\n        print(\"\\n7. Testing content visibility...\")\n\n        # Count posts visible to Alice\n        cur.execute(\"SET app.current_user_id = '11111111-1111-1111-1111-111111111111';\")  # Alice\n        cur.execute(\"SELECT COUNT(*) FROM posts;\")\n        alice_posts = cur.fetchone()[0]\n\n        # Count posts visible to Eve\n        cur.execute(\"SET app.current_user_id = '55555555-5555-5555-5555-555555555555';\")  # Eve\n        cur.execute(\"SELECT COUNT(*) FROM posts;\")\n        eve_posts = cur.fetchone()[0]\n\n        if alice_posts >= 2 and eve_posts >= 1:  # Alice should see posts in channels she has access to\n            test_results.append(\"✓ Content visibility varies correctly based on user context\")\n        else:\n            test_results.append(f\"✗ Content visibility issue: Alice sees {alice_posts}, Eve sees {eve_posts}\")\n\n        # Test 8: Anonymous user access\n        print(\"\\n8. Testing anonymous user restrictions...\")\n\n        try:\n            cur.execute(\"SET app.current_user_id = '';\")  # Anonymous user\n            cur.execute(\"SELECT COUNT(*) FROM users;\")\n            anon_users = cur.fetchone()[0]\n\n            # Anonymous users should be able to see public user profiles per requirements\n            # Count public users that should be visible\n            cur.execute(\"SELECT COUNT(*) FROM users WHERE is_public = true;\")\n            public_users = cur.fetchone()[0] if cur.rowcount > 0 else 0\n\n            if anon_users == public_users and anon_users > 0:\n                test_results.append(f\"✓ Anonymous users can see {anon_users} public user profiles (correct)\")\n            elif anon_users == 0:\n                test_results.append(\"✗ Anonymous users cannot see any users (should see public profiles)\")\n            else:\n                test_results.append(f\"✗ Anonymous users can see {anon_users} users but expected {public_users} public users\")\n        except Exception as e:\n            test_results.append(\"✓ Anonymous users properly restricted\")\n\n        # Print results\n        print(\"\\n\" + \"=\"*60)\n        print(\"RLS VERIFICATION RESULTS - SOCIAL MEDIA PLATFORM\")\n        print(\"=\"*60)\n\n        passed = sum(1 for result in test_results if result.startswith(\"✓\"))\n        failed = sum(1 for result in test_results if result.startswith(\"✗\"))\n\n        for result in test_results:\n            print(result)\n\n        print(f\"\\nSummary: {passed} passed, {failed} failed\")\n\n        cur.close()\n        conn.close()\n\n        if failed == 0:\n            print(\"\\nAll tests passed.\")\n            return True\n        else:\n            print(f\"\\n{failed} test(s) failed.\")\n            return False\n\n    except Exception as e:\n        print(f\"Error during verification: {e}\")\n        return False\n\nif __name__ == \"__main__\":\n    success = verify_rls_implementation()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/postgres/standard/security/user_permission_audit/description.md",
    "content": "Conduct a comprehensive security audit to identify PostgreSQL users with insufficient or dangling permissions in a business database environment.\n\n## Your Mission:\n\nYou've been hired as a security consultant to audit the PostgreSQL database permissions for a growing e-commerce company. The company has experienced rapid growth and multiple teams have been granted database access over time. However, there's concern about permission inconsistencies and security gaps.\n\n## Security Audit Requirements:\n\n1. **Discover the database structure**: Identify all business tables and their purposes\n2. **Catalog all database users and roles**: Use `pg_user`, `pg_roles`, and `pg_auth_members` to find all accounts\n3. **Analyze current permissions**: Use `information_schema.table_privileges` to map permissions\n4. **Identify security issues**:\n   - **Dangling users**: Inactive accounts that should be removed\n   - **Missing permissions**: Users lacking permissions required for their business role\n   - **Excessive permissions**: Users with unnecessary permissions that should be revoked\n\n## Expected permissions by role (what they SHOULD have)\n\n```python\n# users's role\nUSER_ROLE = {\n    # Active functional users\n    'analytics_user': 'Analytics Team',\n    'marketing_user': 'Marketing Department',\n    'customer_service': 'Customer Service',\n    'finance_user': 'Finance Team',\n    'product_manager': 'Product Management',\n    'security_auditor': 'Security Team',\n    'developer_user': 'Development Team',\n    'backup_user': 'Backup Service',\n}\n\n# each role has its permissions\nROLE_EXPECTED_PERMISSIONS = {\n    'Analytics Team': [\n        ('user_profiles', 'SELECT'),\n        ('user_stat_analysis', 'SELECT'),\n        ('product_catalog', 'SELECT'),\n        ('order_management', 'SELECT'),\n    ],\n    'Marketing Department': [\n        ('user_profiles', 'SELECT'),\n        ('user_stat_analysis', 'SELECT'),\n        ('product_catalog', 'SELECT'),\n    ],\n    'Customer Service': [\n        ('user_profiles', 'SELECT'),\n        ('user_profiles', 'UPDATE'),\n        ('order_management', 'SELECT'),\n        ('order_management', 'INSERT'),\n        ('order_management', 'UPDATE'),\n        ('product_catalog', 'SELECT'),\n    ],\n    'Finance Team': [\n        ('financial_transactions', 'SELECT'),\n        ('order_management', 'SELECT'),\n        ('user_profiles', 'SELECT'),\n    ],\n    'Product Management': [\n        ('product_catalog', 'SELECT'),\n        ('product_catalog', 'INSERT'),\n        ('product_catalog', 'UPDATE'),\n        ('product_catalog', 'DELETE'),\n        ('order_management', 'SELECT'),\n        ('user_stat_analysis', 'SELECT'),\n    ],\n    'Security Team': [\n        ('audit_logs', 'SELECT'),\n        ('user_credentials', 'SELECT'),\n        ('user_profiles', 'SELECT'),\n    ],\n    'Development Team': [\n        ('user_profiles', 'SELECT'),\n        ('product_catalog', 'SELECT'),\n    ],\n    'Backup Service': [\n        ('user_profiles', 'SELECT'),\n        ('product_catalog', 'SELECT'),\n        ('order_management', 'SELECT'),\n        ('financial_transactions', 'SELECT'),\n        ('user_stat_analysis', 'SELECT'),\n        ('audit_logs', 'SELECT'),\n        ('user_credentials', 'SELECT'),\n    ]\n}\n```\n\n## Expected Deliverables:\n\nYour audit must produce findings in a structured format that can be verified. Create two tables to store your audit results:\n\n**1. Summary Table:**\n```sql\nCREATE TABLE security_audit_results (\n    audit_id SERIAL PRIMARY KEY,\n    audit_type VARCHAR(50) NOT NULL, -- 'DANGLING_USERS', 'MISSING_PERMISSIONS', 'EXCESSIVE_PERMISSIONS'\n    total_issues INTEGER NOT NULL,\n    users_affected INTEGER NOT NULL,\n    tables_affected INTEGER NOT NULL\n);\n```\n\n**2. Detailed Findings Table:**\n```sql\nCREATE TABLE security_audit_details (\n    detail_id SERIAL PRIMARY KEY,\n    username VARCHAR(50) NOT NULL,\n    issue_type VARCHAR(50) NOT NULL, -- 'DANGLING_USER', 'MISSING_PERMISSION', 'EXCESSIVE_PERMISSION'\n    table_name VARCHAR(50), -- NULL for dangling users\n    permission_type VARCHAR(20), -- 'SELECT', 'INSERT', 'UPDATE', 'DELETE', NULL for dangling users\n    expected_access BOOLEAN NOT NULL -- TRUE if user should have access, FALSE if should not\n);\n```\n\n## Success Criteria:\n\nYour audit should populate both tables with:\n- **Summary data**: High-level counts of different types of security issues\n- **Detailed findings**: Specific permission gaps for each user and table combination\n\n## Business Role Expectations\n\nAnalyze usernames and infer their intended business roles based on naming patterns:\n\n- **analytics_user** → Analytics Team (needs user behavior and statistics data)\n- **marketing_user** → Marketing Department (needs customer and product data for campaigns)  \n- **customer_service** → Customer Service (needs user profiles and order management)\n- **finance_user** → Finance Team (needs financial and order data)\n- **product_manager** → Product Management (needs full product catalog access)\n- **security_auditor** → Security Team (needs audit logs and credential data)\n- **developer_user** → Development Team (needs limited access for testing)\n- **backup_user** → Backup Service (needs read-only access to all business data)\n- **temp_contractor, old_employee, test_account** → Inactive/Temporary (should have NO permissions)\n\nThe verification process will check that your findings correctly identify the actual permission gaps in the system by comparing against expected results.\n"
  },
  {
    "path": "tasks/postgres/standard/security/user_permission_audit/ground_truth.sql",
    "content": "-- Ground Truth Solution: Complete Security Audit Implementation\n-- This includes comprehensive PostgreSQL user, role, and permission discovery\n\n/*\n================================================================================\nPERMISSION MODEL DOCUMENTATION\n================================================================================\n\n## Current Permission State\n| Username          | Table                  | Permission | Status  | Reason                                    |\n|-------------------|------------------------|------------|---------|-------------------------------------------|\n| analytics_user    | user_stat_analysis     | SELECT     | EXISTS  | Correctly granted                         |\n| analytics_user    | user_profiles          | SELECT     | MISSING | Permission was revoked                    |\n| analytics_user    | financial_transactions | SELECT     | EXISTS  | Should be revoked - no business need     |\n| marketing_user    | user_profiles          | SELECT     | EXISTS  | Correctly granted                         |\n| marketing_user    | user_stat_analysis     | SELECT     | EXISTS  | Correctly granted                         |\n| marketing_user    | product_catalog        | SELECT     | MISSING | Permission was revoked                    |\n| marketing_user    | financial_transactions | SELECT     | EXISTS  | Should be revoked - security risk        |\n| customer_service  | user_profiles          | SELECT     | EXISTS  | Correctly granted                         |\n| customer_service  | user_profiles          | UPDATE     | EXISTS  | Correctly granted                         |\n| customer_service  | order_management       | SELECT     | EXISTS  | Correctly granted                         |\n| customer_service  | order_management       | INSERT     | EXISTS  | Correctly granted                         |\n| customer_service  | order_management       | UPDATE     | EXISTS  | Correctly granted                         |\n| customer_service  | product_catalog        | SELECT     | MISSING | Permission was revoked                    |\n| customer_service  | user_credentials       | SELECT     | EXISTS  | Should be revoked - security risk        |\n| finance_user      | financial_transactions | SELECT     | EXISTS  | Correctly granted                         |\n| finance_user      | order_management       | SELECT     | EXISTS  | Correctly granted                         |\n| finance_user      | user_profiles          | SELECT     | MISSING | Permission was revoked                    |\n| product_manager   | product_catalog        | SELECT     | EXISTS  | Correctly granted                         |\n| product_manager   | product_catalog        | INSERT     | EXISTS  | Correctly granted                         |\n| product_manager   | product_catalog        | UPDATE     | EXISTS  | Correctly granted                         |\n| product_manager   | product_catalog        | DELETE     | EXISTS  | Correctly granted                         |\n| product_manager   | order_management       | SELECT     | EXISTS  | Correctly granted                         |\n| product_manager   | financial_transactions | SELECT     | EXISTS  | Should be revoked - no business need     |\n| security_auditor  | user_credentials       | SELECT     | EXISTS  | Correctly granted                         |\n| security_auditor  | user_profiles          | SELECT     | EXISTS  | Correctly granted                         |\n| security_auditor  | audit_logs             | SELECT     | MISSING | Permission was revoked                    |\n| security_auditor  | financial_transactions | UPDATE     | EXISTS  | Should be revoked - excessive privilege  |\n| developer_user    | user_profiles          | SELECT     | EXISTS  | Correctly granted                         |\n| developer_user    | product_catalog        | SELECT     | MISSING | Permission was revoked                    |\n| developer_user    | user_credentials       | SELECT     | EXISTS  | Should be revoked - security risk        |\n| developer_user    | order_management       | UPDATE     | EXISTS  | Should be revoked - no business need     |\n| backup_user       | user_profiles          | SELECT     | EXISTS  | Correctly granted                         |\n| backup_user       | product_catalog        | SELECT     | EXISTS  | Correctly granted                         |\n| backup_user       | audit_logs             | SELECT     | EXISTS  | Correctly granted                         |\n| backup_user       | order_management       | SELECT     | MISSING | Permission was revoked                    |\n| backup_user       | product_catalog        | DELETE     | EXISTS  | Should be revoked - backup should be read-only |\n| temp_contractor   | product_catalog        | SELECT     | EXISTS  | Should be revoked - user is inactive     |\n| temp_contractor   | user_profiles          | SELECT     | EXISTS  | Should be revoked - user is inactive     |\n| old_employee      | audit_logs             | SELECT     | EXISTS  | Should be revoked - user is inactive     |\n| old_employee      | user_stat_analysis     | UPDATE     | EXISTS  | Should be revoked - user is inactive     |\n| test_account      | user_profiles          | SELECT     | EXISTS  | Should be revoked - test account          |\n\n## Expected Permission State\n| Username          | Table                  | Permission | Justification                                                |\n|-------------------|------------------------|------------|--------------------------------------------------------------|\n| analytics_user    | user_profiles          | SELECT     | Analytics team needs customer data for user behavior analysis|\n| analytics_user    | user_stat_analysis     | SELECT     | Core analytics data required for reporting                   |\n| analytics_user    | product_catalog        | SELECT     | Product performance analysis and customer preferences        |\n| analytics_user    | order_management       | SELECT     | Sales trend analysis and customer purchasing patterns        |\n| marketing_user    | user_profiles          | SELECT     | Customer segmentation and personalized marketing campaigns   |\n| marketing_user    | user_stat_analysis     | SELECT     | Campaign effectiveness analysis and user behavior tracking   |\n| marketing_user    | product_catalog        | SELECT     | Product promotion planning and marketing material creation   |\n| customer_service  | user_profiles          | SELECT     | Customer identity verification and support                   |\n| customer_service  | user_profiles          | UPDATE     | Update customer information and resolve account issues       |\n| customer_service  | order_management       | SELECT     | Order status inquiries and customer support                  |\n| customer_service  | order_management       | INSERT     | Create orders for customers over phone                       |\n| customer_service  | order_management       | UPDATE     | Update order status and resolve order issues                 |\n| customer_service  | product_catalog        | SELECT     | Product information for customer questions and support       |\n| finance_user      | financial_transactions | SELECT     | Financial reporting, auditing, and compliance               |\n| finance_user      | order_management       | SELECT     | Revenue reconciliation and financial analysis                |\n| finance_user      | user_profiles          | SELECT     | Customer financial analysis and credit assessment            |\n| product_manager   | product_catalog        | SELECT     | Product information access and management                    |\n| product_manager   | product_catalog        | INSERT     | Add new products to catalog                                  |\n| product_manager   | product_catalog        | UPDATE     | Update product details, pricing, and specifications         |\n| product_manager   | product_catalog        | DELETE     | Remove discontinued or obsolete products                     |\n| product_manager   | order_management       | SELECT     | Product sales analysis and demand forecasting               |\n| product_manager   | user_stat_analysis     | SELECT     | Product usage analytics and customer behavior insights       |\n| security_auditor  | audit_logs             | SELECT     | Security monitoring and incident investigation               |\n| security_auditor  | user_credentials       | SELECT     | Security auditing and compliance verification               |\n| security_auditor  | user_profiles          | SELECT     | User account auditing and security incident investigation    |\n| developer_user    | user_profiles          | SELECT     | Application development and testing with realistic data      |\n| developer_user    | product_catalog        | SELECT     | Application development and testing with product data        |\n| backup_user       | user_profiles          | SELECT     | Complete data backup coverage for business continuity       |\n| backup_user       | product_catalog        | SELECT     | Complete data backup coverage for business continuity       |\n| backup_user       | order_management       | SELECT     | Complete data backup coverage for business continuity       |\n| backup_user       | financial_transactions | SELECT     | Complete data backup coverage for business continuity       |\n| backup_user       | user_stat_analysis     | SELECT     | Complete data backup coverage for business continuity       |\n| backup_user       | audit_logs             | SELECT     | Complete data backup coverage for business continuity       |\n| backup_user       | user_credentials       | SELECT     | Complete data backup coverage for business continuity       |\n\nNotes:\n- temp_contractor, old_employee, test_account should have NO permissions (accounts should be removed)\n- All excessive permissions should be revoked for security compliance\n- Missing permissions should be granted based on business role requirements\n\n================================================================================\n*/\n\nBEGIN;\n\n-- ============================================================================\n-- CREATE AUDIT RESULTS TABLES\n-- ============================================================================\n\nCREATE TABLE security_audit_results (\n    audit_id SERIAL PRIMARY KEY,\n    audit_type VARCHAR(50) NOT NULL, -- 'DANGLING_USERS', 'MISSING_PERMISSIONS', 'EXCESSIVE_PERMISSIONS'\n    total_issues INTEGER NOT NULL,\n    users_affected INTEGER NOT NULL,\n    tables_affected INTEGER NOT NULL\n);\n\nCREATE TABLE security_audit_details (\n    detail_id SERIAL PRIMARY KEY,\n    username VARCHAR(50) NOT NULL,\n    issue_type VARCHAR(50) NOT NULL, -- 'DANGLING_USER', 'MISSING_PERMISSION', 'EXCESSIVE_PERMISSION'\n    table_name VARCHAR(50), -- NULL for dangling users\n    permission_type VARCHAR(20), -- 'SELECT', 'INSERT', 'UPDATE', 'DELETE', NULL for dangling users\n    expected_access BOOLEAN NOT NULL -- TRUE if user should have access, FALSE if should not\n);\n\n-- ============================================================================\n-- DISCOVER DATABASE USERS AND ROLES\n-- ============================================================================\n\nCREATE TEMP TABLE temp_user_discovery AS\nSELECT DISTINCT\n    COALESCE(u.usename, r.rolname) as username,\n    COALESCE(u.usesuper, r.rolsuper) as is_superuser,\n    COALESCE(u.usecreatedb, r.rolcreatedb) as can_create_db,\n    r.rolname as role_name,\n    u.usename as user_name,\n    CASE \n        WHEN COALESCE(u.usename, r.rolname) LIKE '%analytics%' THEN 'Analytics Team'\n        WHEN COALESCE(u.usename, r.rolname) LIKE '%marketing%' THEN 'Marketing Department'\n        WHEN COALESCE(u.usename, r.rolname) LIKE '%customer%' OR COALESCE(u.usename, r.rolname) LIKE '%service%' THEN 'Customer Service'\n        WHEN COALESCE(u.usename, r.rolname) LIKE '%finance%' THEN 'Finance Team'\n        WHEN COALESCE(u.usename, r.rolname) LIKE '%product%' THEN 'Product Management'\n        WHEN COALESCE(u.usename, r.rolname) LIKE '%security%' OR COALESCE(u.usename, r.rolname) LIKE '%audit%' THEN 'Security Team'\n        WHEN COALESCE(u.usename, r.rolname) LIKE '%backup%' THEN 'Backup Service'\n        WHEN COALESCE(u.usename, r.rolname) LIKE '%developer%' OR COALESCE(u.usename, r.rolname) LIKE '%dev%' THEN 'Development Team'\n        WHEN COALESCE(u.usename, r.rolname) LIKE '%temp%' OR COALESCE(u.usename, r.rolname) LIKE '%old%' OR COALESCE(u.usename, r.rolname) LIKE '%test%' THEN 'Inactive/Temporary'\n        ELSE 'Unknown'\n    END as inferred_business_role\nFROM pg_user u\nFULL OUTER JOIN pg_roles r ON u.usename = r.rolname\nWHERE COALESCE(u.usename, r.rolname) NOT IN ('postgres', 'test_user')\nAND COALESCE(u.usename, r.rolname) IS NOT NULL;\n\n-- ============================================================================\n-- DISCOVER ROLE MEMBERSHIPS\n-- ============================================================================\n\nCREATE TEMP TABLE temp_role_memberships AS\nSELECT \n    member_role.rolname as member_name,\n    granted_role.rolname as granted_role_name,\n    grantor_role.rolname as grantor_name,\n    am.admin_option\nFROM pg_auth_members am\nJOIN pg_roles member_role ON am.member = member_role.oid\nJOIN pg_roles granted_role ON am.roleid = granted_role.oid  \nJOIN pg_roles grantor_role ON am.grantor = grantor_role.oid\nWHERE member_role.rolname NOT IN ('postgres')\nAND granted_role.rolname NOT IN ('postgres');\n\n-- ============================================================================\n-- ANALYZE CURRENT PERMISSIONS\n-- ============================================================================\n\nCREATE TEMP TABLE temp_current_permissions AS\nSELECT DISTINCT\n    tp.grantee as username,\n    tp.table_name,\n    tp.privilege_type as permission_type,\n    tp.is_grantable,\n    tp.grantor,\n    ud.inferred_business_role,\n    ud.is_superuser\nFROM information_schema.table_privileges tp\nLEFT JOIN temp_user_discovery ud ON tp.grantee = ud.username\nWHERE tp.table_schema = 'public'\nAND tp.grantee NOT IN ('postgres', 'PUBLIC', 'test_user')\nAND tp.table_name NOT LIKE 'security_audit_%'\nORDER BY tp.grantee, tp.table_name, tp.privilege_type;\n\n-- ============================================================================\n-- IDENTIFY DANGLING USERS\n-- ============================================================================\n\nINSERT INTO security_audit_details (username, issue_type, table_name, permission_type, expected_access)\nSELECT DISTINCT\n    username,\n    'DANGLING_USER',\n    NULL,\n    NULL,\n    FALSE\nFROM temp_user_discovery\nWHERE inferred_business_role = 'Inactive/Temporary';\n\n-- ============================================================================\n-- IDENTIFY EXCESSIVE PERMISSIONS\n-- ============================================================================\n\nWITH excessive_permissions AS (\n    SELECT username, table_name, permission_type FROM (VALUES\n        ('analytics_user', 'financial_transactions', 'SELECT'),\n        ('marketing_user', 'financial_transactions', 'SELECT'),\n        ('customer_service', 'user_credentials', 'SELECT'),\n        ('product_manager', 'financial_transactions', 'SELECT'),\n        ('security_auditor', 'financial_transactions', 'UPDATE'),\n        ('developer_user', 'user_credentials', 'SELECT'),\n        ('developer_user', 'order_management', 'UPDATE'),\n        ('backup_user', 'product_catalog', 'DELETE'),\n        ('temp_contractor', 'product_catalog', 'SELECT'),\n        ('temp_contractor', 'user_profiles', 'SELECT'),\n        ('old_employee', 'audit_logs', 'SELECT'),\n        ('old_employee', 'user_stat_analysis', 'UPDATE'),\n        ('test_account', 'user_profiles', 'SELECT')\n    ) AS excessive(username, table_name, permission_type)\n)\nINSERT INTO security_audit_details (username, issue_type, table_name, permission_type, expected_access)\nSELECT \n    ep.username,\n    'EXCESSIVE_PERMISSION',\n    ep.table_name,\n    ep.permission_type,\n    FALSE\nFROM excessive_permissions ep\nWHERE EXISTS (\n    SELECT 1 FROM temp_current_permissions cp\n    WHERE cp.username = ep.username\n    AND cp.table_name = ep.table_name  \n    AND cp.permission_type = ep.permission_type\n);\n\n-- ============================================================================\n-- IDENTIFY MISSING PERMISSIONS\n-- ============================================================================\n\nWITH expected_permissions AS (\n    SELECT role_name, table_name, permission_type FROM (VALUES\n        ('Analytics Team', 'user_profiles', 'SELECT'),\n        ('Analytics Team', 'user_stat_analysis', 'SELECT'),\n        ('Analytics Team', 'product_catalog', 'SELECT'),\n        ('Analytics Team', 'order_management', 'SELECT'),\n        ('Marketing Department', 'user_profiles', 'SELECT'),\n        ('Marketing Department', 'user_stat_analysis', 'SELECT'),\n        ('Marketing Department', 'product_catalog', 'SELECT'),\n        ('Customer Service', 'user_profiles', 'SELECT'),\n        ('Customer Service', 'user_profiles', 'UPDATE'),\n        ('Customer Service', 'order_management', 'SELECT'),\n        ('Customer Service', 'order_management', 'INSERT'),\n        ('Customer Service', 'order_management', 'UPDATE'),\n        ('Customer Service', 'product_catalog', 'SELECT'),\n        ('Finance Team', 'financial_transactions', 'SELECT'),\n        ('Finance Team', 'order_management', 'SELECT'),\n        ('Finance Team', 'user_profiles', 'SELECT'),\n        ('Product Management', 'product_catalog', 'SELECT'),\n        ('Product Management', 'product_catalog', 'INSERT'),\n        ('Product Management', 'product_catalog', 'UPDATE'),\n        ('Product Management', 'product_catalog', 'DELETE'),\n        ('Product Management', 'order_management', 'SELECT'),\n        ('Product Management', 'user_stat_analysis', 'SELECT'),\n        ('Security Team', 'audit_logs', 'SELECT'),\n        ('Security Team', 'user_credentials', 'SELECT'),\n        ('Security Team', 'user_profiles', 'SELECT'),\n        ('Development Team', 'user_profiles', 'SELECT'),\n        ('Development Team', 'product_catalog', 'SELECT'),\n        ('Backup Service', 'user_profiles', 'SELECT'),\n        ('Backup Service', 'product_catalog', 'SELECT'),\n        ('Backup Service', 'order_management', 'SELECT'),\n        ('Backup Service', 'financial_transactions', 'SELECT'),\n        ('Backup Service', 'user_stat_analysis', 'SELECT'),\n        ('Backup Service', 'audit_logs', 'SELECT'),\n        ('Backup Service', 'user_credentials', 'SELECT')\n    ) AS expected(role_name, table_name, permission_type)\n)\nINSERT INTO security_audit_details (username, issue_type, table_name, permission_type, expected_access)\nSELECT DISTINCT\n    ud.username,\n    'MISSING_PERMISSION',\n    ep.table_name,\n    ep.permission_type,\n    TRUE\nFROM temp_user_discovery ud\nJOIN expected_permissions ep ON ud.inferred_business_role = ep.role_name\nLEFT JOIN temp_current_permissions cp ON (\n    cp.username = ud.username \n    AND cp.table_name = ep.table_name \n    AND cp.permission_type = ep.permission_type\n)\nWHERE cp.username IS NULL\nAND ud.inferred_business_role != 'Inactive/Temporary'\nAND ud.inferred_business_role != 'Unknown'\nAND EXISTS (\n    SELECT 1 FROM information_schema.tables t\n    WHERE t.table_name = ep.table_name \n    AND t.table_schema = 'public'\n    AND t.table_type = 'BASE TABLE'\n);\n\n-- ============================================================================\n-- POPULATE SUMMARY STATISTICS\n-- ============================================================================\n\nINSERT INTO security_audit_results (audit_type, total_issues, users_affected, tables_affected)\nSELECT \n    'DANGLING_USERS',\n    COUNT(*),\n    COUNT(DISTINCT username),\n    0\nFROM security_audit_details\nWHERE issue_type = 'DANGLING_USER';\n\nINSERT INTO security_audit_results (audit_type, total_issues, users_affected, tables_affected)\nSELECT \n    'MISSING_PERMISSIONS',\n    COUNT(*),\n    COUNT(DISTINCT username),\n    COUNT(DISTINCT table_name)\nFROM security_audit_details\nWHERE issue_type = 'MISSING_PERMISSION';\n\nINSERT INTO security_audit_results (audit_type, total_issues, users_affected, tables_affected)\nSELECT \n    'EXCESSIVE_PERMISSIONS',\n    COUNT(*),\n    COUNT(DISTINCT username),\n    COUNT(DISTINCT table_name)\nFROM security_audit_details\nWHERE issue_type = 'EXCESSIVE_PERMISSION';\n\n-- ============================================================================\n-- CLEANUP TEMPORARY TABLES\n-- ============================================================================\n\nDROP TABLE temp_user_discovery;\nDROP TABLE temp_role_memberships;\nDROP TABLE temp_current_permissions;\n\nCOMMIT;\n\n-- ============================================================================\n-- DISCOVERY AND VERIFICATION QUERIES\n-- ============================================================================\n\n-- Show all users and their properties\nSELECT \n    usename as username,\n    usesuper as is_superuser,\n    usecreatedb as can_create_db,\n    valuntil as password_expiry\nFROM pg_user \nWHERE usename NOT IN ('postgres', 'test_user')\nORDER BY usename;\n\n-- Show all roles and their properties  \nSELECT \n    rolname as role_name,\n    rolsuper as is_superuser,\n    rolinherit as inherits_privileges,\n    rolcanlogin as can_login\nFROM pg_roles \nWHERE rolname NOT LIKE 'pg_%'\nAND rolname NOT IN ('postgres', 'test_user')\nORDER BY rolname;\n\n-- Show current table privileges\nSELECT \n    grantee as username,\n    table_name,\n    privilege_type as permission,\n    is_grantable\nFROM information_schema.table_privileges\nWHERE table_schema = 'public'\nAND grantee NOT IN ('postgres', 'PUBLIC', 'test_user')\nAND table_name NOT LIKE 'security_audit_%'\nORDER BY grantee, table_name, privilege_type;\n\n-- Show role memberships\nSELECT \n    member.rolname as member,\n    granted.rolname as granted_role\nFROM pg_auth_members am\nJOIN pg_roles member ON am.member = member.oid\nJOIN pg_roles granted ON am.roleid = granted.oid\nWHERE member.rolname NOT IN ('postgres')\nORDER BY member.rolname, granted.rolname;\n\n-- Display audit summary\nSELECT \n    audit_type,\n    total_issues,\n    users_affected,\n    tables_affected\nFROM security_audit_results \nORDER BY audit_type;\n\n-- Display detailed findings\nSELECT \n    username,\n    issue_type,\n    COALESCE(table_name, 'N/A') as table_name,\n    COALESCE(permission_type, 'N/A') as permission_type,\n    expected_access\nFROM security_audit_details \nORDER BY issue_type, username, table_name;"
  },
  {
    "path": "tasks/postgres/standard/security/user_permission_audit/meta.json",
    "content": "{\n  \"task_id\": \"user_permission_audit\",\n  \"task_name\": \"User Permission Audit\",\n  \"category_id\": \"security\",\n  \"category_name\": \"Security\",\n  \"description\": \"Conduct comprehensive security audit identifying users with insufficient or dangling permissions in business database environment.\",\n  \"author\": \"Fanshi Zhang\",\n  \"created_at\": \"2025-08-17\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"security and access control\",\n    \"audit and compliance\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Table \\\"user_profiles\\\" {\\n  \\\"user_id\\\" int4 [pk, not null, increment]\\n  \\\"username\\\" varchar(50) [unique, not null]\\n  \\\"email\\\" varchar(100) [unique, not null]\\n  \\\"first_name\\\" varchar(50) [not null]\\n  \\\"last_name\\\" varchar(50) [not null]\\n  \\\"phone\\\" varchar(20)\\n  \\\"address\\\" text\\n  \\\"city\\\" varchar(50)\\n  \\\"state\\\" varchar(2)\\n  \\\"zip_code\\\" varchar(10)\\n  \\\"date_created\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n  \\\"last_updated\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n  \\\"is_active\\\" bool [default: true]\\n  \\\"profile_picture_url\\\" text\\n  \\\"bio\\\" text\\n}\\n\\nTable \\\"user_credentials\\\" {\\n  \\\"credential_id\\\" int4 [pk, not null, increment]\\n  \\\"user_id\\\" int4\\n  \\\"password_hash\\\" varchar(255) [not null]\\n  \\\"salt\\\" varchar(100) [not null]\\n  \\\"login_attempts\\\" int4 [default: 0]\\n  \\\"last_login\\\" timestamp\\n  \\\"password_created\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n  \\\"password_expires\\\" timestamp\\n  \\\"is_locked\\\" bool [default: false]\\n  \\\"two_factor_enabled\\\" bool [default: false]\\n  \\\"two_factor_secret\\\" varchar(32)\\n  \\\"backup_codes\\\" \\\"text[]\\\"\\n  \\\"security_questions\\\" jsonb\\n}\\n\\nTable \\\"user_stat_analysis\\\" {\\n  \\\"analysis_id\\\" int4 [pk, not null, increment]\\n  \\\"user_id\\\" int4\\n  \\\"session_id\\\" varchar(100)\\n  \\\"page_views\\\" int4 [default: 0]\\n  \\\"time_spent_minutes\\\" int4 [default: 0]\\n  \\\"actions_performed\\\" jsonb\\n  \\\"device_info\\\" jsonb\\n  \\\"ip_address\\\" inet\\n  \\\"location_data\\\" jsonb\\n  \\\"referrer_url\\\" text\\n  \\\"conversion_events\\\" jsonb\\n  \\\"analysis_date\\\" date [default: `CURRENT_DATE`]\\n  \\\"created_at\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n}\\n\\nTable \\\"product_catalog\\\" {\\n  \\\"product_id\\\" int4 [pk, not null, increment]\\n  \\\"product_name\\\" varchar(100) [not null]\\n  \\\"description\\\" text\\n  \\\"category\\\" varchar(50)\\n  \\\"price\\\" numeric(10,2) [not null]\\n  \\\"cost\\\" numeric(10,2)\\n  \\\"sku\\\" varchar(50) [unique]\\n  \\\"inventory_count\\\" int4 [default: 0]\\n  \\\"is_active\\\" bool [default: true]\\n  \\\"created_at\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n  \\\"updated_at\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n  \\\"supplier_info\\\" jsonb\\n  \\\"weight_kg\\\" numeric(6,2)\\n  \\\"dimensions\\\" jsonb\\n}\\n\\nTable \\\"order_management\\\" {\\n  \\\"order_id\\\" int4 [pk, not null, increment]\\n  \\\"user_id\\\" int4\\n  \\\"order_number\\\" varchar(50) [unique, not null]\\n  \\\"order_status\\\" varchar(20) [default: 'pending']\\n  \\\"total_amount\\\" numeric(12,2) [not null]\\n  \\\"tax_amount\\\" numeric(12,2)\\n  \\\"shipping_amount\\\" numeric(12,2)\\n  \\\"discount_amount\\\" numeric(12,2) [default: 0]\\n  \\\"payment_method\\\" varchar(50)\\n  \\\"payment_status\\\" varchar(20) [default: 'pending']\\n  \\\"shipping_address\\\" jsonb\\n  \\\"billing_address\\\" jsonb\\n  \\\"order_date\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n  \\\"shipped_date\\\" timestamp\\n  \\\"delivered_date\\\" timestamp\\n  \\\"tracking_number\\\" varchar(100)\\n}\\n\\nTable \\\"financial_transactions\\\" {\\n  \\\"transaction_id\\\" int4 [pk, not null, increment]\\n  \\\"order_id\\\" int4\\n  \\\"user_id\\\" int4\\n  \\\"transaction_type\\\" varchar(20) [not null]\\n  \\\"amount\\\" numeric(12,2) [not null]\\n  \\\"currency\\\" varchar(3) [default: 'USD']\\n  \\\"payment_gateway\\\" varchar(50)\\n  \\\"gateway_transaction_id\\\" varchar(100)\\n  \\\"credit_card_last_four\\\" bpchar(4)\\n  \\\"bank_account_last_four\\\" bpchar(4)\\n  \\\"transaction_status\\\" varchar(20) [default: 'pending']\\n  \\\"processed_at\\\" timestamp\\n  \\\"created_at\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n  \\\"fee_amount\\\" numeric(8,2)\\n  \\\"refund_amount\\\" numeric(12,2) [default: 0]\\n  \\\"notes\\\" text\\n}\\n\\nTable \\\"audit_logs\\\" {\\n  \\\"log_id\\\" int4 [pk, not null, increment]\\n  \\\"user_id\\\" int4\\n  \\\"action_type\\\" varchar(50) [not null]\\n  \\\"table_name\\\" varchar(50)\\n  \\\"record_id\\\" int4\\n  \\\"old_values\\\" jsonb\\n  \\\"new_values\\\" jsonb\\n  \\\"ip_address\\\" inet\\n  \\\"user_agent\\\" text\\n  \\\"session_id\\\" varchar(100)\\n  \\\"timestamp\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n  \\\"success\\\" bool [default: true]\\n  \\\"error_message\\\" text\\n}\\n\\nRef \\\"audit_logs_user_id_fkey\\\":\\\"user_profiles\\\".\\\"user_id\\\" < \\\"audit_logs\\\".\\\"user_id\\\"\\n\\nRef \\\"financial_transactions_order_id_fkey\\\":\\\"order_management\\\".\\\"order_id\\\" < \\\"financial_transactions\\\".\\\"order_id\\\"\\n\\nRef \\\"financial_transactions_user_id_fkey\\\":\\\"user_profiles\\\".\\\"user_id\\\" < \\\"financial_transactions\\\".\\\"user_id\\\"\\n\\nRef \\\"order_management_user_id_fkey\\\":\\\"user_profiles\\\".\\\"user_id\\\" < \\\"order_management\\\".\\\"user_id\\\"\\n\\nRef \\\"user_credentials_user_id_fkey\\\":\\\"user_profiles\\\".\\\"user_id\\\" < \\\"user_credentials\\\".\\\"user_id\\\" [delete: cascade]\\n\\nRef \\\"user_stat_analysis_user_id_fkey\\\":\\\"user_profiles\\\".\\\"user_id\\\" < \\\"user_stat_analysis\\\".\\\"user_id\\\" [delete: cascade]\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/postgres/standard/security/user_permission_audit/prepare_environment.py",
    "content": "#!/usr/bin/env python3\n\nimport os\nimport psycopg2\nfrom psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT\nimport sys\n\n# Configuration for users and their permissions\nUSER_CONFIGS = {\n    # Active functional users\n    'analytics_user': {\n        'password': 'analytics123',\n        'role': 'Analytics Team',\n        'status': 'active'\n    },\n    'marketing_user': {\n        'password': 'marketing123',\n        'role': 'Marketing Department',\n        'status': 'active'\n    },\n    'customer_service': {\n        'password': 'service123',\n        'role': 'Customer Service',\n        'status': 'active'\n    },\n    'finance_user': {\n        'password': 'finance123',\n        'role': 'Finance Team',\n        'status': 'active'\n    },\n    'product_manager': {\n        'password': 'product123',\n        'role': 'Product Management',\n        'status': 'active'\n    },\n    'security_auditor': {\n        'password': 'security123',\n        'role': 'Security Team',\n        'status': 'active'\n    },\n    'developer_user': {\n        'password': 'dev123',\n        'role': 'Development Team',\n        'status': 'active'\n    },\n    'backup_user': {\n        'password': 'backup123',\n        'role': 'Backup Service',\n        'status': 'active'\n    },\n    # Inactive/dangling users\n    'temp_contractor': {\n        'password': 'temp123',\n        'role': 'Inactive/Temporary',\n        'status': 'inactive'\n    },\n    'old_employee': {\n        'password': 'old456',\n        'role': 'Inactive/Temporary',\n        'status': 'inactive'\n    },\n    'test_account': {\n        'password': 'test789',\n        'role': 'Inactive/Temporary',\n        'status': 'inactive'\n    }\n}\n\n# Expected permissions by role (what they SHOULD have)\nROLE_EXPECTED_PERMISSIONS = {\n    'Analytics Team': [\n        ('user_profiles', 'SELECT'),\n        ('user_stat_analysis', 'SELECT'),\n        ('product_catalog', 'SELECT'),\n        ('order_management', 'SELECT'),\n    ],\n    'Marketing Department': [\n        ('user_profiles', 'SELECT'),\n        ('user_stat_analysis', 'SELECT'),\n        ('product_catalog', 'SELECT'),\n    ],\n    'Customer Service': [\n        ('user_profiles', 'SELECT'),\n        ('user_profiles', 'UPDATE'),\n        ('order_management', 'SELECT'),\n        ('order_management', 'INSERT'),\n        ('order_management', 'UPDATE'),\n        ('product_catalog', 'SELECT'),\n    ],\n    'Finance Team': [\n        ('financial_transactions', 'SELECT'),\n        ('order_management', 'SELECT'),\n        ('user_profiles', 'SELECT'),\n    ],\n    'Product Management': [\n        ('product_catalog', 'SELECT'),\n        ('product_catalog', 'INSERT'),\n        ('product_catalog', 'UPDATE'),\n        ('product_catalog', 'DELETE'),\n        ('order_management', 'SELECT'),\n        ('user_stat_analysis', 'SELECT'),\n    ],\n    'Security Team': [\n        ('audit_logs', 'SELECT'),\n        ('user_credentials', 'SELECT'),\n        ('user_profiles', 'SELECT'),\n    ],\n    'Development Team': [\n        ('user_profiles', 'SELECT'),\n        ('product_catalog', 'SELECT'),\n    ],\n    'Backup Service': [\n        ('user_profiles', 'SELECT'),\n        ('product_catalog', 'SELECT'),\n        ('order_management', 'SELECT'),\n        ('financial_transactions', 'SELECT'),\n        ('user_stat_analysis', 'SELECT'),\n        ('audit_logs', 'SELECT'),\n        ('user_credentials', 'SELECT'),\n    ],\n}\n\n# Excessive permissions that will be granted but should be flagged as security issues\nEXCESSIVE_PERMISSIONS = [\n    # Users getting financial access they shouldn't have\n    ('analytics_user', 'financial_transactions', 'SELECT'),\n    ('marketing_user', 'financial_transactions', 'SELECT'),\n    ('product_manager', 'financial_transactions', 'SELECT'),\n\n    # Security risks - credential access\n    ('customer_service', 'user_credentials', 'SELECT'),\n    ('developer_user', 'user_credentials', 'SELECT'),\n\n    # Excessive privileges\n    ('security_auditor', 'financial_transactions', 'UPDATE'),\n    ('developer_user', 'order_management', 'UPDATE'),\n    ('backup_user', 'product_catalog', 'DELETE'),  # Backup should be read-only\n\n    # Inactive users with permissions they shouldn't have\n    ('temp_contractor', 'product_catalog', 'SELECT'),\n    ('temp_contractor', 'user_profiles', 'SELECT'),\n    ('old_employee', 'audit_logs', 'SELECT'),\n    ('old_employee', 'user_stat_analysis', 'UPDATE'),\n    ('test_account', 'user_profiles', 'SELECT'),\n]\n\n# Permissions to revoke to create \"missing permission\" findings\nPERMISSIONS_TO_REVOKE = [\n    ('analytics_user', 'user_profiles', 'SELECT'),\n    ('analytics_user', 'order_management', 'SELECT'),\n    ('analytics_user', 'product_catalog', 'SELECT'),\n    ('marketing_user', 'product_catalog', 'SELECT'),\n    ('finance_user', 'user_profiles', 'SELECT'),\n    ('developer_user', 'product_catalog', 'SELECT'),\n    ('customer_service', 'product_catalog', 'SELECT'),\n    ('security_auditor', 'audit_logs', 'SELECT'),\n    ('product_manager', 'user_stat_analysis', 'SELECT'),\n    ('backup_user', 'order_management', 'SELECT'),\n    ('backup_user', 'financial_transactions', 'SELECT'),\n    ('backup_user', 'user_stat_analysis', 'SELECT'),\n    ('backup_user', 'user_credentials', 'SELECT'),\n]\n\ndef create_business_tables(cur):\n    \"\"\"Create all business tables\"\"\"\n\n    tables = [\n        ('user_profiles', \"\"\"\n            DROP TABLE IF EXISTS user_profiles CASCADE;\n            CREATE TABLE user_profiles (\n                user_id SERIAL PRIMARY KEY,\n                username VARCHAR(50) UNIQUE NOT NULL,\n                email VARCHAR(100) UNIQUE NOT NULL,\n                first_name VARCHAR(50) NOT NULL,\n                last_name VARCHAR(50) NOT NULL,\n                phone VARCHAR(20),\n                address TEXT,\n                city VARCHAR(50),\n                state VARCHAR(2),\n                zip_code VARCHAR(10),\n                date_created TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n                last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n                is_active BOOLEAN DEFAULT true,\n                profile_picture_url TEXT,\n                bio TEXT\n            );\n        \"\"\"),\n\n        ('user_credentials', \"\"\"\n            DROP TABLE IF EXISTS user_credentials CASCADE;\n            CREATE TABLE user_credentials (\n                credential_id SERIAL PRIMARY KEY,\n                user_id INTEGER REFERENCES user_profiles(user_id) ON DELETE CASCADE,\n                password_hash VARCHAR(255) NOT NULL,\n                salt VARCHAR(100) NOT NULL,\n                login_attempts INTEGER DEFAULT 0,\n                last_login TIMESTAMP,\n                password_created TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n                password_expires TIMESTAMP,\n                is_locked BOOLEAN DEFAULT false,\n                two_factor_enabled BOOLEAN DEFAULT false,\n                two_factor_secret VARCHAR(32),\n                backup_codes TEXT[],\n                security_questions JSONB\n            );\n        \"\"\"),\n\n        ('user_stat_analysis', \"\"\"\n            DROP TABLE IF EXISTS user_stat_analysis CASCADE;\n            CREATE TABLE user_stat_analysis (\n                analysis_id SERIAL PRIMARY KEY,\n                user_id INTEGER REFERENCES user_profiles(user_id) ON DELETE CASCADE,\n                session_id VARCHAR(100),\n                page_views INTEGER DEFAULT 0,\n                time_spent_minutes INTEGER DEFAULT 0,\n                actions_performed JSONB,\n                device_info JSONB,\n                ip_address INET,\n                location_data JSONB,\n                referrer_url TEXT,\n                conversion_events JSONB,\n                analysis_date DATE DEFAULT CURRENT_DATE,\n                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP\n            );\n        \"\"\"),\n\n        ('product_catalog', \"\"\"\n            DROP TABLE IF EXISTS product_catalog CASCADE;\n            CREATE TABLE product_catalog (\n                product_id SERIAL PRIMARY KEY,\n                product_name VARCHAR(100) NOT NULL,\n                description TEXT,\n                category VARCHAR(50),\n                price DECIMAL(10,2) NOT NULL,\n                cost DECIMAL(10,2),\n                sku VARCHAR(50) UNIQUE,\n                inventory_count INTEGER DEFAULT 0,\n                is_active BOOLEAN DEFAULT true,\n                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n                supplier_info JSONB,\n                weight_kg DECIMAL(6,2),\n                dimensions JSONB\n            );\n        \"\"\"),\n\n        ('order_management', \"\"\"\n            DROP TABLE IF EXISTS order_management CASCADE;\n            CREATE TABLE order_management (\n                order_id SERIAL PRIMARY KEY,\n                user_id INTEGER REFERENCES user_profiles(user_id),\n                order_number VARCHAR(50) UNIQUE NOT NULL,\n                order_status VARCHAR(20) DEFAULT 'pending',\n                total_amount DECIMAL(12,2) NOT NULL,\n                tax_amount DECIMAL(12,2),\n                shipping_amount DECIMAL(12,2),\n                discount_amount DECIMAL(12,2) DEFAULT 0,\n                payment_method VARCHAR(50),\n                payment_status VARCHAR(20) DEFAULT 'pending',\n                shipping_address JSONB,\n                billing_address JSONB,\n                order_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n                shipped_date TIMESTAMP,\n                delivered_date TIMESTAMP,\n                tracking_number VARCHAR(100)\n            );\n        \"\"\"),\n\n        ('financial_transactions', \"\"\"\n            DROP TABLE IF EXISTS financial_transactions CASCADE;\n            CREATE TABLE financial_transactions (\n                transaction_id SERIAL PRIMARY KEY,\n                order_id INTEGER REFERENCES order_management(order_id),\n                user_id INTEGER REFERENCES user_profiles(user_id),\n                transaction_type VARCHAR(20) NOT NULL,\n                amount DECIMAL(12,2) NOT NULL,\n                currency VARCHAR(3) DEFAULT 'USD',\n                payment_gateway VARCHAR(50),\n                gateway_transaction_id VARCHAR(100),\n                credit_card_last_four CHAR(4),\n                bank_account_last_four CHAR(4),\n                transaction_status VARCHAR(20) DEFAULT 'pending',\n                processed_at TIMESTAMP,\n                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n                fee_amount DECIMAL(8,2),\n                refund_amount DECIMAL(12,2) DEFAULT 0,\n                notes TEXT\n            );\n        \"\"\"),\n\n        ('audit_logs', \"\"\"\n            DROP TABLE IF EXISTS audit_logs CASCADE;\n            CREATE TABLE audit_logs (\n                log_id SERIAL PRIMARY KEY,\n                user_id INTEGER REFERENCES user_profiles(user_id),\n                action_type VARCHAR(50) NOT NULL,\n                table_name VARCHAR(50),\n                record_id INTEGER,\n                old_values JSONB,\n                new_values JSONB,\n                ip_address INET,\n                user_agent TEXT,\n                session_id VARCHAR(100),\n                timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n                success BOOLEAN DEFAULT true,\n                error_message TEXT\n            );\n        \"\"\")\n    ]\n\n    for table_name, sql in tables:\n        cur.execute(sql)\n\ndef create_users(cur):\n    \"\"\"Create PostgreSQL users from configuration\"\"\"\n    for username, config in USER_CONFIGS.items():\n        cur.execute(f\"CREATE USER {username} WITH PASSWORD %s;\", (config['password'],))\n\ndef grant_expected_permissions(cur):\n    \"\"\"Grant expected permissions to users based on their roles\"\"\"\n    for username, config in USER_CONFIGS.items():\n        if config['status'] == 'active':\n            role = config['role']\n            permissions = ROLE_EXPECTED_PERMISSIONS.get(role, [])\n            for table_name, privilege in permissions:\n                cur.execute(f\"GRANT {privilege} ON {table_name} TO {username};\")\n\ndef grant_excessive_permissions(cur):\n    \"\"\"Grant excessive permissions that should be flagged as security issues\"\"\"\n    for username, table_name, privilege in EXCESSIVE_PERMISSIONS:\n        cur.execute(f\"GRANT {privilege} ON {table_name} TO {username};\")\n\ndef revoke_permissions(cur):\n    \"\"\"Revoke specific permissions to create missing permission findings\"\"\"\n    for username, table_name, privilege in PERMISSIONS_TO_REVOKE:\n        cur.execute(f\"REVOKE {privilege} ON {table_name} FROM {username};\")\n\ndef grant_sequence_permissions(cur):\n    \"\"\"Grant sequence permissions for users that need them\"\"\"\n    users_needing_sequences = ['customer_service', 'product_manager']\n    for username in users_needing_sequences:\n        cur.execute(f\"GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO {username};\")\n\ndef setup_security_environment():\n    \"\"\"\n    Set up a security-focused PostgreSQL environment with business tables and users with various permissions.\n    Creates a scenario where some users have dangling or insufficient permissions for realistic security analysis.\n    \"\"\"\n\n    # Database connection parameters from environment\n    db_params = {\n        'host': os.getenv('POSTGRES_HOST', 'localhost'),\n        'port': os.getenv('POSTGRES_PORT', '5432'),\n        'user': os.getenv('POSTGRES_USERNAME', 'postgres'),\n        'password': os.getenv('POSTGRES_PASSWORD', 'password'),\n        'database': os.getenv('POSTGRES_DATABASE', 'postgres')\n    }\n\n    postgres_params = db_params.copy()\n    postgres_params['database'] = 'postgres'\n\n    try:\n        conn_postgres = psycopg2.connect(**postgres_params)\n        conn_postgres.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)\n        cur_postgres = conn_postgres.cursor()\n\n        current_db = db_params['database']\n        cur_postgres.execute(\"SELECT datname FROM pg_database WHERE datname LIKE %s AND datname != %s;\", ('%user_permission_audit%', current_db))\n        audit_databases = cur_postgres.fetchall()\n\n        if audit_databases:\n            for db_row in audit_databases:\n                db_name = db_row[0]\n                try:\n                    cur_postgres.execute(\"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = %s;\", (db_name,))\n                    cur_postgres.execute(f\"DROP DATABASE IF EXISTS {db_name};\")\n                    print(f\"Dropped database: {db_name}\")\n                except Exception as e:\n                    print(f\"Warning: Could not drop database {db_name}: {e}\")\n\n        # Clean up existing users\n        for username in USER_CONFIGS.keys():\n            try:\n                cur_postgres.execute(\"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE usename = %s;\", (username,))\n                cur_postgres.execute(f\"DROP USER IF EXISTS {username};\")\n            except Exception as e:\n                print(f\"Warning: Could not drop user {username}: {e}\")\n\n        cur_postgres.close()\n        conn_postgres.close()\n\n    except Exception as e:\n        print(f\"Warning: Could not clean up users: {e}\")\n\n    try:\n        conn = psycopg2.connect(**db_params)\n        conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)\n        cur = conn.cursor()\n\n        print(\"Setting up security audit environment...\")\n\n        # Create business tables with realistic structure\n        create_business_tables(cur)\n        print(\"Created 7 business tables\")\n\n        # Create users\n        create_users(cur)\n        active_count = len([u for u in USER_CONFIGS.values() if u['status'] == 'active'])\n        inactive_count = len([u for u in USER_CONFIGS.values() if u['status'] == 'inactive'])\n        print(f\"Created {len(USER_CONFIGS)} users ({active_count} functional, {inactive_count} dangling)\")\n\n        # Grant expected permissions\n        grant_expected_permissions(cur)\n\n        # Grant excessive permissions that will be flagged as issues\n        grant_excessive_permissions(cur)\n\n        print(\"Granted initial permissions\")\n\n        # Revoke specific permissions to create missing permission findings\n        revoke_permissions(cur)\n\n        # Grant sequence permissions where needed\n        grant_sequence_permissions(cur)\n\n        cur.close()\n        conn.close()\n\n    except Exception as e:\n        print(f\"Error setting up environment: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    setup_security_environment()\n"
  },
  {
    "path": "tasks/postgres/standard/security/user_permission_audit/verify.py",
    "content": "import os\nimport psycopg2\nimport sys\n\n\ndef verify_security_audit():\n    \"\"\"\n    Verify that the security audit correctly identified all permission issues.\n    \"\"\"\n\n    # Database connection parameters from environment\n    db_params = {\n        'host': os.getenv('POSTGRES_HOST', 'localhost'),\n        'port': os.getenv('POSTGRES_PORT', '5432'),\n        'user': os.getenv('POSTGRES_USERNAME', 'postgres'),\n        'password': os.getenv('POSTGRES_PASSWORD', 'password'),\n        'database': os.getenv('POSTGRES_DATABASE', 'postgres')\n    }\n\n    try:\n        conn = psycopg2.connect(**db_params)\n        cur = conn.cursor()\n\n        print(\"| Verifying security audit findings...\")\n\n        # Check if security_audit_results table exists\n        cur.execute(\"\"\"\n            SELECT EXISTS (\n                SELECT FROM information_schema.tables\n                WHERE table_name = 'security_audit_results'\n            );\n        \"\"\")\n\n        if not cur.fetchone()[0]:\n            print(\"FAIL: security_audit_results table not found\")\n            return False\n\n        # Check if security_audit_details table exists\n        cur.execute(\"\"\"\n            SELECT EXISTS (\n                SELECT FROM information_schema.tables\n                WHERE table_name = 'security_audit_details'\n            );\n        \"\"\")\n\n        if not cur.fetchone()[0]:\n            print(\"FAIL: security_audit_details table not found\")\n            return False\n\n        # Get all detailed findings\n        cur.execute(\"SELECT * FROM security_audit_details ORDER BY detail_id;\")\n        findings = cur.fetchall()\n\n        if not findings:\n            print(\"FAIL: No findings in security_audit_details table\")\n            return False\n\n        print(f\"| Found {len(findings)} audit findings\")\n\n        # Expected findings based on the ground truth:\n        expected_findings = {\n            # Expected dangling users\n            'dangling_users': {'temp_contractor', 'old_employee', 'test_account'},\n\n            # Expected missing permissions (should be granted)\n            'missing_permissions': {\n                ('analytics_user', 'user_profiles', 'SELECT'),\n                ('analytics_user', 'product_catalog', 'SELECT'),\n                ('analytics_user', 'order_management', 'SELECT'),\n                ('marketing_user', 'product_catalog', 'SELECT'),\n                ('customer_service', 'product_catalog', 'SELECT'),\n                ('finance_user', 'user_profiles', 'SELECT'),\n                ('product_manager', 'user_stat_analysis', 'SELECT'),\n                ('security_auditor', 'audit_logs', 'SELECT'),\n                ('developer_user', 'product_catalog', 'SELECT'),\n                ('backup_user', 'order_management', 'SELECT'),\n                ('backup_user', 'financial_transactions', 'SELECT'),\n                ('backup_user', 'user_stat_analysis', 'SELECT'),\n                ('backup_user', 'user_credentials', 'SELECT')\n            },\n\n            # Expected excessive permissions (should be revoked)\n            'excessive_permissions': {\n                ('analytics_user', 'financial_transactions', 'SELECT'),\n                ('marketing_user', 'financial_transactions', 'SELECT'),\n                ('customer_service', 'user_credentials', 'SELECT'),\n                ('product_manager', 'financial_transactions', 'SELECT'),\n                ('security_auditor', 'financial_transactions', 'UPDATE'),\n                ('developer_user', 'user_credentials', 'SELECT'),\n                ('developer_user', 'order_management', 'UPDATE'),\n                ('backup_user', 'product_catalog', 'DELETE'),\n                ('temp_contractor', 'product_catalog', 'SELECT'),\n                ('temp_contractor', 'user_profiles', 'SELECT'),\n                ('old_employee', 'audit_logs', 'SELECT'),\n                ('old_employee', 'user_stat_analysis', 'UPDATE'),\n                ('test_account', 'user_profiles', 'SELECT')\n            }\n        }\n\n        found_dangling = set()\n        found_missing_permissions = set()\n        found_excessive_permissions = set()\n\n        # Analyze findings (detail_id, username, issue_type, table_name, permission_type, expected_access)\n        for finding in findings:\n            username = finding[1]\n            issue_type = finding[2]\n            table_name = finding[3]\n            permission_type = finding[4]\n            expected_access = finding[5]\n\n            if issue_type == 'DANGLING_USER':\n                found_dangling.add(username)\n            elif issue_type == 'MISSING_PERMISSION' and expected_access:\n                if table_name and permission_type:\n                    found_missing_permissions.add((username, table_name, permission_type))\n            elif issue_type == 'EXCESSIVE_PERMISSION' and not expected_access:\n                if table_name and permission_type:\n                    found_excessive_permissions.add((username, table_name, permission_type))\n\n        # Verify dangling users\n        missing_dangling = expected_findings['dangling_users'] - found_dangling\n        extra_dangling = found_dangling - expected_findings['dangling_users']\n\n        # Verify missing permissions\n        missing_missing_perms = expected_findings['missing_permissions'] - found_missing_permissions\n        extra_missing_perms = found_missing_permissions - expected_findings['missing_permissions']\n\n        # Verify excessive permissions\n        missing_excessive_perms = expected_findings['excessive_permissions'] - found_excessive_permissions\n        extra_excessive_perms = found_excessive_permissions - expected_findings['excessive_permissions']\n\n        # Validate structure\n        structure_valid = True\n        for i, finding in enumerate(findings):\n            if len(finding) != 6:  # Should have 6 columns\n                print(f\"| FAIL: Finding {i + 1} has wrong number of columns (expected 6, got {len(finding)})\")\n                structure_valid = False\n                continue\n\n            detail_id, username, issue_type, table_name, permission_type, expected_access = finding\n\n            if not username:\n                print(f\"| FAIL: Finding {i + 1} missing username\")\n                structure_valid = False\n\n            if issue_type not in ['DANGLING_USER', 'MISSING_PERMISSION', 'EXCESSIVE_PERMISSION']:\n                print(f\"| FAIL: Finding {i + 1} invalid issue_type: {issue_type}\")\n                structure_valid = False\n\n            if expected_access not in [True, False]:\n                print(f\"| FAIL: Finding {i + 1} invalid expected_access: {expected_access}\")\n                structure_valid = False\n\n        if structure_valid:\n            print(f\"| ✓ structure is valid\")\n\n        # Check for missing findings\n        all_correct = True\n\n        print(f\"| Expected dangling users: {expected_findings['dangling_users']} Found: {found_dangling}\")\n        if missing_dangling:\n            print(f\"| Missing dangling users: {missing_dangling}\")\n            all_correct = False\n\n        print(\n            f\"| Expected missing permissions: {len(expected_findings['missing_permissions'])} Found: {len(found_missing_permissions)} Missing: {len(missing_missing_perms)}\")\n        if missing_missing_perms:\n            print(f\"| Missing 'missing permission' findings:\")\n            for perm in sorted(missing_missing_perms):\n                print(f\"|   - {perm[0]} should be granted {perm[2]} on {perm[1]}\")\n            all_correct = False\n\n        print(\n            f\"| Expected excessive permissions: {len(expected_findings['excessive_permissions'])} Found: {len(found_excessive_permissions)} Missing: {len(missing_excessive_perms)}\")\n        if missing_excessive_perms:\n            print(f\"| Missing 'excessive permission' findings:\")\n            for perm in sorted(missing_excessive_perms):\n                print(f\"|   - {perm[0]} should have {perm[2]} revoked on {perm[1]}\")\n            all_correct = False\n\n        # Check audit summary table\n        cur.execute(\n            \"SELECT audit_type, total_issues, users_affected, tables_affected FROM security_audit_results ORDER BY audit_type;\")\n        summary_results = cur.fetchall()\n\n        # Expected summary numbers based on ground truth\n        expected_summary = {\n            'DANGLING_USERS': (3, 3, 0),          # 3 issues, 3 users affected, 0 tables affected\n            'EXCESSIVE_PERMISSIONS': (13, 10, 7), # 13 issues, 10 users affected, 7 tables affected\n            'MISSING_PERMISSIONS': (13, 8, 7)     # 13 issues, 8 users affected, 7 tables affected\n        }\n\n        summary_correct = True\n        for result in summary_results:\n            audit_type, total_issues, users_affected, tables_affected = result\n            print(f\"| Summary result: [{audit_type}] {total_issues} issues, {users_affected} users affected, {tables_affected} tables affected\")\n            \n            if audit_type in expected_summary:\n                expected = expected_summary[audit_type]\n                if (total_issues, users_affected, tables_affected) != expected:\n                    print(f\"| FAIL: {audit_type} summary mismatch - Expected: {expected}, Got: ({total_issues}, {users_affected}, {tables_affected})\")\n                    summary_correct = False\n                else:\n                    print(f\"| ✓ {audit_type} summary matches expected values\")\n\n        # Assert exact counts match expected\n        assert len(found_dangling) == 3, f\"Expected 3 dangling users, found {len(found_dangling)}\"\n        assert len(found_missing_permissions) == 13, f\"Expected 13 missing permissions, found {len(found_missing_permissions)}\"\n        assert len(found_excessive_permissions) == 13, f\"Expected 13 excessive permissions, found {len(found_excessive_permissions)}\"\n\n        if all_correct and structure_valid and summary_correct:\n            print(\"| ✓ All assertions passed\")\n            return True\n        else:\n            return False\n\n    except Exception as e:\n        print(f\"FAIL: Error during verification: {e}\")\n        return False\n    finally:\n        if 'cur' in locals():\n            cur.close()\n        if 'conn' in locals():\n            conn.close()\n\n\nif __name__ == \"__main__\":\n    success = verify_security_audit()\n    sys.exit(0 if success else 1)\n"
  },
  {
    "path": "tasks/postgres/standard/sports/baseball_player_analysis/description.md",
    "content": "Create comprehensive baseball player performance analysis in the sports database.\n\n## Background\n\nYou are a sports analyst working with a comprehensive sports database. The analytics team needs to create a detailed analysis of baseball players by combining their offensive and defensive statistics with personal information. Currently, this data is scattered across multiple tables and needs to be consolidated for reporting purposes.\n\n## Your Task\n\nCreate a table called `baseball_player_analysis` that consolidates baseball player performance data. The table should provide a comprehensive view of each qualifying player's performance metrics.\n\n### Table Structure\n\nCreate the `baseball_player_analysis` table with the following columns:\n- `player_id` (INTEGER, NOT NULL) - Player identifier\n- `player_name` (VARCHAR(255), NOT NULL) - Player's full name\n- `team_name` (VARCHAR(255)) - Set to 'Unknown' for all players\n- `games_played` (INTEGER) - Number of games/events the player participated in\n- `at_bats` (INTEGER) - Total at-bats for the player\n- `hits` (INTEGER) - Total hits for the player\n- `runs_scored` (INTEGER) - Total runs scored by the player\n- `rbi` (INTEGER) - Total runs batted in by the player\n- `home_runs` (INTEGER) - Total home runs hit by the player\n- `batting_average` (DECIMAL) - Calculated as hits/at_bats\n- `defensive_games` (INTEGER) - Number of defensive games played (same as games_played)\n- `putouts` (INTEGER) - Total putouts in defensive play\n- `assists` (INTEGER) - Total assists in defensive play\n- `errors` (INTEGER) - Total errors made in defensive play\n- `fielding_percentage` (DECIMAL) - Calculated as (putouts + assists)/(putouts + assists + errors)\n\n### Data Requirements\n\nInclude only baseball players that meet ALL of the following criteria:\n- Have offensive statistics available for regular season play\n- Have played at least 10 games/events\n- Have at least 50 at-bats\n- Have a valid name available in the system\n\n### Important Notes\n\n- Focus on regular season statistics only\n- Handle NULL values appropriately in calculations (use 0 for missing stats)\n- Ensure batting average and fielding percentage calculations handle division by zero\n- Do NOT use ROUND functions - keep the full precision of calculated values\n- Sort results by batting average descending, then by games played descending\n\n## Requirements\n\n- Explore the database to understand the table structure and relationships\n- Create the table with the exact structure specified above\n- Populate the table using appropriate queries and joins\n- Ensure all calculations are mathematically correct\n- Handle edge cases properly (division by zero, NULL values)"
  },
  {
    "path": "tasks/postgres/standard/sports/baseball_player_analysis/meta.json",
    "content": "{\n  \"task_id\": \"baseball_player_analysis\",\n  \"task_name\": \"Baseball Player Analysis\",\n  \"category_id\": \"sports\",\n  \"category_name\": \"Sports\",\n  \"description\": \"Consolidate scattered baseball player data into comprehensive analysis table combining offensive and defensive statistics.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-08-18\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"reporting and analytics\",\n    \"statistical aggregation\",\n    \"schema design\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Table \\\"addresses\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"location_id\\\" int4 [not null]\\n  \\\"language\\\" varchar(100)\\n  \\\"suite\\\" varchar(100)\\n  \\\"floor\\\" varchar(100)\\n  \\\"building\\\" varchar(100)\\n  \\\"street_number\\\" varchar(100)\\n  \\\"street_prefix\\\" varchar(100)\\n  \\\"street\\\" varchar(100)\\n  \\\"street_suffix\\\" varchar(100)\\n  \\\"neighborhood\\\" varchar(100)\\n  \\\"district\\\" varchar(100)\\n  \\\"locality\\\" varchar(100)\\n  \\\"county\\\" varchar(100)\\n  \\\"region\\\" varchar(100)\\n  \\\"postal_code\\\" varchar(100)\\n  \\\"country\\\" varchar(100)\\n}\\n\\nTable \\\"affiliation_phases\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"ancestor_affiliation_id\\\" int4\\n  \\\"start_season_id\\\" int4\\n  \\\"start_date_time\\\" timestamp\\n  \\\"end_season_id\\\" int4\\n  \\\"end_date_time\\\" timestamp\\n}\\n\\nTable \\\"affiliations\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"affiliation_key\\\" varchar(100) [not null]\\n  \\\"affiliation_type\\\" varchar(100)\\n  \\\"publisher_id\\\" int4 [not null]\\n}\\n\\nTable \\\"affiliations_documents\\\" {\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"document_id\\\" int4 [not null]\\n}\\n\\nTable \\\"affiliations_events\\\" {\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n}\\n\\nTable \\\"affiliations_media\\\" {\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"media_id\\\" int4 [not null]\\n}\\n\\nTable \\\"american_football_action_participants\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"american_football_action_play_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"participant_role\\\" varchar(100) [not null]\\n  \\\"score_type\\\" varchar(100)\\n  \\\"field_line\\\" int4\\n  \\\"yardage\\\" int4\\n  \\\"score_credit\\\" int4\\n  \\\"yards_gained\\\" int4\\n}\\n\\nTable \\\"american_football_action_plays\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"american_football_event_state_id\\\" int4 [not null]\\n  \\\"play_type\\\" varchar(100)\\n  \\\"score_attempt_type\\\" varchar(100)\\n  \\\"drive_result\\\" varchar(100)\\n  \\\"points\\\" int4\\n  \\\"comment\\\" varchar(255)\\n}\\n\\nTable \\\"american_football_defensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"tackles_total\\\" varchar(100)\\n  \\\"tackles_solo\\\" varchar(100)\\n  \\\"tackles_assists\\\" varchar(100)\\n  \\\"interceptions_total\\\" varchar(100)\\n  \\\"interceptions_yards\\\" varchar(100)\\n  \\\"interceptions_average\\\" varchar(100)\\n  \\\"interceptions_longest\\\" varchar(100)\\n  \\\"interceptions_touchdown\\\" varchar(100)\\n  \\\"quarterback_hurries\\\" varchar(100)\\n  \\\"sacks_total\\\" varchar(100)\\n  \\\"sacks_yards\\\" varchar(100)\\n  \\\"passes_defensed\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_down_progress_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"first_downs_total\\\" varchar(100)\\n  \\\"first_downs_pass\\\" varchar(100)\\n  \\\"first_downs_run\\\" varchar(100)\\n  \\\"first_downs_penalty\\\" varchar(100)\\n  \\\"conversions_third_down\\\" varchar(100)\\n  \\\"conversions_third_down_attempts\\\" varchar(100)\\n  \\\"conversions_third_down_percentage\\\" varchar(100)\\n  \\\"conversions_fourth_down\\\" varchar(100)\\n  \\\"conversions_fourth_down_attempts\\\" varchar(100)\\n  \\\"conversions_fourth_down_percentage\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int2\\n  \\\"sequence_number\\\" int4\\n  \\\"period_value\\\" int4\\n  \\\"period_time_elapsed\\\" varchar(100)\\n  \\\"period_time_remaining\\\" varchar(100)\\n  \\\"clock_state\\\" varchar(100)\\n  \\\"down\\\" int4\\n  \\\"team_in_possession_id\\\" int4\\n  \\\"distance_for_1st_down\\\" int4\\n  \\\"field_side\\\" varchar(100)\\n  \\\"field_line\\\" int4\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"american_football_fumbles_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"fumbles_committed\\\" varchar(100)\\n  \\\"fumbles_forced\\\" varchar(100)\\n  \\\"fumbles_recovered\\\" varchar(100)\\n  \\\"fumbles_lost\\\" varchar(100)\\n  \\\"fumbles_yards_gained\\\" varchar(100)\\n  \\\"fumbles_own_committed\\\" varchar(100)\\n  \\\"fumbles_own_recovered\\\" varchar(100)\\n  \\\"fumbles_own_lost\\\" varchar(100)\\n  \\\"fumbles_own_yards_gained\\\" varchar(100)\\n  \\\"fumbles_opposing_committed\\\" varchar(100)\\n  \\\"fumbles_opposing_recovered\\\" varchar(100)\\n  \\\"fumbles_opposing_lost\\\" varchar(100)\\n  \\\"fumbles_opposing_yards_gained\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_offensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"offensive_plays_yards\\\" varchar(100)\\n  \\\"offensive_plays_number\\\" varchar(100)\\n  \\\"offensive_plays_average_yards_per\\\" varchar(100)\\n  \\\"possession_duration\\\" varchar(100)\\n  \\\"turnovers_giveaway\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_passing_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"passes_attempts\\\" varchar(100)\\n  \\\"passes_completions\\\" varchar(100)\\n  \\\"passes_percentage\\\" varchar(100)\\n  \\\"passes_yards_gross\\\" varchar(100)\\n  \\\"passes_yards_net\\\" varchar(100)\\n  \\\"passes_yards_lost\\\" varchar(100)\\n  \\\"passes_touchdowns\\\" varchar(100)\\n  \\\"passes_touchdowns_percentage\\\" varchar(100)\\n  \\\"passes_interceptions\\\" varchar(100)\\n  \\\"passes_interceptions_percentage\\\" varchar(100)\\n  \\\"passes_longest\\\" varchar(100)\\n  \\\"passes_average_yards_per\\\" varchar(100)\\n  \\\"passer_rating\\\" varchar(100)\\n  \\\"receptions_total\\\" varchar(100)\\n  \\\"receptions_yards\\\" varchar(100)\\n  \\\"receptions_touchdowns\\\" varchar(100)\\n  \\\"receptions_first_down\\\" varchar(100)\\n  \\\"receptions_longest\\\" varchar(100)\\n  \\\"receptions_average_yards_per\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_penalties_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"penalties_total\\\" varchar(100)\\n  \\\"penalty_yards\\\" varchar(100)\\n  \\\"penalty_first_downs\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_rushing_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"rushes_attempts\\\" varchar(100)\\n  \\\"rushes_yards\\\" varchar(100)\\n  \\\"rushes_touchdowns\\\" varchar(100)\\n  \\\"rushing_average_yards_per\\\" varchar(100)\\n  \\\"rushes_first_down\\\" varchar(100)\\n  \\\"rushes_longest\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_sacks_against_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"sacks_against_yards\\\" varchar(100)\\n  \\\"sacks_against_total\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_scoring_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"touchdowns_total\\\" varchar(100)\\n  \\\"touchdowns_passing\\\" varchar(100)\\n  \\\"touchdowns_rushing\\\" varchar(100)\\n  \\\"touchdowns_special_teams\\\" varchar(100)\\n  \\\"touchdowns_defensive\\\" varchar(100)\\n  \\\"extra_points_attempts\\\" varchar(100)\\n  \\\"extra_points_made\\\" varchar(100)\\n  \\\"extra_points_missed\\\" varchar(100)\\n  \\\"extra_points_blocked\\\" varchar(100)\\n  \\\"field_goal_attempts\\\" varchar(100)\\n  \\\"field_goals_made\\\" varchar(100)\\n  \\\"field_goals_missed\\\" varchar(100)\\n  \\\"field_goals_blocked\\\" varchar(100)\\n  \\\"safeties_against\\\" varchar(100)\\n  \\\"two_point_conversions_attempts\\\" varchar(100)\\n  \\\"two_point_conversions_made\\\" varchar(100)\\n  \\\"touchbacks_total\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_special_teams_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"returns_punt_total\\\" varchar(100)\\n  \\\"returns_punt_yards\\\" varchar(100)\\n  \\\"returns_punt_average\\\" varchar(100)\\n  \\\"returns_punt_longest\\\" varchar(100)\\n  \\\"returns_punt_touchdown\\\" varchar(100)\\n  \\\"returns_kickoff_total\\\" varchar(100)\\n  \\\"returns_kickoff_yards\\\" varchar(100)\\n  \\\"returns_kickoff_average\\\" varchar(100)\\n  \\\"returns_kickoff_longest\\\" varchar(100)\\n  \\\"returns_kickoff_touchdown\\\" varchar(100)\\n  \\\"returns_total\\\" varchar(100)\\n  \\\"returns_yards\\\" varchar(100)\\n  \\\"punts_total\\\" varchar(100)\\n  \\\"punts_yards_gross\\\" varchar(100)\\n  \\\"punts_yards_net\\\" varchar(100)\\n  \\\"punts_longest\\\" varchar(100)\\n  \\\"punts_inside_20\\\" varchar(100)\\n  \\\"punts_inside_20_percentage\\\" varchar(100)\\n  \\\"punts_average\\\" varchar(100)\\n  \\\"punts_blocked\\\" varchar(100)\\n  \\\"touchbacks_total\\\" varchar(100)\\n  \\\"touchbacks_total_percentage\\\" varchar(100)\\n  \\\"touchbacks_kickoffs\\\" varchar(100)\\n  \\\"touchbacks_kickoffs_percentage\\\" varchar(100)\\n  \\\"touchbacks_punts\\\" varchar(100)\\n  \\\"touchbacks_punts_percentage\\\" varchar(100)\\n  \\\"touchbacks_interceptions\\\" varchar(100)\\n  \\\"touchbacks_interceptions_percentage\\\" varchar(100)\\n  \\\"fair_catches\\\" varchar(100)\\n}\\n\\nTable \\\"baseball_action_contact_details\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"baseball_action_pitch_id\\\" int4 [not null]\\n  \\\"location\\\" varchar(100)\\n  \\\"strength\\\" varchar(100)\\n  \\\"velocity\\\" int4\\n  \\\"comment\\\" text\\n  \\\"trajectory_coordinates\\\" varchar(100)\\n  \\\"trajectory_formula\\\" varchar(100)\\n}\\n\\nTable \\\"baseball_action_pitches\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"baseball_action_play_id\\\" int4 [not null]\\n  \\\"sequence_number\\\" int4\\n  \\\"baseball_defensive_group_id\\\" int4\\n  \\\"umpire_call\\\" varchar(100)\\n  \\\"pitch_location\\\" varchar(100)\\n  \\\"pitch_type\\\" varchar(100)\\n  \\\"pitch_velocity\\\" int4\\n  \\\"comment\\\" text\\n  \\\"trajectory_coordinates\\\" varchar(100)\\n  \\\"trajectory_formula\\\" varchar(100)\\n  \\\"ball_type\\\" varchar(40)\\n  \\\"strike_type\\\" varchar(40)\\n}\\n\\nTable \\\"baseball_action_plays\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"baseball_event_state_id\\\" int4 [not null]\\n  \\\"play_type\\\" varchar(100)\\n  \\\"notation\\\" varchar(100)\\n  \\\"notation_yaml\\\" text\\n  \\\"baseball_defensive_group_id\\\" int4\\n  \\\"comment\\\" varchar(255)\\n  \\\"runner_on_first_advance\\\" int4\\n  \\\"runner_on_second_advance\\\" int4\\n  \\\"runner_on_third_advance\\\" int4\\n  \\\"outs_recorded\\\" int4\\n  \\\"rbi\\\" int4\\n  \\\"runs_scored\\\" int4\\n  \\\"earned_runs_scored\\\" varchar(100)\\n}\\n\\nTable \\\"baseball_action_substitutions\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"baseball_event_state_id\\\" int4 [not null]\\n  \\\"sequence_number\\\" int4\\n  \\\"person_type\\\" varchar(100)\\n  \\\"person_original_id\\\" int4\\n  \\\"person_original_position_id\\\" int4\\n  \\\"person_original_lineup_slot\\\" int4\\n  \\\"person_replacing_id\\\" int4\\n  \\\"person_replacing_position_id\\\" int4\\n  \\\"person_replacing_lineup_slot\\\" int4\\n  \\\"substitution_reason\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n}\\n\\nTable \\\"baseball_defensive_group\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n}\\n\\nTable \\\"baseball_defensive_players\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"baseball_defensive_group_id\\\" int4 [not null]\\n  \\\"player_id\\\" int4 [not null]\\n  \\\"position_id\\\" int4 [not null]\\n}\\n\\nTable \\\"baseball_defensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"double_plays\\\" int4\\n  \\\"triple_plays\\\" int4\\n  \\\"putouts\\\" int4\\n  \\\"assists\\\" int4\\n  \\\"errors\\\" int4\\n  \\\"fielding_percentage\\\" numeric\\n  \\\"defensive_average\\\" numeric\\n  \\\"errors_passed_ball\\\" int4\\n  \\\"errors_catchers_interference\\\" int4\\n}\\n\\nTable \\\"baseball_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int2\\n  \\\"sequence_number\\\" int4\\n  \\\"at_bat_number\\\" int4\\n  \\\"inning_value\\\" int4\\n  \\\"inning_half\\\" varchar(100)\\n  \\\"outs\\\" int4\\n  \\\"balls\\\" int4\\n  \\\"strikes\\\" int4\\n  \\\"runner_on_first_id\\\" int4\\n  \\\"runner_on_second_id\\\" int4\\n  \\\"runner_on_third_id\\\" int4\\n  \\\"runner_on_first\\\" int2\\n  \\\"runner_on_second\\\" int2\\n  \\\"runner_on_third\\\" int2\\n  \\\"runs_this_inning_half\\\" int4\\n  \\\"pitcher_id\\\" int4\\n  \\\"batter_id\\\" int4\\n  \\\"batter_side\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"baseball_offensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"average\\\" numeric\\n  \\\"runs_scored\\\" int4\\n  \\\"at_bats\\\" int4\\n  \\\"hits\\\" int4\\n  \\\"rbi\\\" int4\\n  \\\"total_bases\\\" int4\\n  \\\"slugging_percentage\\\" numeric\\n  \\\"bases_on_balls\\\" int4\\n  \\\"strikeouts\\\" int4\\n  \\\"left_on_base\\\" int4\\n  \\\"left_in_scoring_position\\\" int4\\n  \\\"singles\\\" int4\\n  \\\"doubles\\\" int4\\n  \\\"triples\\\" int4\\n  \\\"home_runs\\\" int4\\n  \\\"grand_slams\\\" int4\\n  \\\"at_bats_per_rbi\\\" numeric\\n  \\\"plate_appearances_per_rbi\\\" numeric\\n  \\\"at_bats_per_home_run\\\" numeric\\n  \\\"plate_appearances_per_home_run\\\" numeric\\n  \\\"sac_flies\\\" int4\\n  \\\"sac_bunts\\\" int4\\n  \\\"grounded_into_double_play\\\" int4\\n  \\\"moved_up\\\" int4\\n  \\\"on_base_percentage\\\" numeric\\n  \\\"stolen_bases\\\" int4\\n  \\\"stolen_bases_caught\\\" int4\\n  \\\"stolen_bases_average\\\" numeric\\n  \\\"hit_by_pitch\\\" int4\\n  \\\"defensive_interferance_reaches\\\" int4\\n  \\\"on_base_plus_slugging\\\" numeric\\n  \\\"plate_appearances\\\" int4\\n  \\\"hits_extra_base\\\" int4\\n}\\n\\nTable \\\"baseball_pitching_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"runs_allowed\\\" int4\\n  \\\"singles_allowed\\\" int4\\n  \\\"doubles_allowed\\\" int4\\n  \\\"triples_allowed\\\" int4\\n  \\\"home_runs_allowed\\\" int4\\n  \\\"innings_pitched\\\" varchar(20)\\n  \\\"hits\\\" int4\\n  \\\"earned_runs\\\" int4\\n  \\\"unearned_runs\\\" int4\\n  \\\"bases_on_balls\\\" int4\\n  \\\"bases_on_balls_intentional\\\" int4\\n  \\\"strikeouts\\\" int4\\n  \\\"strikeout_to_bb_ratio\\\" numeric\\n  \\\"number_of_pitches\\\" int4\\n  \\\"era\\\" numeric\\n  \\\"inherited_runners_scored\\\" int4\\n  \\\"pick_offs\\\" int4\\n  \\\"errors_hit_with_pitch\\\" int4\\n  \\\"errors_wild_pitch\\\" int4\\n  \\\"balks\\\" int4\\n  \\\"wins\\\" int4\\n  \\\"losses\\\" int4\\n  \\\"saves\\\" int4\\n  \\\"shutouts\\\" int4\\n  \\\"games_complete\\\" int4\\n  \\\"games_finished\\\" int4\\n  \\\"winning_percentage\\\" numeric\\n  \\\"event_credit\\\" varchar(40)\\n  \\\"save_credit\\\" varchar(40)\\n}\\n\\nTable \\\"basketball_defensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"steals_total\\\" varchar(100)\\n  \\\"steals_per_game\\\" varchar(100)\\n  \\\"blocks_total\\\" varchar(100)\\n  \\\"blocks_per_game\\\" varchar(100)\\n}\\n\\nTable \\\"basketball_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int4\\n  \\\"sequence_number\\\" int4\\n  \\\"period_value\\\" varchar(100)\\n  \\\"period_time_elapsed\\\" varchar(100)\\n  \\\"period_time_remaining\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"basketball_offensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"field_goals_made\\\" int4\\n  \\\"field_goals_attempted\\\" int4\\n  \\\"field_goals_percentage\\\" varchar(100)\\n  \\\"field_goals_per_game\\\" varchar(100)\\n  \\\"field_goals_attempted_per_game\\\" varchar(100)\\n  \\\"field_goals_percentage_adjusted\\\" varchar(100)\\n  \\\"three_pointers_made\\\" int4\\n  \\\"three_pointers_attempted\\\" int4\\n  \\\"three_pointers_percentage\\\" varchar(100)\\n  \\\"three_pointers_per_game\\\" varchar(100)\\n  \\\"three_pointers_attempted_per_game\\\" varchar(100)\\n  \\\"free_throws_made\\\" varchar(100)\\n  \\\"free_throws_attempted\\\" varchar(100)\\n  \\\"free_throws_percentage\\\" varchar(100)\\n  \\\"free_throws_per_game\\\" varchar(100)\\n  \\\"free_throws_attempted_per_game\\\" varchar(100)\\n  \\\"points_scored_total\\\" varchar(100)\\n  \\\"points_scored_per_game\\\" varchar(100)\\n  \\\"assists_total\\\" varchar(100)\\n  \\\"assists_per_game\\\" varchar(100)\\n  \\\"turnovers_total\\\" varchar(100)\\n  \\\"turnovers_per_game\\\" varchar(100)\\n  \\\"points_scored_off_turnovers\\\" varchar(100)\\n  \\\"points_scored_in_paint\\\" varchar(100)\\n  \\\"points_scored_on_second_chance\\\" varchar(100)\\n  \\\"points_scored_on_fast_break\\\" varchar(100)\\n}\\n\\nTable \\\"basketball_rebounding_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"rebounds_total\\\" varchar(100)\\n  \\\"rebounds_per_game\\\" varchar(100)\\n  \\\"rebounds_defensive\\\" varchar(100)\\n  \\\"rebounds_offensive\\\" varchar(100)\\n  \\\"team_rebounds_total\\\" varchar(100)\\n  \\\"team_rebounds_per_game\\\" varchar(100)\\n  \\\"team_rebounds_defensive\\\" varchar(100)\\n  \\\"team_rebounds_offensive\\\" varchar(100)\\n}\\n\\nTable \\\"basketball_team_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"timeouts_left\\\" varchar(100)\\n  \\\"largest_lead\\\" varchar(100)\\n  \\\"fouls_total\\\" varchar(100)\\n  \\\"turnover_margin\\\" varchar(100)\\n}\\n\\nTable \\\"bookmakers\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_key\\\" varchar(100)\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"location_id\\\" int4\\n}\\n\\nTable \\\"core_person_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"time_played_event\\\" varchar(40)\\n  \\\"time_played_total\\\" varchar(40)\\n  \\\"time_played_event_average\\\" varchar(40)\\n  \\\"events_played\\\" int4\\n  \\\"events_started\\\" int4\\n  \\\"position_id\\\" int4\\n}\\n\\nTable \\\"core_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"score\\\" varchar(100)\\n  \\\"score_opposing\\\" varchar(100)\\n  \\\"score_attempts\\\" varchar(100)\\n  \\\"score_attempts_opposing\\\" varchar(100)\\n  \\\"score_percentage\\\" varchar(100)\\n  \\\"score_percentage_opposing\\\" varchar(100)\\n}\\n\\nTable \\\"db_info\\\" {\\n  \\\"version\\\" varchar(100) [not null, default: 16]\\n}\\n\\nTable \\\"display_names\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"language\\\" varchar(100) [not null]\\n  \\\"entity_type\\\" varchar(100) [not null]\\n  \\\"entity_id\\\" int4 [not null]\\n  \\\"full_name\\\" varchar(100)\\n  \\\"first_name\\\" varchar(100)\\n  \\\"middle_name\\\" varchar(100)\\n  \\\"last_name\\\" varchar(100)\\n  \\\"alias\\\" varchar(100)\\n  \\\"abbreviation\\\" varchar(100)\\n  \\\"short_name\\\" varchar(100)\\n  \\\"prefix\\\" varchar(20)\\n  \\\"suffix\\\" varchar(20)\\n}\\n\\nTable \\\"document_classes\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"name\\\" varchar(100)\\n}\\n\\nTable \\\"document_contents\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"document_id\\\" int4 [not null]\\n  \\\"sportsml\\\" varchar(200)\\n  \\\"abstract\\\" text\\n}\\n\\nTable \\\"document_fixtures\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"fixture_key\\\" varchar(100)\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"name\\\" varchar(100)\\n  \\\"document_class_id\\\" int4 [not null]\\n}\\n\\nTable \\\"document_fixtures_events\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"document_fixture_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"latest_document_id\\\" int4 [not null]\\n  \\\"last_update\\\" timestamp\\n}\\n\\nTable \\\"document_package_entry\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"document_package_id\\\" int4 [not null]\\n  \\\"rank\\\" varchar(100)\\n  \\\"document_id\\\" int4 [not null]\\n  \\\"headline\\\" varchar(100)\\n  \\\"short_headline\\\" varchar(100)\\n}\\n\\nTable \\\"document_packages\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"package_key\\\" varchar(100)\\n  \\\"package_name\\\" varchar(100)\\n  \\\"date_time\\\" date\\n}\\n\\nTable \\\"documents\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"doc_id\\\" varchar(75) [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"title\\\" varchar(255)\\n  \\\"language\\\" varchar(100)\\n  \\\"priority\\\" varchar(100)\\n  \\\"revision_id\\\" varchar(75)\\n  \\\"stats_coverage\\\" varchar(100)\\n  \\\"document_fixture_id\\\" int4 [not null]\\n  \\\"source_id\\\" int4\\n  \\\"db_loading_date_time\\\" timestamp\\n}\\n\\nTable \\\"documents_media\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"document_id\\\" int4 [not null]\\n  \\\"media_id\\\" int4 [not null]\\n  \\\"media_caption_id\\\" int4 [not null]\\n}\\n\\nTable \\\"events\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_key\\\" varchar(100) [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"start_date_time\\\" timestamp\\n  \\\"site_id\\\" int4\\n  \\\"site_alignment\\\" varchar(100)\\n  \\\"event_status\\\" varchar(100)\\n  \\\"duration\\\" varchar(100)\\n  \\\"attendance\\\" varchar(100)\\n  \\\"last_update\\\" timestamp\\n}\\n\\nTable \\\"events_documents\\\" {\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"document_id\\\" int4 [not null]\\n}\\n\\nTable \\\"events_media\\\" {\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"media_id\\\" int4 [not null]\\n}\\n\\nTable \\\"events_sub_seasons\\\" {\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"sub_season_id\\\" int4 [not null]\\n}\\n\\nTable \\\"ice_hockey_action_participants\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"ice_hockey_action_play_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"participant_role\\\" varchar(100) [not null]\\n  \\\"point_credit\\\" int4\\n}\\n\\nTable \\\"ice_hockey_action_plays\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"ice_hockey_event_state_id\\\" int4 [not null]\\n  \\\"play_type\\\" varchar(100)\\n  \\\"score_attempt_type\\\" varchar(100)\\n  \\\"play_result\\\" varchar(100)\\n  \\\"comment\\\" varchar(255)\\n}\\n\\nTable \\\"ice_hockey_defensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"shots_power_play_allowed\\\" varchar(100)\\n  \\\"shots_penalty_shot_allowed\\\" varchar(100)\\n  \\\"goals_power_play_allowed\\\" varchar(100)\\n  \\\"goals_penalty_shot_allowed\\\" varchar(100)\\n  \\\"goals_against_average\\\" varchar(100)\\n  \\\"saves\\\" varchar(100)\\n  \\\"save_percentage\\\" varchar(100)\\n  \\\"penalty_killing_amount\\\" varchar(100)\\n  \\\"penalty_killing_percentage\\\" varchar(100)\\n  \\\"shots_blocked\\\" varchar(100)\\n  \\\"takeaways\\\" varchar(100)\\n  \\\"shutouts\\\" varchar(100)\\n  \\\"minutes_penalty_killing\\\" varchar(100)\\n  \\\"hits\\\" varchar(100)\\n  \\\"goals_empty_net_allowed\\\" varchar(100)\\n  \\\"goals_short_handed_allowed\\\" varchar(100)\\n  \\\"goals_shootout_allowed\\\" varchar(100)\\n  \\\"shots_shootout_allowed\\\" varchar(100)\\n}\\n\\nTable \\\"ice_hockey_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int4\\n  \\\"sequence_number\\\" int4\\n  \\\"period_value\\\" varchar(100)\\n  \\\"period_time_elapsed\\\" varchar(100)\\n  \\\"period_time_remaining\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"ice_hockey_offensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"goals_game_winning\\\" varchar(100)\\n  \\\"goals_game_tying\\\" varchar(100)\\n  \\\"goals_power_play\\\" varchar(100)\\n  \\\"goals_short_handed\\\" varchar(100)\\n  \\\"goals_even_strength\\\" varchar(100)\\n  \\\"goals_empty_net\\\" varchar(100)\\n  \\\"goals_overtime\\\" varchar(100)\\n  \\\"goals_shootout\\\" varchar(100)\\n  \\\"goals_penalty_shot\\\" varchar(100)\\n  \\\"assists\\\" varchar(100)\\n  \\\"points\\\" varchar(100)\\n  \\\"power_play_amount\\\" varchar(100)\\n  \\\"power_play_percentage\\\" varchar(100)\\n  \\\"shots_penalty_shot_taken\\\" varchar(100)\\n  \\\"shots_penalty_shot_missed\\\" varchar(100)\\n  \\\"shots_penalty_shot_percentage\\\" varchar(100)\\n  \\\"giveaways\\\" varchar(100)\\n  \\\"minutes_power_play\\\" varchar(100)\\n  \\\"faceoff_wins\\\" varchar(100)\\n  \\\"faceoff_losses\\\" varchar(100)\\n  \\\"faceoff_win_percentage\\\" varchar(100)\\n  \\\"scoring_chances\\\" varchar(100)\\n}\\n\\nTable \\\"ice_hockey_player_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"plus_minus\\\" varchar(100)\\n}\\n\\nTable \\\"injury_phases\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"injury_status\\\" varchar(100)\\n  \\\"injury_type\\\" varchar(100)\\n  \\\"injury_comment\\\" varchar(100)\\n  \\\"disabled_list\\\" varchar(100)\\n  \\\"start_date_time\\\" timestamp\\n  \\\"end_date_time\\\" timestamp\\n  \\\"season_id\\\" int4\\n  \\\"phase_type\\\" varchar(100)\\n  \\\"injury_side\\\" varchar(100)\\n}\\n\\nTable \\\"key_aliases\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"key_id\\\" int4 [not null]\\n  \\\"key_root_id\\\" int4 [not null]\\n}\\n\\nTable \\\"key_roots\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"key_type\\\" varchar(100)\\n}\\n\\nTable \\\"latest_revisions\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"revision_id\\\" varchar(75) [not null]\\n  \\\"latest_document_id\\\" int4 [not null]\\n}\\n\\nTable \\\"locations\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"timezone\\\" varchar(100)\\n  \\\"latitude\\\" varchar(100)\\n  \\\"longitude\\\" varchar(100)\\n  \\\"country_code\\\" varchar(100)\\n}\\n\\nTable \\\"media\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"object_id\\\" int4\\n  \\\"source_id\\\" int4\\n  \\\"revision_id\\\" int4\\n  \\\"media_type\\\" varchar(100)\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"date_time\\\" varchar(100)\\n  \\\"credit_id\\\" int4 [not null]\\n  \\\"db_loading_date_time\\\" timestamp\\n  \\\"creation_location_id\\\" int4 [not null]\\n}\\n\\nTable \\\"media_captions\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"media_id\\\" int4 [not null]\\n  \\\"caption_type\\\" varchar(100)\\n  \\\"caption\\\" varchar(100)\\n  \\\"caption_author_id\\\" int4 [not null]\\n  \\\"language\\\" varchar(100)\\n  \\\"caption_size\\\" varchar(100)\\n}\\n\\nTable \\\"media_contents\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"media_id\\\" int4 [not null]\\n  \\\"object\\\" varchar(100)\\n  \\\"format\\\" varchar(100)\\n  \\\"mime_type\\\" varchar(100)\\n  \\\"height\\\" varchar(100)\\n  \\\"width\\\" varchar(100)\\n  \\\"duration\\\" varchar(100)\\n  \\\"file_size\\\" varchar(100)\\n  \\\"resolution\\\" varchar(100)\\n}\\n\\nTable \\\"media_keywords\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"keyword\\\" varchar(100)\\n  \\\"media_id\\\" int4 [not null]\\n}\\n\\nTable \\\"motor_racing_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int4\\n  \\\"sequence_number\\\" int4\\n  \\\"lap\\\" varchar(100)\\n  \\\"laps_remaining\\\" varchar(100)\\n  \\\"time_elapsed\\\" varchar(100)\\n  \\\"flag_state\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"motor_racing_qualifying_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"grid\\\" varchar(100)\\n  \\\"pole_position\\\" varchar(100)\\n  \\\"pole_wins\\\" varchar(100)\\n  \\\"qualifying_speed\\\" varchar(100)\\n  \\\"qualifying_speed_units\\\" varchar(100)\\n  \\\"qualifying_time\\\" varchar(100)\\n  \\\"qualifying_position\\\" varchar(100)\\n}\\n\\nTable \\\"motor_racing_race_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"time_behind_leader\\\" varchar(100)\\n  \\\"laps_behind_leader\\\" varchar(100)\\n  \\\"time_ahead_follower\\\" varchar(100)\\n  \\\"laps_ahead_follower\\\" varchar(100)\\n  \\\"time\\\" varchar(100)\\n  \\\"points\\\" varchar(100)\\n  \\\"points_rookie\\\" varchar(100)\\n  \\\"bonus\\\" varchar(100)\\n  \\\"laps_completed\\\" varchar(100)\\n  \\\"laps_leading_total\\\" varchar(100)\\n  \\\"distance_leading\\\" varchar(100)\\n  \\\"distance_completed\\\" varchar(100)\\n  \\\"distance_units\\\" varchar(40)\\n  \\\"speed_average\\\" varchar(40)\\n  \\\"speed_units\\\" varchar(40)\\n  \\\"status\\\" varchar(40)\\n  \\\"finishes_top_5\\\" varchar(40)\\n  \\\"finishes_top_10\\\" varchar(40)\\n  \\\"starts\\\" varchar(40)\\n  \\\"finishes\\\" varchar(40)\\n  \\\"non_finishes\\\" varchar(40)\\n  \\\"wins\\\" varchar(40)\\n  \\\"races_leading\\\" varchar(40)\\n  \\\"money\\\" varchar(40)\\n  \\\"money_units\\\" varchar(40)\\n  \\\"leads_total\\\" varchar(40)\\n}\\n\\nTable \\\"outcome_totals\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"standing_subgroup_id\\\" int4 [not null]\\n  \\\"outcome_holder_type\\\" varchar(100)\\n  \\\"outcome_holder_id\\\" int4\\n  \\\"rank\\\" varchar(100)\\n  \\\"wins\\\" varchar(100)\\n  \\\"losses\\\" varchar(100)\\n  \\\"ties\\\" varchar(100)\\n  \\\"undecideds\\\" varchar(100)\\n  \\\"winning_percentage\\\" varchar(100)\\n  \\\"points_scored_for\\\" varchar(100)\\n  \\\"points_scored_against\\\" varchar(100)\\n  \\\"points_difference\\\" varchar(100)\\n  \\\"standing_points\\\" varchar(100)\\n  \\\"streak_type\\\" varchar(100)\\n  \\\"streak_duration\\\" varchar(100)\\n  \\\"streak_total\\\" varchar(100)\\n  \\\"streak_start\\\" date\\n  \\\"streak_end\\\" date\\n}\\n\\nTable \\\"participants_events\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"participant_type\\\" varchar(100) [not null]\\n  \\\"participant_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"alignment\\\" varchar(100)\\n  \\\"score\\\" varchar(100)\\n  \\\"event_outcome\\\" varchar(100)\\n  \\\"rank\\\" int4\\n}\\n\\nTable \\\"periods\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"participant_event_id\\\" int4 [not null]\\n  \\\"period_value\\\" varchar(100)\\n  \\\"score\\\" varchar(100)\\n}\\n\\nTable \\\"person_event_metadata\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"status\\\" varchar(100)\\n  \\\"health\\\" varchar(100)\\n  \\\"weight\\\" varchar(100)\\n  \\\"role_id\\\" int4\\n  \\\"position_id\\\" int4\\n  \\\"team_id\\\" int4\\n  \\\"lineup_slot\\\" int4\\n  \\\"lineup_slot_sequence\\\" int4\\n}\\n\\nTable \\\"person_phases\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"membership_type\\\" varchar(40) [not null]\\n  \\\"membership_id\\\" int4 [not null]\\n  \\\"role_id\\\" int4\\n  \\\"role_status\\\" varchar(40)\\n  \\\"phase_status\\\" varchar(40)\\n  \\\"uniform_number\\\" varchar(20)\\n  \\\"regular_position_id\\\" int4\\n  \\\"regular_position_depth\\\" varchar(40)\\n  \\\"height\\\" varchar(100)\\n  \\\"weight\\\" varchar(100)\\n  \\\"start_date_time\\\" timestamp\\n  \\\"start_season_id\\\" int4\\n  \\\"end_date_time\\\" timestamp\\n  \\\"end_season_id\\\" int4\\n  \\\"entry_reason\\\" varchar(40)\\n  \\\"exit_reason\\\" varchar(40)\\n  \\\"selection_level\\\" int4\\n  \\\"selection_sublevel\\\" int4\\n  \\\"selection_overall\\\" int4\\n}\\n\\nTable \\\"persons\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"person_key\\\" varchar(100) [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"gender\\\" varchar(20)\\n  \\\"birth_date\\\" varchar(30)\\n  \\\"death_date\\\" varchar(30)\\n  \\\"birth_location_id\\\" int4\\n  \\\"hometown_location_id\\\" int4\\n  \\\"residence_location_id\\\" int4\\n  \\\"death_location_id\\\" int4\\n}\\n\\nTable \\\"persons_documents\\\" {\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"document_id\\\" int4 [not null]\\n}\\n\\nTable \\\"persons_media\\\" {\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"media_id\\\" int4 [not null]\\n}\\n\\nTable \\\"positions\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"abbreviation\\\" varchar(100) [not null]\\n}\\n\\nTable \\\"publishers\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"publisher_key\\\" varchar(100) [not null]\\n  \\\"publisher_name\\\" varchar(100)\\n}\\n\\nTable \\\"roles\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"role_key\\\" varchar(100) [not null]\\n  \\\"role_name\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n}\\n\\nTable \\\"seasons\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"season_key\\\" int4 [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"league_id\\\" int4 [not null]\\n  \\\"start_date_time\\\" timestamp\\n  \\\"end_date_time\\\" timestamp\\n}\\n\\nTable \\\"sites\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"site_key\\\" int4 [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"location_id\\\" int4\\n}\\n\\nTable \\\"soccer_defensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"shots_penalty_shot_allowed\\\" varchar(100)\\n  \\\"goals_penalty_shot_allowed\\\" varchar(100)\\n  \\\"goals_against_average\\\" varchar(100)\\n  \\\"goals_against_total\\\" varchar(100)\\n  \\\"saves\\\" varchar(100)\\n  \\\"save_percentage\\\" varchar(100)\\n  \\\"catches_punches\\\" varchar(100)\\n  \\\"shots_on_goal_total\\\" varchar(100)\\n  \\\"shots_shootout_total\\\" varchar(100)\\n  \\\"shots_shootout_allowed\\\" varchar(100)\\n  \\\"shots_blocked\\\" varchar(100)\\n  \\\"shutouts\\\" varchar(100)\\n}\\n\\nTable \\\"soccer_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int4\\n  \\\"sequence_number\\\" int4\\n  \\\"period_value\\\" varchar(100)\\n  \\\"period_time_elapsed\\\" varchar(100)\\n  \\\"period_time_remaining\\\" varchar(100)\\n  \\\"minutes_elapsed\\\" varchar(100)\\n  \\\"period_minute_elapsed\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"soccer_foul_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"fouls_suffered\\\" varchar(100)\\n  \\\"fouls_commited\\\" varchar(100)\\n  \\\"cautions_total\\\" varchar(100)\\n  \\\"cautions_pending\\\" varchar(100)\\n  \\\"caution_points_total\\\" varchar(100)\\n  \\\"caution_points_pending\\\" varchar(100)\\n  \\\"ejections_total\\\" varchar(100)\\n}\\n\\nTable \\\"soccer_offensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"goals_game_winning\\\" varchar(100)\\n  \\\"goals_game_tying\\\" varchar(100)\\n  \\\"goals_overtime\\\" varchar(100)\\n  \\\"goals_shootout\\\" varchar(100)\\n  \\\"goals_total\\\" varchar(100)\\n  \\\"assists_game_winning\\\" varchar(100)\\n  \\\"assists_game_tying\\\" varchar(100)\\n  \\\"assists_overtime\\\" varchar(100)\\n  \\\"assists_total\\\" varchar(100)\\n  \\\"points\\\" varchar(100)\\n  \\\"shots_total\\\" varchar(100)\\n  \\\"shots_on_goal_total\\\" varchar(100)\\n  \\\"shots_hit_frame\\\" varchar(100)\\n  \\\"shots_penalty_shot_taken\\\" varchar(100)\\n  \\\"shots_penalty_shot_scored\\\" varchar(100)\\n  \\\"shots_penalty_shot_missed\\\" varchar(40)\\n  \\\"shots_penalty_shot_percentage\\\" varchar(40)\\n  \\\"shots_shootout_taken\\\" varchar(40)\\n  \\\"shots_shootout_scored\\\" varchar(40)\\n  \\\"shots_shootout_missed\\\" varchar(40)\\n  \\\"shots_shootout_percentage\\\" varchar(40)\\n  \\\"giveaways\\\" varchar(40)\\n  \\\"offsides\\\" varchar(40)\\n  \\\"corner_kicks\\\" varchar(40)\\n  \\\"hat_tricks\\\" varchar(40)\\n}\\n\\nTable \\\"standing_subgroups\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"standing_id\\\" int4 [not null]\\n  \\\"affiliation_id\\\" int4 [not null]\\n}\\n\\nTable \\\"standings\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"standing_type\\\" varchar(100)\\n  \\\"sub_season_id\\\" int4 [not null]\\n  \\\"last_updated\\\" varchar(100)\\n  \\\"duration_scope\\\" varchar(100)\\n  \\\"competition_scope\\\" varchar(100)\\n  \\\"competition_scope_id\\\" varchar(100)\\n  \\\"alignment_scope\\\" varchar(100)\\n  \\\"site_scope\\\" varchar(100)\\n  \\\"scoping_label\\\" varchar(100)\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"source\\\" varchar(100)\\n}\\n\\nTable \\\"stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"stat_repository_type\\\" varchar(100)\\n  \\\"stat_repository_id\\\" int4 [not null]\\n  \\\"stat_holder_type\\\" varchar(100)\\n  \\\"stat_holder_id\\\" int4\\n  \\\"stat_coverage_type\\\" varchar(100)\\n  \\\"stat_coverage_id\\\" int4\\n  \\\"context\\\" varchar(40) [not null]\\n}\\n\\nTable \\\"sub_periods\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"period_id\\\" int4 [not null]\\n  \\\"sub_period_value\\\" varchar(100)\\n  \\\"score\\\" varchar(100)\\n}\\n\\nTable \\\"sub_seasons\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"sub_season_key\\\" varchar(100) [not null]\\n  \\\"season_id\\\" int4 [not null]\\n  \\\"sub_season_type\\\" varchar(100) [not null]\\n  \\\"start_date_time\\\" timestamp\\n  \\\"end_date_time\\\" timestamp\\n}\\n\\nTable \\\"team_american_football_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"yards_per_attempt\\\" varchar(100)\\n  \\\"average_starting_position\\\" varchar(100)\\n  \\\"timeouts\\\" varchar(100)\\n  \\\"time_of_possession\\\" varchar(100)\\n  \\\"turnover_ratio\\\" varchar(100)\\n}\\n\\nTable \\\"team_phases\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"start_season_id\\\" int4\\n  \\\"end_season_id\\\" int4\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"start_date_time\\\" varchar(100)\\n  \\\"end_date_time\\\" varchar(100)\\n  \\\"phase_status\\\" varchar(40)\\n  \\\"role_id\\\" int4\\n}\\n\\nTable \\\"teams\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"team_key\\\" varchar(100) [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"home_site_id\\\" int4\\n}\\n\\nTable \\\"teams_documents\\\" {\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"document_id\\\" int4 [not null]\\n}\\n\\nTable \\\"teams_media\\\" {\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"media_id\\\" int4 [not null]\\n}\\n\\nTable \\\"tennis_action_points\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"sub_period_id\\\" varchar(100)\\n  \\\"sequence_number\\\" varchar(100)\\n  \\\"win_type\\\" varchar(100)\\n}\\n\\nTable \\\"tennis_action_volleys\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"sequence_number\\\" varchar(100)\\n  \\\"tennis_action_points_id\\\" int4\\n  \\\"landing_location\\\" varchar(100)\\n  \\\"swing_type\\\" varchar(100)\\n  \\\"result\\\" varchar(100)\\n  \\\"spin_type\\\" varchar(100)\\n  \\\"trajectory_details\\\" varchar(100)\\n}\\n\\nTable \\\"tennis_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int4\\n  \\\"sequence_number\\\" int4\\n  \\\"tennis_set\\\" varchar(100)\\n  \\\"game\\\" varchar(100)\\n  \\\"server_person_id\\\" int4\\n  \\\"server_score\\\" varchar(100)\\n  \\\"receiver_person_id\\\" int4\\n  \\\"receiver_score\\\" varchar(100)\\n  \\\"service_number\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"tennis_return_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"returns_played\\\" varchar(100)\\n  \\\"matches_played\\\" varchar(100)\\n  \\\"first_service_return_points_won\\\" varchar(100)\\n  \\\"first_service_return_points_won_pct\\\" varchar(100)\\n  \\\"second_service_return_points_won\\\" varchar(100)\\n  \\\"second_service_return_points_won_pct\\\" varchar(100)\\n  \\\"return_games_played\\\" varchar(100)\\n  \\\"return_games_won\\\" varchar(100)\\n  \\\"return_games_won_pct\\\" varchar(100)\\n  \\\"break_points_played\\\" varchar(100)\\n  \\\"break_points_converted\\\" varchar(100)\\n  \\\"break_points_converted_pct\\\" varchar(100)\\n}\\n\\nTable \\\"tennis_service_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"services_played\\\" varchar(100)\\n  \\\"matches_played\\\" varchar(100)\\n  \\\"aces\\\" varchar(100)\\n  \\\"first_services_good\\\" varchar(100)\\n  \\\"first_services_good_pct\\\" varchar(100)\\n  \\\"first_service_points_won\\\" varchar(100)\\n  \\\"first_service_points_won_pct\\\" varchar(100)\\n  \\\"second_service_points_won\\\" varchar(100)\\n  \\\"second_service_points_won_pct\\\" varchar(100)\\n  \\\"service_games_played\\\" varchar(100)\\n  \\\"service_games_won\\\" varchar(100)\\n  \\\"service_games_won_pct\\\" varchar(100)\\n  \\\"break_points_played\\\" varchar(100)\\n  \\\"break_points_saved\\\" varchar(100)\\n  \\\"break_points_saved_pct\\\" varchar(100)\\n}\\n\\nTable \\\"wagering_moneylines\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4\\n  \\\"rotation_key\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n  \\\"vigorish\\\" varchar(100)\\n  \\\"line\\\" varchar(100)\\n  \\\"line_opening\\\" varchar(100)\\n  \\\"prediction\\\" varchar(100)\\n}\\n\\nTable \\\"wagering_odds_lines\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4\\n  \\\"rotation_key\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n  \\\"numerator\\\" varchar(100)\\n  \\\"denominator\\\" varchar(100)\\n  \\\"prediction\\\" varchar(100)\\n  \\\"payout_calculation\\\" varchar(100)\\n  \\\"payout_amount\\\" varchar(100)\\n}\\n\\nTable \\\"wagering_runlines\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4\\n  \\\"rotation_key\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n  \\\"vigorish\\\" varchar(100)\\n  \\\"line\\\" varchar(100)\\n  \\\"line_opening\\\" varchar(100)\\n  \\\"line_value\\\" varchar(100)\\n  \\\"prediction\\\" varchar(100)\\n}\\n\\nTable \\\"wagering_straight_spread_lines\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4\\n  \\\"rotation_key\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n  \\\"vigorish\\\" varchar(100)\\n  \\\"line_value\\\" varchar(100)\\n  \\\"line_value_opening\\\" varchar(100)\\n  \\\"prediction\\\" varchar(100)\\n}\\n\\nTable \\\"wagering_total_score_lines\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4\\n  \\\"rotation_key\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n  \\\"vigorish\\\" varchar(100)\\n  \\\"line_over\\\" varchar(100)\\n  \\\"line_under\\\" varchar(100)\\n  \\\"total\\\" varchar(100)\\n  \\\"total_opening\\\" varchar(100)\\n  \\\"prediction\\\" varchar(100)\\n}\\n\\nTable \\\"weather_conditions\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"temperature\\\" varchar(100)\\n  \\\"temperature_units\\\" varchar(40)\\n  \\\"humidity\\\" varchar(100)\\n  \\\"clouds\\\" varchar(100)\\n  \\\"wind_direction\\\" varchar(100)\\n  \\\"wind_velocity\\\" varchar(100)\\n  \\\"weather_code\\\" varchar(100)\\n}\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/yugabyte/yugabyte-db/blob/master/sample/sportsdb_tables.sql\"\n  }\n}"
  },
  {
    "path": "tasks/postgres/standard/sports/baseball_player_analysis/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Sports Task 1: Baseball Player Analysis\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nfrom decimal import Decimal\n\ndef rows_match(actual_row, expected_row):\n    \"\"\"Compare two rows with appropriate tolerance for decimals and floats.\"\"\"\n    if len(actual_row) != len(expected_row):\n        return False\n    \n    for actual, expected in zip(actual_row, expected_row):\n        if isinstance(actual, (Decimal, float)) and isinstance(expected, (Decimal, float)):\n            # Use higher tolerance for floating point comparisons\n            if abs(float(actual) - float(expected)) > 0.001:\n                return False\n        elif actual != expected:\n            return False\n    \n    return True\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\")\n    }\n\ndef verify_baseball_player_analysis_table(conn) -> bool:\n    \"\"\"Verify the baseball_player_analysis table results.\"\"\"\n    with conn.cursor() as cur:\n        cur.execute(\"\"\"\n            SELECT player_id, player_name, team_name, games_played, at_bats, hits,\n                   runs_scored, rbi, home_runs, batting_average, defensive_games,\n                   putouts, assists, errors, fielding_percentage\n            FROM baseball_player_analysis\n            ORDER BY batting_average DESC, games_played DESC\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        cur.execute(\"\"\"\n            SELECT\n            p.id AS player_id,\n            MAX(dn.full_name) AS player_name,\n            'Unknown' AS team_name,\n            core.events_played AS games_played,\n            off.at_bats,\n            off.hits,\n            off.runs_scored,\n            off.rbi,\n            off.home_runs,\n            CASE WHEN off.at_bats > 0\n                THEN 1.0 * off.hits / off.at_bats\n                ELSE 0\n            END AS batting_average,\n            core.events_played AS defensive_games,\n            COALESCE(def.putouts, 0)  AS putouts,\n            COALESCE(def.assists, 0)  AS assists,\n            COALESCE(def.errors, 0)   AS errors,\n            CASE\n                WHEN (COALESCE(def.putouts,0) + COALESCE(def.assists,0) + COALESCE(def.errors,0)) > 0\n                THEN 1.0 * (COALESCE(def.putouts,0) + COALESCE(def.assists,0))\n                    / (COALESCE(def.putouts,0) + COALESCE(def.assists,0) + COALESCE(def.errors,0))\n                ELSE 0\n            END AS fielding_percentage\n            FROM persons p\n            JOIN display_names dn\n            ON dn.entity_id = p.id\n            AND dn.entity_type = 'persons'\n            AND NULLIF(TRIM(dn.full_name), '') IS NOT NULL\n            JOIN (\n            SELECT s.stat_holder_id AS player_id,\n                    SUM(bos.at_bats)       AS at_bats,\n                    SUM(bos.hits)          AS hits,\n                    SUM(bos.runs_scored)   AS runs_scored,\n                    SUM(bos.rbi)           AS rbi,\n                    SUM(bos.home_runs)     AS home_runs\n            FROM stats s\n            JOIN baseball_offensive_stats bos\n                ON bos.id = s.stat_repository_id\n            WHERE s.stat_holder_type = 'persons'\n                AND s.stat_repository_type = 'baseball_offensive_stats'\n                AND s.context = 'season-regular'\n            GROUP BY s.stat_holder_id\n            ) off ON off.player_id = p.id\n            JOIN (\n            SELECT s.stat_holder_id AS player_id,\n                    SUM(cps.events_played) AS events_played\n            FROM stats s\n            JOIN core_person_stats cps\n                ON cps.id = s.stat_repository_id\n            WHERE s.stat_holder_type = 'persons'\n                AND s.stat_repository_type = 'core_person_stats'\n                AND s.context = 'season-regular'\n            GROUP BY s.stat_holder_id\n            ) core ON core.player_id = p.id\n            LEFT JOIN (\n            SELECT s.stat_holder_id AS player_id,\n                    SUM(bds.putouts)  AS putouts,\n                    SUM(bds.assists)  AS assists,\n                    SUM(bds.errors)   AS errors\n            FROM stats s\n            JOIN baseball_defensive_stats bds\n                ON bds.id = s.stat_repository_id\n            WHERE s.stat_holder_type = 'persons'\n                AND s.stat_repository_type = 'baseball_defensive_stats'\n                AND s.context = 'season-regular'\n            GROUP BY s.stat_holder_id\n            ) def ON def.player_id = p.id\n            WHERE core.events_played >= 10\n            AND off.at_bats >= 50\n            GROUP BY\n            p.id, core.events_played,\n            off.at_bats, off.hits, off.runs_scored, off.rbi, off.home_runs,\n            def.putouts, def.assists, def.errors\n            ORDER BY batting_average DESC, games_played DESC;\n        \"\"\")\n        expected_results = cur.fetchall()\n        \n        if len(actual_results) != len(expected_results):\n            print(f\"❌ baseball_player_analysis table has {len(actual_results)} records, expected {len(expected_results)}\")\n            return False\n            \n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:  # Only show first 5 mismatches\n                    print(f\"❌ Player analysis row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n                \n        if mismatches > 0:\n            print(f\"❌ Total player analysis mismatches: {mismatches}\")\n            return False\n            \n        print(f\"✅ baseball_player_analysis table created and populated correctly ({len(actual_results)} players)\")\n        return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 70)\n    print(\"PostgreSQL Sports Task 1 Verification: Baseball Player Analysis\")\n    print(\"=\" * 70)\n    \n    # Get connection parameters\n    conn_params = get_connection_params()\n    \n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n    \n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n        \n        # Verify results\n        success = verify_baseball_player_analysis_table(conn)\n        \n        conn.close()\n        \n        if success:\n            print(\"\\n🎉 Task verification: PASS\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Task verification: FAIL\")\n            sys.exit(1)\n            \n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/standard/sports/participant_report_optimization/description.md",
    "content": "# Query Performance Optimization\n\n## Background\nYou need to optimize a slow-running analytics query that generates performance reports. The query currently takes too long to execute and needs optimization.\n\n## Requirements\n\n### 1. Create Performance Report Table\nCreate a table called `participant_performance_report` with the following structure:\n- report_id (serial primary key)\n- participant_id (integer not null)\n- event_count (integer)\n- stat_count (integer)\n- stat_type_count (integer)\n- last_event_date (timestamp)\n- created_at (timestamp default current_timestamp)\n\nAdd constraint: CHECK (participant_id > 0)\n\n### 2. Execute and Optimize the Slow Query\nThe following query is currently running very slowly. Your task is to:\n1. **Identify why the query is slow**\n2. **Create appropriate indexes to optimize it** \n3. **Populate the report table with the query results**\n\n```sql\nSELECT \n    pe.participant_id,\n    COUNT(pe.event_id) as event_count,\n    (SELECT COUNT(*) FROM stats s WHERE s.stat_holder_id = pe.participant_id AND s.stat_holder_type = 'persons') as stat_count,\n    (SELECT COUNT(DISTINCT s.stat_repository_type) FROM stats s WHERE s.stat_holder_id = pe.participant_id AND s.stat_holder_type = 'persons') as stat_type_count,\n    (SELECT MAX(e.start_date_time) FROM events e JOIN participants_events pe2 ON e.id = pe2.event_id WHERE pe2.participant_id = pe.participant_id) as last_event_date\nFROM participants_events pe \nWHERE pe.participant_id <= 50\nGROUP BY pe.participant_id\nORDER BY pe.participant_id;\n```\n\n### 3. Document Performance Improvement\nAfter optimization, insert the results into your `participant_performance_report` table.\n\n## Success Criteria\n- The query should execute significantly faster after your optimization\n- All results should be correctly inserted into the report table\n- Your optimization should use appropriate database indexes\n\n## Important Notes\n- Analyze the query execution plan to identify bottlenecks\n- Focus on the most impactful optimizations\n- Handle NULL values appropriately in calculations"
  },
  {
    "path": "tasks/postgres/standard/sports/participant_report_optimization/meta.json",
    "content": "{\n  \"task_id\": \"participant_report_optimization\",\n  \"task_name\": \"Participant Report Optimization\",\n  \"category_id\": \"sports\",\n  \"category_name\": \"Sports\",\n  \"description\": \"Optimize slow-running participant performance query by creating indexes and populating performance report table.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-08-18\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"performance optimization\",\n    \"schema design\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Table \\\"addresses\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"location_id\\\" int4 [not null]\\n  \\\"language\\\" varchar(100)\\n  \\\"suite\\\" varchar(100)\\n  \\\"floor\\\" varchar(100)\\n  \\\"building\\\" varchar(100)\\n  \\\"street_number\\\" varchar(100)\\n  \\\"street_prefix\\\" varchar(100)\\n  \\\"street\\\" varchar(100)\\n  \\\"street_suffix\\\" varchar(100)\\n  \\\"neighborhood\\\" varchar(100)\\n  \\\"district\\\" varchar(100)\\n  \\\"locality\\\" varchar(100)\\n  \\\"county\\\" varchar(100)\\n  \\\"region\\\" varchar(100)\\n  \\\"postal_code\\\" varchar(100)\\n  \\\"country\\\" varchar(100)\\n}\\n\\nTable \\\"affiliation_phases\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"ancestor_affiliation_id\\\" int4\\n  \\\"start_season_id\\\" int4\\n  \\\"start_date_time\\\" timestamp\\n  \\\"end_season_id\\\" int4\\n  \\\"end_date_time\\\" timestamp\\n}\\n\\nTable \\\"affiliations\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"affiliation_key\\\" varchar(100) [not null]\\n  \\\"affiliation_type\\\" varchar(100)\\n  \\\"publisher_id\\\" int4 [not null]\\n}\\n\\nTable \\\"affiliations_documents\\\" {\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"document_id\\\" int4 [not null]\\n}\\n\\nTable \\\"affiliations_events\\\" {\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n}\\n\\nTable \\\"affiliations_media\\\" {\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"media_id\\\" int4 [not null]\\n}\\n\\nTable \\\"american_football_action_participants\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"american_football_action_play_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"participant_role\\\" varchar(100) [not null]\\n  \\\"score_type\\\" varchar(100)\\n  \\\"field_line\\\" int4\\n  \\\"yardage\\\" int4\\n  \\\"score_credit\\\" int4\\n  \\\"yards_gained\\\" int4\\n}\\n\\nTable \\\"american_football_action_plays\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"american_football_event_state_id\\\" int4 [not null]\\n  \\\"play_type\\\" varchar(100)\\n  \\\"score_attempt_type\\\" varchar(100)\\n  \\\"drive_result\\\" varchar(100)\\n  \\\"points\\\" int4\\n  \\\"comment\\\" varchar(255)\\n}\\n\\nTable \\\"american_football_defensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"tackles_total\\\" varchar(100)\\n  \\\"tackles_solo\\\" varchar(100)\\n  \\\"tackles_assists\\\" varchar(100)\\n  \\\"interceptions_total\\\" varchar(100)\\n  \\\"interceptions_yards\\\" varchar(100)\\n  \\\"interceptions_average\\\" varchar(100)\\n  \\\"interceptions_longest\\\" varchar(100)\\n  \\\"interceptions_touchdown\\\" varchar(100)\\n  \\\"quarterback_hurries\\\" varchar(100)\\n  \\\"sacks_total\\\" varchar(100)\\n  \\\"sacks_yards\\\" varchar(100)\\n  \\\"passes_defensed\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_down_progress_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"first_downs_total\\\" varchar(100)\\n  \\\"first_downs_pass\\\" varchar(100)\\n  \\\"first_downs_run\\\" varchar(100)\\n  \\\"first_downs_penalty\\\" varchar(100)\\n  \\\"conversions_third_down\\\" varchar(100)\\n  \\\"conversions_third_down_attempts\\\" varchar(100)\\n  \\\"conversions_third_down_percentage\\\" varchar(100)\\n  \\\"conversions_fourth_down\\\" varchar(100)\\n  \\\"conversions_fourth_down_attempts\\\" varchar(100)\\n  \\\"conversions_fourth_down_percentage\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int2\\n  \\\"sequence_number\\\" int4\\n  \\\"period_value\\\" int4\\n  \\\"period_time_elapsed\\\" varchar(100)\\n  \\\"period_time_remaining\\\" varchar(100)\\n  \\\"clock_state\\\" varchar(100)\\n  \\\"down\\\" int4\\n  \\\"team_in_possession_id\\\" int4\\n  \\\"distance_for_1st_down\\\" int4\\n  \\\"field_side\\\" varchar(100)\\n  \\\"field_line\\\" int4\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"american_football_fumbles_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"fumbles_committed\\\" varchar(100)\\n  \\\"fumbles_forced\\\" varchar(100)\\n  \\\"fumbles_recovered\\\" varchar(100)\\n  \\\"fumbles_lost\\\" varchar(100)\\n  \\\"fumbles_yards_gained\\\" varchar(100)\\n  \\\"fumbles_own_committed\\\" varchar(100)\\n  \\\"fumbles_own_recovered\\\" varchar(100)\\n  \\\"fumbles_own_lost\\\" varchar(100)\\n  \\\"fumbles_own_yards_gained\\\" varchar(100)\\n  \\\"fumbles_opposing_committed\\\" varchar(100)\\n  \\\"fumbles_opposing_recovered\\\" varchar(100)\\n  \\\"fumbles_opposing_lost\\\" varchar(100)\\n  \\\"fumbles_opposing_yards_gained\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_offensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"offensive_plays_yards\\\" varchar(100)\\n  \\\"offensive_plays_number\\\" varchar(100)\\n  \\\"offensive_plays_average_yards_per\\\" varchar(100)\\n  \\\"possession_duration\\\" varchar(100)\\n  \\\"turnovers_giveaway\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_passing_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"passes_attempts\\\" varchar(100)\\n  \\\"passes_completions\\\" varchar(100)\\n  \\\"passes_percentage\\\" varchar(100)\\n  \\\"passes_yards_gross\\\" varchar(100)\\n  \\\"passes_yards_net\\\" varchar(100)\\n  \\\"passes_yards_lost\\\" varchar(100)\\n  \\\"passes_touchdowns\\\" varchar(100)\\n  \\\"passes_touchdowns_percentage\\\" varchar(100)\\n  \\\"passes_interceptions\\\" varchar(100)\\n  \\\"passes_interceptions_percentage\\\" varchar(100)\\n  \\\"passes_longest\\\" varchar(100)\\n  \\\"passes_average_yards_per\\\" varchar(100)\\n  \\\"passer_rating\\\" varchar(100)\\n  \\\"receptions_total\\\" varchar(100)\\n  \\\"receptions_yards\\\" varchar(100)\\n  \\\"receptions_touchdowns\\\" varchar(100)\\n  \\\"receptions_first_down\\\" varchar(100)\\n  \\\"receptions_longest\\\" varchar(100)\\n  \\\"receptions_average_yards_per\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_penalties_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"penalties_total\\\" varchar(100)\\n  \\\"penalty_yards\\\" varchar(100)\\n  \\\"penalty_first_downs\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_rushing_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"rushes_attempts\\\" varchar(100)\\n  \\\"rushes_yards\\\" varchar(100)\\n  \\\"rushes_touchdowns\\\" varchar(100)\\n  \\\"rushing_average_yards_per\\\" varchar(100)\\n  \\\"rushes_first_down\\\" varchar(100)\\n  \\\"rushes_longest\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_sacks_against_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"sacks_against_yards\\\" varchar(100)\\n  \\\"sacks_against_total\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_scoring_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"touchdowns_total\\\" varchar(100)\\n  \\\"touchdowns_passing\\\" varchar(100)\\n  \\\"touchdowns_rushing\\\" varchar(100)\\n  \\\"touchdowns_special_teams\\\" varchar(100)\\n  \\\"touchdowns_defensive\\\" varchar(100)\\n  \\\"extra_points_attempts\\\" varchar(100)\\n  \\\"extra_points_made\\\" varchar(100)\\n  \\\"extra_points_missed\\\" varchar(100)\\n  \\\"extra_points_blocked\\\" varchar(100)\\n  \\\"field_goal_attempts\\\" varchar(100)\\n  \\\"field_goals_made\\\" varchar(100)\\n  \\\"field_goals_missed\\\" varchar(100)\\n  \\\"field_goals_blocked\\\" varchar(100)\\n  \\\"safeties_against\\\" varchar(100)\\n  \\\"two_point_conversions_attempts\\\" varchar(100)\\n  \\\"two_point_conversions_made\\\" varchar(100)\\n  \\\"touchbacks_total\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_special_teams_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"returns_punt_total\\\" varchar(100)\\n  \\\"returns_punt_yards\\\" varchar(100)\\n  \\\"returns_punt_average\\\" varchar(100)\\n  \\\"returns_punt_longest\\\" varchar(100)\\n  \\\"returns_punt_touchdown\\\" varchar(100)\\n  \\\"returns_kickoff_total\\\" varchar(100)\\n  \\\"returns_kickoff_yards\\\" varchar(100)\\n  \\\"returns_kickoff_average\\\" varchar(100)\\n  \\\"returns_kickoff_longest\\\" varchar(100)\\n  \\\"returns_kickoff_touchdown\\\" varchar(100)\\n  \\\"returns_total\\\" varchar(100)\\n  \\\"returns_yards\\\" varchar(100)\\n  \\\"punts_total\\\" varchar(100)\\n  \\\"punts_yards_gross\\\" varchar(100)\\n  \\\"punts_yards_net\\\" varchar(100)\\n  \\\"punts_longest\\\" varchar(100)\\n  \\\"punts_inside_20\\\" varchar(100)\\n  \\\"punts_inside_20_percentage\\\" varchar(100)\\n  \\\"punts_average\\\" varchar(100)\\n  \\\"punts_blocked\\\" varchar(100)\\n  \\\"touchbacks_total\\\" varchar(100)\\n  \\\"touchbacks_total_percentage\\\" varchar(100)\\n  \\\"touchbacks_kickoffs\\\" varchar(100)\\n  \\\"touchbacks_kickoffs_percentage\\\" varchar(100)\\n  \\\"touchbacks_punts\\\" varchar(100)\\n  \\\"touchbacks_punts_percentage\\\" varchar(100)\\n  \\\"touchbacks_interceptions\\\" varchar(100)\\n  \\\"touchbacks_interceptions_percentage\\\" varchar(100)\\n  \\\"fair_catches\\\" varchar(100)\\n}\\n\\nTable \\\"baseball_action_contact_details\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"baseball_action_pitch_id\\\" int4 [not null]\\n  \\\"location\\\" varchar(100)\\n  \\\"strength\\\" varchar(100)\\n  \\\"velocity\\\" int4\\n  \\\"comment\\\" text\\n  \\\"trajectory_coordinates\\\" varchar(100)\\n  \\\"trajectory_formula\\\" varchar(100)\\n}\\n\\nTable \\\"baseball_action_pitches\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"baseball_action_play_id\\\" int4 [not null]\\n  \\\"sequence_number\\\" int4\\n  \\\"baseball_defensive_group_id\\\" int4\\n  \\\"umpire_call\\\" varchar(100)\\n  \\\"pitch_location\\\" varchar(100)\\n  \\\"pitch_type\\\" varchar(100)\\n  \\\"pitch_velocity\\\" int4\\n  \\\"comment\\\" text\\n  \\\"trajectory_coordinates\\\" varchar(100)\\n  \\\"trajectory_formula\\\" varchar(100)\\n  \\\"ball_type\\\" varchar(40)\\n  \\\"strike_type\\\" varchar(40)\\n}\\n\\nTable \\\"baseball_action_plays\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"baseball_event_state_id\\\" int4 [not null]\\n  \\\"play_type\\\" varchar(100)\\n  \\\"notation\\\" varchar(100)\\n  \\\"notation_yaml\\\" text\\n  \\\"baseball_defensive_group_id\\\" int4\\n  \\\"comment\\\" varchar(255)\\n  \\\"runner_on_first_advance\\\" int4\\n  \\\"runner_on_second_advance\\\" int4\\n  \\\"runner_on_third_advance\\\" int4\\n  \\\"outs_recorded\\\" int4\\n  \\\"rbi\\\" int4\\n  \\\"runs_scored\\\" int4\\n  \\\"earned_runs_scored\\\" varchar(100)\\n}\\n\\nTable \\\"baseball_action_substitutions\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"baseball_event_state_id\\\" int4 [not null]\\n  \\\"sequence_number\\\" int4\\n  \\\"person_type\\\" varchar(100)\\n  \\\"person_original_id\\\" int4\\n  \\\"person_original_position_id\\\" int4\\n  \\\"person_original_lineup_slot\\\" int4\\n  \\\"person_replacing_id\\\" int4\\n  \\\"person_replacing_position_id\\\" int4\\n  \\\"person_replacing_lineup_slot\\\" int4\\n  \\\"substitution_reason\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n}\\n\\nTable \\\"baseball_defensive_group\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n}\\n\\nTable \\\"baseball_defensive_players\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"baseball_defensive_group_id\\\" int4 [not null]\\n  \\\"player_id\\\" int4 [not null]\\n  \\\"position_id\\\" int4 [not null]\\n}\\n\\nTable \\\"baseball_defensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"double_plays\\\" int4\\n  \\\"triple_plays\\\" int4\\n  \\\"putouts\\\" int4\\n  \\\"assists\\\" int4\\n  \\\"errors\\\" int4\\n  \\\"fielding_percentage\\\" numeric\\n  \\\"defensive_average\\\" numeric\\n  \\\"errors_passed_ball\\\" int4\\n  \\\"errors_catchers_interference\\\" int4\\n}\\n\\nTable \\\"baseball_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int2\\n  \\\"sequence_number\\\" int4\\n  \\\"at_bat_number\\\" int4\\n  \\\"inning_value\\\" int4\\n  \\\"inning_half\\\" varchar(100)\\n  \\\"outs\\\" int4\\n  \\\"balls\\\" int4\\n  \\\"strikes\\\" int4\\n  \\\"runner_on_first_id\\\" int4\\n  \\\"runner_on_second_id\\\" int4\\n  \\\"runner_on_third_id\\\" int4\\n  \\\"runner_on_first\\\" int2\\n  \\\"runner_on_second\\\" int2\\n  \\\"runner_on_third\\\" int2\\n  \\\"runs_this_inning_half\\\" int4\\n  \\\"pitcher_id\\\" int4\\n  \\\"batter_id\\\" int4\\n  \\\"batter_side\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"baseball_offensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"average\\\" numeric\\n  \\\"runs_scored\\\" int4\\n  \\\"at_bats\\\" int4\\n  \\\"hits\\\" int4\\n  \\\"rbi\\\" int4\\n  \\\"total_bases\\\" int4\\n  \\\"slugging_percentage\\\" numeric\\n  \\\"bases_on_balls\\\" int4\\n  \\\"strikeouts\\\" int4\\n  \\\"left_on_base\\\" int4\\n  \\\"left_in_scoring_position\\\" int4\\n  \\\"singles\\\" int4\\n  \\\"doubles\\\" int4\\n  \\\"triples\\\" int4\\n  \\\"home_runs\\\" int4\\n  \\\"grand_slams\\\" int4\\n  \\\"at_bats_per_rbi\\\" numeric\\n  \\\"plate_appearances_per_rbi\\\" numeric\\n  \\\"at_bats_per_home_run\\\" numeric\\n  \\\"plate_appearances_per_home_run\\\" numeric\\n  \\\"sac_flies\\\" int4\\n  \\\"sac_bunts\\\" int4\\n  \\\"grounded_into_double_play\\\" int4\\n  \\\"moved_up\\\" int4\\n  \\\"on_base_percentage\\\" numeric\\n  \\\"stolen_bases\\\" int4\\n  \\\"stolen_bases_caught\\\" int4\\n  \\\"stolen_bases_average\\\" numeric\\n  \\\"hit_by_pitch\\\" int4\\n  \\\"defensive_interferance_reaches\\\" int4\\n  \\\"on_base_plus_slugging\\\" numeric\\n  \\\"plate_appearances\\\" int4\\n  \\\"hits_extra_base\\\" int4\\n}\\n\\nTable \\\"baseball_pitching_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"runs_allowed\\\" int4\\n  \\\"singles_allowed\\\" int4\\n  \\\"doubles_allowed\\\" int4\\n  \\\"triples_allowed\\\" int4\\n  \\\"home_runs_allowed\\\" int4\\n  \\\"innings_pitched\\\" varchar(20)\\n  \\\"hits\\\" int4\\n  \\\"earned_runs\\\" int4\\n  \\\"unearned_runs\\\" int4\\n  \\\"bases_on_balls\\\" int4\\n  \\\"bases_on_balls_intentional\\\" int4\\n  \\\"strikeouts\\\" int4\\n  \\\"strikeout_to_bb_ratio\\\" numeric\\n  \\\"number_of_pitches\\\" int4\\n  \\\"era\\\" numeric\\n  \\\"inherited_runners_scored\\\" int4\\n  \\\"pick_offs\\\" int4\\n  \\\"errors_hit_with_pitch\\\" int4\\n  \\\"errors_wild_pitch\\\" int4\\n  \\\"balks\\\" int4\\n  \\\"wins\\\" int4\\n  \\\"losses\\\" int4\\n  \\\"saves\\\" int4\\n  \\\"shutouts\\\" int4\\n  \\\"games_complete\\\" int4\\n  \\\"games_finished\\\" int4\\n  \\\"winning_percentage\\\" numeric\\n  \\\"event_credit\\\" varchar(40)\\n  \\\"save_credit\\\" varchar(40)\\n}\\n\\nTable \\\"basketball_defensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"steals_total\\\" varchar(100)\\n  \\\"steals_per_game\\\" varchar(100)\\n  \\\"blocks_total\\\" varchar(100)\\n  \\\"blocks_per_game\\\" varchar(100)\\n}\\n\\nTable \\\"basketball_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int4\\n  \\\"sequence_number\\\" int4\\n  \\\"period_value\\\" varchar(100)\\n  \\\"period_time_elapsed\\\" varchar(100)\\n  \\\"period_time_remaining\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"basketball_offensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"field_goals_made\\\" int4\\n  \\\"field_goals_attempted\\\" int4\\n  \\\"field_goals_percentage\\\" varchar(100)\\n  \\\"field_goals_per_game\\\" varchar(100)\\n  \\\"field_goals_attempted_per_game\\\" varchar(100)\\n  \\\"field_goals_percentage_adjusted\\\" varchar(100)\\n  \\\"three_pointers_made\\\" int4\\n  \\\"three_pointers_attempted\\\" int4\\n  \\\"three_pointers_percentage\\\" varchar(100)\\n  \\\"three_pointers_per_game\\\" varchar(100)\\n  \\\"three_pointers_attempted_per_game\\\" varchar(100)\\n  \\\"free_throws_made\\\" varchar(100)\\n  \\\"free_throws_attempted\\\" varchar(100)\\n  \\\"free_throws_percentage\\\" varchar(100)\\n  \\\"free_throws_per_game\\\" varchar(100)\\n  \\\"free_throws_attempted_per_game\\\" varchar(100)\\n  \\\"points_scored_total\\\" varchar(100)\\n  \\\"points_scored_per_game\\\" varchar(100)\\n  \\\"assists_total\\\" varchar(100)\\n  \\\"assists_per_game\\\" varchar(100)\\n  \\\"turnovers_total\\\" varchar(100)\\n  \\\"turnovers_per_game\\\" varchar(100)\\n  \\\"points_scored_off_turnovers\\\" varchar(100)\\n  \\\"points_scored_in_paint\\\" varchar(100)\\n  \\\"points_scored_on_second_chance\\\" varchar(100)\\n  \\\"points_scored_on_fast_break\\\" varchar(100)\\n}\\n\\nTable \\\"basketball_rebounding_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"rebounds_total\\\" varchar(100)\\n  \\\"rebounds_per_game\\\" varchar(100)\\n  \\\"rebounds_defensive\\\" varchar(100)\\n  \\\"rebounds_offensive\\\" varchar(100)\\n  \\\"team_rebounds_total\\\" varchar(100)\\n  \\\"team_rebounds_per_game\\\" varchar(100)\\n  \\\"team_rebounds_defensive\\\" varchar(100)\\n  \\\"team_rebounds_offensive\\\" varchar(100)\\n}\\n\\nTable \\\"basketball_team_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"timeouts_left\\\" varchar(100)\\n  \\\"largest_lead\\\" varchar(100)\\n  \\\"fouls_total\\\" varchar(100)\\n  \\\"turnover_margin\\\" varchar(100)\\n}\\n\\nTable \\\"bookmakers\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_key\\\" varchar(100)\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"location_id\\\" int4\\n}\\n\\nTable \\\"core_person_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"time_played_event\\\" varchar(40)\\n  \\\"time_played_total\\\" varchar(40)\\n  \\\"time_played_event_average\\\" varchar(40)\\n  \\\"events_played\\\" int4\\n  \\\"events_started\\\" int4\\n  \\\"position_id\\\" int4\\n}\\n\\nTable \\\"core_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"score\\\" varchar(100)\\n  \\\"score_opposing\\\" varchar(100)\\n  \\\"score_attempts\\\" varchar(100)\\n  \\\"score_attempts_opposing\\\" varchar(100)\\n  \\\"score_percentage\\\" varchar(100)\\n  \\\"score_percentage_opposing\\\" varchar(100)\\n}\\n\\nTable \\\"db_info\\\" {\\n  \\\"version\\\" varchar(100) [not null, default: 16]\\n}\\n\\nTable \\\"display_names\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"language\\\" varchar(100) [not null]\\n  \\\"entity_type\\\" varchar(100) [not null]\\n  \\\"entity_id\\\" int4 [not null]\\n  \\\"full_name\\\" varchar(100)\\n  \\\"first_name\\\" varchar(100)\\n  \\\"middle_name\\\" varchar(100)\\n  \\\"last_name\\\" varchar(100)\\n  \\\"alias\\\" varchar(100)\\n  \\\"abbreviation\\\" varchar(100)\\n  \\\"short_name\\\" varchar(100)\\n  \\\"prefix\\\" varchar(20)\\n  \\\"suffix\\\" varchar(20)\\n}\\n\\nTable \\\"document_classes\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"name\\\" varchar(100)\\n}\\n\\nTable \\\"document_contents\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"document_id\\\" int4 [not null]\\n  \\\"sportsml\\\" varchar(200)\\n  \\\"abstract\\\" text\\n}\\n\\nTable \\\"document_fixtures\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"fixture_key\\\" varchar(100)\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"name\\\" varchar(100)\\n  \\\"document_class_id\\\" int4 [not null]\\n}\\n\\nTable \\\"document_fixtures_events\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"document_fixture_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"latest_document_id\\\" int4 [not null]\\n  \\\"last_update\\\" timestamp\\n}\\n\\nTable \\\"document_package_entry\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"document_package_id\\\" int4 [not null]\\n  \\\"rank\\\" varchar(100)\\n  \\\"document_id\\\" int4 [not null]\\n  \\\"headline\\\" varchar(100)\\n  \\\"short_headline\\\" varchar(100)\\n}\\n\\nTable \\\"document_packages\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"package_key\\\" varchar(100)\\n  \\\"package_name\\\" varchar(100)\\n  \\\"date_time\\\" date\\n}\\n\\nTable \\\"documents\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"doc_id\\\" varchar(75) [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"title\\\" varchar(255)\\n  \\\"language\\\" varchar(100)\\n  \\\"priority\\\" varchar(100)\\n  \\\"revision_id\\\" varchar(75)\\n  \\\"stats_coverage\\\" varchar(100)\\n  \\\"document_fixture_id\\\" int4 [not null]\\n  \\\"source_id\\\" int4\\n  \\\"db_loading_date_time\\\" timestamp\\n}\\n\\nTable \\\"documents_media\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"document_id\\\" int4 [not null]\\n  \\\"media_id\\\" int4 [not null]\\n  \\\"media_caption_id\\\" int4 [not null]\\n}\\n\\nTable \\\"events\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_key\\\" varchar(100) [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"start_date_time\\\" timestamp\\n  \\\"site_id\\\" int4\\n  \\\"site_alignment\\\" varchar(100)\\n  \\\"event_status\\\" varchar(100)\\n  \\\"duration\\\" varchar(100)\\n  \\\"attendance\\\" varchar(100)\\n  \\\"last_update\\\" timestamp\\n}\\n\\nTable \\\"events_documents\\\" {\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"document_id\\\" int4 [not null]\\n}\\n\\nTable \\\"events_media\\\" {\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"media_id\\\" int4 [not null]\\n}\\n\\nTable \\\"events_sub_seasons\\\" {\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"sub_season_id\\\" int4 [not null]\\n}\\n\\nTable \\\"ice_hockey_action_participants\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"ice_hockey_action_play_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"participant_role\\\" varchar(100) [not null]\\n  \\\"point_credit\\\" int4\\n}\\n\\nTable \\\"ice_hockey_action_plays\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"ice_hockey_event_state_id\\\" int4 [not null]\\n  \\\"play_type\\\" varchar(100)\\n  \\\"score_attempt_type\\\" varchar(100)\\n  \\\"play_result\\\" varchar(100)\\n  \\\"comment\\\" varchar(255)\\n}\\n\\nTable \\\"ice_hockey_defensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"shots_power_play_allowed\\\" varchar(100)\\n  \\\"shots_penalty_shot_allowed\\\" varchar(100)\\n  \\\"goals_power_play_allowed\\\" varchar(100)\\n  \\\"goals_penalty_shot_allowed\\\" varchar(100)\\n  \\\"goals_against_average\\\" varchar(100)\\n  \\\"saves\\\" varchar(100)\\n  \\\"save_percentage\\\" varchar(100)\\n  \\\"penalty_killing_amount\\\" varchar(100)\\n  \\\"penalty_killing_percentage\\\" varchar(100)\\n  \\\"shots_blocked\\\" varchar(100)\\n  \\\"takeaways\\\" varchar(100)\\n  \\\"shutouts\\\" varchar(100)\\n  \\\"minutes_penalty_killing\\\" varchar(100)\\n  \\\"hits\\\" varchar(100)\\n  \\\"goals_empty_net_allowed\\\" varchar(100)\\n  \\\"goals_short_handed_allowed\\\" varchar(100)\\n  \\\"goals_shootout_allowed\\\" varchar(100)\\n  \\\"shots_shootout_allowed\\\" varchar(100)\\n}\\n\\nTable \\\"ice_hockey_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int4\\n  \\\"sequence_number\\\" int4\\n  \\\"period_value\\\" varchar(100)\\n  \\\"period_time_elapsed\\\" varchar(100)\\n  \\\"period_time_remaining\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"ice_hockey_offensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"goals_game_winning\\\" varchar(100)\\n  \\\"goals_game_tying\\\" varchar(100)\\n  \\\"goals_power_play\\\" varchar(100)\\n  \\\"goals_short_handed\\\" varchar(100)\\n  \\\"goals_even_strength\\\" varchar(100)\\n  \\\"goals_empty_net\\\" varchar(100)\\n  \\\"goals_overtime\\\" varchar(100)\\n  \\\"goals_shootout\\\" varchar(100)\\n  \\\"goals_penalty_shot\\\" varchar(100)\\n  \\\"assists\\\" varchar(100)\\n  \\\"points\\\" varchar(100)\\n  \\\"power_play_amount\\\" varchar(100)\\n  \\\"power_play_percentage\\\" varchar(100)\\n  \\\"shots_penalty_shot_taken\\\" varchar(100)\\n  \\\"shots_penalty_shot_missed\\\" varchar(100)\\n  \\\"shots_penalty_shot_percentage\\\" varchar(100)\\n  \\\"giveaways\\\" varchar(100)\\n  \\\"minutes_power_play\\\" varchar(100)\\n  \\\"faceoff_wins\\\" varchar(100)\\n  \\\"faceoff_losses\\\" varchar(100)\\n  \\\"faceoff_win_percentage\\\" varchar(100)\\n  \\\"scoring_chances\\\" varchar(100)\\n}\\n\\nTable \\\"ice_hockey_player_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"plus_minus\\\" varchar(100)\\n}\\n\\nTable \\\"injury_phases\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"injury_status\\\" varchar(100)\\n  \\\"injury_type\\\" varchar(100)\\n  \\\"injury_comment\\\" varchar(100)\\n  \\\"disabled_list\\\" varchar(100)\\n  \\\"start_date_time\\\" timestamp\\n  \\\"end_date_time\\\" timestamp\\n  \\\"season_id\\\" int4\\n  \\\"phase_type\\\" varchar(100)\\n  \\\"injury_side\\\" varchar(100)\\n}\\n\\nTable \\\"key_aliases\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"key_id\\\" int4 [not null]\\n  \\\"key_root_id\\\" int4 [not null]\\n}\\n\\nTable \\\"key_roots\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"key_type\\\" varchar(100)\\n}\\n\\nTable \\\"latest_revisions\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"revision_id\\\" varchar(75) [not null]\\n  \\\"latest_document_id\\\" int4 [not null]\\n}\\n\\nTable \\\"locations\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"timezone\\\" varchar(100)\\n  \\\"latitude\\\" varchar(100)\\n  \\\"longitude\\\" varchar(100)\\n  \\\"country_code\\\" varchar(100)\\n}\\n\\nTable \\\"media\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"object_id\\\" int4\\n  \\\"source_id\\\" int4\\n  \\\"revision_id\\\" int4\\n  \\\"media_type\\\" varchar(100)\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"date_time\\\" varchar(100)\\n  \\\"credit_id\\\" int4 [not null]\\n  \\\"db_loading_date_time\\\" timestamp\\n  \\\"creation_location_id\\\" int4 [not null]\\n}\\n\\nTable \\\"media_captions\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"media_id\\\" int4 [not null]\\n  \\\"caption_type\\\" varchar(100)\\n  \\\"caption\\\" varchar(100)\\n  \\\"caption_author_id\\\" int4 [not null]\\n  \\\"language\\\" varchar(100)\\n  \\\"caption_size\\\" varchar(100)\\n}\\n\\nTable \\\"media_contents\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"media_id\\\" int4 [not null]\\n  \\\"object\\\" varchar(100)\\n  \\\"format\\\" varchar(100)\\n  \\\"mime_type\\\" varchar(100)\\n  \\\"height\\\" varchar(100)\\n  \\\"width\\\" varchar(100)\\n  \\\"duration\\\" varchar(100)\\n  \\\"file_size\\\" varchar(100)\\n  \\\"resolution\\\" varchar(100)\\n}\\n\\nTable \\\"media_keywords\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"keyword\\\" varchar(100)\\n  \\\"media_id\\\" int4 [not null]\\n}\\n\\nTable \\\"motor_racing_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int4\\n  \\\"sequence_number\\\" int4\\n  \\\"lap\\\" varchar(100)\\n  \\\"laps_remaining\\\" varchar(100)\\n  \\\"time_elapsed\\\" varchar(100)\\n  \\\"flag_state\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"motor_racing_qualifying_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"grid\\\" varchar(100)\\n  \\\"pole_position\\\" varchar(100)\\n  \\\"pole_wins\\\" varchar(100)\\n  \\\"qualifying_speed\\\" varchar(100)\\n  \\\"qualifying_speed_units\\\" varchar(100)\\n  \\\"qualifying_time\\\" varchar(100)\\n  \\\"qualifying_position\\\" varchar(100)\\n}\\n\\nTable \\\"motor_racing_race_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"time_behind_leader\\\" varchar(100)\\n  \\\"laps_behind_leader\\\" varchar(100)\\n  \\\"time_ahead_follower\\\" varchar(100)\\n  \\\"laps_ahead_follower\\\" varchar(100)\\n  \\\"time\\\" varchar(100)\\n  \\\"points\\\" varchar(100)\\n  \\\"points_rookie\\\" varchar(100)\\n  \\\"bonus\\\" varchar(100)\\n  \\\"laps_completed\\\" varchar(100)\\n  \\\"laps_leading_total\\\" varchar(100)\\n  \\\"distance_leading\\\" varchar(100)\\n  \\\"distance_completed\\\" varchar(100)\\n  \\\"distance_units\\\" varchar(40)\\n  \\\"speed_average\\\" varchar(40)\\n  \\\"speed_units\\\" varchar(40)\\n  \\\"status\\\" varchar(40)\\n  \\\"finishes_top_5\\\" varchar(40)\\n  \\\"finishes_top_10\\\" varchar(40)\\n  \\\"starts\\\" varchar(40)\\n  \\\"finishes\\\" varchar(40)\\n  \\\"non_finishes\\\" varchar(40)\\n  \\\"wins\\\" varchar(40)\\n  \\\"races_leading\\\" varchar(40)\\n  \\\"money\\\" varchar(40)\\n  \\\"money_units\\\" varchar(40)\\n  \\\"leads_total\\\" varchar(40)\\n}\\n\\nTable \\\"outcome_totals\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"standing_subgroup_id\\\" int4 [not null]\\n  \\\"outcome_holder_type\\\" varchar(100)\\n  \\\"outcome_holder_id\\\" int4\\n  \\\"rank\\\" varchar(100)\\n  \\\"wins\\\" varchar(100)\\n  \\\"losses\\\" varchar(100)\\n  \\\"ties\\\" varchar(100)\\n  \\\"undecideds\\\" varchar(100)\\n  \\\"winning_percentage\\\" varchar(100)\\n  \\\"points_scored_for\\\" varchar(100)\\n  \\\"points_scored_against\\\" varchar(100)\\n  \\\"points_difference\\\" varchar(100)\\n  \\\"standing_points\\\" varchar(100)\\n  \\\"streak_type\\\" varchar(100)\\n  \\\"streak_duration\\\" varchar(100)\\n  \\\"streak_total\\\" varchar(100)\\n  \\\"streak_start\\\" date\\n  \\\"streak_end\\\" date\\n}\\n\\nTable \\\"participants_events\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"participant_type\\\" varchar(100) [not null]\\n  \\\"participant_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"alignment\\\" varchar(100)\\n  \\\"score\\\" varchar(100)\\n  \\\"event_outcome\\\" varchar(100)\\n  \\\"rank\\\" int4\\n}\\n\\nTable \\\"periods\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"participant_event_id\\\" int4 [not null]\\n  \\\"period_value\\\" varchar(100)\\n  \\\"score\\\" varchar(100)\\n}\\n\\nTable \\\"person_event_metadata\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"status\\\" varchar(100)\\n  \\\"health\\\" varchar(100)\\n  \\\"weight\\\" varchar(100)\\n  \\\"role_id\\\" int4\\n  \\\"position_id\\\" int4\\n  \\\"team_id\\\" int4\\n  \\\"lineup_slot\\\" int4\\n  \\\"lineup_slot_sequence\\\" int4\\n}\\n\\nTable \\\"person_phases\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"membership_type\\\" varchar(40) [not null]\\n  \\\"membership_id\\\" int4 [not null]\\n  \\\"role_id\\\" int4\\n  \\\"role_status\\\" varchar(40)\\n  \\\"phase_status\\\" varchar(40)\\n  \\\"uniform_number\\\" varchar(20)\\n  \\\"regular_position_id\\\" int4\\n  \\\"regular_position_depth\\\" varchar(40)\\n  \\\"height\\\" varchar(100)\\n  \\\"weight\\\" varchar(100)\\n  \\\"start_date_time\\\" timestamp\\n  \\\"start_season_id\\\" int4\\n  \\\"end_date_time\\\" timestamp\\n  \\\"end_season_id\\\" int4\\n  \\\"entry_reason\\\" varchar(40)\\n  \\\"exit_reason\\\" varchar(40)\\n  \\\"selection_level\\\" int4\\n  \\\"selection_sublevel\\\" int4\\n  \\\"selection_overall\\\" int4\\n}\\n\\nTable \\\"persons\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"person_key\\\" varchar(100) [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"gender\\\" varchar(20)\\n  \\\"birth_date\\\" varchar(30)\\n  \\\"death_date\\\" varchar(30)\\n  \\\"birth_location_id\\\" int4\\n  \\\"hometown_location_id\\\" int4\\n  \\\"residence_location_id\\\" int4\\n  \\\"death_location_id\\\" int4\\n}\\n\\nTable \\\"persons_documents\\\" {\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"document_id\\\" int4 [not null]\\n}\\n\\nTable \\\"persons_media\\\" {\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"media_id\\\" int4 [not null]\\n}\\n\\nTable \\\"positions\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"abbreviation\\\" varchar(100) [not null]\\n}\\n\\nTable \\\"publishers\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"publisher_key\\\" varchar(100) [not null]\\n  \\\"publisher_name\\\" varchar(100)\\n}\\n\\nTable \\\"roles\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"role_key\\\" varchar(100) [not null]\\n  \\\"role_name\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n}\\n\\nTable \\\"seasons\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"season_key\\\" int4 [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"league_id\\\" int4 [not null]\\n  \\\"start_date_time\\\" timestamp\\n  \\\"end_date_time\\\" timestamp\\n}\\n\\nTable \\\"sites\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"site_key\\\" int4 [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"location_id\\\" int4\\n}\\n\\nTable \\\"soccer_defensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"shots_penalty_shot_allowed\\\" varchar(100)\\n  \\\"goals_penalty_shot_allowed\\\" varchar(100)\\n  \\\"goals_against_average\\\" varchar(100)\\n  \\\"goals_against_total\\\" varchar(100)\\n  \\\"saves\\\" varchar(100)\\n  \\\"save_percentage\\\" varchar(100)\\n  \\\"catches_punches\\\" varchar(100)\\n  \\\"shots_on_goal_total\\\" varchar(100)\\n  \\\"shots_shootout_total\\\" varchar(100)\\n  \\\"shots_shootout_allowed\\\" varchar(100)\\n  \\\"shots_blocked\\\" varchar(100)\\n  \\\"shutouts\\\" varchar(100)\\n}\\n\\nTable \\\"soccer_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int4\\n  \\\"sequence_number\\\" int4\\n  \\\"period_value\\\" varchar(100)\\n  \\\"period_time_elapsed\\\" varchar(100)\\n  \\\"period_time_remaining\\\" varchar(100)\\n  \\\"minutes_elapsed\\\" varchar(100)\\n  \\\"period_minute_elapsed\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"soccer_foul_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"fouls_suffered\\\" varchar(100)\\n  \\\"fouls_commited\\\" varchar(100)\\n  \\\"cautions_total\\\" varchar(100)\\n  \\\"cautions_pending\\\" varchar(100)\\n  \\\"caution_points_total\\\" varchar(100)\\n  \\\"caution_points_pending\\\" varchar(100)\\n  \\\"ejections_total\\\" varchar(100)\\n}\\n\\nTable \\\"soccer_offensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"goals_game_winning\\\" varchar(100)\\n  \\\"goals_game_tying\\\" varchar(100)\\n  \\\"goals_overtime\\\" varchar(100)\\n  \\\"goals_shootout\\\" varchar(100)\\n  \\\"goals_total\\\" varchar(100)\\n  \\\"assists_game_winning\\\" varchar(100)\\n  \\\"assists_game_tying\\\" varchar(100)\\n  \\\"assists_overtime\\\" varchar(100)\\n  \\\"assists_total\\\" varchar(100)\\n  \\\"points\\\" varchar(100)\\n  \\\"shots_total\\\" varchar(100)\\n  \\\"shots_on_goal_total\\\" varchar(100)\\n  \\\"shots_hit_frame\\\" varchar(100)\\n  \\\"shots_penalty_shot_taken\\\" varchar(100)\\n  \\\"shots_penalty_shot_scored\\\" varchar(100)\\n  \\\"shots_penalty_shot_missed\\\" varchar(40)\\n  \\\"shots_penalty_shot_percentage\\\" varchar(40)\\n  \\\"shots_shootout_taken\\\" varchar(40)\\n  \\\"shots_shootout_scored\\\" varchar(40)\\n  \\\"shots_shootout_missed\\\" varchar(40)\\n  \\\"shots_shootout_percentage\\\" varchar(40)\\n  \\\"giveaways\\\" varchar(40)\\n  \\\"offsides\\\" varchar(40)\\n  \\\"corner_kicks\\\" varchar(40)\\n  \\\"hat_tricks\\\" varchar(40)\\n}\\n\\nTable \\\"standing_subgroups\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"standing_id\\\" int4 [not null]\\n  \\\"affiliation_id\\\" int4 [not null]\\n}\\n\\nTable \\\"standings\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"standing_type\\\" varchar(100)\\n  \\\"sub_season_id\\\" int4 [not null]\\n  \\\"last_updated\\\" varchar(100)\\n  \\\"duration_scope\\\" varchar(100)\\n  \\\"competition_scope\\\" varchar(100)\\n  \\\"competition_scope_id\\\" varchar(100)\\n  \\\"alignment_scope\\\" varchar(100)\\n  \\\"site_scope\\\" varchar(100)\\n  \\\"scoping_label\\\" varchar(100)\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"source\\\" varchar(100)\\n}\\n\\nTable \\\"stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"stat_repository_type\\\" varchar(100)\\n  \\\"stat_repository_id\\\" int4 [not null]\\n  \\\"stat_holder_type\\\" varchar(100)\\n  \\\"stat_holder_id\\\" int4\\n  \\\"stat_coverage_type\\\" varchar(100)\\n  \\\"stat_coverage_id\\\" int4\\n  \\\"context\\\" varchar(40) [not null]\\n}\\n\\nTable \\\"sub_periods\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"period_id\\\" int4 [not null]\\n  \\\"sub_period_value\\\" varchar(100)\\n  \\\"score\\\" varchar(100)\\n}\\n\\nTable \\\"sub_seasons\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"sub_season_key\\\" varchar(100) [not null]\\n  \\\"season_id\\\" int4 [not null]\\n  \\\"sub_season_type\\\" varchar(100) [not null]\\n  \\\"start_date_time\\\" timestamp\\n  \\\"end_date_time\\\" timestamp\\n}\\n\\nTable \\\"team_american_football_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"yards_per_attempt\\\" varchar(100)\\n  \\\"average_starting_position\\\" varchar(100)\\n  \\\"timeouts\\\" varchar(100)\\n  \\\"time_of_possession\\\" varchar(100)\\n  \\\"turnover_ratio\\\" varchar(100)\\n}\\n\\nTable \\\"team_phases\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"start_season_id\\\" int4\\n  \\\"end_season_id\\\" int4\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"start_date_time\\\" varchar(100)\\n  \\\"end_date_time\\\" varchar(100)\\n  \\\"phase_status\\\" varchar(40)\\n  \\\"role_id\\\" int4\\n}\\n\\nTable \\\"teams\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"team_key\\\" varchar(100) [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"home_site_id\\\" int4\\n}\\n\\nTable \\\"teams_documents\\\" {\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"document_id\\\" int4 [not null]\\n}\\n\\nTable \\\"teams_media\\\" {\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"media_id\\\" int4 [not null]\\n}\\n\\nTable \\\"tennis_action_points\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"sub_period_id\\\" varchar(100)\\n  \\\"sequence_number\\\" varchar(100)\\n  \\\"win_type\\\" varchar(100)\\n}\\n\\nTable \\\"tennis_action_volleys\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"sequence_number\\\" varchar(100)\\n  \\\"tennis_action_points_id\\\" int4\\n  \\\"landing_location\\\" varchar(100)\\n  \\\"swing_type\\\" varchar(100)\\n  \\\"result\\\" varchar(100)\\n  \\\"spin_type\\\" varchar(100)\\n  \\\"trajectory_details\\\" varchar(100)\\n}\\n\\nTable \\\"tennis_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int4\\n  \\\"sequence_number\\\" int4\\n  \\\"tennis_set\\\" varchar(100)\\n  \\\"game\\\" varchar(100)\\n  \\\"server_person_id\\\" int4\\n  \\\"server_score\\\" varchar(100)\\n  \\\"receiver_person_id\\\" int4\\n  \\\"receiver_score\\\" varchar(100)\\n  \\\"service_number\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"tennis_return_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"returns_played\\\" varchar(100)\\n  \\\"matches_played\\\" varchar(100)\\n  \\\"first_service_return_points_won\\\" varchar(100)\\n  \\\"first_service_return_points_won_pct\\\" varchar(100)\\n  \\\"second_service_return_points_won\\\" varchar(100)\\n  \\\"second_service_return_points_won_pct\\\" varchar(100)\\n  \\\"return_games_played\\\" varchar(100)\\n  \\\"return_games_won\\\" varchar(100)\\n  \\\"return_games_won_pct\\\" varchar(100)\\n  \\\"break_points_played\\\" varchar(100)\\n  \\\"break_points_converted\\\" varchar(100)\\n  \\\"break_points_converted_pct\\\" varchar(100)\\n}\\n\\nTable \\\"tennis_service_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"services_played\\\" varchar(100)\\n  \\\"matches_played\\\" varchar(100)\\n  \\\"aces\\\" varchar(100)\\n  \\\"first_services_good\\\" varchar(100)\\n  \\\"first_services_good_pct\\\" varchar(100)\\n  \\\"first_service_points_won\\\" varchar(100)\\n  \\\"first_service_points_won_pct\\\" varchar(100)\\n  \\\"second_service_points_won\\\" varchar(100)\\n  \\\"second_service_points_won_pct\\\" varchar(100)\\n  \\\"service_games_played\\\" varchar(100)\\n  \\\"service_games_won\\\" varchar(100)\\n  \\\"service_games_won_pct\\\" varchar(100)\\n  \\\"break_points_played\\\" varchar(100)\\n  \\\"break_points_saved\\\" varchar(100)\\n  \\\"break_points_saved_pct\\\" varchar(100)\\n}\\n\\nTable \\\"wagering_moneylines\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4\\n  \\\"rotation_key\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n  \\\"vigorish\\\" varchar(100)\\n  \\\"line\\\" varchar(100)\\n  \\\"line_opening\\\" varchar(100)\\n  \\\"prediction\\\" varchar(100)\\n}\\n\\nTable \\\"wagering_odds_lines\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4\\n  \\\"rotation_key\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n  \\\"numerator\\\" varchar(100)\\n  \\\"denominator\\\" varchar(100)\\n  \\\"prediction\\\" varchar(100)\\n  \\\"payout_calculation\\\" varchar(100)\\n  \\\"payout_amount\\\" varchar(100)\\n}\\n\\nTable \\\"wagering_runlines\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4\\n  \\\"rotation_key\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n  \\\"vigorish\\\" varchar(100)\\n  \\\"line\\\" varchar(100)\\n  \\\"line_opening\\\" varchar(100)\\n  \\\"line_value\\\" varchar(100)\\n  \\\"prediction\\\" varchar(100)\\n}\\n\\nTable \\\"wagering_straight_spread_lines\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4\\n  \\\"rotation_key\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n  \\\"vigorish\\\" varchar(100)\\n  \\\"line_value\\\" varchar(100)\\n  \\\"line_value_opening\\\" varchar(100)\\n  \\\"prediction\\\" varchar(100)\\n}\\n\\nTable \\\"wagering_total_score_lines\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4\\n  \\\"rotation_key\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n  \\\"vigorish\\\" varchar(100)\\n  \\\"line_over\\\" varchar(100)\\n  \\\"line_under\\\" varchar(100)\\n  \\\"total\\\" varchar(100)\\n  \\\"total_opening\\\" varchar(100)\\n  \\\"prediction\\\" varchar(100)\\n}\\n\\nTable \\\"weather_conditions\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"temperature\\\" varchar(100)\\n  \\\"temperature_units\\\" varchar(40)\\n  \\\"humidity\\\" varchar(100)\\n  \\\"clouds\\\" varchar(100)\\n  \\\"wind_direction\\\" varchar(100)\\n  \\\"wind_velocity\\\" varchar(100)\\n  \\\"weather_code\\\" varchar(100)\\n}\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/yugabyte/yugabyte-db/blob/master/sample/sportsdb_tables.sql\"\n  }\n}"
  },
  {
    "path": "tasks/postgres/standard/sports/participant_report_optimization/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Sports Task 3: Query Performance Optimization\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nfrom decimal import Decimal\n\ndef rows_match(actual_row, expected_row):\n    \"\"\"\n    Compare two rows with appropriate tolerance.\n    For Decimal types: allows 0.001 tolerance\n    For other types: requires exact match\n    \"\"\"\n    if len(actual_row) != len(expected_row):\n        return False\n    \n    for actual, expected in zip(actual_row, expected_row):\n        if isinstance(actual, Decimal) and isinstance(expected, Decimal):\n            if abs(float(actual) - float(expected)) > 0.001:\n                return False\n        elif isinstance(actual, float) and isinstance(expected, float):\n            if abs(actual - expected) > 0.001:\n                return False\n        elif actual != expected:\n            return False\n    \n    return True\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\", \"sports\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\", \"postgres\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\", \"postgres\")\n    }\n\ndef verify_report_data(conn) -> bool:\n    \"\"\"Verify the report table contains the expected data.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results from the report table\n        cur.execute(\"\"\"\n            SELECT participant_id, event_count, stat_count, stat_type_count, last_event_date\n            FROM participant_performance_report\n            ORDER BY participant_id\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        if len(actual_results) == 0:\n            print(\"❌ Report table is empty\")\n            return False\n        \n        # Execute ground truth query\n        cur.execute(\"\"\"\n            SELECT \n                pe.participant_id,\n                COUNT(pe.event_id) as event_count,\n                (SELECT COUNT(*) FROM stats s WHERE s.stat_holder_id = pe.participant_id AND s.stat_holder_type = 'persons') as stat_count,\n                (SELECT COUNT(DISTINCT s.stat_repository_type) FROM stats s WHERE s.stat_holder_id = pe.participant_id AND s.stat_holder_type = 'persons') as stat_type_count,\n                (SELECT MAX(e.start_date_time) FROM events e JOIN participants_events pe2 ON e.id = pe2.event_id WHERE pe2.participant_id = pe.participant_id) as last_event_date\n            FROM participants_events pe \n            WHERE pe.participant_id <= 50\n            GROUP BY pe.participant_id\n            ORDER BY pe.participant_id\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} report records, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for actual, expected in zip(actual_results, expected_results):\n            if not rows_match(actual, expected):\n                if mismatches < 5:\n                    print(f\"❌ Row mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total mismatches in report data: {mismatches}\")\n            return False\n\n        print(f\"✅ Report data is correct ({len(actual_results)} records)\")\n        return True\n\ndef verify_performance_optimization(conn) -> bool:\n    \"\"\"Verify that key performance optimization indexes have been implemented.\"\"\"\n    with conn.cursor() as cur:\n        print(\"\\n🔍 Checking for critical performance indexes...\")\n        \n        # Check 1: participants_events.participant_id index (critical for subqueries)\n        cur.execute(\"\"\"\n            SELECT indexname, indexdef \n            FROM pg_indexes \n            WHERE schemaname = 'public' \n            AND tablename = 'participants_events'\n            AND indexdef LIKE '%participant_id%'\n        \"\"\")\n        participant_indexes = cur.fetchall()\n        has_participant_index = len(participant_indexes) > 0\n        \n        # Check 2: stats table optimization (critical for subquery filtering)\n        cur.execute(\"\"\"\n            SELECT indexname, indexdef \n            FROM pg_indexes \n            WHERE schemaname = 'public' \n            AND tablename = 'stats'\n            AND indexdef LIKE '%stat_holder_type%'\n            AND indexdef LIKE '%stat_holder_id%'\n        \"\"\")\n        stats_indexes = cur.fetchall()\n        has_stats_index = len(stats_indexes) > 0\n        \n        # Report findings\n        critical_indexes_found = 0\n        \n        if has_participant_index:\n            print(\"✅ Found participant filtering index on participants_events.participant_id\")\n            critical_indexes_found += 1\n        else:\n            print(\"❌ Missing critical index on participants_events.participant_id\")\n            \n        if has_stats_index:\n            print(\"✅ Found subquery optimization index on stats table\")\n            critical_indexes_found += 1\n        else:\n            print(\"❌ Missing critical index on stats table\")\n        \n        # Must have both critical indexes for this subquery-heavy query\n        if critical_indexes_found >= 2:\n            print(f\"\\n✅ Performance optimization: PASS ({critical_indexes_found}/2 critical indexes found)\")\n            return True\n        else:\n            print(f\"\\n❌ Performance optimization: FAIL ({critical_indexes_found}/2 critical indexes found)\")\n            print(\"   Create these critical indexes:\")\n            print(\"   - CREATE INDEX ON participants_events(participant_id);\")\n            print(\"   - CREATE INDEX ON stats(stat_holder_type, stat_holder_id);\")\n            return False\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 50)\n    print(\"Verifying Sports Task 3: Query Performance Optimization\")\n    print(\"=\" * 50)\n\n    # Get connection parameters\n    conn_params = get_connection_params()\n\n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n\n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n\n        # Verify all components\n        success = (\n            verify_report_data(conn) and\n            verify_performance_optimization(conn)\n        )\n\n        conn.close()\n\n        if success:\n            print(\"\\n🎉 Task verification: PASS\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Task verification: FAIL\")\n            sys.exit(1)\n\n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/standard/sports/team_roster_management/description.md",
    "content": "# Team Roster Management Operations\n\n## Background\nYou need to manage team rosters for the upcoming season, including player transfers, injury tracking, and performance evaluations.\n\n## Requirements\n\nComplete the following 5 operations in order:\n\n### 1. Set Up Player Performance Tracking\nCreate a table called `player_evaluation` with the following structure:\n- performance_id (serial primary key)\n- person_id (integer not null, references persons(id))\n- batting_avg (decimal)\n- home_runs (integer)\n- rbis (integer)\n- games_played (integer)\n- performance_score (decimal)\n- evaluation_date (date)\n\nAdd constraint: CHECK (batting_avg BETWEEN 0 AND 1)\n\n### 2. Load Historical Player Statistics\nInsert player performance data into `player_evaluation`:\n- Select all players who have offensive statistics\n- Calculate batting_avg as hits/at_bats (handle division by zero)\n- Sum up home_runs, rbi from baseball_offensive_stats\n- Count games_played from person_event_metadata\n- Calculate performance_score as: (batting_avg * 1000) + (home_runs * 5) + (rbi * 2)\n- Only include players with at least 10 games played\n- Set evaluation_date to '2024-01-01'\n\n### 3. Track Player Health Status\nCreate a table called `player_injury_status`:\n- status_id (serial primary key)\n- person_id (integer unique not null)\n- injury_count (integer default 0)\n- last_injury_date (date)\n- current_status (varchar check in ('healthy', 'injured', 'recovering'))\n\nInsert data by:\n- Including all players from player_evaluation\n- Count injuries from injury_phases for each player\n- Get the most recent injury start_date as last_injury_date\n- Set current_status: 'injured' if injury has no end_date, otherwise 'healthy'\n\n### 4. Adjust Scores Based on Health\nUpdate `player_evaluation` to reduce performance scores for injured players:\n- Reduce performance_score by 20% for players with current_status = 'injured'\n- Reduce performance_score by 10% for players with injury_count > 2\n- Set minimum performance_score to 0 (no negative scores)\n\n### 5. Generate Performance Summary Report\nCreate a summary table called `team_performance_summary`:\n- summary_id (serial primary key)\n- metric_name (varchar unique)\n- metric_value (decimal)\n\nInsert the following metrics:\n- 'total_players' - count of players in player_evaluation\n- 'avg_batting_average' - average batting_avg\n- 'total_home_runs' - sum of all home_runs\n- 'avg_performance_score' - average performance_score\n- 'injured_player_count' - count of injured players\n- 'healthy_player_count' - count of healthy players\n\n## Important Notes\n- Handle NULL values appropriately (treat as 0 where needed)\n- Ensure foreign key constraints are properly set\n- Do NOT use ROUND functions in calculations\n- Use COALESCE to handle NULL values in calculations"
  },
  {
    "path": "tasks/postgres/standard/sports/team_roster_management/meta.json",
    "content": "{\n  \"task_id\": \"team_roster_management\",\n  \"task_name\": \"Team Roster Management\",\n  \"category_id\": \"sports\",\n  \"category_name\": \"Sports\",\n  \"description\": \"Manage team rosters with player transfers, injury tracking, performance evaluations, and health status adjustments.\",\n  \"author\": \"Lingxiao Du\",\n  \"created_at\": \"2025-08-18\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"schema design\",\n    \"data migration\",\n    \"statistical aggregation\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Table \\\"addresses\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"location_id\\\" int4 [not null]\\n  \\\"language\\\" varchar(100)\\n  \\\"suite\\\" varchar(100)\\n  \\\"floor\\\" varchar(100)\\n  \\\"building\\\" varchar(100)\\n  \\\"street_number\\\" varchar(100)\\n  \\\"street_prefix\\\" varchar(100)\\n  \\\"street\\\" varchar(100)\\n  \\\"street_suffix\\\" varchar(100)\\n  \\\"neighborhood\\\" varchar(100)\\n  \\\"district\\\" varchar(100)\\n  \\\"locality\\\" varchar(100)\\n  \\\"county\\\" varchar(100)\\n  \\\"region\\\" varchar(100)\\n  \\\"postal_code\\\" varchar(100)\\n  \\\"country\\\" varchar(100)\\n}\\n\\nTable \\\"affiliation_phases\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"ancestor_affiliation_id\\\" int4\\n  \\\"start_season_id\\\" int4\\n  \\\"start_date_time\\\" timestamp\\n  \\\"end_season_id\\\" int4\\n  \\\"end_date_time\\\" timestamp\\n}\\n\\nTable \\\"affiliations\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"affiliation_key\\\" varchar(100) [not null]\\n  \\\"affiliation_type\\\" varchar(100)\\n  \\\"publisher_id\\\" int4 [not null]\\n}\\n\\nTable \\\"affiliations_documents\\\" {\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"document_id\\\" int4 [not null]\\n}\\n\\nTable \\\"affiliations_events\\\" {\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n}\\n\\nTable \\\"affiliations_media\\\" {\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"media_id\\\" int4 [not null]\\n}\\n\\nTable \\\"american_football_action_participants\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"american_football_action_play_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"participant_role\\\" varchar(100) [not null]\\n  \\\"score_type\\\" varchar(100)\\n  \\\"field_line\\\" int4\\n  \\\"yardage\\\" int4\\n  \\\"score_credit\\\" int4\\n  \\\"yards_gained\\\" int4\\n}\\n\\nTable \\\"american_football_action_plays\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"american_football_event_state_id\\\" int4 [not null]\\n  \\\"play_type\\\" varchar(100)\\n  \\\"score_attempt_type\\\" varchar(100)\\n  \\\"drive_result\\\" varchar(100)\\n  \\\"points\\\" int4\\n  \\\"comment\\\" varchar(255)\\n}\\n\\nTable \\\"american_football_defensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"tackles_total\\\" varchar(100)\\n  \\\"tackles_solo\\\" varchar(100)\\n  \\\"tackles_assists\\\" varchar(100)\\n  \\\"interceptions_total\\\" varchar(100)\\n  \\\"interceptions_yards\\\" varchar(100)\\n  \\\"interceptions_average\\\" varchar(100)\\n  \\\"interceptions_longest\\\" varchar(100)\\n  \\\"interceptions_touchdown\\\" varchar(100)\\n  \\\"quarterback_hurries\\\" varchar(100)\\n  \\\"sacks_total\\\" varchar(100)\\n  \\\"sacks_yards\\\" varchar(100)\\n  \\\"passes_defensed\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_down_progress_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"first_downs_total\\\" varchar(100)\\n  \\\"first_downs_pass\\\" varchar(100)\\n  \\\"first_downs_run\\\" varchar(100)\\n  \\\"first_downs_penalty\\\" varchar(100)\\n  \\\"conversions_third_down\\\" varchar(100)\\n  \\\"conversions_third_down_attempts\\\" varchar(100)\\n  \\\"conversions_third_down_percentage\\\" varchar(100)\\n  \\\"conversions_fourth_down\\\" varchar(100)\\n  \\\"conversions_fourth_down_attempts\\\" varchar(100)\\n  \\\"conversions_fourth_down_percentage\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int2\\n  \\\"sequence_number\\\" int4\\n  \\\"period_value\\\" int4\\n  \\\"period_time_elapsed\\\" varchar(100)\\n  \\\"period_time_remaining\\\" varchar(100)\\n  \\\"clock_state\\\" varchar(100)\\n  \\\"down\\\" int4\\n  \\\"team_in_possession_id\\\" int4\\n  \\\"distance_for_1st_down\\\" int4\\n  \\\"field_side\\\" varchar(100)\\n  \\\"field_line\\\" int4\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"american_football_fumbles_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"fumbles_committed\\\" varchar(100)\\n  \\\"fumbles_forced\\\" varchar(100)\\n  \\\"fumbles_recovered\\\" varchar(100)\\n  \\\"fumbles_lost\\\" varchar(100)\\n  \\\"fumbles_yards_gained\\\" varchar(100)\\n  \\\"fumbles_own_committed\\\" varchar(100)\\n  \\\"fumbles_own_recovered\\\" varchar(100)\\n  \\\"fumbles_own_lost\\\" varchar(100)\\n  \\\"fumbles_own_yards_gained\\\" varchar(100)\\n  \\\"fumbles_opposing_committed\\\" varchar(100)\\n  \\\"fumbles_opposing_recovered\\\" varchar(100)\\n  \\\"fumbles_opposing_lost\\\" varchar(100)\\n  \\\"fumbles_opposing_yards_gained\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_offensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"offensive_plays_yards\\\" varchar(100)\\n  \\\"offensive_plays_number\\\" varchar(100)\\n  \\\"offensive_plays_average_yards_per\\\" varchar(100)\\n  \\\"possession_duration\\\" varchar(100)\\n  \\\"turnovers_giveaway\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_passing_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"passes_attempts\\\" varchar(100)\\n  \\\"passes_completions\\\" varchar(100)\\n  \\\"passes_percentage\\\" varchar(100)\\n  \\\"passes_yards_gross\\\" varchar(100)\\n  \\\"passes_yards_net\\\" varchar(100)\\n  \\\"passes_yards_lost\\\" varchar(100)\\n  \\\"passes_touchdowns\\\" varchar(100)\\n  \\\"passes_touchdowns_percentage\\\" varchar(100)\\n  \\\"passes_interceptions\\\" varchar(100)\\n  \\\"passes_interceptions_percentage\\\" varchar(100)\\n  \\\"passes_longest\\\" varchar(100)\\n  \\\"passes_average_yards_per\\\" varchar(100)\\n  \\\"passer_rating\\\" varchar(100)\\n  \\\"receptions_total\\\" varchar(100)\\n  \\\"receptions_yards\\\" varchar(100)\\n  \\\"receptions_touchdowns\\\" varchar(100)\\n  \\\"receptions_first_down\\\" varchar(100)\\n  \\\"receptions_longest\\\" varchar(100)\\n  \\\"receptions_average_yards_per\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_penalties_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"penalties_total\\\" varchar(100)\\n  \\\"penalty_yards\\\" varchar(100)\\n  \\\"penalty_first_downs\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_rushing_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"rushes_attempts\\\" varchar(100)\\n  \\\"rushes_yards\\\" varchar(100)\\n  \\\"rushes_touchdowns\\\" varchar(100)\\n  \\\"rushing_average_yards_per\\\" varchar(100)\\n  \\\"rushes_first_down\\\" varchar(100)\\n  \\\"rushes_longest\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_sacks_against_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"sacks_against_yards\\\" varchar(100)\\n  \\\"sacks_against_total\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_scoring_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"touchdowns_total\\\" varchar(100)\\n  \\\"touchdowns_passing\\\" varchar(100)\\n  \\\"touchdowns_rushing\\\" varchar(100)\\n  \\\"touchdowns_special_teams\\\" varchar(100)\\n  \\\"touchdowns_defensive\\\" varchar(100)\\n  \\\"extra_points_attempts\\\" varchar(100)\\n  \\\"extra_points_made\\\" varchar(100)\\n  \\\"extra_points_missed\\\" varchar(100)\\n  \\\"extra_points_blocked\\\" varchar(100)\\n  \\\"field_goal_attempts\\\" varchar(100)\\n  \\\"field_goals_made\\\" varchar(100)\\n  \\\"field_goals_missed\\\" varchar(100)\\n  \\\"field_goals_blocked\\\" varchar(100)\\n  \\\"safeties_against\\\" varchar(100)\\n  \\\"two_point_conversions_attempts\\\" varchar(100)\\n  \\\"two_point_conversions_made\\\" varchar(100)\\n  \\\"touchbacks_total\\\" varchar(100)\\n}\\n\\nTable \\\"american_football_special_teams_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"returns_punt_total\\\" varchar(100)\\n  \\\"returns_punt_yards\\\" varchar(100)\\n  \\\"returns_punt_average\\\" varchar(100)\\n  \\\"returns_punt_longest\\\" varchar(100)\\n  \\\"returns_punt_touchdown\\\" varchar(100)\\n  \\\"returns_kickoff_total\\\" varchar(100)\\n  \\\"returns_kickoff_yards\\\" varchar(100)\\n  \\\"returns_kickoff_average\\\" varchar(100)\\n  \\\"returns_kickoff_longest\\\" varchar(100)\\n  \\\"returns_kickoff_touchdown\\\" varchar(100)\\n  \\\"returns_total\\\" varchar(100)\\n  \\\"returns_yards\\\" varchar(100)\\n  \\\"punts_total\\\" varchar(100)\\n  \\\"punts_yards_gross\\\" varchar(100)\\n  \\\"punts_yards_net\\\" varchar(100)\\n  \\\"punts_longest\\\" varchar(100)\\n  \\\"punts_inside_20\\\" varchar(100)\\n  \\\"punts_inside_20_percentage\\\" varchar(100)\\n  \\\"punts_average\\\" varchar(100)\\n  \\\"punts_blocked\\\" varchar(100)\\n  \\\"touchbacks_total\\\" varchar(100)\\n  \\\"touchbacks_total_percentage\\\" varchar(100)\\n  \\\"touchbacks_kickoffs\\\" varchar(100)\\n  \\\"touchbacks_kickoffs_percentage\\\" varchar(100)\\n  \\\"touchbacks_punts\\\" varchar(100)\\n  \\\"touchbacks_punts_percentage\\\" varchar(100)\\n  \\\"touchbacks_interceptions\\\" varchar(100)\\n  \\\"touchbacks_interceptions_percentage\\\" varchar(100)\\n  \\\"fair_catches\\\" varchar(100)\\n}\\n\\nTable \\\"baseball_action_contact_details\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"baseball_action_pitch_id\\\" int4 [not null]\\n  \\\"location\\\" varchar(100)\\n  \\\"strength\\\" varchar(100)\\n  \\\"velocity\\\" int4\\n  \\\"comment\\\" text\\n  \\\"trajectory_coordinates\\\" varchar(100)\\n  \\\"trajectory_formula\\\" varchar(100)\\n}\\n\\nTable \\\"baseball_action_pitches\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"baseball_action_play_id\\\" int4 [not null]\\n  \\\"sequence_number\\\" int4\\n  \\\"baseball_defensive_group_id\\\" int4\\n  \\\"umpire_call\\\" varchar(100)\\n  \\\"pitch_location\\\" varchar(100)\\n  \\\"pitch_type\\\" varchar(100)\\n  \\\"pitch_velocity\\\" int4\\n  \\\"comment\\\" text\\n  \\\"trajectory_coordinates\\\" varchar(100)\\n  \\\"trajectory_formula\\\" varchar(100)\\n  \\\"ball_type\\\" varchar(40)\\n  \\\"strike_type\\\" varchar(40)\\n}\\n\\nTable \\\"baseball_action_plays\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"baseball_event_state_id\\\" int4 [not null]\\n  \\\"play_type\\\" varchar(100)\\n  \\\"notation\\\" varchar(100)\\n  \\\"notation_yaml\\\" text\\n  \\\"baseball_defensive_group_id\\\" int4\\n  \\\"comment\\\" varchar(255)\\n  \\\"runner_on_first_advance\\\" int4\\n  \\\"runner_on_second_advance\\\" int4\\n  \\\"runner_on_third_advance\\\" int4\\n  \\\"outs_recorded\\\" int4\\n  \\\"rbi\\\" int4\\n  \\\"runs_scored\\\" int4\\n  \\\"earned_runs_scored\\\" varchar(100)\\n}\\n\\nTable \\\"baseball_action_substitutions\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"baseball_event_state_id\\\" int4 [not null]\\n  \\\"sequence_number\\\" int4\\n  \\\"person_type\\\" varchar(100)\\n  \\\"person_original_id\\\" int4\\n  \\\"person_original_position_id\\\" int4\\n  \\\"person_original_lineup_slot\\\" int4\\n  \\\"person_replacing_id\\\" int4\\n  \\\"person_replacing_position_id\\\" int4\\n  \\\"person_replacing_lineup_slot\\\" int4\\n  \\\"substitution_reason\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n}\\n\\nTable \\\"baseball_defensive_group\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n}\\n\\nTable \\\"baseball_defensive_players\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"baseball_defensive_group_id\\\" int4 [not null]\\n  \\\"player_id\\\" int4 [not null]\\n  \\\"position_id\\\" int4 [not null]\\n}\\n\\nTable \\\"baseball_defensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"double_plays\\\" int4\\n  \\\"triple_plays\\\" int4\\n  \\\"putouts\\\" int4\\n  \\\"assists\\\" int4\\n  \\\"errors\\\" int4\\n  \\\"fielding_percentage\\\" numeric\\n  \\\"defensive_average\\\" numeric\\n  \\\"errors_passed_ball\\\" int4\\n  \\\"errors_catchers_interference\\\" int4\\n}\\n\\nTable \\\"baseball_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int2\\n  \\\"sequence_number\\\" int4\\n  \\\"at_bat_number\\\" int4\\n  \\\"inning_value\\\" int4\\n  \\\"inning_half\\\" varchar(100)\\n  \\\"outs\\\" int4\\n  \\\"balls\\\" int4\\n  \\\"strikes\\\" int4\\n  \\\"runner_on_first_id\\\" int4\\n  \\\"runner_on_second_id\\\" int4\\n  \\\"runner_on_third_id\\\" int4\\n  \\\"runner_on_first\\\" int2\\n  \\\"runner_on_second\\\" int2\\n  \\\"runner_on_third\\\" int2\\n  \\\"runs_this_inning_half\\\" int4\\n  \\\"pitcher_id\\\" int4\\n  \\\"batter_id\\\" int4\\n  \\\"batter_side\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"baseball_offensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"average\\\" numeric\\n  \\\"runs_scored\\\" int4\\n  \\\"at_bats\\\" int4\\n  \\\"hits\\\" int4\\n  \\\"rbi\\\" int4\\n  \\\"total_bases\\\" int4\\n  \\\"slugging_percentage\\\" numeric\\n  \\\"bases_on_balls\\\" int4\\n  \\\"strikeouts\\\" int4\\n  \\\"left_on_base\\\" int4\\n  \\\"left_in_scoring_position\\\" int4\\n  \\\"singles\\\" int4\\n  \\\"doubles\\\" int4\\n  \\\"triples\\\" int4\\n  \\\"home_runs\\\" int4\\n  \\\"grand_slams\\\" int4\\n  \\\"at_bats_per_rbi\\\" numeric\\n  \\\"plate_appearances_per_rbi\\\" numeric\\n  \\\"at_bats_per_home_run\\\" numeric\\n  \\\"plate_appearances_per_home_run\\\" numeric\\n  \\\"sac_flies\\\" int4\\n  \\\"sac_bunts\\\" int4\\n  \\\"grounded_into_double_play\\\" int4\\n  \\\"moved_up\\\" int4\\n  \\\"on_base_percentage\\\" numeric\\n  \\\"stolen_bases\\\" int4\\n  \\\"stolen_bases_caught\\\" int4\\n  \\\"stolen_bases_average\\\" numeric\\n  \\\"hit_by_pitch\\\" int4\\n  \\\"defensive_interferance_reaches\\\" int4\\n  \\\"on_base_plus_slugging\\\" numeric\\n  \\\"plate_appearances\\\" int4\\n  \\\"hits_extra_base\\\" int4\\n}\\n\\nTable \\\"baseball_pitching_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"runs_allowed\\\" int4\\n  \\\"singles_allowed\\\" int4\\n  \\\"doubles_allowed\\\" int4\\n  \\\"triples_allowed\\\" int4\\n  \\\"home_runs_allowed\\\" int4\\n  \\\"innings_pitched\\\" varchar(20)\\n  \\\"hits\\\" int4\\n  \\\"earned_runs\\\" int4\\n  \\\"unearned_runs\\\" int4\\n  \\\"bases_on_balls\\\" int4\\n  \\\"bases_on_balls_intentional\\\" int4\\n  \\\"strikeouts\\\" int4\\n  \\\"strikeout_to_bb_ratio\\\" numeric\\n  \\\"number_of_pitches\\\" int4\\n  \\\"era\\\" numeric\\n  \\\"inherited_runners_scored\\\" int4\\n  \\\"pick_offs\\\" int4\\n  \\\"errors_hit_with_pitch\\\" int4\\n  \\\"errors_wild_pitch\\\" int4\\n  \\\"balks\\\" int4\\n  \\\"wins\\\" int4\\n  \\\"losses\\\" int4\\n  \\\"saves\\\" int4\\n  \\\"shutouts\\\" int4\\n  \\\"games_complete\\\" int4\\n  \\\"games_finished\\\" int4\\n  \\\"winning_percentage\\\" numeric\\n  \\\"event_credit\\\" varchar(40)\\n  \\\"save_credit\\\" varchar(40)\\n}\\n\\nTable \\\"basketball_defensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"steals_total\\\" varchar(100)\\n  \\\"steals_per_game\\\" varchar(100)\\n  \\\"blocks_total\\\" varchar(100)\\n  \\\"blocks_per_game\\\" varchar(100)\\n}\\n\\nTable \\\"basketball_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int4\\n  \\\"sequence_number\\\" int4\\n  \\\"period_value\\\" varchar(100)\\n  \\\"period_time_elapsed\\\" varchar(100)\\n  \\\"period_time_remaining\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"basketball_offensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"field_goals_made\\\" int4\\n  \\\"field_goals_attempted\\\" int4\\n  \\\"field_goals_percentage\\\" varchar(100)\\n  \\\"field_goals_per_game\\\" varchar(100)\\n  \\\"field_goals_attempted_per_game\\\" varchar(100)\\n  \\\"field_goals_percentage_adjusted\\\" varchar(100)\\n  \\\"three_pointers_made\\\" int4\\n  \\\"three_pointers_attempted\\\" int4\\n  \\\"three_pointers_percentage\\\" varchar(100)\\n  \\\"three_pointers_per_game\\\" varchar(100)\\n  \\\"three_pointers_attempted_per_game\\\" varchar(100)\\n  \\\"free_throws_made\\\" varchar(100)\\n  \\\"free_throws_attempted\\\" varchar(100)\\n  \\\"free_throws_percentage\\\" varchar(100)\\n  \\\"free_throws_per_game\\\" varchar(100)\\n  \\\"free_throws_attempted_per_game\\\" varchar(100)\\n  \\\"points_scored_total\\\" varchar(100)\\n  \\\"points_scored_per_game\\\" varchar(100)\\n  \\\"assists_total\\\" varchar(100)\\n  \\\"assists_per_game\\\" varchar(100)\\n  \\\"turnovers_total\\\" varchar(100)\\n  \\\"turnovers_per_game\\\" varchar(100)\\n  \\\"points_scored_off_turnovers\\\" varchar(100)\\n  \\\"points_scored_in_paint\\\" varchar(100)\\n  \\\"points_scored_on_second_chance\\\" varchar(100)\\n  \\\"points_scored_on_fast_break\\\" varchar(100)\\n}\\n\\nTable \\\"basketball_rebounding_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"rebounds_total\\\" varchar(100)\\n  \\\"rebounds_per_game\\\" varchar(100)\\n  \\\"rebounds_defensive\\\" varchar(100)\\n  \\\"rebounds_offensive\\\" varchar(100)\\n  \\\"team_rebounds_total\\\" varchar(100)\\n  \\\"team_rebounds_per_game\\\" varchar(100)\\n  \\\"team_rebounds_defensive\\\" varchar(100)\\n  \\\"team_rebounds_offensive\\\" varchar(100)\\n}\\n\\nTable \\\"basketball_team_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"timeouts_left\\\" varchar(100)\\n  \\\"largest_lead\\\" varchar(100)\\n  \\\"fouls_total\\\" varchar(100)\\n  \\\"turnover_margin\\\" varchar(100)\\n}\\n\\nTable \\\"bookmakers\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_key\\\" varchar(100)\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"location_id\\\" int4\\n}\\n\\nTable \\\"core_person_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"time_played_event\\\" varchar(40)\\n  \\\"time_played_total\\\" varchar(40)\\n  \\\"time_played_event_average\\\" varchar(40)\\n  \\\"events_played\\\" int4\\n  \\\"events_started\\\" int4\\n  \\\"position_id\\\" int4\\n}\\n\\nTable \\\"core_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"score\\\" varchar(100)\\n  \\\"score_opposing\\\" varchar(100)\\n  \\\"score_attempts\\\" varchar(100)\\n  \\\"score_attempts_opposing\\\" varchar(100)\\n  \\\"score_percentage\\\" varchar(100)\\n  \\\"score_percentage_opposing\\\" varchar(100)\\n}\\n\\nTable \\\"db_info\\\" {\\n  \\\"version\\\" varchar(100) [not null, default: 16]\\n}\\n\\nTable \\\"display_names\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"language\\\" varchar(100) [not null]\\n  \\\"entity_type\\\" varchar(100) [not null]\\n  \\\"entity_id\\\" int4 [not null]\\n  \\\"full_name\\\" varchar(100)\\n  \\\"first_name\\\" varchar(100)\\n  \\\"middle_name\\\" varchar(100)\\n  \\\"last_name\\\" varchar(100)\\n  \\\"alias\\\" varchar(100)\\n  \\\"abbreviation\\\" varchar(100)\\n  \\\"short_name\\\" varchar(100)\\n  \\\"prefix\\\" varchar(20)\\n  \\\"suffix\\\" varchar(20)\\n}\\n\\nTable \\\"document_classes\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"name\\\" varchar(100)\\n}\\n\\nTable \\\"document_contents\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"document_id\\\" int4 [not null]\\n  \\\"sportsml\\\" varchar(200)\\n  \\\"abstract\\\" text\\n}\\n\\nTable \\\"document_fixtures\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"fixture_key\\\" varchar(100)\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"name\\\" varchar(100)\\n  \\\"document_class_id\\\" int4 [not null]\\n}\\n\\nTable \\\"document_fixtures_events\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"document_fixture_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"latest_document_id\\\" int4 [not null]\\n  \\\"last_update\\\" timestamp\\n}\\n\\nTable \\\"document_package_entry\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"document_package_id\\\" int4 [not null]\\n  \\\"rank\\\" varchar(100)\\n  \\\"document_id\\\" int4 [not null]\\n  \\\"headline\\\" varchar(100)\\n  \\\"short_headline\\\" varchar(100)\\n}\\n\\nTable \\\"document_packages\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"package_key\\\" varchar(100)\\n  \\\"package_name\\\" varchar(100)\\n  \\\"date_time\\\" date\\n}\\n\\nTable \\\"documents\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"doc_id\\\" varchar(75) [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"title\\\" varchar(255)\\n  \\\"language\\\" varchar(100)\\n  \\\"priority\\\" varchar(100)\\n  \\\"revision_id\\\" varchar(75)\\n  \\\"stats_coverage\\\" varchar(100)\\n  \\\"document_fixture_id\\\" int4 [not null]\\n  \\\"source_id\\\" int4\\n  \\\"db_loading_date_time\\\" timestamp\\n}\\n\\nTable \\\"documents_media\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"document_id\\\" int4 [not null]\\n  \\\"media_id\\\" int4 [not null]\\n  \\\"media_caption_id\\\" int4 [not null]\\n}\\n\\nTable \\\"events\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_key\\\" varchar(100) [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"start_date_time\\\" timestamp\\n  \\\"site_id\\\" int4\\n  \\\"site_alignment\\\" varchar(100)\\n  \\\"event_status\\\" varchar(100)\\n  \\\"duration\\\" varchar(100)\\n  \\\"attendance\\\" varchar(100)\\n  \\\"last_update\\\" timestamp\\n}\\n\\nTable \\\"events_documents\\\" {\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"document_id\\\" int4 [not null]\\n}\\n\\nTable \\\"events_media\\\" {\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"media_id\\\" int4 [not null]\\n}\\n\\nTable \\\"events_sub_seasons\\\" {\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"sub_season_id\\\" int4 [not null]\\n}\\n\\nTable \\\"ice_hockey_action_participants\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"ice_hockey_action_play_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"participant_role\\\" varchar(100) [not null]\\n  \\\"point_credit\\\" int4\\n}\\n\\nTable \\\"ice_hockey_action_plays\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"ice_hockey_event_state_id\\\" int4 [not null]\\n  \\\"play_type\\\" varchar(100)\\n  \\\"score_attempt_type\\\" varchar(100)\\n  \\\"play_result\\\" varchar(100)\\n  \\\"comment\\\" varchar(255)\\n}\\n\\nTable \\\"ice_hockey_defensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"shots_power_play_allowed\\\" varchar(100)\\n  \\\"shots_penalty_shot_allowed\\\" varchar(100)\\n  \\\"goals_power_play_allowed\\\" varchar(100)\\n  \\\"goals_penalty_shot_allowed\\\" varchar(100)\\n  \\\"goals_against_average\\\" varchar(100)\\n  \\\"saves\\\" varchar(100)\\n  \\\"save_percentage\\\" varchar(100)\\n  \\\"penalty_killing_amount\\\" varchar(100)\\n  \\\"penalty_killing_percentage\\\" varchar(100)\\n  \\\"shots_blocked\\\" varchar(100)\\n  \\\"takeaways\\\" varchar(100)\\n  \\\"shutouts\\\" varchar(100)\\n  \\\"minutes_penalty_killing\\\" varchar(100)\\n  \\\"hits\\\" varchar(100)\\n  \\\"goals_empty_net_allowed\\\" varchar(100)\\n  \\\"goals_short_handed_allowed\\\" varchar(100)\\n  \\\"goals_shootout_allowed\\\" varchar(100)\\n  \\\"shots_shootout_allowed\\\" varchar(100)\\n}\\n\\nTable \\\"ice_hockey_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int4\\n  \\\"sequence_number\\\" int4\\n  \\\"period_value\\\" varchar(100)\\n  \\\"period_time_elapsed\\\" varchar(100)\\n  \\\"period_time_remaining\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"ice_hockey_offensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"goals_game_winning\\\" varchar(100)\\n  \\\"goals_game_tying\\\" varchar(100)\\n  \\\"goals_power_play\\\" varchar(100)\\n  \\\"goals_short_handed\\\" varchar(100)\\n  \\\"goals_even_strength\\\" varchar(100)\\n  \\\"goals_empty_net\\\" varchar(100)\\n  \\\"goals_overtime\\\" varchar(100)\\n  \\\"goals_shootout\\\" varchar(100)\\n  \\\"goals_penalty_shot\\\" varchar(100)\\n  \\\"assists\\\" varchar(100)\\n  \\\"points\\\" varchar(100)\\n  \\\"power_play_amount\\\" varchar(100)\\n  \\\"power_play_percentage\\\" varchar(100)\\n  \\\"shots_penalty_shot_taken\\\" varchar(100)\\n  \\\"shots_penalty_shot_missed\\\" varchar(100)\\n  \\\"shots_penalty_shot_percentage\\\" varchar(100)\\n  \\\"giveaways\\\" varchar(100)\\n  \\\"minutes_power_play\\\" varchar(100)\\n  \\\"faceoff_wins\\\" varchar(100)\\n  \\\"faceoff_losses\\\" varchar(100)\\n  \\\"faceoff_win_percentage\\\" varchar(100)\\n  \\\"scoring_chances\\\" varchar(100)\\n}\\n\\nTable \\\"ice_hockey_player_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"plus_minus\\\" varchar(100)\\n}\\n\\nTable \\\"injury_phases\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"injury_status\\\" varchar(100)\\n  \\\"injury_type\\\" varchar(100)\\n  \\\"injury_comment\\\" varchar(100)\\n  \\\"disabled_list\\\" varchar(100)\\n  \\\"start_date_time\\\" timestamp\\n  \\\"end_date_time\\\" timestamp\\n  \\\"season_id\\\" int4\\n  \\\"phase_type\\\" varchar(100)\\n  \\\"injury_side\\\" varchar(100)\\n}\\n\\nTable \\\"key_aliases\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"key_id\\\" int4 [not null]\\n  \\\"key_root_id\\\" int4 [not null]\\n}\\n\\nTable \\\"key_roots\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"key_type\\\" varchar(100)\\n}\\n\\nTable \\\"latest_revisions\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"revision_id\\\" varchar(75) [not null]\\n  \\\"latest_document_id\\\" int4 [not null]\\n}\\n\\nTable \\\"locations\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"timezone\\\" varchar(100)\\n  \\\"latitude\\\" varchar(100)\\n  \\\"longitude\\\" varchar(100)\\n  \\\"country_code\\\" varchar(100)\\n}\\n\\nTable \\\"media\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"object_id\\\" int4\\n  \\\"source_id\\\" int4\\n  \\\"revision_id\\\" int4\\n  \\\"media_type\\\" varchar(100)\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"date_time\\\" varchar(100)\\n  \\\"credit_id\\\" int4 [not null]\\n  \\\"db_loading_date_time\\\" timestamp\\n  \\\"creation_location_id\\\" int4 [not null]\\n}\\n\\nTable \\\"media_captions\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"media_id\\\" int4 [not null]\\n  \\\"caption_type\\\" varchar(100)\\n  \\\"caption\\\" varchar(100)\\n  \\\"caption_author_id\\\" int4 [not null]\\n  \\\"language\\\" varchar(100)\\n  \\\"caption_size\\\" varchar(100)\\n}\\n\\nTable \\\"media_contents\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"media_id\\\" int4 [not null]\\n  \\\"object\\\" varchar(100)\\n  \\\"format\\\" varchar(100)\\n  \\\"mime_type\\\" varchar(100)\\n  \\\"height\\\" varchar(100)\\n  \\\"width\\\" varchar(100)\\n  \\\"duration\\\" varchar(100)\\n  \\\"file_size\\\" varchar(100)\\n  \\\"resolution\\\" varchar(100)\\n}\\n\\nTable \\\"media_keywords\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"keyword\\\" varchar(100)\\n  \\\"media_id\\\" int4 [not null]\\n}\\n\\nTable \\\"motor_racing_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int4\\n  \\\"sequence_number\\\" int4\\n  \\\"lap\\\" varchar(100)\\n  \\\"laps_remaining\\\" varchar(100)\\n  \\\"time_elapsed\\\" varchar(100)\\n  \\\"flag_state\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"motor_racing_qualifying_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"grid\\\" varchar(100)\\n  \\\"pole_position\\\" varchar(100)\\n  \\\"pole_wins\\\" varchar(100)\\n  \\\"qualifying_speed\\\" varchar(100)\\n  \\\"qualifying_speed_units\\\" varchar(100)\\n  \\\"qualifying_time\\\" varchar(100)\\n  \\\"qualifying_position\\\" varchar(100)\\n}\\n\\nTable \\\"motor_racing_race_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"time_behind_leader\\\" varchar(100)\\n  \\\"laps_behind_leader\\\" varchar(100)\\n  \\\"time_ahead_follower\\\" varchar(100)\\n  \\\"laps_ahead_follower\\\" varchar(100)\\n  \\\"time\\\" varchar(100)\\n  \\\"points\\\" varchar(100)\\n  \\\"points_rookie\\\" varchar(100)\\n  \\\"bonus\\\" varchar(100)\\n  \\\"laps_completed\\\" varchar(100)\\n  \\\"laps_leading_total\\\" varchar(100)\\n  \\\"distance_leading\\\" varchar(100)\\n  \\\"distance_completed\\\" varchar(100)\\n  \\\"distance_units\\\" varchar(40)\\n  \\\"speed_average\\\" varchar(40)\\n  \\\"speed_units\\\" varchar(40)\\n  \\\"status\\\" varchar(40)\\n  \\\"finishes_top_5\\\" varchar(40)\\n  \\\"finishes_top_10\\\" varchar(40)\\n  \\\"starts\\\" varchar(40)\\n  \\\"finishes\\\" varchar(40)\\n  \\\"non_finishes\\\" varchar(40)\\n  \\\"wins\\\" varchar(40)\\n  \\\"races_leading\\\" varchar(40)\\n  \\\"money\\\" varchar(40)\\n  \\\"money_units\\\" varchar(40)\\n  \\\"leads_total\\\" varchar(40)\\n}\\n\\nTable \\\"outcome_totals\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"standing_subgroup_id\\\" int4 [not null]\\n  \\\"outcome_holder_type\\\" varchar(100)\\n  \\\"outcome_holder_id\\\" int4\\n  \\\"rank\\\" varchar(100)\\n  \\\"wins\\\" varchar(100)\\n  \\\"losses\\\" varchar(100)\\n  \\\"ties\\\" varchar(100)\\n  \\\"undecideds\\\" varchar(100)\\n  \\\"winning_percentage\\\" varchar(100)\\n  \\\"points_scored_for\\\" varchar(100)\\n  \\\"points_scored_against\\\" varchar(100)\\n  \\\"points_difference\\\" varchar(100)\\n  \\\"standing_points\\\" varchar(100)\\n  \\\"streak_type\\\" varchar(100)\\n  \\\"streak_duration\\\" varchar(100)\\n  \\\"streak_total\\\" varchar(100)\\n  \\\"streak_start\\\" date\\n  \\\"streak_end\\\" date\\n}\\n\\nTable \\\"participants_events\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"participant_type\\\" varchar(100) [not null]\\n  \\\"participant_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"alignment\\\" varchar(100)\\n  \\\"score\\\" varchar(100)\\n  \\\"event_outcome\\\" varchar(100)\\n  \\\"rank\\\" int4\\n}\\n\\nTable \\\"periods\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"participant_event_id\\\" int4 [not null]\\n  \\\"period_value\\\" varchar(100)\\n  \\\"score\\\" varchar(100)\\n}\\n\\nTable \\\"person_event_metadata\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"status\\\" varchar(100)\\n  \\\"health\\\" varchar(100)\\n  \\\"weight\\\" varchar(100)\\n  \\\"role_id\\\" int4\\n  \\\"position_id\\\" int4\\n  \\\"team_id\\\" int4\\n  \\\"lineup_slot\\\" int4\\n  \\\"lineup_slot_sequence\\\" int4\\n}\\n\\nTable \\\"person_phases\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"membership_type\\\" varchar(40) [not null]\\n  \\\"membership_id\\\" int4 [not null]\\n  \\\"role_id\\\" int4\\n  \\\"role_status\\\" varchar(40)\\n  \\\"phase_status\\\" varchar(40)\\n  \\\"uniform_number\\\" varchar(20)\\n  \\\"regular_position_id\\\" int4\\n  \\\"regular_position_depth\\\" varchar(40)\\n  \\\"height\\\" varchar(100)\\n  \\\"weight\\\" varchar(100)\\n  \\\"start_date_time\\\" timestamp\\n  \\\"start_season_id\\\" int4\\n  \\\"end_date_time\\\" timestamp\\n  \\\"end_season_id\\\" int4\\n  \\\"entry_reason\\\" varchar(40)\\n  \\\"exit_reason\\\" varchar(40)\\n  \\\"selection_level\\\" int4\\n  \\\"selection_sublevel\\\" int4\\n  \\\"selection_overall\\\" int4\\n}\\n\\nTable \\\"persons\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"person_key\\\" varchar(100) [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"gender\\\" varchar(20)\\n  \\\"birth_date\\\" varchar(30)\\n  \\\"death_date\\\" varchar(30)\\n  \\\"birth_location_id\\\" int4\\n  \\\"hometown_location_id\\\" int4\\n  \\\"residence_location_id\\\" int4\\n  \\\"death_location_id\\\" int4\\n}\\n\\nTable \\\"persons_documents\\\" {\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"document_id\\\" int4 [not null]\\n}\\n\\nTable \\\"persons_media\\\" {\\n  \\\"person_id\\\" int4 [not null]\\n  \\\"media_id\\\" int4 [not null]\\n}\\n\\nTable \\\"positions\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"abbreviation\\\" varchar(100) [not null]\\n}\\n\\nTable \\\"publishers\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"publisher_key\\\" varchar(100) [not null]\\n  \\\"publisher_name\\\" varchar(100)\\n}\\n\\nTable \\\"roles\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"role_key\\\" varchar(100) [not null]\\n  \\\"role_name\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n}\\n\\nTable \\\"seasons\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"season_key\\\" int4 [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"league_id\\\" int4 [not null]\\n  \\\"start_date_time\\\" timestamp\\n  \\\"end_date_time\\\" timestamp\\n}\\n\\nTable \\\"sites\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"site_key\\\" int4 [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"location_id\\\" int4\\n}\\n\\nTable \\\"soccer_defensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"shots_penalty_shot_allowed\\\" varchar(100)\\n  \\\"goals_penalty_shot_allowed\\\" varchar(100)\\n  \\\"goals_against_average\\\" varchar(100)\\n  \\\"goals_against_total\\\" varchar(100)\\n  \\\"saves\\\" varchar(100)\\n  \\\"save_percentage\\\" varchar(100)\\n  \\\"catches_punches\\\" varchar(100)\\n  \\\"shots_on_goal_total\\\" varchar(100)\\n  \\\"shots_shootout_total\\\" varchar(100)\\n  \\\"shots_shootout_allowed\\\" varchar(100)\\n  \\\"shots_blocked\\\" varchar(100)\\n  \\\"shutouts\\\" varchar(100)\\n}\\n\\nTable \\\"soccer_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int4\\n  \\\"sequence_number\\\" int4\\n  \\\"period_value\\\" varchar(100)\\n  \\\"period_time_elapsed\\\" varchar(100)\\n  \\\"period_time_remaining\\\" varchar(100)\\n  \\\"minutes_elapsed\\\" varchar(100)\\n  \\\"period_minute_elapsed\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"soccer_foul_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"fouls_suffered\\\" varchar(100)\\n  \\\"fouls_commited\\\" varchar(100)\\n  \\\"cautions_total\\\" varchar(100)\\n  \\\"cautions_pending\\\" varchar(100)\\n  \\\"caution_points_total\\\" varchar(100)\\n  \\\"caution_points_pending\\\" varchar(100)\\n  \\\"ejections_total\\\" varchar(100)\\n}\\n\\nTable \\\"soccer_offensive_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"goals_game_winning\\\" varchar(100)\\n  \\\"goals_game_tying\\\" varchar(100)\\n  \\\"goals_overtime\\\" varchar(100)\\n  \\\"goals_shootout\\\" varchar(100)\\n  \\\"goals_total\\\" varchar(100)\\n  \\\"assists_game_winning\\\" varchar(100)\\n  \\\"assists_game_tying\\\" varchar(100)\\n  \\\"assists_overtime\\\" varchar(100)\\n  \\\"assists_total\\\" varchar(100)\\n  \\\"points\\\" varchar(100)\\n  \\\"shots_total\\\" varchar(100)\\n  \\\"shots_on_goal_total\\\" varchar(100)\\n  \\\"shots_hit_frame\\\" varchar(100)\\n  \\\"shots_penalty_shot_taken\\\" varchar(100)\\n  \\\"shots_penalty_shot_scored\\\" varchar(100)\\n  \\\"shots_penalty_shot_missed\\\" varchar(40)\\n  \\\"shots_penalty_shot_percentage\\\" varchar(40)\\n  \\\"shots_shootout_taken\\\" varchar(40)\\n  \\\"shots_shootout_scored\\\" varchar(40)\\n  \\\"shots_shootout_missed\\\" varchar(40)\\n  \\\"shots_shootout_percentage\\\" varchar(40)\\n  \\\"giveaways\\\" varchar(40)\\n  \\\"offsides\\\" varchar(40)\\n  \\\"corner_kicks\\\" varchar(40)\\n  \\\"hat_tricks\\\" varchar(40)\\n}\\n\\nTable \\\"standing_subgroups\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"standing_id\\\" int4 [not null]\\n  \\\"affiliation_id\\\" int4 [not null]\\n}\\n\\nTable \\\"standings\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"standing_type\\\" varchar(100)\\n  \\\"sub_season_id\\\" int4 [not null]\\n  \\\"last_updated\\\" varchar(100)\\n  \\\"duration_scope\\\" varchar(100)\\n  \\\"competition_scope\\\" varchar(100)\\n  \\\"competition_scope_id\\\" varchar(100)\\n  \\\"alignment_scope\\\" varchar(100)\\n  \\\"site_scope\\\" varchar(100)\\n  \\\"scoping_label\\\" varchar(100)\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"source\\\" varchar(100)\\n}\\n\\nTable \\\"stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"stat_repository_type\\\" varchar(100)\\n  \\\"stat_repository_id\\\" int4 [not null]\\n  \\\"stat_holder_type\\\" varchar(100)\\n  \\\"stat_holder_id\\\" int4\\n  \\\"stat_coverage_type\\\" varchar(100)\\n  \\\"stat_coverage_id\\\" int4\\n  \\\"context\\\" varchar(40) [not null]\\n}\\n\\nTable \\\"sub_periods\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"period_id\\\" int4 [not null]\\n  \\\"sub_period_value\\\" varchar(100)\\n  \\\"score\\\" varchar(100)\\n}\\n\\nTable \\\"sub_seasons\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"sub_season_key\\\" varchar(100) [not null]\\n  \\\"season_id\\\" int4 [not null]\\n  \\\"sub_season_type\\\" varchar(100) [not null]\\n  \\\"start_date_time\\\" timestamp\\n  \\\"end_date_time\\\" timestamp\\n}\\n\\nTable \\\"team_american_football_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"yards_per_attempt\\\" varchar(100)\\n  \\\"average_starting_position\\\" varchar(100)\\n  \\\"timeouts\\\" varchar(100)\\n  \\\"time_of_possession\\\" varchar(100)\\n  \\\"turnover_ratio\\\" varchar(100)\\n}\\n\\nTable \\\"team_phases\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"start_season_id\\\" int4\\n  \\\"end_season_id\\\" int4\\n  \\\"affiliation_id\\\" int4 [not null]\\n  \\\"start_date_time\\\" varchar(100)\\n  \\\"end_date_time\\\" varchar(100)\\n  \\\"phase_status\\\" varchar(40)\\n  \\\"role_id\\\" int4\\n}\\n\\nTable \\\"teams\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"team_key\\\" varchar(100) [not null]\\n  \\\"publisher_id\\\" int4 [not null]\\n  \\\"home_site_id\\\" int4\\n}\\n\\nTable \\\"teams_documents\\\" {\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"document_id\\\" int4 [not null]\\n}\\n\\nTable \\\"teams_media\\\" {\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"media_id\\\" int4 [not null]\\n}\\n\\nTable \\\"tennis_action_points\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"sub_period_id\\\" varchar(100)\\n  \\\"sequence_number\\\" varchar(100)\\n  \\\"win_type\\\" varchar(100)\\n}\\n\\nTable \\\"tennis_action_volleys\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"sequence_number\\\" varchar(100)\\n  \\\"tennis_action_points_id\\\" int4\\n  \\\"landing_location\\\" varchar(100)\\n  \\\"swing_type\\\" varchar(100)\\n  \\\"result\\\" varchar(100)\\n  \\\"spin_type\\\" varchar(100)\\n  \\\"trajectory_details\\\" varchar(100)\\n}\\n\\nTable \\\"tennis_event_states\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"current_state\\\" int4\\n  \\\"sequence_number\\\" int4\\n  \\\"tennis_set\\\" varchar(100)\\n  \\\"game\\\" varchar(100)\\n  \\\"server_person_id\\\" int4\\n  \\\"server_score\\\" varchar(100)\\n  \\\"receiver_person_id\\\" int4\\n  \\\"receiver_score\\\" varchar(100)\\n  \\\"service_number\\\" varchar(100)\\n  \\\"context\\\" varchar(40)\\n}\\n\\nTable \\\"tennis_return_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"returns_played\\\" varchar(100)\\n  \\\"matches_played\\\" varchar(100)\\n  \\\"first_service_return_points_won\\\" varchar(100)\\n  \\\"first_service_return_points_won_pct\\\" varchar(100)\\n  \\\"second_service_return_points_won\\\" varchar(100)\\n  \\\"second_service_return_points_won_pct\\\" varchar(100)\\n  \\\"return_games_played\\\" varchar(100)\\n  \\\"return_games_won\\\" varchar(100)\\n  \\\"return_games_won_pct\\\" varchar(100)\\n  \\\"break_points_played\\\" varchar(100)\\n  \\\"break_points_converted\\\" varchar(100)\\n  \\\"break_points_converted_pct\\\" varchar(100)\\n}\\n\\nTable \\\"tennis_service_stats\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"services_played\\\" varchar(100)\\n  \\\"matches_played\\\" varchar(100)\\n  \\\"aces\\\" varchar(100)\\n  \\\"first_services_good\\\" varchar(100)\\n  \\\"first_services_good_pct\\\" varchar(100)\\n  \\\"first_service_points_won\\\" varchar(100)\\n  \\\"first_service_points_won_pct\\\" varchar(100)\\n  \\\"second_service_points_won\\\" varchar(100)\\n  \\\"second_service_points_won_pct\\\" varchar(100)\\n  \\\"service_games_played\\\" varchar(100)\\n  \\\"service_games_won\\\" varchar(100)\\n  \\\"service_games_won_pct\\\" varchar(100)\\n  \\\"break_points_played\\\" varchar(100)\\n  \\\"break_points_saved\\\" varchar(100)\\n  \\\"break_points_saved_pct\\\" varchar(100)\\n}\\n\\nTable \\\"wagering_moneylines\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4\\n  \\\"rotation_key\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n  \\\"vigorish\\\" varchar(100)\\n  \\\"line\\\" varchar(100)\\n  \\\"line_opening\\\" varchar(100)\\n  \\\"prediction\\\" varchar(100)\\n}\\n\\nTable \\\"wagering_odds_lines\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4\\n  \\\"rotation_key\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n  \\\"numerator\\\" varchar(100)\\n  \\\"denominator\\\" varchar(100)\\n  \\\"prediction\\\" varchar(100)\\n  \\\"payout_calculation\\\" varchar(100)\\n  \\\"payout_amount\\\" varchar(100)\\n}\\n\\nTable \\\"wagering_runlines\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4\\n  \\\"rotation_key\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n  \\\"vigorish\\\" varchar(100)\\n  \\\"line\\\" varchar(100)\\n  \\\"line_opening\\\" varchar(100)\\n  \\\"line_value\\\" varchar(100)\\n  \\\"prediction\\\" varchar(100)\\n}\\n\\nTable \\\"wagering_straight_spread_lines\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4\\n  \\\"rotation_key\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n  \\\"vigorish\\\" varchar(100)\\n  \\\"line_value\\\" varchar(100)\\n  \\\"line_value_opening\\\" varchar(100)\\n  \\\"prediction\\\" varchar(100)\\n}\\n\\nTable \\\"wagering_total_score_lines\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"bookmaker_id\\\" int4 [not null]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"date_time\\\" timestamp\\n  \\\"team_id\\\" int4 [not null]\\n  \\\"person_id\\\" int4\\n  \\\"rotation_key\\\" varchar(100)\\n  \\\"comment\\\" varchar(100)\\n  \\\"vigorish\\\" varchar(100)\\n  \\\"line_over\\\" varchar(100)\\n  \\\"line_under\\\" varchar(100)\\n  \\\"total\\\" varchar(100)\\n  \\\"total_opening\\\" varchar(100)\\n  \\\"prediction\\\" varchar(100)\\n}\\n\\nTable \\\"weather_conditions\\\" {\\n  \\\"id\\\" int4 [not null, increment]\\n  \\\"event_id\\\" int4 [not null]\\n  \\\"temperature\\\" varchar(100)\\n  \\\"temperature_units\\\" varchar(40)\\n  \\\"humidity\\\" varchar(100)\\n  \\\"clouds\\\" varchar(100)\\n  \\\"wind_direction\\\" varchar(100)\\n  \\\"wind_velocity\\\" varchar(100)\\n  \\\"weather_code\\\" varchar(100)\\n}\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": \"https://github.com/yugabyte/yugabyte-db/blob/master/sample/sportsdb_tables.sql\"\n  }\n}"
  },
  {
    "path": "tasks/postgres/standard/sports/team_roster_management/verify.py",
    "content": "\"\"\"\nVerification script for PostgreSQL Sports Task 2: Team Roster Management Operations\n\"\"\"\n\nimport os\nimport sys\nimport psycopg2\nfrom decimal import Decimal\n\ndef rows_match(actual_row, expected_row):\n    \"\"\"\n    Compare two rows with appropriate tolerance.\n    For Decimal types: allows 0.001 tolerance\n    For other types: requires exact match\n    \"\"\"\n    if len(actual_row) != len(expected_row):\n        return False\n    \n    for actual, expected in zip(actual_row, expected_row):\n        if isinstance(actual, Decimal) and isinstance(expected, Decimal):\n            if abs(float(actual) - float(expected)) > 0.001:\n                return False\n        elif isinstance(actual, float) and isinstance(expected, float):\n            if abs(actual - expected) > 0.001:\n                return False\n        elif actual != expected:\n            return False\n    \n    return True\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\", \"sports\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\", \"postgres\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\", \"postgres\")\n    }\n\ndef verify_player_evaluation_table(conn) -> bool:\n    \"\"\"Verify the final state of player_evaluation table after all operations.\"\"\"\n    with conn.cursor() as cur:        \n        # Get actual results from the created table\n        cur.execute(\"\"\"\n            SELECT person_id, batting_avg, home_runs, rbis, games_played, performance_score\n            FROM player_evaluation\n            ORDER BY person_id\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        # Execute ground truth query that simulates all steps:\n        # 1. Initial insert (step 2)\n        # 2. Update based on injuries (step 4)\n        cur.execute(\"\"\"\n            WITH initial_players AS (\n                SELECT \n                    s.stat_holder_id AS person_id,\n                    SUM(bos.hits)      AS total_hits,\n                    SUM(bos.at_bats)   AS total_at_bats,\n                    CASE \n                        WHEN SUM(bos.at_bats) > 0 \n                        THEN 1.0 * SUM(bos.hits) / SUM(bos.at_bats)\n                        ELSE 0 \n                    END                AS batting_avg,\n                    SUM(bos.home_runs) AS home_runs,\n                    SUM(bos.rbi)       AS rbis\n                FROM stats s\n                JOIN baseball_offensive_stats bos\n                ON s.stat_repository_id = bos.id\n                WHERE s.stat_holder_type = 'persons'\n                AND s.stat_repository_type = 'baseball_offensive_stats'\n                GROUP BY s.stat_holder_id\n            ),\n            game_counts AS (\n                SELECT \n                    person_id,\n                    COUNT(DISTINCT event_id) AS games_played\n                FROM person_event_metadata\n                GROUP BY person_id\n            ),\n            players_with_games AS (\n                SELECT \n                    ip.person_id,\n                    ip.batting_avg,\n                    ip.home_runs,\n                    ip.rbis,\n                    COALESCE(gc.games_played, 0) AS games_played,\n                    (ip.batting_avg * 1000)\n                    + (COALESCE(ip.home_runs, 0) * 5)\n                    + (COALESCE(ip.rbis, 0) * 2) AS initial_score\n                FROM initial_players ip\n                LEFT JOIN game_counts gc ON ip.person_id = gc.person_id\n                WHERE COALESCE(gc.games_played, 0) >= 10\n            ),\n            injury_info AS (\n                SELECT \n                    person_id,\n                    COUNT(*) AS injury_count,\n                    MAX(CASE WHEN end_date_time IS NULL THEN 1 ELSE 0 END) AS has_active_injury\n                FROM injury_phases\n                GROUP BY person_id\n            ),\n            adjusted_scores AS (\n                SELECT \n                    pwg.person_id,\n                    pwg.batting_avg,\n                    pwg.home_runs,\n                    pwg.rbis,\n                    pwg.games_played,\n                    GREATEST(\n                        CASE \n                            WHEN COALESCE(ii.has_active_injury, 0) = 1 AND COALESCE(ii.injury_count, 0) > 2 \n                                THEN pwg.initial_score * 0.8 * 0.9\n                            WHEN COALESCE(ii.has_active_injury, 0) = 1 \n                                THEN pwg.initial_score * 0.8\n                            WHEN COALESCE(ii.injury_count, 0) > 2 \n                                THEN pwg.initial_score * 0.9\n                            ELSE pwg.initial_score\n                        END,\n                        0\n                    ) AS performance_score\n                FROM players_with_games pwg\n                LEFT JOIN injury_info ii ON ii.person_id = pwg.person_id\n            )\n            SELECT \n                person_id,\n                batting_avg,\n                home_runs,\n                rbis,\n                games_played,\n                performance_score\n            FROM adjusted_scores\n            ORDER BY person_id;\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} player evaluation records, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:  # Only show first 5 mismatches\n                    print(f\"❌ Row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total mismatches in player_evaluation: {mismatches}\")\n            return False\n\n        print(f\"✅ Player evaluation table is correct ({len(actual_results)} records)\")\n        return True\n\ndef verify_injury_status_table(conn) -> bool:\n    \"\"\"Verify the player_injury_status table and data.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results\n        cur.execute(\"\"\"\n            SELECT person_id, injury_count, last_injury_date, current_status\n            FROM player_injury_status\n            ORDER BY person_id\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        # Execute ground truth query - get players from player_evaluation\n        cur.execute(\"\"\"\n            WITH player_list AS (\n                SELECT DISTINCT person_id \n                FROM player_evaluation\n            ),\n            injury_counts AS (\n                SELECT \n                    person_id,\n                    COUNT(*) as injury_count,\n                    MAX(start_date_time::date) as last_injury_date,\n                    MAX(CASE WHEN end_date_time IS NULL THEN 1 ELSE 0 END) as has_active_injury\n                FROM injury_phases\n                GROUP BY person_id\n            )\n            SELECT \n                pl.person_id,\n                COALESCE(ic.injury_count, 0) as injury_count,\n                ic.last_injury_date,\n                CASE \n                    WHEN COALESCE(ic.has_active_injury, 0) = 1 THEN 'injured'\n                    ELSE 'healthy'\n                END as current_status\n            FROM player_list pl\n            LEFT JOIN injury_counts ic ON pl.person_id = ic.person_id\n            ORDER BY pl.person_id\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} injury status records, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for i, (actual, expected) in enumerate(zip(actual_results, expected_results)):\n            if not rows_match(actual, expected):\n                if mismatches < 5:\n                    print(f\"❌ Row {i+1} mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total mismatches in player_injury_status: {mismatches}\")\n            return False\n\n        print(f\"✅ Player injury status table is correct ({len(actual_results)} records)\")\n        return True\n\n\ndef verify_summary_table(conn) -> bool:\n    \"\"\"Verify the team_performance_summary table.\"\"\"\n    with conn.cursor() as cur:\n        # Get actual results\n        cur.execute(\"\"\"\n            SELECT metric_name, metric_value\n            FROM team_performance_summary\n            ORDER BY metric_name\n        \"\"\")\n        actual_results = cur.fetchall()\n        \n        # Execute ground truth query\n        cur.execute(\"\"\"\n            WITH player_data AS (\n                SELECT \n                    COUNT(*) as total_players,\n                    AVG(batting_avg) as avg_batting_average,\n                    SUM(home_runs) as total_home_runs,\n                    AVG(performance_score) as avg_performance_score\n                FROM player_evaluation\n            ),\n            health_data AS (\n                SELECT \n                    SUM(CASE WHEN current_status = 'injured' THEN 1 ELSE 0 END) as injured_count,\n                    SUM(CASE WHEN current_status = 'healthy' THEN 1 ELSE 0 END) as healthy_count\n                FROM player_injury_status\n                WHERE person_id IN (SELECT person_id FROM player_evaluation)\n            )\n            SELECT metric_name, metric_value::DECIMAL\n            FROM (\n                SELECT 'avg_batting_average' as metric_name, avg_batting_average as metric_value FROM player_data\n                UNION ALL\n                SELECT 'avg_performance_score', avg_performance_score FROM player_data\n                UNION ALL\n                SELECT 'healthy_player_count', healthy_count FROM health_data\n                UNION ALL\n                SELECT 'injured_player_count', injured_count FROM health_data\n                UNION ALL\n                SELECT 'total_home_runs', total_home_runs FROM player_data\n                UNION ALL\n                SELECT 'total_players', total_players FROM player_data\n            ) metrics\n            ORDER BY metric_name\n        \"\"\")\n        expected_results = cur.fetchall()\n\n        if len(actual_results) != len(expected_results):\n            print(f\"❌ Expected {len(expected_results)} metrics, got {len(actual_results)}\")\n            return False\n\n        mismatches = 0\n        for actual, expected in zip(actual_results, expected_results):\n            if not rows_match(actual, expected):\n                if mismatches < 5:\n                    print(f\"❌ Metric mismatch: expected {expected}, got {actual}\")\n                mismatches += 1\n\n        if mismatches > 0:\n            print(f\"❌ Total mismatches in summary table: {mismatches}\")\n            return False\n        \n        print(f\"✅ Team performance summary table is correct ({len(actual_results)} metrics)\")\n        return True\n\ndef main():\n    \"\"\"Main verification function.\"\"\"\n    print(\"=\" * 50)\n    print(\"Verifying Sports Task 2: Team Roster Management Operations\")\n    print(\"=\" * 50)\n\n    # Get connection parameters\n    conn_params = get_connection_params()\n\n    if not conn_params[\"database\"]:\n        print(\"❌ No database specified\")\n        sys.exit(1)\n\n    try:\n        # Connect to database\n        conn = psycopg2.connect(**conn_params)\n\n        # Verify all steps\n        success = (\n            verify_player_evaluation_table(conn) and \n            verify_injury_status_table(conn) and\n            verify_summary_table(conn)\n        )\n\n        conn.close()\n\n        if success:\n            print(\"\\n🎉 Task verification: PASS\")\n            sys.exit(0)\n        else:\n            print(\"\\n❌ Task verification: FAIL\")\n            sys.exit(1)\n\n    except psycopg2.Error as e:\n        print(f\"❌ Database error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"❌ Verification error: {e}\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "tasks/postgres/standard/vectors/dba_vector_analysis/description.md",
    "content": "# PostgreSQL Vector Database Analysis\n\n> Analyze and optimize a pgvector-powered database to understand storage patterns, performance characteristics, and data quality for embeddings in production workloads.\n\n## What's this about?\n\nYou've got a PostgreSQL database running with the vector extension that stores embeddings for RAG (document similarity search, image recognition), or other ML workloads.\nYour job is to dive deep into this vector database and figure out what's going on under the hood.\nYou need to understand:\n\n- how vectors are stored\n- how much space they're taking up\n- whether indexes are working properly\n- if there are any data quality issues lurking around\n\n## What you need to investigate\n\nFirst, get familiar with what you're working with:\n\n- Check vector extension status: ensuring it's installed properly, check version, identify any configuration issues\n- Identify all vector columns across entire database: providing me columns, types of columns, and vector dim (dimensions)\n- Map the vector landscape: understand relationships between vector tables and regular tables, foreign keys, dependencies\n\nVectors can eat up a lot of storage, so let's see where the bytes are going:\n\n- Calculate vector storage overhead: measure how much space vectors take compared to regular columns in same tables\n- Analyze table sizes: identify which vector tables are biggest storage consumers, break down by table\n- Understand growth patterns: examine record counts and project future storage needs based on current data\n\nVectors without proper indexes are painfully slow, so investigate:\n\n- Catalog vector indexes: find all HNSW and IVFFlat indexes, document their configurations and parameters\n- Measure index effectiveness: determine if indexes are actually being used and helping query performance\n- Identify optimization opportunities: spot missing indexes, suboptimal configurations, unused indexes\n\nBad vector data makes everything worse:\n\n- Hunt for data issues: locate NULL vectors, dimension mismatches, corrupted embeddings that could break queries\n- Validate consistency: ensure vectors in each column have consistent dimensions across all rows\n- Check for outliers: find vectors that might be skewing similarity calculations or causing performance issues\n\n## Your deliverables\n\nCreate these analysis tables and populate them with your findings:\n\n### `vector_analysis_columns`\n\nComplete catalog of every vector column you find:\n\n```sql\nCREATE TABLE vector_analysis_columns (\n    schema VARCHAR(50),\n    table_name VARCHAR(100),\n    column_name VARCHAR(100),\n    dimensions INTEGER,\n    data_type VARCHAR(50),\n    has_constraints BOOLEAN,\n    rows BIGINT\n);\n```\n\n### `vector_analysis_storage_consumption`\n\nShow exactly where storage is being consumed:\n\n```sql\nCREATE TABLE vector_analysis_storage_consumption (\n    schema VARCHAR(50),\n    table_name VARCHAR(100),\n    total_size_bytes BIGINT,\n    vector_data_bytes BIGINT,\n    regular_data_bytes BIGINT,\n    vector_storage_pct NUMERIC(5,2),\n    row_count BIGINT\n);\n```\n\n### `vector_analysis_indices`\n\nDocument all vector indexes and their characteristics:\n```sql\nCREATE TABLE vector_analysis_indices (\n    schema VARCHAR(50),\n    table_name VARCHAR(100),\n    column_name VARCHAR(100),\n    index_name VARCHAR(100),\n    index_type VARCHAR(50), -- 'hnsw', 'ivfflat', etc.\n    index_size_bytes BIGINT\n);\n```\n\nUse PostgreSQL system catalogs, pgvector-specific views, and storage analysis functions to gather comprehensive metrics about the vector database implementation.\n"
  },
  {
    "path": "tasks/postgres/standard/vectors/dba_vector_analysis/ground_truth.sql",
    "content": "-- Ground Truth Data for Vector Database Analysis Task\n-- This defines the exact expected results that candidates should discover and report\n\n/*\n================================================================================\nEXPECTED VECTOR DATABASE STRUCTURE (created by vectors_setup.py)\n================================================================================\n\nTables with Vector Columns:\n1. documents.embedding (vector(1536))\n2. document_chunks.embedding (vector(1536))\n3. user_queries.embedding (vector(1536))\n\nVector Indexes:\n1. documents_embedding_idx (HNSW on documents.embedding)\n2. chunks_embedding_idx (HNSW on document_chunks.embedding)\n3. queries_embedding_idx (HNSW on user_queries.embedding)\n\nExpected Data Counts:\n- documents: 10 records\n- document_chunks: ~40-70 records (3-7 chunks per document)\n- user_queries: 10 records\n- embedding_models: 5 records (metadata)\n- knowledge_base: 5 records (metadata)\n- search_cache: 5 records (metadata)\n\n================================================================================\nDEFINITIVE GROUND TRUTH VERIFICATION DATA\n================================================================================\n*/\n\nBEGIN;\n\n-- Create expected analysis result structure\nCREATE TABLE IF NOT EXISTS expected_vector_column_inventory (\n    table_schema VARCHAR(50) DEFAULT 'public',\n    table_name VARCHAR(100),\n    column_name VARCHAR(100),\n    vector_dimensions INTEGER,\n    data_type VARCHAR(50) DEFAULT 'USER-DEFINED',\n    has_constraints BOOLEAN DEFAULT false,\n    min_estimated_rows BIGINT\n);\n\n-- Insert expected vector column inventory\nINSERT INTO expected_vector_column_inventory (table_name, column_name, vector_dimensions, min_estimated_rows) VALUES\n('documents', 'embedding', 1536, 10),\n('document_chunks', 'embedding', 1536, 30),\n('user_queries', 'embedding', 1536, 10);\n\n-- Create expected storage analysis structure\nCREATE TABLE IF NOT EXISTS expected_vector_storage_analysis (\n    table_name VARCHAR(100),\n    has_vector_data BOOLEAN,\n    min_row_count BIGINT,\n    vector_column_exists BOOLEAN,\n    should_have_storage_metrics BOOLEAN DEFAULT true\n);\n\n-- Insert expected storage analysis\nINSERT INTO expected_vector_storage_analysis (table_name, has_vector_data, min_row_count, vector_column_exists) VALUES\n('documents', true, 10, true),\n('document_chunks', true, 30, true),\n('user_queries', true, 10, true),\n('embedding_models', false, 5, false),\n('knowledge_base', false, 5, false),\n('search_cache', false, 5, false);\n\n-- Create expected index analysis structure\nCREATE TABLE IF NOT EXISTS expected_vector_index_analysis (\n    index_name_pattern VARCHAR(100),\n    table_name VARCHAR(100),\n    column_name VARCHAR(100),\n    expected_index_type VARCHAR(50),\n    should_exist BOOLEAN DEFAULT true\n);\n\n-- Insert expected vector index analysis\nINSERT INTO expected_vector_index_analysis (index_name_pattern, table_name, column_name, expected_index_type) VALUES\n('%documents%embedding%', 'documents', 'embedding', 'hnsw'),\n('%chunks%embedding%', 'document_chunks', 'embedding', 'hnsw'),\n('%queries%embedding%', 'user_queries', 'embedding', 'hnsw');\n\n-- Create storage analysis table\nCREATE TABLE vector_storage_analysis (\n    table_name VARCHAR(100),\n    total_size_bytes BIGINT,\n    vector_data_bytes BIGINT,\n    regular_data_bytes BIGINT,\n    vector_storage_pct NUMERIC(5,2),\n    row_count BIGINT,\n    avg_vector_size_bytes INTEGER\n);\n\n-- Populate storage analysis with actual storage metrics\nDO $$\nDECLARE\n    rec RECORD;\n    total_size BIGINT;\n    row_cnt BIGINT;\n    vector_size INTEGER := 1536 * 4; -- 1536 dimensions * 4 bytes per float\nBEGIN\n    FOR rec IN SELECT tablename FROM pg_tables WHERE tablename IN ('documents', 'document_chunks', 'user_queries') LOOP\n        EXECUTE format('SELECT COUNT(*) FROM %I', rec.tablename) INTO row_cnt;\n        SELECT pg_total_relation_size(format('public.%I', rec.tablename)) INTO total_size;\n\n        INSERT INTO vector_storage_analysis (\n            table_name, total_size_bytes, row_count, avg_vector_size_bytes,\n            vector_data_bytes, regular_data_bytes, vector_storage_pct\n        ) VALUES (\n            rec.tablename,\n            total_size,\n            row_cnt,\n            vector_size,\n            row_cnt * vector_size,\n            GREATEST(total_size - (row_cnt * vector_size), 0),\n            ROUND((row_cnt * vector_size * 100.0) / NULLIF(total_size, 0), 2)\n        );\n    END LOOP;\nEND $$;\n\n-- Create index analysis table\nCREATE TABLE vector_index_analysis (\n    index_name VARCHAR(100),\n    table_name VARCHAR(100),\n    column_name VARCHAR(100),\n    index_type VARCHAR(50),\n    index_size_bytes BIGINT,\n    index_parameters TEXT,\n    is_valid BOOLEAN\n);\n\n-- Populate index analysis with actual vector indexes\nINSERT INTO vector_index_analysis (index_name, table_name, column_name, index_type, index_size_bytes, is_valid)\nSELECT\n    i.indexname as index_name,\n    i.tablename as table_name,\n    'embedding' as column_name, -- Known from our setup\n    CASE\n        WHEN i.indexdef ILIKE '%hnsw%' THEN 'hnsw'\n        WHEN i.indexdef ILIKE '%ivfflat%' THEN 'ivfflat'\n        ELSE 'unknown'\n    END as index_type,\n    pg_relation_size(format('public.%I', i.indexname)) as index_size_bytes,\n    true as is_valid\nFROM pg_indexes i\nWHERE (i.indexdef ILIKE '%vector%' OR i.indexdef ILIKE '%hnsw%' OR i.indexdef ILIKE '%ivfflat%')\nAND i.tablename IN ('documents', 'document_chunks', 'user_queries')\nORDER BY i.tablename, i.indexname;\n\n-- Create data quality analysis table\nCREATE TABLE vector_data_quality (\n    table_name VARCHAR(100),\n    column_name VARCHAR(100),\n    quality_check_type VARCHAR(50),\n    total_records BIGINT,\n    issue_count BIGINT,\n    quality_status VARCHAR(20),\n    details TEXT\n);\n\n-- Populate data quality analysis with actual checks\nDO $$\nDECLARE\n    rec RECORD;\n    total_cnt BIGINT;\n    null_cnt BIGINT;\nBEGIN\n    FOR rec IN SELECT tablename FROM pg_tables WHERE tablename IN ('documents', 'document_chunks', 'user_queries') LOOP\n        -- Count total records\n        EXECUTE format('SELECT COUNT(*) FROM %I', rec.tablename) INTO total_cnt;\n\n        -- Count NULL vectors\n        EXECUTE format('SELECT COUNT(*) FROM %I WHERE embedding IS NULL', rec.tablename) INTO null_cnt;\n\n        -- Insert NULL_CHECK result\n        INSERT INTO vector_data_quality (\n            table_name, column_name, quality_check_type,\n            total_records, issue_count, quality_status\n        ) VALUES (\n            rec.tablename, 'embedding', 'NULL_CHECK',\n            total_cnt, null_cnt,\n            CASE WHEN null_cnt = 0 THEN 'GOOD' ELSE 'WARNING' END\n        );\n\n        -- Insert DIMENSION_CHECK result (all vectors in our setup are 1536-dimensional)\n        INSERT INTO vector_data_quality (\n            table_name, column_name, quality_check_type,\n            total_records, issue_count, quality_status\n        ) VALUES (\n            rec.tablename, 'embedding', 'DIMENSION_CHECK',\n            total_cnt - null_cnt, 0, 'GOOD'\n        );\n    END LOOP;\nEND $$;\n\n-- ============================================================================\n-- GROUND TRUTH IMPLEMENTATION\n-- ============================================================================\n-- This is the correct analysis implementation that candidates should produce\n\n-- Create vector_analysis_columns table and populate it\nCREATE TABLE vector_analysis_columns (\n    schema VARCHAR(50),\n    table_name VARCHAR(100),\n    column_name VARCHAR(100),\n    dimensions INTEGER,\n    data_type VARCHAR(50),\n    has_constraints BOOLEAN,\n    rows BIGINT\n);\n\n-- Discover and insert vector columns\nINSERT INTO vector_analysis_columns (schema, table_name, column_name, dimensions, data_type, has_constraints, rows)\nSELECT\n    'public' as schema,\n    c.table_name,\n    c.column_name,\n    1536 as dimensions, -- pgvector embedding dimension\n    'USER-DEFINED' as data_type,\n    false as has_constraints,\n    -- Get actual row count using dynamic query\n    CASE c.table_name\n        WHEN 'documents' THEN (SELECT COUNT(*) FROM documents)\n        WHEN 'document_chunks' THEN (SELECT COUNT(*) FROM document_chunks)\n        WHEN 'user_queries' THEN (SELECT COUNT(*) FROM user_queries)\n        ELSE 0\n    END as rows\nFROM information_schema.columns c\nWHERE c.data_type = 'USER-DEFINED'\nAND c.udt_name = 'vector'\nORDER BY c.table_name, c.column_name;\n\n-- Create vector_analysis_storage_consumption table\nCREATE TABLE vector_analysis_storage_consumption (\n    schema VARCHAR(50),\n    table_name VARCHAR(100),\n    total_size_bytes BIGINT,\n    vector_data_bytes BIGINT,\n    regular_data_bytes BIGINT,\n    vector_storage_pct NUMERIC(5,2),\n    row_count BIGINT\n);\n\n-- Populate storage analysis for vector tables\nDO $$\nDECLARE\n    rec RECORD;\n    total_size BIGINT;\n    row_cnt BIGINT;\n    vector_size INTEGER := 1536 * 4; -- 1536 dimensions * 4 bytes per float\nBEGIN\n    FOR rec IN\n        SELECT DISTINCT c.table_name\n        FROM information_schema.columns c\n        WHERE c.data_type = 'USER-DEFINED'\n        AND c.udt_name = 'vector'\n    LOOP\n        -- Get actual row count\n        EXECUTE format('SELECT COUNT(*) FROM %I', rec.table_name) INTO row_cnt;\n\n        -- Get actual table size\n        SELECT pg_total_relation_size(format('public.%I', rec.table_name)) INTO total_size;\n\n        -- Insert analysis results\n        INSERT INTO vector_analysis_storage_consumption (\n            schema, table_name, total_size_bytes, vector_data_bytes,\n            regular_data_bytes, vector_storage_pct, row_count\n        ) VALUES (\n            'public',\n            rec.table_name,\n            total_size,\n            row_cnt * vector_size,\n            GREATEST(total_size - (row_cnt * vector_size), 0),\n            ROUND((row_cnt * vector_size * 100.0) / NULLIF(total_size, 0), 2),\n            row_cnt\n        );\n    END LOOP;\nEND $$;\n\n-- Create vector_analysis_indices table\nCREATE TABLE vector_analysis_indices (\n    schema VARCHAR(50),\n    table_name VARCHAR(100),\n    column_name VARCHAR(100),\n    index_name VARCHAR(100),\n    index_type VARCHAR(50),\n    index_size_bytes BIGINT\n);\n\n-- Populate index analysis for vector indexes\nINSERT INTO vector_analysis_indices (schema, table_name, column_name, index_name, index_type, index_size_bytes)\nSELECT\n    i.schemaname as schema,\n    i.tablename as table_name,\n    'embedding' as column_name, -- known from our setup\n    i.indexname as index_name,\n    CASE\n        WHEN i.indexdef ILIKE '%hnsw%' THEN 'hnsw'\n        WHEN i.indexdef ILIKE '%ivfflat%' THEN 'ivfflat'\n        ELSE 'unknown'\n    END as index_type,\n    pg_relation_size(format('public.%I', i.indexname)) as index_size_bytes\nFROM pg_indexes i\nWHERE (i.indexdef ILIKE '%hnsw%' OR i.indexdef ILIKE '%ivfflat%')\nAND i.tablename IN (\n    SELECT DISTINCT table_name\n    FROM information_schema.columns\n    WHERE data_type = 'USER-DEFINED' AND udt_name = 'vector'\n)\nORDER BY i.tablename, i.indexname;\n\nCOMMIT;\n\n-- ============================================================================\n-- VERIFICATION HELPER QUERIES\n-- ============================================================================\n\n-- Query to check actual vector columns in the database\n/*\nSELECT\n    table_schema,\n    table_name,\n    column_name,\n    data_type,\n    udt_name\nFROM information_schema.columns\nWHERE data_type = 'USER-DEFINED'\nAND udt_name = 'vector'\nORDER BY table_name, column_name;\n*/\n\n-- Query to check actual vector indexes\n/*\nSELECT\n    schemaname,\n    tablename,\n    indexname,\n    indexdef\nFROM pg_indexes\nWHERE indexdef ILIKE '%vector%'\n   OR indexdef ILIKE '%hnsw%'\n   OR indexdef ILIKE '%ivfflat%'\nORDER BY tablename, indexname;\n*/\n\n-- Query to check table row counts\n/*\nSELECT\n    'documents' as table_name, COUNT(*) as row_count FROM documents\nUNION ALL\nSELECT\n    'document_chunks' as table_name, COUNT(*) as row_count FROM document_chunks\nUNION ALL\nSELECT\n    'user_queries' as table_name, COUNT(*) as row_count FROM user_queries\nORDER BY table_name;\n*/\n\n-- Query to check pgvector extension\n/*\nSELECT extname, extversion\nFROM pg_extension\nWHERE extname = 'vector';\n*/\n"
  },
  {
    "path": "tasks/postgres/standard/vectors/dba_vector_analysis/meta.json",
    "content": "{\n  \"task_id\": \"dba_vector_analysis\",\n  \"task_name\": \"DBA Vector Analysis\",\n  \"category_id\": \"vectors\",\n  \"category_name\": \"Vectors\",\n  \"description\": \"Analyze pgvector database storage, identify vector columns, assess space utilization and performance for RAG applications.\",\n  \"author\": \"Fanshi Zhang\",\n  \"created_at\": \"2025-08-18\",\n  \"difficulty\": \"L3\",\n  \"tags\": [\n    \"performance optimization\",\n    \"audit and compliance\",\n    \"statistical aggregation\"\n  ],\n  \"mcp\": [\n    \"postgres\"\n  ],\n  \"meta_data\": {\n    \"stateType\": \"text\",\n    \"stateContent\": \"Table \\\"documents\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"title\\\" text [not null]\\n  \\\"content\\\" text [not null]\\n  \\\"source_url\\\" text\\n  \\\"document_type\\\" varchar(50) [default: 'article']\\n  \\\"created_at\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n  \\\"updated_at\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n  \\\"word_count\\\" int4\\n  \\\"embedding\\\" public.vector\\n\\n  Indexes {\\n    created_at [type: btree, name: \\\"documents_created_idx\\\"]\\n    embedding [type: hnsw, name: \\\"documents_embedding_idx\\\"]\\n    title [type: btree, name: \\\"documents_title_idx\\\"]\\n    document_type [type: btree, name: \\\"documents_type_idx\\\"]\\n  }\\n}\\n\\nTable \\\"document_chunks\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"document_id\\\" int4\\n  \\\"chunk_index\\\" int4 [not null]\\n  \\\"chunk_text\\\" text [not null]\\n  \\\"chunk_size\\\" int4\\n  \\\"overlap_size\\\" int4 [default: 0]\\n  \\\"created_at\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n  \\\"embedding\\\" public.vector\\n\\n  Indexes {\\n    document_id [type: btree, name: \\\"chunks_doc_id_idx\\\"]\\n    embedding [type: hnsw, name: \\\"chunks_embedding_idx\\\"]\\n    chunk_index [type: btree, name: \\\"chunks_index_idx\\\"]\\n  }\\n}\\n\\nTable \\\"user_queries\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"query_text\\\" text [not null]\\n  \\\"user_id\\\" varchar(100)\\n  \\\"session_id\\\" varchar(100)\\n  \\\"created_at\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n  \\\"response_time_ms\\\" int4\\n  \\\"embedding\\\" public.vector\\n\\n  Indexes {\\n    created_at [type: btree, name: \\\"queries_created_idx\\\"]\\n    embedding [type: hnsw, name: \\\"queries_embedding_idx\\\"]\\n    user_id [type: btree, name: \\\"queries_user_idx\\\"]\\n  }\\n}\\n\\nTable \\\"embedding_models\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"model_name\\\" varchar(100) [unique, not null]\\n  \\\"provider\\\" varchar(50) [not null]\\n  \\\"dimensions\\\" int4 [not null]\\n  \\\"max_tokens\\\" int4\\n  \\\"cost_per_token\\\" numeric(10,8)\\n  \\\"created_at\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n  \\\"is_active\\\" bool [default: true]\\n}\\n\\nTable \\\"knowledge_base\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"kb_name\\\" varchar(100) [not null]\\n  \\\"description\\\" text\\n  \\\"domain\\\" varchar(50)\\n  \\\"language\\\" varchar(10) [default: 'en']\\n  \\\"total_documents\\\" int4 [default: 0]\\n  \\\"total_chunks\\\" int4 [default: 0]\\n  \\\"total_storage_mb\\\" numeric(10,2)\\n  \\\"created_at\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n  \\\"updated_at\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n}\\n\\nTable \\\"search_cache\\\" {\\n  \\\"id\\\" int4 [pk, not null, increment]\\n  \\\"query_hash\\\" varchar(64) [not null]\\n  \\\"query_text\\\" text [not null]\\n  \\\"results_json\\\" jsonb\\n  \\\"result_count\\\" int4\\n  \\\"search_time_ms\\\" int4\\n  \\\"similarity_threshold\\\" numeric(4,3)\\n  \\\"created_at\\\" timestamp [default: `CURRENT_TIMESTAMP`]\\n  \\\"expires_at\\\" timestamp\\n\\n  Indexes {\\n    expires_at [type: btree, name: \\\"cache_expires_idx\\\"]\\n    query_hash [type: btree, name: \\\"cache_hash_idx\\\"]\\n  }\\n}\\n\\nRef \\\"document_chunks_document_id_fkey\\\":\\\"documents\\\".\\\"id\\\" < \\\"document_chunks\\\".\\\"document_id\\\" [delete: cascade]\\n\",\n    \"stateUrl\": null,\n    \"stateOriginalUrl\": null\n  }\n}"
  },
  {
    "path": "tasks/postgres/standard/vectors/dba_vector_analysis/prepare_environment.py",
    "content": "\"\"\"\nEnvironment preparation script for Vector Database DBA Analysis task.\n\nThis script imports and uses the shared vector database setup utilities.\n\"\"\"\n\nimport sys\nimport logging\nfrom pathlib import Path\n\n# Add the vectors directory to import the shared utilities\nsys.path.append(str(Path(__file__).resolve().parents[1]))\n\nfrom vectors_setup import prepare_vector_environment\n\nlogger = logging.getLogger(__name__)\n\n\ndef prepare_environment():\n    \"\"\"Main function to prepare the vector database environment.\"\"\"\n    prepare_vector_environment()\n\n\nif __name__ == \"__main__\":\n    logging.basicConfig(level=logging.INFO)\n    prepare_environment()"
  },
  {
    "path": "tasks/postgres/standard/vectors/dba_vector_analysis/verify.py",
    "content": "\"\"\"\nVerification script for Vector Database DBA Analysis task.\n\nThis script verifies that the candidate has properly analyzed the vector database\nand stored their findings in appropriate result tables.\n\"\"\"\n\nimport logging\nimport psycopg2\nimport os\nimport sys\nfrom typing import Dict, Any\n\nlogger = logging.getLogger(__name__)\n\n\ndef get_connection_params():\n    \"\"\"Get database connection parameters from environment variables.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\"),\n    }\n\n\ndef verify_vector_analysis_columns(conn) -> Dict[str, Any]:\n    \"\"\"Verify the vector_analysis_columns table exists, has correct columns, and contains actual vector columns from the database.\"\"\"\n    results = {'passed': False, 'issues': []}\n    expected_columns = [\n        'schema', 'table_name', 'column_name', 'dimensions', 'data_type', 'has_constraints', 'rows'\n    ]\n    try:\n        with conn.cursor() as cur:\n            # Check if table exists\n            cur.execute(\"\"\"\n                SELECT EXISTS (\n                    SELECT FROM information_schema.tables\n                    WHERE table_name = 'vector_analysis_columns'\n                );\n            \"\"\")\n            if not cur.fetchone()[0]:\n                results['issues'].append(\"vector_analysis_columns table not found\")\n                return results\n\n            # Check columns\n            cur.execute(\"\"\"\n                SELECT column_name FROM information_schema.columns\n                WHERE table_name = 'vector_analysis_columns'\n                ORDER BY column_name;\n            \"\"\")\n            actual_columns = {row[0] for row in cur.fetchall()}\n            missing = set(expected_columns) - actual_columns\n            extra = actual_columns - set(expected_columns)\n            if missing:\n                results['issues'].append(f\"Missing columns: {missing}\")\n            if extra:\n                results['issues'].append(f\"Unexpected columns: {extra}\")\n\n            # Check for data\n            cur.execute(\"SELECT COUNT(*) FROM vector_analysis_columns;\")\n            count = cur.fetchone()[0]\n            if count == 0:\n                results['issues'].append(\"No rows found in vector_analysis_columns\")\n                return results\n\n            # Get actual vector columns from the database\n            cur.execute(\"\"\"\n                SELECT table_name, column_name\n                FROM information_schema.columns\n                WHERE data_type = 'USER-DEFINED'\n                AND udt_name = 'vector'\n                ORDER BY table_name, column_name;\n            \"\"\")\n            actual_vector_columns = set(cur.fetchall())\n\n            # Get what the agent found\n            cur.execute(\"\"\"\n                SELECT table_name, column_name\n                FROM vector_analysis_columns\n                ORDER BY table_name, column_name;\n            \"\"\")\n            found_vector_columns = set(cur.fetchall())\n\n            # Check if agent found the actual vector columns\n            missing_vectors = actual_vector_columns - found_vector_columns\n            extra_vectors = found_vector_columns - actual_vector_columns\n\n            if missing_vectors:\n                results['issues'].append(f\"Missing: {missing_vectors}\")\n            if extra_vectors:\n                results['issues'].append(f\"Non-existing: {extra_vectors}\")\n\n            if not missing and not extra and count > 0 and not missing_vectors and not extra_vectors:\n                results['passed'] = True\n\n    except psycopg2.Error as e:\n        results['issues'].append(f\"Database error: {e}\")\n    except Exception as e:\n        results['issues'].append(f\"Verification error: {e}\")\n    return results\n\n\ndef verify_vector_analysis_storage_consumption(conn) -> Dict[str, Any]:\n    \"\"\"Verify the vector_analysis_storage_consumption table exists, has correct columns, and analyzes actual vector tables.\"\"\"\n    results = {'passed': False, 'issues': []}\n    expected_columns = [\n        'schema', 'table_name', 'total_size_bytes', 'vector_data_bytes', 'regular_data_bytes', 'vector_storage_pct', 'row_count'\n    ]\n    try:\n        with conn.cursor() as cur:\n            cur.execute(\"\"\"\n                SELECT EXISTS (\n                    SELECT FROM information_schema.tables\n                    WHERE table_name = 'vector_analysis_storage_consumption'\n                );\n            \"\"\")\n            if not cur.fetchone()[0]:\n                results['issues'].append(\"vector_analysis_storage_consumption table not found\")\n                return results\n\n            cur.execute(\"\"\"\n                SELECT column_name FROM information_schema.columns\n                WHERE table_name = 'vector_analysis_storage_consumption'\n                ORDER BY column_name;\n            \"\"\")\n            actual_columns = {row[0] for row in cur.fetchall()}\n            missing = set(expected_columns) - actual_columns\n            extra = actual_columns - set(expected_columns)\n            if missing:\n                results['issues'].append(f\"Missing columns: {missing}\")\n            if extra:\n                results['issues'].append(f\"Unexpected columns: {extra}\")\n\n            cur.execute(\"SELECT COUNT(*) FROM vector_analysis_storage_consumption;\")\n            count = cur.fetchone()[0]\n            if count == 0:\n                results['issues'].append(\"No rows found in vector_analysis_storage_consumption\")\n                return results\n\n            # Get actual tables with vector columns\n            cur.execute(\"\"\"\n                SELECT DISTINCT table_name\n                FROM information_schema.columns\n                WHERE data_type = 'USER-DEFINED'\n                AND udt_name = 'vector'\n                ORDER BY table_name;\n            \"\"\")\n            actual_vector_tables = {row[0] for row in cur.fetchall()}\n\n            # Get what the agent analyzed\n            cur.execute(\"\"\"\n                SELECT DISTINCT table_name\n                FROM vector_analysis_storage_consumption\n                ORDER BY table_name;\n            \"\"\")\n            analyzed_tables = {row[0] for row in cur.fetchall()}\n\n            # Check if agent analyzed the actual vector tables\n            missing_tables = actual_vector_tables - analyzed_tables\n            if missing_tables:\n                results['issues'].append(f\"Agent missed analyzing vector tables: {missing_tables}\")\n\n            # Check that analyzed tables actually have vector columns\n            extra_tables = analyzed_tables - actual_vector_tables\n            if extra_tables:\n                results['issues'].append(f\"Agent analyzed non-vector tables: {extra_tables}\")\n\n            if not missing and not extra and count > 0 and not missing_tables and not extra_tables:\n                results['passed'] = True\n\n    except psycopg2.Error as e:\n        results['issues'].append(f\"Database error: {e}\")\n    except Exception as e:\n        results['issues'].append(f\"Verification error: {e}\")\n    return results\n\n\ndef verify_vector_analysis_indices(conn) -> Dict[str, Any]:\n    \"\"\"Verify the vector_analysis_indices table exists, has correct columns, and identifies actual vector indexes.\"\"\"\n    results = {'passed': False, 'issues': []}\n    expected_columns = [\n        'schema', 'table_name', 'column_name', 'index_name', 'index_type', 'index_size_bytes'\n    ]\n    try:\n        with conn.cursor() as cur:\n            cur.execute(\"\"\"\n                SELECT EXISTS (\n                    SELECT FROM information_schema.tables\n                    WHERE table_name = 'vector_analysis_indices'\n                );\n            \"\"\")\n            if not cur.fetchone()[0]:\n                results['issues'].append(\"vector_analysis_indices table not found\")\n                return results\n\n            cur.execute(\"\"\"\n                SELECT column_name FROM information_schema.columns\n                WHERE table_name = 'vector_analysis_indices'\n                ORDER BY column_name;\n            \"\"\")\n            actual_columns = {row[0] for row in cur.fetchall()}\n            missing = set(expected_columns) - actual_columns\n            extra = actual_columns - set(expected_columns)\n            if missing:\n                results['issues'].append(f\"Missing columns: {missing}\")\n            if extra:\n                results['issues'].append(f\"Unexpected columns: {extra}\")\n\n            cur.execute(\"SELECT COUNT(*) FROM vector_analysis_indices;\")\n            count = cur.fetchone()[0]\n            if count == 0:\n                results['issues'].append(\"No rows found in vector_analysis_indices\")\n                return results\n\n            # Get actual vector indexes from the database (exclude ground truth table indexes)\n            cur.execute(\"\"\"\n                SELECT schemaname, tablename, indexname\n                FROM pg_indexes\n                WHERE (indexdef ILIKE '%hnsw%' OR indexdef ILIKE '%ivfflat%')\n                AND tablename NOT LIKE '%analysis%'\n                ORDER BY tablename, indexname;\n            \"\"\")\n            actual_vector_indexes = set(cur.fetchall())\n\n            # Get what the agent found\n            cur.execute(\"\"\"\n                SELECT schema, table_name, index_name\n                FROM vector_analysis_indices\n                ORDER BY table_name, index_name;\n            \"\"\")\n            found_indexes = set(cur.fetchall())\n\n            # Check if agent found the actual vector indexes\n            missing_indexes = actual_vector_indexes - found_indexes\n            if missing_indexes:\n                results['issues'].append(f\"Agent missed vector indexes: {missing_indexes}\")\n\n            # Allow agent to find more indexes than just vector ones (they might include related indexes)\n            # but at least they should find the vector-specific ones\n\n            if not missing and not extra and count > 0 and not missing_indexes:\n                results['passed'] = True\n\n    except psycopg2.Error as e:\n        results['issues'].append(f\"Database error: {e}\")\n    except Exception as e:\n        results['issues'].append(f\"Verification error: {e}\")\n    return results\n\n\ndef verify_no_extra_analysis_tables(conn) -> Dict[str, Any]:\n    \"\"\"Check that only the required analysis tables exist (no legacy/extra analysis tables).\"\"\"\n    results = {'passed': True, 'issues': []}  # Start with passed=True, more lenient\n    required = {\n        'vector_analysis_columns',\n        'vector_analysis_storage_consumption',\n        'vector_analysis_indices',\n    }\n    try:\n        with conn.cursor() as cur:\n            cur.execute(\"\"\"\n                SELECT table_name FROM information_schema.tables\n                WHERE table_schema = 'public'\n                AND table_name LIKE 'vector_analysis_%';\n            \"\"\")\n            analysis_tables = {row[0] for row in cur.fetchall()}\n\n            # Only flag as issue if there are analysis tables that don't match our required set\n            # Exclude ground truth tables from this check\n            analysis_tables_filtered = {t for t in analysis_tables if not t.startswith('expected_') and not t.startswith('vector_analysis_results')}\n            extra = analysis_tables_filtered - required\n            if extra:\n                results['issues'].append(f\"Found unexpected analysis tables: {extra}\")\n                results['passed'] = False\n\n    except Exception as e:\n        results['issues'].append(f\"Verification error: {e}\")\n        results['passed'] = False\n    return results\n\n\n\ndef main():\n    \"\"\"Main verification function for vector analysis deliverables.\"\"\"\n\n    conn_params = get_connection_params()\n    if not conn_params[\"database\"]:\n        print(\"No database specified\")\n        sys.exit(1)\n    try:\n        conn = psycopg2.connect(**conn_params)\n        checks = [\n            (\"vector_analysis_columns\", verify_vector_analysis_columns),\n            (\"vector_analysis_storage_consumption\", verify_vector_analysis_storage_consumption),\n            (\"vector_analysis_indices\", verify_vector_analysis_indices),\n            (\"no_extra_analysis_tables\", verify_no_extra_analysis_tables),\n        ]\n        passed_checks = 0\n        all_issues = []\n        for i, (desc, check_func) in enumerate(checks, 1):\n            result = check_func(conn)\n            if result['passed']:\n                print(f\"  PASSED\")\n                passed_checks += 1\n            else:\n                print(f\"  FAILED\")\n                for issue in result['issues']:\n                    print(f\"    - {issue}\")\n                all_issues.extend(result['issues'])\n            print()\n        conn.close()\n        total_checks = len(checks)\n        print(f\"Results: {passed_checks}/{total_checks} checks passed\")\n        if passed_checks == total_checks:\n            sys.exit(0)\n        elif passed_checks >= total_checks * 0.75:\n            sys.exit(0)\n        else:\n            sys.exit(1)\n    except psycopg2.Error as e:\n        print(f\"Database connection error: {e}\")\n        sys.exit(1)\n    except Exception as e:\n        print(f\"Verification error: {e}\")\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tasks/postgres/standard/vectors/vectors_setup.py",
    "content": "\"\"\"\nShared Vector Database Setup Utilities\n\nThis module provides utilities for setting up a complete PostgreSQL database\nwith pgvector extension and sample RAG-related tables with vector data.\nUsed by all vector database tasks.\n\"\"\"\n\nimport os\nimport logging\nimport psycopg2\nimport json\nimport random\nimport numpy as np\nfrom typing import List\n\nlogger = logging.getLogger(__name__)\n\ndef get_connection_params():\n    \"\"\"Get database connection parameters from environment variables.\"\"\"\n    return {\n        'host': os.getenv('POSTGRES_HOST', 'localhost'),\n        'port': os.getenv('POSTGRES_PORT', '5432'),\n        'user': os.getenv('POSTGRES_USERNAME', 'postgres'),\n        'password': os.getenv('POSTGRES_PASSWORD', 'password'),\n        'database': os.getenv('POSTGRES_DATABASE', 'postgres')\n    }\n\n\ndef generate_mock_embedding(dimensions: int = 1536) -> List[float]:\n    \"\"\"Generate a mock embedding vector with specified dimensions.\"\"\"\n    # Generate random values between -1 and 1, then normalize\n    vector = np.random.uniform(-1, 1, dimensions)\n    # Normalize to unit vector (common practice for embeddings)\n    norm = np.linalg.norm(vector)\n    if norm > 0:\n        vector = vector / norm\n    return vector.tolist()\n\n\ndef create_vector_extension():\n    \"\"\"Create the pgvector extension.\"\"\"\n    conn_params = get_connection_params()\n\n    try:\n        conn = psycopg2.connect(**conn_params)\n        conn.autocommit = True\n\n        with conn.cursor() as cur:\n            logger.info(\"Creating pgvector extension...\")\n            cur.execute(\"CREATE EXTENSION IF NOT EXISTS vector;\")\n            logger.info(\"pgvector extension created successfully\")\n\n        conn.close()\n\n    except psycopg2.Error as e:\n        logger.error(f\"Failed to create pgvector extension: {e}\")\n        raise\n\n\ndef create_vector_tables():\n    \"\"\"Create sample tables with vector columns for RAG applications.\"\"\"\n    conn_params = get_connection_params()\n\n    try:\n        conn = psycopg2.connect(**conn_params)\n        conn.autocommit = True\n\n        with conn.cursor() as cur:\n            logger.info(\"Creating vector database tables...\")\n\n            # Create documents table for document embeddings\n            cur.execute(\"\"\"\n                CREATE TABLE IF NOT EXISTS documents (\n                    id SERIAL PRIMARY KEY,\n                    title TEXT NOT NULL,\n                    content TEXT NOT NULL,\n                    source_url TEXT,\n                    document_type VARCHAR(50) DEFAULT 'article',\n                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n                    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n                    word_count INTEGER,\n                    embedding vector(1536)\n                );\n            \"\"\")\n\n            # Create chunks table for document chunks (common in RAG)\n            cur.execute(\"\"\"\n                CREATE TABLE IF NOT EXISTS document_chunks (\n                    id SERIAL PRIMARY KEY,\n                    document_id INTEGER REFERENCES documents(id) ON DELETE CASCADE,\n                    chunk_index INTEGER NOT NULL,\n                    chunk_text TEXT NOT NULL,\n                    chunk_size INTEGER,\n                    overlap_size INTEGER DEFAULT 0,\n                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n                    embedding vector(1536)\n                );\n            \"\"\")\n\n            # Create queries table for storing user queries and their embeddings\n            cur.execute(\"\"\"\n                CREATE TABLE IF NOT EXISTS user_queries (\n                    id SERIAL PRIMARY KEY,\n                    query_text TEXT NOT NULL,\n                    user_id VARCHAR(100),\n                    session_id VARCHAR(100),\n                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n                    response_time_ms INTEGER,\n                    embedding vector(1536)\n                );\n            \"\"\")\n\n            # Create embeddings metadata table\n            cur.execute(\"\"\"\n                CREATE TABLE IF NOT EXISTS embedding_models (\n                    id SERIAL PRIMARY KEY,\n                    model_name VARCHAR(100) NOT NULL UNIQUE,\n                    provider VARCHAR(50) NOT NULL,\n                    dimensions INTEGER NOT NULL,\n                    max_tokens INTEGER,\n                    cost_per_token DECIMAL(10, 8),\n                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n                    is_active BOOLEAN DEFAULT TRUE\n                );\n            \"\"\")\n\n            # Create knowledge base table\n            cur.execute(\"\"\"\n                CREATE TABLE IF NOT EXISTS knowledge_base (\n                    id SERIAL PRIMARY KEY,\n                    kb_name VARCHAR(100) NOT NULL,\n                    description TEXT,\n                    domain VARCHAR(50),\n                    language VARCHAR(10) DEFAULT 'en',\n                    total_documents INTEGER DEFAULT 0,\n                    total_chunks INTEGER DEFAULT 0,\n                    total_storage_mb DECIMAL(10, 2),\n                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n                    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP\n                );\n            \"\"\")\n\n            # Create similarity search results cache\n            cur.execute(\"\"\"\n                CREATE TABLE IF NOT EXISTS search_cache (\n                    id SERIAL PRIMARY KEY,\n                    query_hash VARCHAR(64) NOT NULL,\n                    query_text TEXT NOT NULL,\n                    results_json JSONB,\n                    result_count INTEGER,\n                    search_time_ms INTEGER,\n                    similarity_threshold DECIMAL(4, 3),\n                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n                    expires_at TIMESTAMP\n                );\n            \"\"\")\n\n            logger.info(\"Vector database tables created successfully\")\n\n        conn.close()\n\n    except psycopg2.Error as e:\n        logger.error(f\"Failed to create vector tables: {e}\")\n        raise\n\n\ndef create_vector_indexes():\n    \"\"\"Create indexes for vector columns and other frequently queried fields.\"\"\"\n    conn_params = get_connection_params()\n\n    try:\n        conn = psycopg2.connect(**conn_params)\n        conn.autocommit = True\n\n        with conn.cursor() as cur:\n            logger.info(\"Creating vector indexes...\")\n\n            # Vector indexes using HNSW (Hierarchical Navigable Small World)\n            indexes = [\n                (\"documents_embedding_idx\", \"documents\", \"embedding\", \"hnsw\"),\n                (\"chunks_embedding_idx\", \"document_chunks\", \"embedding\", \"hnsw\"),\n                (\"queries_embedding_idx\", \"user_queries\", \"embedding\", \"hnsw\"),\n            ]\n\n            for idx_name, table_name, column_name, method in indexes:\n                try:\n                    if method == \"hnsw\":\n                        cur.execute(f\"\"\"\n                            CREATE INDEX IF NOT EXISTS {idx_name}\n                            ON {table_name} USING hnsw ({column_name} vector_cosine_ops);\n                        \"\"\")\n                    else:\n                        cur.execute(f\"\"\"\n                            CREATE INDEX IF NOT EXISTS {idx_name}\n                            ON {table_name} USING ivfflat ({column_name} vector_cosine_ops) WITH (lists = 100);\n                        \"\"\")\n                    logger.info(f\"Created index {idx_name} on {table_name}\")\n                except psycopg2.Error as e:\n                    logger.warning(f\"Could not create {method} index {idx_name}: {e}\")\n                    # Try with IVFFlat as fallback\n                    if method == \"hnsw\":\n                        try:\n                            cur.execute(f\"\"\"\n                                CREATE INDEX IF NOT EXISTS {idx_name}_ivf\n                                ON {table_name} USING ivfflat ({column_name} vector_cosine_ops) WITH (lists = 100);\n                            \"\"\")\n                            logger.info(f\"Created fallback IVFFlat index {idx_name}_ivf on {table_name}\")\n                        except psycopg2.Error as e2:\n                            logger.warning(f\"Could not create fallback index: {e2}\")\n\n            # Regular indexes for performance\n            regular_indexes = [\n                (\"documents_title_idx\", \"documents\", \"title\"),\n                (\"documents_type_idx\", \"documents\", \"document_type\"),\n                (\"documents_created_idx\", \"documents\", \"created_at\"),\n                (\"chunks_doc_id_idx\", \"document_chunks\", \"document_id\"),\n                (\"chunks_index_idx\", \"document_chunks\", \"chunk_index\"),\n                (\"queries_user_idx\", \"user_queries\", \"user_id\"),\n                (\"queries_created_idx\", \"user_queries\", \"created_at\"),\n                (\"cache_hash_idx\", \"search_cache\", \"query_hash\"),\n                (\"cache_expires_idx\", \"search_cache\", \"expires_at\"),\n            ]\n\n            for idx_name, table_name, column_name in regular_indexes:\n                try:\n                    cur.execute(f\"CREATE INDEX IF NOT EXISTS {idx_name} ON {table_name} ({column_name});\")\n                    logger.debug(f\"Created regular index {idx_name}\")\n                except psycopg2.Error as e:\n                    logger.warning(f\"Could not create regular index {idx_name}: {e}\")\n\n            logger.info(\"Vector indexes created successfully\")\n\n        conn.close()\n\n    except psycopg2.Error as e:\n        logger.error(f\"Failed to create vector indexes: {e}\")\n        raise\n\n\ndef insert_sample_data():\n    \"\"\"Insert sample data into vector tables.\"\"\"\n    conn_params = get_connection_params()\n\n    try:\n        conn = psycopg2.connect(**conn_params)\n        conn.autocommit = True\n\n        with conn.cursor() as cur:\n            logger.info(\"Inserting sample data...\")\n\n            # Insert embedding models\n            embedding_models = [\n                ('text-embedding-3-small', 'OpenAI', 1536, 8192, 0.00000002, True),\n                ('text-embedding-3-large', 'OpenAI', 3072, 8192, 0.00000013, True),\n                ('text-embedding-ada-002', 'OpenAI', 1536, 8192, 0.00000010, False),\n                ('all-MiniLM-L6-v2', 'Sentence-Transformers', 384, 512, 0.0, True),\n                ('all-mpnet-base-v2', 'Sentence-Transformers', 768, 514, 0.0, True),\n            ]\n\n            for model_data in embedding_models:\n                cur.execute(\"\"\"\n                    INSERT INTO embedding_models (model_name, provider, dimensions, max_tokens, cost_per_token, is_active)\n                    VALUES (%s, %s, %s, %s, %s, %s)\n                    ON CONFLICT (model_name) DO NOTHING;\n                \"\"\", model_data)\n\n            # Insert knowledge bases\n            knowledge_bases = [\n                ('Technical Documentation', 'Software engineering and API documentation', 'technology'),\n                ('Research Papers', 'Academic papers and research publications', 'research'),\n                ('Customer Support', 'FAQ and troubleshooting guides', 'support'),\n                ('Product Catalog', 'Product descriptions and specifications', 'commerce'),\n                ('Legal Documents', 'Contracts, policies, and legal texts', 'legal'),\n            ]\n\n            kb_ids = []\n            for kb_data in knowledge_bases:\n                cur.execute(\"\"\"\n                    INSERT INTO knowledge_base (kb_name, description, domain, total_documents, total_chunks, total_storage_mb)\n                    VALUES (%s, %s, %s, %s, %s, %s)\n                    RETURNING id;\n                \"\"\", kb_data + (random.randint(50, 500), random.randint(200, 2000), round(random.uniform(10.5, 250.8), 2)))\n                kb_ids.append(cur.fetchone()[0])\n\n            # Insert sample documents\n            sample_documents = [\n                (\"PostgreSQL Performance Tuning\", \"Comprehensive guide to optimizing PostgreSQL database performance including indexing strategies, query optimization, and configuration tuning.\", \"https://example.com/pg-performance\", \"technical_guide\"),\n                (\"Vector Similarity Search\", \"Understanding vector embeddings and similarity search algorithms for AI applications and recommendation systems.\", \"https://example.com/vector-search\", \"technical_guide\"),\n                (\"RAG Implementation Best Practices\", \"Best practices for implementing Retrieval-Augmented Generation systems using vector databases and large language models.\", \"https://example.com/rag-practices\", \"best_practices\"),\n                (\"Database Security Guidelines\", \"Security considerations and implementation guidelines for PostgreSQL databases in production environments.\", \"https://example.com/db-security\", \"security_guide\"),\n                (\"Machine Learning with SQL\", \"Integrating machine learning workflows with SQL databases and leveraging database extensions for AI applications.\", \"https://example.com/ml-sql\", \"tutorial\"),\n                (\"API Documentation Standards\", \"Standards and best practices for creating comprehensive and user-friendly API documentation.\", \"https://example.com/api-docs\", \"documentation\"),\n                (\"Microservices Architecture\", \"Design patterns and implementation strategies for microservices architecture in modern applications.\", \"https://example.com/microservices\", \"architecture_guide\"),\n                (\"Data Pipeline Optimization\", \"Optimizing data processing pipelines for scalability, reliability, and performance in enterprise environments.\", \"https://example.com/data-pipelines\", \"optimization_guide\"),\n                (\"Cloud Database Migration\", \"Step-by-step guide for migrating on-premises databases to cloud infrastructure with minimal downtime.\", \"https://example.com/cloud-migration\", \"migration_guide\"),\n                (\"NoSQL vs SQL Comparison\", \"Detailed comparison of NoSQL and SQL databases, including use cases, performance characteristics, and selection criteria.\", \"https://example.com/nosql-sql\", \"comparison_guide\"),\n            ]\n\n            doc_ids = []\n            for title, content, url, doc_type in sample_documents:\n                embedding = generate_mock_embedding(1536)\n                word_count = len(content.split())\n\n                cur.execute(\"\"\"\n                    INSERT INTO documents (title, content, source_url, document_type, word_count, embedding)\n                    VALUES (%s, %s, %s, %s, %s, %s)\n                    RETURNING id;\n                \"\"\", (title, content, url, doc_type, word_count, embedding))\n                doc_ids.append(cur.fetchone()[0])\n\n            # Insert document chunks\n            chunk_count = 0\n            for doc_id in doc_ids:\n                # Generate 3-7 chunks per document\n                num_chunks = random.randint(3, 7)\n                for chunk_idx in range(num_chunks):\n                    chunk_text = f\"This is chunk {chunk_idx + 1} of document {doc_id}. \" + \\\n                               \"It contains relevant information that would be useful for similarity search and RAG applications. \" + \\\n                               \"The content includes technical details, examples, and best practices.\"\n                    chunk_size = len(chunk_text)\n                    overlap_size = random.randint(20, 50) if chunk_idx > 0 else 0\n                    embedding = generate_mock_embedding(1536)\n\n                    cur.execute(\"\"\"\n                        INSERT INTO document_chunks (document_id, chunk_index, chunk_text, chunk_size, overlap_size, embedding)\n                        VALUES (%s, %s, %s, %s, %s, %s);\n                    \"\"\", (doc_id, chunk_idx, chunk_text, chunk_size, overlap_size, embedding))\n                    chunk_count += 1\n\n            # Insert sample user queries\n            sample_queries = [\n                (\"How to optimize PostgreSQL performance?\", \"user123\", \"session_abc1\"),\n                (\"What are vector embeddings?\", \"user456\", \"session_def2\"),\n                (\"Best practices for RAG implementation\", \"user789\", \"session_ghi3\"),\n                (\"Database security checklist\", \"user123\", \"session_abc2\"),\n                (\"Machine learning with databases\", \"user456\", \"session_def3\"),\n                (\"API documentation examples\", \"user321\", \"session_jkl1\"),\n                (\"Microservices design patterns\", \"user654\", \"session_mno2\"),\n                (\"Data pipeline best practices\", \"user987\", \"session_pqr3\"),\n                (\"Cloud migration strategies\", \"user111\", \"session_stu4\"),\n                (\"NoSQL vs SQL databases\", \"user222\", \"session_vwx5\"),\n            ]\n\n            for query_text, user_id, session_id in sample_queries:\n                embedding = generate_mock_embedding(1536)\n                response_time = random.randint(50, 500)\n\n                cur.execute(\"\"\"\n                    INSERT INTO user_queries (query_text, user_id, session_id, response_time_ms, embedding)\n                    VALUES (%s, %s, %s, %s, %s);\n                \"\"\", (query_text, user_id, session_id, response_time, embedding))\n\n            # Insert some search cache entries\n            for i in range(5):\n                query_hash = f\"hash_{random.randint(100000, 999999)}\"\n                query_text = f\"Sample cached query {i + 1}\"\n                results = [{\"doc_id\": random.randint(1, len(doc_ids)), \"similarity\": round(random.uniform(0.7, 0.95), 3)} for _ in range(3)]\n                result_count = len(results)\n                search_time = random.randint(10, 100)\n                threshold = round(random.uniform(0.6, 0.8), 3)\n\n                cur.execute(\"\"\"\n                    INSERT INTO search_cache (query_hash, query_text, results_json, result_count, search_time_ms, similarity_threshold)\n                    VALUES (%s, %s, %s, %s, %s, %s);\n                \"\"\", (query_hash, query_text, json.dumps(results), result_count, search_time, threshold))\n\n            logger.info(f\"Sample data inserted successfully:\")\n            logger.info(f\"   {len(sample_documents)} documents\")\n            logger.info(f\"   {chunk_count} document chunks\")\n            logger.info(f\"   {len(sample_queries)} user queries\")\n            logger.info(f\"   {len(embedding_models)} embedding models\")\n            logger.info(f\"   {len(knowledge_bases)} knowledge bases\")\n\n        conn.close()\n\n    except psycopg2.Error as e:\n        logger.error(f\"Failed to insert sample data: {e}\")\n        raise\n\n\ndef verify_vector_setup():\n    \"\"\"Verify that the vector database was set up correctly.\"\"\"\n    conn_params = get_connection_params()\n\n    try:\n        conn = psycopg2.connect(**conn_params)\n\n        with conn.cursor() as cur:\n            logger.info(\"Verifying vector database setup...\")\n\n            # Check extension\n            cur.execute(\"SELECT extname FROM pg_extension WHERE extname = 'vector';\")\n            if cur.fetchone():\n                logger.info(\"pgvector extension is installed\")\n            else:\n                logger.error(\"pgvector extension not found\")\n                return False\n\n            # Check tables and record counts\n            tables_to_check = [\n                'documents', 'document_chunks', 'user_queries',\n                'embedding_models', 'knowledge_base', 'search_cache'\n            ]\n\n            table_counts = {}\n            for table in tables_to_check:\n                cur.execute(f'SELECT COUNT(*) FROM {table}')\n                count = cur.fetchone()[0]\n                table_counts[table] = count\n                logger.info(f\"Table {table}: {count} records\")\n\n            # Check vector columns\n            cur.execute(\"\"\"\n                SELECT table_name, column_name, data_type\n                FROM information_schema.columns\n                WHERE data_type = 'USER-DEFINED'\n                AND udt_name = 'vector'\n                ORDER BY table_name, column_name;\n            \"\"\")\n\n            vector_columns = cur.fetchall()\n            logger.info(f\"Found {len(vector_columns)} vector columns:\")\n            for table, column, dtype in vector_columns:\n                logger.info(f\"   {table}.{column} ({dtype})\")\n\n            # Check indexes\n            cur.execute(\"\"\"\n                SELECT schemaname, tablename, indexname, indexdef\n                FROM pg_indexes\n                WHERE indexdef LIKE '%vector%' OR indexdef LIKE '%hnsw%' OR indexdef LIKE '%ivfflat%'\n                ORDER BY tablename, indexname;\n            \"\"\")\n\n            vector_indexes = cur.fetchall()\n            logger.info(f\"Found {len(vector_indexes)} vector indexes:\")\n            for schema, table, index, definition in vector_indexes:\n                logger.info(f\"   {index} on {table}\")\n\n            # Test a simple vector similarity query\n            mock_embedding = generate_mock_embedding(1536)\n            cur.execute(\"\"\"\n                SELECT id, title, embedding <-> %s::vector as distance\n                FROM documents\n                ORDER BY embedding <-> %s::vector\n                LIMIT 3;\n            \"\"\", (mock_embedding, mock_embedding))\n\n            results = cur.fetchall()\n            logger.info(f\"Vector similarity query returned {len(results)} results\")\n\n        conn.close()\n        logger.info(\"Vector database verification completed successfully\")\n        return table_counts, vector_columns, vector_indexes\n\n    except psycopg2.Error as e:\n        logger.error(f\"Verification failed: {e}\")\n        raise\n\n\ndef prepare_vector_environment():\n    \"\"\"Main function to prepare the vector database environment.\"\"\"\n    logger.info(\"Preparing vector database environment...\")\n\n    try:\n        # Create pgvector extension\n        create_vector_extension()\n\n        # Create vector tables\n        create_vector_tables()\n\n        # Insert sample data first\n        insert_sample_data()\n\n        # Create indexes after data insertion for better performance\n        create_vector_indexes()\n\n        # Verify the setup\n        table_counts, vector_columns, vector_indexes = verify_vector_setup()\n\n        logger.info(\"Vector database environment prepared successfully!\")\n        logger.info(f\"Total tables created: {len(table_counts)}\")\n        logger.info(f\"Total vector columns: {len(vector_columns)}\")\n        logger.info(f\"Total vector indexes: {len(vector_indexes)}\")\n\n        return {\n            'table_counts': table_counts,\n            'vector_columns': vector_columns,\n            'vector_indexes': vector_indexes\n        }\n\n    except Exception as e:\n        logger.error(f\"Failed to prepare vector environment: {e}\")\n        raise\n\n\nif __name__ == \"__main__\":\n    # Allow running this module directly for testing\n    logging.basicConfig(level=logging.INFO)\n    prepare_vector_environment()\n"
  },
  {
    "path": "tasks/utils/__init__.py",
    "content": ""
  },
  {
    "path": "tasks/utils/notion_utils.py",
    "content": "import os\nfrom notion_client import Client\nimport sys\nfrom dotenv import load_dotenv\n\n\ndef get_notion_client():\n    # Construct the absolute path to the .env file in the project root\n    load_dotenv(dotenv_path=\".mcp_env\")\n    api_key = os.getenv(\"EVAL_NOTION_API_KEY\")\n    if not api_key:\n        print(\n            \"Error: EVAL_NOTION_API_KEY not found in environment variables.\",\n            file=sys.stderr,\n        )\n        sys.exit(1)\n    return Client(auth=api_key)\n\n\ndef _find_object(notion: Client, title: str, object_type: str):\n    \"\"\"Generic helper to find a Notion page or database by title.\n\n    Args:\n        notion: Authenticated Notion Client.\n        title: Title (or partial title) to search for.\n        object_type: Either \"page\" or \"database\".\n\n    Returns:\n        The ID string if found, otherwise None.\n    \"\"\"\n    search_results = (\n        notion.search(\n            query=title, filter={\"property\": \"object\", \"value\": object_type}\n        ).get(\"results\")\n        or []\n    )\n\n    if not search_results:\n        return None\n\n    # Shortcut when there is only one match\n    if len(search_results) == 1:\n        return search_results[0][\"id\"]\n\n    # Attempt to find a case-insensitive match on the title field\n    for result in search_results:\n        if object_type == \"page\":\n            # Pages store their title inside the \"properties.title.title\" rich text list\n            title_rich_texts = (\n                result.get(\"properties\", {}).get(\"title\", {}).get(\"title\", [])\n            )\n        else:  # database\n            title_rich_texts = result.get(\"title\", [])\n\n        for text_obj in title_rich_texts:\n            if title.lower() in text_obj.get(\"plain_text\", \"\").lower():\n                return result[\"id\"]\n\n    # Fallback: return the first result\n    return search_results[0][\"id\"]\n\n\ndef find_page(notion: Client, page_title: str):\n    \"\"\"Finds a page by title. Wrapper around _find_object with object_type='page'.\"\"\"\n    return _find_object(notion, page_title, \"page\")\n\n\ndef get_page_by_id(notion: Client, page_id: str):\n    \"\"\"Gets a page by its ID. Returns the page object if found, None otherwise.\"\"\"\n    try:\n        return notion.pages.retrieve(page_id=page_id)\n    except Exception:\n        return None\n\n\ndef find_page_by_id(notion: Client, page_id: str):\n    \"\"\"Finds a page by its ID and returns the ID if it exists, None otherwise.\"\"\"\n    try:\n        notion.pages.retrieve(page_id=page_id)\n        return page_id\n    except Exception:\n        return None\n\n\ndef find_database_by_id(notion: Client, database_id: str):\n    \"\"\"Finds a database by its ID and returns the ID if it exists, None otherwise.\"\"\"\n    try:\n        notion.databases.retrieve(database_id=database_id)\n        return database_id\n    except Exception:\n        return None\n\n\ndef find_page_or_database_by_id(notion: Client, object_id: str):\n    \"\"\"\n    Finds either a page or database by ID. Returns a tuple (object_id, object_type)\n    where object_type is either 'page' or 'database', or (None, None) if not found.\n    \"\"\"\n    # Try as page first\n    try:\n        notion.pages.retrieve(page_id=object_id)\n        return (object_id, \"page\")\n    except Exception:\n        pass\n\n    # Try as database\n    try:\n        notion.databases.retrieve(database_id=object_id)\n        return (object_id, \"database\")\n    except Exception:\n        pass\n\n    return (None, None)\n\n\ndef find_database(notion: Client, db_title: str):\n    \"\"\"Finds a database by title. Wrapper around _find_object with object_type='database'.\"\"\"\n    return _find_object(notion, db_title, \"database\")\n\n\ndef find_database_in_block(notion: Client, block_id: str, db_title: str):\n    \"\"\"\n    Recursively find a database by title within a block.\n    \"\"\"\n    blocks = notion.blocks.children.list(block_id=block_id).get(\"results\")\n    for block in blocks:\n        if (\n            block.get(\"type\") == \"child_database\"\n            and block.get(\"child_database\", {}).get(\"title\") == db_title\n        ):\n            return block[\"id\"]\n        if block.get(\"has_children\"):\n            db_id = find_database_in_block(notion, block[\"id\"], db_title)\n            if db_id:\n                return db_id\n    return None\n\n\ndef get_all_blocks_recursively(notion: Client, block_id: str):\n    \"\"\"\n    Recursively fetches all blocks from a starting block ID and its children,\n    returning a single flat list of block objects.\n    \"\"\"\n    all_blocks = []\n    try:\n        direct_children = notion.blocks.children.list(block_id=block_id).get(\n            \"results\", []\n        )\n    except Exception:\n        return []\n\n    for block in direct_children:\n        all_blocks.append(block)\n        if block.get(\"has_children\"):\n            all_blocks.extend(get_all_blocks_recursively(notion, block[\"id\"]))\n\n    return all_blocks\n\n\ndef get_block_plain_text(block):\n    \"\"\"\n    Safely extract plain_text from a block (paragraph, heading, etc.).\n    \"\"\"\n    block_type = block.get(\"type\")\n    if not block_type:\n        return \"\"\n\n    block_content = block.get(block_type)\n    if not block_content:\n        return \"\"\n\n    rich_text_list = block_content.get(\"rich_text\", [])\n    plain_text = \"\".join([rt.get(\"plain_text\", \"\") for rt in rich_text_list])\n\n    return plain_text\n"
  },
  {
    "path": "tasks/utils/postgres_utils.py",
    "content": "\"\"\"\nPostgreSQL Data Loading Utilities for MCPMark Tasks\n===================================================\n\nCommon utilities for loading data into PostgreSQL databases from CSV files\nand setting up schemas in prepare_environment.py scripts.\n\"\"\"\n\nimport csv\nimport os\nimport psycopg2\nfrom pathlib import Path\nfrom typing import Dict, List, Any, Optional\nimport logging\n\nlogger = logging.getLogger(__name__)\n\n\ndef get_connection_params() -> dict:\n    \"\"\"Get database connection parameters from environment variables.\"\"\"\n    return {\n        \"host\": os.getenv(\"POSTGRES_HOST\", \"localhost\"),\n        \"port\": int(os.getenv(\"POSTGRES_PORT\", 5432)),\n        \"database\": os.getenv(\"POSTGRES_DATABASE\"),\n        \"user\": os.getenv(\"POSTGRES_USERNAME\"),\n        \"password\": os.getenv(\"POSTGRES_PASSWORD\"),\n    }\n\n\ndef execute_schema_sql(conn, schema_sql: str):\n    \"\"\"Execute schema SQL with proper error handling.\"\"\"\n    with conn.cursor() as cur:\n        cur.execute(schema_sql)\n        conn.commit()\n        logger.info(\"✅ Database schema created successfully\")\n\n\ndef load_csv_to_table(\n    conn, \n    csv_file_path: Path, \n    table_name: str, \n    columns: Optional[List[str]] = None,\n    skip_header: bool = True\n):\n    \"\"\"\n    Load CSV data into a PostgreSQL table.\n    \n    Args:\n        conn: Database connection\n        csv_file_path: Path to CSV file\n        table_name: Target table name\n        columns: List of column names (if None, uses all columns)\n        skip_header: Whether to skip the first row\n    \"\"\"\n    if not csv_file_path.exists():\n        raise FileNotFoundError(f\"CSV file not found: {csv_file_path}\")\n    \n    with conn.cursor() as cur:\n        with open(csv_file_path, 'r', encoding='utf-8') as f:\n            csv_reader = csv.reader(f)\n            \n            # Skip header if needed\n            if skip_header:\n                next(csv_reader)\n            \n            # Build COPY command\n            if columns:\n                copy_sql = f\"COPY {table_name} ({', '.join(columns)}) FROM STDIN WITH CSV\"\n            else:\n                copy_sql = f\"COPY {table_name} FROM STDIN WITH CSV\"\n            \n            # Reset file pointer and copy data\n            f.seek(0)\n            if skip_header:\n                next(csv.reader(f))  # Skip header again\n            \n            cur.copy_expert(copy_sql, f)\n            \n        conn.commit()\n        logger.info(f\"✅ Loaded data from {csv_file_path.name} into {table_name}\")\n\n\ndef insert_data_from_dict(conn, table_name: str, data: List[Dict[str, Any]]):\n    \"\"\"\n    Insert data from a list of dictionaries into a table.\n    \n    Args:\n        conn: Database connection\n        table_name: Target table name\n        data: List of dictionaries with column_name: value pairs\n    \"\"\"\n    if not data:\n        return\n    \n    # Get column names from first record\n    columns = list(data[0].keys())\n    placeholders = ', '.join(['%s'] * len(columns))\n    columns_str = ', '.join(columns)\n    \n    insert_sql = f\"INSERT INTO {table_name} ({columns_str}) VALUES ({placeholders}) ON CONFLICT DO NOTHING\"\n    \n    with conn.cursor() as cur:\n        for row in data:\n            values = [row[col] for col in columns]\n            cur.execute(insert_sql, values)\n        \n        conn.commit()\n        logger.info(f\"✅ Inserted {len(data)} rows into {table_name}\")\n\n\ndef create_table_with_data(\n    conn, \n    table_name: str, \n    schema_sql: str, \n    data: Optional[List[Dict[str, Any]]] = None,\n    data_from_csv: Optional[Path] = None\n):\n    \"\"\"\n    Create a table and optionally load data.\n    \n    Args:\n        conn: Database connection\n        table_name: Table name\n        schema_sql: CREATE TABLE SQL statement\n        data: Optional list of dictionaries to insert\n        data_from_csv: Optional CSV file to load\n    \"\"\"\n    with conn.cursor() as cur:\n        # Create table\n        cur.execute(schema_sql)\n        logger.info(f\"✅ Created table {table_name}\")\n        \n        # Load data if provided\n        if data:\n            insert_data_from_dict(conn, table_name, data)\n        elif data_from_csv:\n            load_csv_to_table(conn, data_from_csv, table_name)\n\n\ndef setup_database_with_config(setup_config: Dict[str, Any]):\n    \"\"\"\n    Set up database using a configuration dictionary.\n    \n    Args:\n        setup_config: Dictionary with 'tables' key containing table configurations\n        \n    Example config:\n    {\n        \"tables\": {\n            \"artists\": {\n                \"schema\": \"CREATE TABLE artists (id SERIAL PRIMARY KEY, name VARCHAR(120))\",\n                \"data\": [{\"id\": 1, \"name\": \"Iron Maiden\"}],\n                \"data_from_csv\": \"data/artists.csv\"  # alternative to data\n            }\n        }\n    }\n    \"\"\"\n    conn_params = get_connection_params()\n    if not conn_params[\"database\"]:\n        raise ValueError(\"❌ No database specified in POSTGRES_DATABASE environment variable\")\n    \n    try:\n        conn = psycopg2.connect(**conn_params)\n        \n        for table_name, config in setup_config[\"tables\"].items():\n            schema_sql = config[\"schema\"]\n            data = config.get(\"data\")\n            csv_file_path = None\n            \n            # Handle CSV file path\n            if \"data_from_csv\" in config:\n                csv_file_path = Path(config[\"data_from_csv\"])\n                if not csv_file_path.is_absolute():\n                    # Assume relative to current working directory (task directory)\n                    csv_file_path = Path.cwd() / csv_file_path\n            \n            create_table_with_data(\n                conn, \n                table_name, \n                schema_sql, \n                data=data, \n                data_from_csv=csv_file_path\n            )\n        \n        conn.close()\n        logger.info(\"🎉 Database setup completed successfully\")\n        \n    except psycopg2.Error as e:\n        logger.error(f\"❌ Database error during setup: {e}\")\n        raise\n    except Exception as e:\n        logger.error(f\"❌ Setup error: {e}\")\n        raise"
  }
]