[
  {
    "path": ".dockerignore",
    "content": ".venv/\nvenv/\npr_agent/settings/.secrets.toml\npics/\npr_agent.egg-info/\nbuild/\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/bug-report.yml",
    "content": "name: \"\\U0001FAB2 Bug Report\"\ndescription: Submit a bug report\nlabels: [\"bug\"]\nbody:\n\n  - type: dropdown\n    id: information-git-provider\n    attributes:\n      label: Git provider\n      description: 'The problem arises when using:'\n      options:\n        - \"Github Cloud\"\n        - \"Github Enterprise\"\n        - \"Gitlab\"\n        - \"Bitbucket Cloud\"\n        - \"Bitbucket Server\"\n        - \"Azure\"\n        - \"Other\"\n    validations:\n      required: true\n\n  - type: textarea\n    id: system-info\n    attributes:\n      label: System Info\n      description: Please share your system info with us.\n      placeholder: model used, deployment type (action/app/cli/...), etc...\n    validations:\n      required: true\n\n  - type: textarea\n    id: bug-details\n    attributes:\n      label: Bug details\n      description: Please describe the problem.\n      placeholder: Describe the problem\n    validations:\n      required: true\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/config.yml",
    "content": "blank_issues_enabled: false\nversion: 0.1\ncontact_links:\n  - name: Discussions\n    url: https://github.com/qodo-ai/pr-agent/discussions\n    about: GitHub Discussions\n\n  - name: Discord community\n    url: https://discord.com/channels/1057273017547378788/1126104260430528613\n    about: Join our discord community\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/feature-request.yml",
    "content": "name: \"\\U0001F4A1 Feature request\"\ndescription: Submit a proposal/request for a new PR-Agent feature\nlabels: [\"feature\"]\nbody:\n  - type: textarea\n    id: feature-request\n    validations:\n      required: true\n    attributes:\n      label: Feature request\n      description: |\n        Description of the feature proposal.\n\n  - type: textarea\n    id: motivation\n    validations:\n      required: true\n    attributes:\n      label: Motivation\n      description: |\n        Outline the motivation for the proposal.\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/miscellaneous.yml",
    "content": "name: \"❔ General Issue\"\ndescription: Submit a general issue\nlabels: [\"general\"]\nbody:\n\n  - type: dropdown\n    id: information-git-provider\n    attributes:\n      label: Git provider (optional)\n      description: 'Git Provider:'\n      options:\n        - \"Github Cloud\"\n        - \"Github Enterprise\"\n        - \"Gitlab\"\n        - \"Bitbucket Cloud\"\n        - \"Bitbucket Server\"\n        - \"Azure\"\n        - \"Other\"\n\n  - type: textarea\n    id: system-info\n    attributes:\n      label: System Info (optional)\n      description: Please share your system info with us.\n      placeholder: model used, deployment type (action/app/cli/...), etc...\n    validations:\n      required: false\n\n  - type: textarea\n    id: issues-details\n    attributes:\n      label: Issues details\n      description: Please share the issues details.\n      placeholder: Describe the issue\n    validations:\n      required: true\n"
  },
  {
    "path": ".github/workflows/build-and-test.yaml",
    "content": "name: Build-and-test\n\non:\n  push:\n    branches:\n      - main\n  pull_request:\n    branches:\n      - main\n\njobs:\n  build-and-test:\n    runs-on: ubuntu-latest\n\n    steps:\n      - id: checkout\n        uses: actions/checkout@v6\n\n      - id: dockerx\n        name: Setup Docker Buildx\n        uses: docker/setup-buildx-action@v3\n\n      - id: build\n        name: Build dev docker\n        uses: docker/build-push-action@v6\n        with:\n          context: .\n          file: ./docker/Dockerfile\n          push: false\n          load: true\n          tags: codiumai/pr-agent:test\n          cache-from: type=gha,scope=dev\n          cache-to: type=gha,mode=max,scope=dev\n          target: test\n\n      - id: test\n        name: Test dev docker\n        run: |\n          docker run --rm codiumai/pr-agent:test pytest -v tests/unittest\n"
  },
  {
    "path": ".github/workflows/code_coverage.yaml",
    "content": "name: Code-coverage\n\non:\n  workflow_dispatch:\n  # push:\n  #   branches:\n  #     - main\n  pull_request:\n    branches:\n      - main\n\njobs:\n  build-and-test:\n    runs-on: ubuntu-latest\n\n    steps:\n      - id: checkout\n        uses: actions/checkout@v6\n\n      - id: dockerx\n        name: Setup Docker Buildx\n        uses: docker/setup-buildx-action@v3\n\n      - id: build\n        name: Build dev docker\n        uses: docker/build-push-action@v6\n        with:\n          context: .\n          file: ./docker/Dockerfile\n          push: false\n          load: true\n          tags: codiumai/pr-agent:test\n          cache-from: type=gha,scope=dev\n          cache-to: type=gha,mode=max,scope=dev\n          target: test\n\n      - id: code_cov\n        name: Test dev docker\n        run: |\n          docker run --name test_container codiumai/pr-agent:test  pytest  tests/unittest --cov=pr_agent --cov-report term --cov-report xml:coverage.xml\n          docker cp test_container:/app/coverage.xml coverage.xml\n          docker rm test_container\n\n      - name: Validate coverage report\n        run: |\n          if [ ! -f coverage.xml ]; then\n            echo \"Coverage report not found\"\n            exit 1\n          fi\n      - name: Upload coverage to Codecov\n        uses: codecov/codecov-action@v5\n        with:\n          token: ${{ secrets.CODECOV_TOKEN }}\n"
  },
  {
    "path": ".github/workflows/docs-ci.yaml",
    "content": "name: docs-ci\non:\n  push:\n    branches:\n      - main\n      - add-docs-portal\n    paths:\n      - docs/**\npermissions:\n  contents: write\njobs:\n  deploy:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v6\n      - name: Configure Git Credentials\n        run: |\n          git config user.name github-actions[bot]\n          git config user.email 41898282+github-actions[bot]@users.noreply.github.com\n      - uses: actions/setup-python@v5\n        with:\n          python-version: 3.x\n      - run: echo \"cache_id=$(date --utc '+%V')\" >> $GITHUB_ENV\n      - uses: actions/cache@v4\n        with:\n          key: mkdocs-material-${{ env.cache_id }}\n          path: .cache\n          restore-keys: |\n            mkdocs-material-\n      - run: pip install mkdocs-material\n      - run: pip install \"mkdocs-material[imaging]\"\n      - run: pip install mkdocs-glightbox\n      - run: mkdocs gh-deploy -f docs/mkdocs.yml --force\n"
  },
  {
    "path": ".github/workflows/e2e_tests.yaml",
    "content": "name: PR-Agent E2E tests\n\non:\n  workflow_dispatch:\n#  schedule:\n#    - cron: '0 0 * * *' # This cron expression runs the workflow every night at midnight UTC\n\njobs:\n  pr_agent_job:\n    runs-on: ubuntu-latest\n    name: PR-Agent E2E GitHub App Test\n    steps:\n      - name: Checkout repository\n        uses: actions/checkout@v6\n\n      - name: Setup Docker Buildx\n        uses: docker/setup-buildx-action@v3\n\n      - id: build\n        name: Build dev docker\n        uses: docker/build-push-action@v6\n        with:\n          context: .\n          file: ./docker/Dockerfile\n          push: false\n          load: true\n          tags: codiumai/pr-agent:test\n          cache-from: type=gha,scope=dev\n          cache-to: type=gha,mode=max,scope=dev\n          target: test\n\n      - id: test1\n        name: E2E test github app\n        run: |\n          docker run -e GITHUB.USER_TOKEN=${{ secrets.TOKEN_GITHUB }} --rm codiumai/pr-agent:test pytest -v tests/e2e_tests/test_github_app.py\n\n      - id: test2\n        name: E2E gitlab webhook\n        run: |\n          docker run -e gitlab.PERSONAL_ACCESS_TOKEN=${{ secrets.TOKEN_GITLAB }} --rm codiumai/pr-agent:test pytest -v tests/e2e_tests/test_gitlab_webhook.py\n\n      - id: test3\n        name: E2E bitbucket app\n        run: |\n          docker run -e BITBUCKET.USERNAME=${{ secrets.BITBUCKET_USERNAME }}  -e BITBUCKET.PASSWORD=${{ secrets.BITBUCKET_PASSWORD }} --rm codiumai/pr-agent:test pytest -v tests/e2e_tests/test_bitbucket_app.py\n"
  },
  {
    "path": ".github/workflows/pr-agent-review.yaml",
    "content": "# This workflow enables developers to call PR-Agents `/[actions]` in PR's comments and upon PR creation.\n# Learn more at https://www.codium.ai/pr-agent/\n# This is v0.2 of this workflow file\n\nname: PR-Agent\n\non:\n# pull_request:\n# issue_comment:\n  workflow_dispatch:\n\npermissions:\n  issues: write\n  pull-requests: write\n\njobs:\n  pr_agent_job:\n    runs-on: ubuntu-latest\n    name: Run pr agent on every pull request\n    steps:\n      - name: PR Agent action step\n        id: pragent\n        uses: Codium-ai/pr-agent@main\n        env:\n          OPENAI_KEY: ${{ secrets.OPENAI_KEY }}\n          OPENAI_ORG: ${{ secrets.OPENAI_ORG }} # optional\n          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n          PINECONE.API_KEY: ${{ secrets.PINECONE_API_KEY }}\n          PINECONE.ENVIRONMENT: ${{ secrets.PINECONE_ENVIRONMENT }}\n          GITHUB_ACTION_CONFIG.AUTO_DESCRIBE: true\n          GITHUB_ACTION_CONFIG.AUTO_REVIEW: true\n          GITHUB_ACTION_CONFIG.AUTO_IMPROVE: true\n"
  },
  {
    "path": ".github/workflows/pre-commit.yml",
    "content": "# disabled. We might run it manually if needed.\nname: pre-commit\n\non:\n  workflow_dispatch:\n#  pull_request:\n#  push:\n#    branches: [main]\n\njobs:\n  pre-commit:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v6\n      - uses: actions/setup-python@v5\n      # SEE https://github.com/pre-commit/action\n      - uses: pre-commit/action@v3.0.1\n"
  },
  {
    "path": ".gitignore",
    "content": ".idea/\n.lsp/\n.vscode/\n.env\n.venv/\nvenv/\npr_agent/settings/.secrets.toml\n__pycache__\ndist/\n*.egg-info/\nbuild/\n.DS_Store\ndocs/.cache/\n.qodo\npoetry.lock\n"
  },
  {
    "path": ".pr_agent.toml",
    "content": "[pr_reviewer]\nenable_review_labels_effort = true\nenable_auto_approval = true\n\n[github_app]\npr_commands = [\n    \"/describe --pr_description.publish_description_as_comment=true\",\n    \"/improve\",\n    \"/agentic_review\"\n]\n\nhandle_push_trigger = true\npush_commands = [\n    \"/improve\",\n    \"/agentic_review\"\n]\n\n[review_agent]\nenabled = true\npublish_output = true\n"
  },
  {
    "path": ".pre-commit-config.yaml",
    "content": "# See https://pre-commit.com for more information\n# See https://pre-commit.com/hooks.html for more hooks\n\ndefault_language_version:\n  python: python3\n\nrepos:\n  - repo: https://github.com/pre-commit/pre-commit-hooks\n    rev: v5.0.0\n    hooks:\n      - id: check-added-large-files\n      - id: check-toml\n      - id: check-yaml\n      - id: end-of-file-fixer\n      - id: trailing-whitespace\n  # - repo: https://github.com/rhysd/actionlint\n  #   rev: v1.7.3\n  #   hooks:\n  #     - id: actionlint\n  - repo: https://github.com/pycqa/isort\n    # rev must match what's in dev-requirements.txt\n    rev: 5.13.2\n    hooks:\n      - id: isort\n  # - repo: https://github.com/PyCQA/bandit\n  #   rev: 1.7.10\n  #   hooks:\n  #     - id: bandit\n  #       args: [\n  #         \"-c\", \"pyproject.toml\",\n  #       ]\n  # - repo: https://github.com/astral-sh/ruff-pre-commit\n  #   rev: v0.7.1\n  #   hooks:\n  #     - id: ruff\n  #       args:\n  #         - --fix\n  #     - id: ruff-format\n  # -   repo: https://github.com/PyCQA/autoflake\n  #     rev: v2.3.1\n  #     hooks:\n  #     -   id: autoflake\n  #         args:\n  #           - --in-place\n  #           - --remove-all-unused-imports\n  #           - --remove-unused-variables\n"
  },
  {
    "path": "AGENTS.md",
    "content": "# Repository Guidelines\n\n## Dos and Don’ts\n\n- **Do** match the interpreter requirement declared in `pyproject.toml` (Python ≥ 3.12) and install `requirements.txt` plus `requirements-dev.txt` before running tools.\n- **Do** run tests with `PYTHONPATH=.` set to keep imports functional (for example `PYTHONPATH=. ./.venv/bin/pytest tests/unittest/test_fix_json_escape_char.py -q`).\n- **Do** adjust configuration through `.pr_agent.toml` or files under `pr_agent/settings/` instead of hard-coding values.\n- **Don’t** commit secrets or access tokens; rely on environment variables as shown in the health and e2e tests.\n- **Don’t** reformat or reorder files globally; match existing 120-character lines, import ordering, and docstring style.\n- **Don’t** delete or rename configuration, prompt, or workflow files without maintainer approval.\n\n## Project Structure and Module Organization\n\nPR-Agent automates AI-assisted reviews for pull requests across multiple git providers.\n\n- `pr_agent/agent/` orchestrates commands (`review`, `describe`, `improve`, etc.) via `pr_agent/agent/pr_agent.py`.\n- `pr_agent/tools/` implements individual capabilities such as reviewers, code suggestions, docs updates, and label generation.\n- `pr_agent/git_providers/` and `pr_agent/identity_providers/` handle integrations with GitHub, GitLab, Bitbucket, Azure DevOps, and secrets.\n- `pr_agent/settings/` stores Dynaconf defaults (prompts, configuration templates, ignore lists) respected at runtime; `.pr_agent.toml` overrides repository-level behavior.\n- `tests/unittest/`, `tests/e2e_tests/`, and `tests/health_test/` contain pytest-based unit, end-to-end, and smoke checks.\n- `docs/` holds the MkDocs site (`docs/mkdocs.yml` plus content under `docs/docs/`); overrides live in `docs/overrides/`.\n- `.github/workflows/` defines CI pipelines for unit tests, coverage, docs deployment, pre-commit, and PR-agent self-review.\n- `docker/` and the root Dockerfiles provide build targets for services (`github_app`, `gitlab_webhook`, etc.) and the `test` stage used in CI.\n\n## Build, Test, and Development Commands\n\n- Create or activate a virtual environment, then install runtime dependencies with `pip install -r requirements.txt`; add development tooling via `pip install -r requirements-dev.txt`.\n- Run a single unit test (verified): `PYTHONPATH=. ./.venv/bin/pytest tests/unittest/test_fix_json_escape_char.py -q`.\n- Run the full unit suite: `PYTHONPATH=. ./.venv/bin/pytest tests/unittest -v`.\n- Execute the CLI locally once dependencies and API keys are available: `python -m pr_agent.cli --pr_url <https://host/org/repo/pull/123> review`.\n- Build the test Docker target mirror of CI when containerizing: `docker build -f docker/Dockerfile --target test .` (loads dev dependencies and copies `tests/`).\n- Generate and deploy documentation with MkDocs after installing the same extras as CI (`mkdocs-material`, `mkdocs-glightbox`): `mkdocs serve -f docs/mkdocs.yml` for previews and `mkdocs gh-deploy -f docs/mkdocs.yml` for publication.\n\n## Coding Style and Naming Conventions\n\n- Python sources follow the Ruff configuration in `pyproject.toml` (`line-length = 120`, Pyflakes plus `flake8-bugbear` checks, and isort ordering). Keep imports grouped as isort would produce and prefer double quotes for strings.\n- Pre-commit (`.pre-commit-config.yaml`) enforces trailing whitespace cleanup, final newlines, TOML/YAML validity, and optional `isort`; run `pre-commit run --all-files` before submitting patches if installed.\n- Match existing docstring and comment style—concise English comments using imperative phrasing only where necessary.\n- Configuration files in `pr_agent/settings/` are TOML; preserve formatting, section order, and comments when editing prompts or defaults.\n- Markdown in `docs/` uses MkDocs conventions (YAML front matter absent; rely on heading hierarchy already in place).\n\n## Testing Guidelines\n\n- Pytest is the standard framework; keep new tests under the closest matching directory (`tests/unittest/` for unit logic, `tests/e2e_tests/` for integration flows, `tests/health_test/` for smoke coverage).\n- Prefer focused unit tests that isolate helpers in `pr_agent/algo/`, `pr_agent/tools/`, or provider adapters; use parameterized tests where existing files already do so.\n- Set `PYTHONPATH=.` when invoking pytest from the repository root to avoid import errors.\n- End-to-end suites require provider tokens (`TOKEN_GITHUB`, `TOKEN_GITLAB`, `BITBUCKET_USERNAME`, `BITBUCKET_PASSWORD`) and may take several minutes; run them only when credentials and sandboxes are configured.\n- The health test (`tests/health_test/main.py`) exercises `/describe`, `/review`, and `/improve`; update expected artifacts if prompts change meaningfully.\n\n## Commit and Pull Request Guidelines\n\n- Follow `CONTRIBUTING.md`: keep changes focused, add or update tests, and use Conventional Commit-style messages (e.g., `fix: handle missing repo settings gracefully`).\n- Target branch names follow `feature/<name>` or `fix/<issue>` patterns for substantial work.\n- Reference related issues and update README or docs when user-facing behavior shifts.\n- Ensure CI workflows (`build-and-test`, `code-coverage`, `docs-ci`) succeed locally or in draft PRs before requesting review; reproduce failures with the documented commands above.\n- Include screenshots or terminal captures when modifying user-visible output or documentation previews.\n\n## Safety and Permissions\n\n- Ask for confirmation before adding dependencies, renaming files, or changing workflow definitions; many consumers embed these paths and prompts.\n- Stay within existing formatting and directory conventions—avoid mass refactors, re-sorting of prompts, or reformatting Markdown beyond the touched sections.\n- You may read files, list directories, and run targeted lint/test/doc commands without prior approval; coordinate before launching full Docker builds or e2e suites that rely on external credentials.\n- Never commit cached credentials, API keys, or coverage artifacts; CI already handles secrets through GitHub Actions.\n- Treat prompt and configuration files as single sources of truth—update mirrors (`.pr_agent.toml`, `pr_agent/settings/*.toml`) together when behavior changes.\n\n## Security and Configuration Tips\n\n- Secrets should be supplied through environment variables (see usages in `tests/e2e_tests/test_github_app.py` and `tests/health_test/main.py`); do not persist them in code or configuration files.\n- Adjust runtime behavior by overriding keys in `.pr_agent.toml` or by supplying repository-specific Dynaconf files; keep overrides minimal and documented inside the PR description.\n- Review `SECURITY.md` before disclosing vulnerabilities and follow its contact instructions for responsible reporting.\n"
  },
  {
    "path": "CHANGELOG.md",
    "content": "## 2023-08-03\n\n### Optimized\n\n- Optimized PR diff processing by introducing caching for diff files, reducing the number of API calls.\n- Refactored `load_large_diff` function to generate a patch only when necessary.\n- Fixed a bug in the GitLab provider where the new file was not retrieved correctly.\n\n## 2023-08-02\n\n### Enhanced\n\n- Updated several tools in the `pr_agent` package to use commit messages in their functionality.\n- Commit messages are now retrieved and stored in the `vars` dictionary for each tool.\n- Added a section to display the commit messages in the prompts of various tools.\n\n## 2023-08-01\n\n### Enhanced\n\n- Introduced the ability to retrieve commit messages from pull requests across different git providers.\n- Implemented commit messages retrieval for GitHub and GitLab providers.\n- Updated the PR description template to include a section for commit messages if they exist.\n- Added support for repository-specific configuration files (.pr_agent.yaml) for the PR Agent.\n- Implemented this feature for both GitHub and GitLab providers.\n- Added a new configuration option 'use_repo_settings_file' to enable or disable the use of a repo-specific settings file.\n\n## 2023-07-30\n\n### Enhanced\n\n- Added the ability to modify any configuration parameter from 'configuration.toml' on-the-fly.\n- Updated the command line interface and bot commands to accept configuration changes as arguments.\n- Improved the PR agent to handle additional arguments for each action.\n\n## 2023-07-28\n\n### Improved\n\n- Enhanced error handling and logging in the GitLab provider.\n- Improved handling of inline comments and code suggestions in GitLab.\n- Fixed a bug where an additional unneeded line was added to code suggestions in GitLab.\n\n## 2023-07-26\n\n### Added\n\n- New feature for updating the CHANGELOG.md based on the contents of a PR.\n- Added support for this feature for the Github provider.\n- New configuration settings and prompts for the changelog update feature.\n"
  },
  {
    "path": "CODE_OF_CONDUCT.md",
    "content": "# Contributor Code of Conduct\n\nAs contributors and maintainers of this project, and in the interest of fostering an open\nand welcoming community, we pledge to respect all people who contribute through reporting\nissues, posting feature requests, updating documentation, submitting pull requests or\npatches, and other activities.\n\nWe are committed to making participation in this project a harassment-free experience for\neveryone, regardless of level of experience, gender, gender identity and expression,\nsexual orientation, disability, personal appearance, body size, race, ethnicity, age,\nreligion, or nationality.\n\nExamples of unacceptable behavior by participants include:\n\n* The use of sexualized language or imagery\n* Personal attacks\n* Trolling or insulting/derogatory comments\n* Public or private harassment\n* Publishing other's private information, such as physical or electronic addresses,\n  without explicit permission\n* Other unethical or unprofessional conduct\n\nProject maintainers have the right and responsibility to remove, edit, or reject comments,\ncommits, code, wiki edits, issues, and other contributions that are not aligned to this\nCode of Conduct, or to ban temporarily or permanently any contributor for other behaviors\nthat they deem inappropriate, threatening, offensive, or harmful.\n\nBy adopting this Code of Conduct, project maintainers commit themselves to fairly and\nconsistently applying these principles to every aspect of managing this project. Project\nmaintainers who do not follow or enforce the Code of Conduct may be permanently removed\nfrom the project team.\n\nThis Code of Conduct applies both within project spaces and in public spaces when an\nindividual is representing the project or its community.\n\nInstances of abusive, harassing, or otherwise unacceptable behavior may be reported by\ncontacting a project maintainer at dana.f@qodo.ai . All complaints will\nbe reviewed and investigated and will result in a response that is deemed necessary and\nappropriate to the circumstances. Maintainers are obligated to maintain confidentiality\nwith regard to the reporter of an incident.\n\nThis Code of Conduct is adapted from the\n[Contributor Covenant](https://contributor-covenant.org), version 1.3.0, available at\n[contributor-covenant.org/version/1/3/0/](https://contributor-covenant.org/version/1/3/0/)\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "# Contributing to PR-Agent\n\nThank you for your interest in contributing to the PR-Agent project!\n\n## Getting Started\n\n1. Fork the repository and clone your fork\n2. Install Python 3.10 or higher\n3. Install dependencies (`requirements.txt` and `requirements-dev.txt`)\n4. Create a new branch for your contribution:\n   - For new features: `git checkout -b feature/your-feature-name`\n   - For bug fixes: `git checkout -b fix/issue-description`\n5. Make your changes\n6. Write or update tests as needed\n7. Run tests locally to ensure everything passes\n8. Commit your changes using conventional commit messages\n9. Push to your fork and submit a pull request\n\n## Development Guidelines\n\n- Keep pull requests focused on a single feature or fix\n- Follow the existing code style and formatting conventions\n- Add unit tests for any new functionality using pytest\n- Ensure test coverage for your changes\n- Update documentation as needed\n\n## Pull Request Process\n\n1. Ensure your PR includes a clear description of the changes\n2. Link any related issues\n3. Update the README.md if needed\n4. Wait for review from maintainers\n\n## Questions or Need Help?\n\n- Join our [Discord community](https://discord.com/channels/1057273017547378788/1126104260430528613) for questions and discussions\n- Check the [documentation](https://qodo-merge-docs.qodo.ai/) for detailed information\n- Report bugs or request features through [GitHub Issues](https://github.com/qodo-ai/pr-agent/issues)\n"
  },
  {
    "path": "Dockerfile.github_action",
    "content": "FROM python:3.12.10-slim AS base\n\nRUN apt-get update && apt-get install --no-install-recommends -y git curl && apt-get clean && rm -rf /var/lib/apt/lists/*\n\nWORKDIR /app\nADD pyproject.toml .\nADD requirements.txt .\nRUN pip install --no-cache-dir . && rm pyproject.toml requirements.txt\nENV PYTHONPATH=/app\nADD docs docs\nADD pr_agent pr_agent\nADD github_action/entrypoint.sh /\nRUN chmod +x /entrypoint.sh\nENTRYPOINT [\"/entrypoint.sh\"]\n"
  },
  {
    "path": "Dockerfile.github_action_dockerhub",
    "content": "FROM codiumai/pr-agent:github_action\n"
  },
  {
    "path": "LICENSE",
    "content": "                    GNU AFFERO GENERAL PUBLIC LICENSE\n                       Version 3, 19 November 2007\n\n Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>\n Everyone is permitted to copy and distribute verbatim copies\n of this license document, but changing it is not allowed.\n\n                            Preamble\n\n  The GNU Affero General Public License is a free, copyleft license for\nsoftware and other kinds of works, specifically designed to ensure\ncooperation with the community in the case of network server software.\n\n  The licenses for most software and other practical works are designed\nto take away your freedom to share and change the works.  By contrast,\nour General Public Licenses are intended to guarantee your freedom to\nshare and change all versions of a program--to make sure it remains free\nsoftware for all its users.\n\n  When we speak of free software, we are referring to freedom, not\nprice.  Our General Public Licenses are designed to make sure that you\nhave the freedom to distribute copies of free software (and charge for\nthem if you wish), that you receive source code or can get it if you\nwant it, that you can change the software or use pieces of it in new\nfree programs, and that you know you can do these things.\n\n  Developers that use our General Public Licenses protect your rights\nwith two steps: (1) assert copyright on the software, and (2) offer\nyou this License which gives you legal permission to copy, distribute\nand/or modify the software.\n\n  A secondary benefit of defending all users' freedom is that\nimprovements made in alternate versions of the program, if they\nreceive widespread use, become available for other developers to\nincorporate.  Many developers of free software are heartened and\nencouraged by the resulting cooperation.  However, in the case of\nsoftware used on network servers, this result may fail to come about.\nThe GNU General Public License permits making a modified version and\nletting the public access it on a server without ever releasing its\nsource code to the public.\n\n  The GNU Affero General Public License is designed specifically to\nensure that, in such cases, the modified source code becomes available\nto the community.  It requires the operator of a network server to\nprovide the source code of the modified version running there to the\nusers of that server.  Therefore, public use of a modified version, on\na publicly accessible server, gives the public access to the source\ncode of the modified version.\n\n  An older license, called the Affero General Public License and\npublished by Affero, was designed to accomplish similar goals.  This is\na different license, not a version of the Affero GPL, but Affero has\nreleased a new version of the Affero GPL which permits relicensing under\nthis license.\n\n  The precise terms and conditions for copying, distribution and\nmodification follow.\n\n                       TERMS AND CONDITIONS\n\n  0. Definitions.\n\n  \"This License\" refers to version 3 of the GNU Affero General Public License.\n\n  \"Copyright\" also means copyright-like laws that apply to other kinds of\nworks, such as semiconductor masks.\n\n  \"The Program\" refers to any copyrightable work licensed under this\nLicense.  Each licensee is addressed as \"you\".  \"Licensees\" and\n\"recipients\" may be individuals or organizations.\n\n  To \"modify\" a work means to copy from or adapt all or part of the work\nin a fashion requiring copyright permission, other than the making of an\nexact copy.  The resulting work is called a \"modified version\" of the\nearlier work or a work \"based on\" the earlier work.\n\n  A \"covered work\" means either the unmodified Program or a work based\non the Program.\n\n  To \"propagate\" a work means to do anything with it that, without\npermission, would make you directly or secondarily liable for\ninfringement under applicable copyright law, except executing it on a\ncomputer or modifying a private copy.  Propagation includes copying,\ndistribution (with or without modification), making available to the\npublic, and in some countries other activities as well.\n\n  To \"convey\" a work means any kind of propagation that enables other\nparties to make or receive copies.  Mere interaction with a user through\na computer network, with no transfer of a copy, is not conveying.\n\n  An interactive user interface displays \"Appropriate Legal Notices\"\nto the extent that it includes a convenient and prominently visible\nfeature that (1) displays an appropriate copyright notice, and (2)\ntells the user that there is no warranty for the work (except to the\nextent that warranties are provided), that licensees may convey the\nwork under this License, and how to view a copy of this License.  If\nthe interface presents a list of user commands or options, such as a\nmenu, a prominent item in the list meets this criterion.\n\n  1. Source Code.\n\n  The \"source code\" for a work means the preferred form of the work\nfor making modifications to it.  \"Object code\" means any non-source\nform of a work.\n\n  A \"Standard Interface\" means an interface that either is an official\nstandard defined by a recognized standards body, or, in the case of\ninterfaces specified for a particular programming language, one that\nis widely used among developers working in that language.\n\n  The \"System Libraries\" of an executable work include anything, other\nthan the work as a whole, that (a) is included in the normal form of\npackaging a Major Component, but which is not part of that Major\nComponent, and (b) serves only to enable use of the work with that\nMajor Component, or to implement a Standard Interface for which an\nimplementation is available to the public in source code form.  A\n\"Major Component\", in this context, means a major essential component\n(kernel, window system, and so on) of the specific operating system\n(if any) on which the executable work runs, or a compiler used to\nproduce the work, or an object code interpreter used to run it.\n\n  The \"Corresponding Source\" for a work in object code form means all\nthe source code needed to generate, install, and (for an executable\nwork) run the object code and to modify the work, including scripts to\ncontrol those activities.  However, it does not include the work's\nSystem Libraries, or general-purpose tools or generally available free\nprograms which are used unmodified in performing those activities but\nwhich are not part of the work.  For example, Corresponding Source\nincludes interface definition files associated with source files for\nthe work, and the source code for shared libraries and dynamically\nlinked subprograms that the work is specifically designed to require,\nsuch as by intimate data communication or control flow between those\nsubprograms and other parts of the work.\n\n  The Corresponding Source need not include anything that users\ncan regenerate automatically from other parts of the Corresponding\nSource.\n\n  The Corresponding Source for a work in source code form is that\nsame work.\n\n  2. Basic Permissions.\n\n  All rights granted under this License are granted for the term of\ncopyright on the Program, and are irrevocable provided the stated\nconditions are met.  This License explicitly affirms your unlimited\npermission to run the unmodified Program.  The output from running a\ncovered work is covered by this License only if the output, given its\ncontent, constitutes a covered work.  This License acknowledges your\nrights of fair use or other equivalent, as provided by copyright law.\n\n  You may make, run and propagate covered works that you do not\nconvey, without conditions so long as your license otherwise remains\nin force.  You may convey covered works to others for the sole purpose\nof having them make modifications exclusively for you, or provide you\nwith facilities for running those works, provided that you comply with\nthe terms of this License in conveying all material for which you do\nnot control copyright.  Those thus making or running the covered works\nfor you must do so exclusively on your behalf, under your direction\nand control, on terms that prohibit them from making any copies of\nyour copyrighted material outside their relationship with you.\n\n  Conveying under any other circumstances is permitted solely under\nthe conditions stated below.  Sublicensing is not allowed; section 10\nmakes it unnecessary.\n\n  3. Protecting Users' Legal Rights From Anti-Circumvention Law.\n\n  No covered work shall be deemed part of an effective technological\nmeasure under any applicable law fulfilling obligations under article\n11 of the WIPO copyright treaty adopted on 20 December 1996, or\nsimilar laws prohibiting or restricting circumvention of such\nmeasures.\n\n  When you convey a covered work, you waive any legal power to forbid\ncircumvention of technological measures to the extent such circumvention\nis effected by exercising rights under this License with respect to\nthe covered work, and you disclaim any intention to limit operation or\nmodification of the work as a means of enforcing, against the work's\nusers, your or third parties' legal rights to forbid circumvention of\ntechnological measures.\n\n  4. Conveying Verbatim Copies.\n\n  You may convey verbatim copies of the Program's source code as you\nreceive it, in any medium, provided that you conspicuously and\nappropriately publish on each copy an appropriate copyright notice;\nkeep intact all notices stating that this License and any\nnon-permissive terms added in accord with section 7 apply to the code;\nkeep intact all notices of the absence of any warranty; and give all\nrecipients a copy of this License along with the Program.\n\n  You may charge any price or no price for each copy that you convey,\nand you may offer support or warranty protection for a fee.\n\n  5. Conveying Modified Source Versions.\n\n  You may convey a work based on the Program, or the modifications to\nproduce it from the Program, in the form of source code under the\nterms of section 4, provided that you also meet all of these conditions:\n\n    a) The work must carry prominent notices stating that you modified\n    it, and giving a relevant date.\n\n    b) The work must carry prominent notices stating that it is\n    released under this License and any conditions added under section\n    7.  This requirement modifies the requirement in section 4 to\n    \"keep intact all notices\".\n\n    c) You must license the entire work, as a whole, under this\n    License to anyone who comes into possession of a copy.  This\n    License will therefore apply, along with any applicable section 7\n    additional terms, to the whole of the work, and all its parts,\n    regardless of how they are packaged.  This License gives no\n    permission to license the work in any other way, but it does not\n    invalidate such permission if you have separately received it.\n\n    d) If the work has interactive user interfaces, each must display\n    Appropriate Legal Notices; however, if the Program has interactive\n    interfaces that do not display Appropriate Legal Notices, your\n    work need not make them do so.\n\n  A compilation of a covered work with other separate and independent\nworks, which are not by their nature extensions of the covered work,\nand which are not combined with it such as to form a larger program,\nin or on a volume of a storage or distribution medium, is called an\n\"aggregate\" if the compilation and its resulting copyright are not\nused to limit the access or legal rights of the compilation's users\nbeyond what the individual works permit.  Inclusion of a covered work\nin an aggregate does not cause this License to apply to the other\nparts of the aggregate.\n\n  6. Conveying Non-Source Forms.\n\n  You may convey a covered work in object code form under the terms\nof sections 4 and 5, provided that you also convey the\nmachine-readable Corresponding Source under the terms of this License,\nin one of these ways:\n\n    a) Convey the object code in, or embodied in, a physical product\n    (including a physical distribution medium), accompanied by the\n    Corresponding Source fixed on a durable physical medium\n    customarily used for software interchange.\n\n    b) Convey the object code in, or embodied in, a physical product\n    (including a physical distribution medium), accompanied by a\n    written offer, valid for at least three years and valid for as\n    long as you offer spare parts or customer support for that product\n    model, to give anyone who possesses the object code either (1) a\n    copy of the Corresponding Source for all the software in the\n    product that is covered by this License, on a durable physical\n    medium customarily used for software interchange, for a price no\n    more than your reasonable cost of physically performing this\n    conveying of source, or (2) access to copy the\n    Corresponding Source from a network server at no charge.\n\n    c) Convey individual copies of the object code with a copy of the\n    written offer to provide the Corresponding Source.  This\n    alternative is allowed only occasionally and noncommercially, and\n    only if you received the object code with such an offer, in accord\n    with subsection 6b.\n\n    d) Convey the object code by offering access from a designated\n    place (gratis or for a charge), and offer equivalent access to the\n    Corresponding Source in the same way through the same place at no\n    further charge.  You need not require recipients to copy the\n    Corresponding Source along with the object code.  If the place to\n    copy the object code is a network server, the Corresponding Source\n    may be on a different server (operated by you or a third party)\n    that supports equivalent copying facilities, provided you maintain\n    clear directions next to the object code saying where to find the\n    Corresponding Source.  Regardless of what server hosts the\n    Corresponding Source, you remain obligated to ensure that it is\n    available for as long as needed to satisfy these requirements.\n\n    e) Convey the object code using peer-to-peer transmission, provided\n    you inform other peers where the object code and Corresponding\n    Source of the work are being offered to the general public at no\n    charge under subsection 6d.\n\n  A separable portion of the object code, whose source code is excluded\nfrom the Corresponding Source as a System Library, need not be\nincluded in conveying the object code work.\n\n  A \"User Product\" is either (1) a \"consumer product\", which means any\ntangible personal property which is normally used for personal, family,\nor household purposes, or (2) anything designed or sold for incorporation\ninto a dwelling.  In determining whether a product is a consumer product,\ndoubtful cases shall be resolved in favor of coverage.  For a particular\nproduct received by a particular user, \"normally used\" refers to a\ntypical or common use of that class of product, regardless of the status\nof the particular user or of the way in which the particular user\nactually uses, or expects or is expected to use, the product.  A product\nis a consumer product regardless of whether the product has substantial\ncommercial, industrial or non-consumer uses, unless such uses represent\nthe only significant mode of use of the product.\n\n  \"Installation Information\" for a User Product means any methods,\nprocedures, authorization keys, or other information required to install\nand execute modified versions of a covered work in that User Product from\na modified version of its Corresponding Source.  The information must\nsuffice to ensure that the continued functioning of the modified object\ncode is in no case prevented or interfered with solely because\nmodification has been made.\n\n  If you convey an object code work under this section in, or with, or\nspecifically for use in, a User Product, and the conveying occurs as\npart of a transaction in which the right of possession and use of the\nUser Product is transferred to the recipient in perpetuity or for a\nfixed term (regardless of how the transaction is characterized), the\nCorresponding Source conveyed under this section must be accompanied\nby the Installation Information.  But this requirement does not apply\nif neither you nor any third party retains the ability to install\nmodified object code on the User Product (for example, the work has\nbeen installed in ROM).\n\n  The requirement to provide Installation Information does not include a\nrequirement to continue to provide support service, warranty, or updates\nfor a work that has been modified or installed by the recipient, or for\nthe User Product in which it has been modified or installed.  Access to a\nnetwork may be denied when the modification itself materially and\nadversely affects the operation of the network or violates the rules and\nprotocols for communication across the network.\n\n  Corresponding Source conveyed, and Installation Information provided,\nin accord with this section must be in a format that is publicly\ndocumented (and with an implementation available to the public in\nsource code form), and must require no special password or key for\nunpacking, reading or copying.\n\n  7. Additional Terms.\n\n  \"Additional permissions\" are terms that supplement the terms of this\nLicense by making exceptions from one or more of its conditions.\nAdditional permissions that are applicable to the entire Program shall\nbe treated as though they were included in this License, to the extent\nthat they are valid under applicable law.  If additional permissions\napply only to part of the Program, that part may be used separately\nunder those permissions, but the entire Program remains governed by\nthis License without regard to the additional permissions.\n\n  When you convey a copy of a covered work, you may at your option\nremove any additional permissions from that copy, or from any part of\nit.  (Additional permissions may be written to require their own\nremoval in certain cases when you modify the work.)  You may place\nadditional permissions on material, added by you to a covered work,\nfor which you have or can give appropriate copyright permission.\n\n  Notwithstanding any other provision of this License, for material you\nadd to a covered work, you may (if authorized by the copyright holders of\nthat material) supplement the terms of this License with terms:\n\n    a) Disclaiming warranty or limiting liability differently from the\n    terms of sections 15 and 16 of this License; or\n\n    b) Requiring preservation of specified reasonable legal notices or\n    author attributions in that material or in the Appropriate Legal\n    Notices displayed by works containing it; or\n\n    c) Prohibiting misrepresentation of the origin of that material, or\n    requiring that modified versions of such material be marked in\n    reasonable ways as different from the original version; or\n\n    d) Limiting the use for publicity purposes of names of licensors or\n    authors of the material; or\n\n    e) Declining to grant rights under trademark law for use of some\n    trade names, trademarks, or service marks; or\n\n    f) Requiring indemnification of licensors and authors of that\n    material by anyone who conveys the material (or modified versions of\n    it) with contractual assumptions of liability to the recipient, for\n    any liability that these contractual assumptions directly impose on\n    those licensors and authors.\n\n  All other non-permissive additional terms are considered \"further\nrestrictions\" within the meaning of section 10.  If the Program as you\nreceived it, or any part of it, contains a notice stating that it is\ngoverned by this License along with a term that is a further\nrestriction, you may remove that term.  If a license document contains\na further restriction but permits relicensing or conveying under this\nLicense, you may add to a covered work material governed by the terms\nof that license document, provided that the further restriction does\nnot survive such relicensing or conveying.\n\n  If you add terms to a covered work in accord with this section, you\nmust place, in the relevant source files, a statement of the\nadditional terms that apply to those files, or a notice indicating\nwhere to find the applicable terms.\n\n  Additional terms, permissive or non-permissive, may be stated in the\nform of a separately written license, or stated as exceptions;\nthe above requirements apply either way.\n\n  8. Termination.\n\n  You may not propagate or modify a covered work except as expressly\nprovided under this License.  Any attempt otherwise to propagate or\nmodify it is void, and will automatically terminate your rights under\nthis License (including any patent licenses granted under the third\nparagraph of section 11).\n\n  However, if you cease all violation of this License, then your\nlicense from a particular copyright holder is reinstated (a)\nprovisionally, unless and until the copyright holder explicitly and\nfinally terminates your license, and (b) permanently, if the copyright\nholder fails to notify you of the violation by some reasonable means\nprior to 60 days after the cessation.\n\n  Moreover, your license from a particular copyright holder is\nreinstated permanently if the copyright holder notifies you of the\nviolation by some reasonable means, this is the first time you have\nreceived notice of violation of this License (for any work) from that\ncopyright holder, and you cure the violation prior to 30 days after\nyour receipt of the notice.\n\n  Termination of your rights under this section does not terminate the\nlicenses of parties who have received copies or rights from you under\nthis License.  If your rights have been terminated and not permanently\nreinstated, you do not qualify to receive new licenses for the same\nmaterial under section 10.\n\n  9. Acceptance Not Required for Having Copies.\n\n  You are not required to accept this License in order to receive or\nrun a copy of the Program.  Ancillary propagation of a covered work\noccurring solely as a consequence of using peer-to-peer transmission\nto receive a copy likewise does not require acceptance.  However,\nnothing other than this License grants you permission to propagate or\nmodify any covered work.  These actions infringe copyright if you do\nnot accept this License.  Therefore, by modifying or propagating a\ncovered work, you indicate your acceptance of this License to do so.\n\n  10. Automatic Licensing of Downstream Recipients.\n\n  Each time you convey a covered work, the recipient automatically\nreceives a license from the original licensors, to run, modify and\npropagate that work, subject to this License.  You are not responsible\nfor enforcing compliance by third parties with this License.\n\n  An \"entity transaction\" is a transaction transferring control of an\norganization, or substantially all assets of one, or subdividing an\norganization, or merging organizations.  If propagation of a covered\nwork results from an entity transaction, each party to that\ntransaction who receives a copy of the work also receives whatever\nlicenses to the work the party's predecessor in interest had or could\ngive under the previous paragraph, plus a right to possession of the\nCorresponding Source of the work from the predecessor in interest, if\nthe predecessor has it or can get it with reasonable efforts.\n\n  You may not impose any further restrictions on the exercise of the\nrights granted or affirmed under this License.  For example, you may\nnot impose a license fee, royalty, or other charge for exercise of\nrights granted under this License, and you may not initiate litigation\n(including a cross-claim or counterclaim in a lawsuit) alleging that\nany patent claim is infringed by making, using, selling, offering for\nsale, or importing the Program or any portion of it.\n\n  11. Patents.\n\n  A \"contributor\" is a copyright holder who authorizes use under this\nLicense of the Program or a work on which the Program is based.  The\nwork thus licensed is called the contributor's \"contributor version\".\n\n  A contributor's \"essential patent claims\" are all patent claims\nowned or controlled by the contributor, whether already acquired or\nhereafter acquired, that would be infringed by some manner, permitted\nby this License, of making, using, or selling its contributor version,\nbut do not include claims that would be infringed only as a\nconsequence of further modification of the contributor version.  For\npurposes of this definition, \"control\" includes the right to grant\npatent sublicenses in a manner consistent with the requirements of\nthis License.\n\n  Each contributor grants you a non-exclusive, worldwide, royalty-free\npatent license under the contributor's essential patent claims, to\nmake, use, sell, offer for sale, import and otherwise run, modify and\npropagate the contents of its contributor version.\n\n  In the following three paragraphs, a \"patent license\" is any express\nagreement or commitment, however denominated, not to enforce a patent\n(such as an express permission to practice a patent or covenant not to\nsue for patent infringement).  To \"grant\" such a patent license to a\nparty means to make such an agreement or commitment not to enforce a\npatent against the party.\n\n  If you convey a covered work, knowingly relying on a patent license,\nand the Corresponding Source of the work is not available for anyone\nto copy, free of charge and under the terms of this License, through a\npublicly available network server or other readily accessible means,\nthen you must either (1) cause the Corresponding Source to be so\navailable, or (2) arrange to deprive yourself of the benefit of the\npatent license for this particular work, or (3) arrange, in a manner\nconsistent with the requirements of this License, to extend the patent\nlicense to downstream recipients.  \"Knowingly relying\" means you have\nactual knowledge that, but for the patent license, your conveying the\ncovered work in a country, or your recipient's use of the covered work\nin a country, would infringe one or more identifiable patents in that\ncountry that you have reason to believe are valid.\n\n  If, pursuant to or in connection with a single transaction or\narrangement, you convey, or propagate by procuring conveyance of, a\ncovered work, and grant a patent license to some of the parties\nreceiving the covered work authorizing them to use, propagate, modify\nor convey a specific copy of the covered work, then the patent license\nyou grant is automatically extended to all recipients of the covered\nwork and works based on it.\n\n  A patent license is \"discriminatory\" if it does not include within\nthe scope of its coverage, prohibits the exercise of, or is\nconditioned on the non-exercise of one or more of the rights that are\nspecifically granted under this License.  You may not convey a covered\nwork if you are a party to an arrangement with a third party that is\nin the business of distributing software, under which you make payment\nto the third party based on the extent of your activity of conveying\nthe work, and under which the third party grants, to any of the\nparties who would receive the covered work from you, a discriminatory\npatent license (a) in connection with copies of the covered work\nconveyed by you (or copies made from those copies), or (b) primarily\nfor and in connection with specific products or compilations that\ncontain the covered work, unless you entered into that arrangement,\nor that patent license was granted, prior to 28 March 2007.\n\n  Nothing in this License shall be construed as excluding or limiting\nany implied license or other defenses to infringement that may\notherwise be available to you under applicable patent law.\n\n  12. No Surrender of Others' Freedom.\n\n  If conditions are imposed on you (whether by court order, agreement or\notherwise) that contradict the conditions of this License, they do not\nexcuse you from the conditions of this License.  If you cannot convey a\ncovered work so as to satisfy simultaneously your obligations under this\nLicense and any other pertinent obligations, then as a consequence you may\nnot convey it at all.  For example, if you agree to terms that obligate you\nto collect a royalty for further conveying from those to whom you convey\nthe Program, the only way you could satisfy both those terms and this\nLicense would be to refrain entirely from conveying the Program.\n\n  13. Remote Network Interaction; Use with the GNU General Public License.\n\n  Notwithstanding any other provision of this License, if you modify the\nProgram, your modified version must prominently offer all users\ninteracting with it remotely through a computer network (if your version\nsupports such interaction) an opportunity to receive the Corresponding\nSource of your version by providing access to the Corresponding Source\nfrom a network server at no charge, through some standard or customary\nmeans of facilitating copying of software.  This Corresponding Source\nshall include the Corresponding Source for any work covered by version 3\nof the GNU General Public License that is incorporated pursuant to the\nfollowing paragraph.\n\n  Notwithstanding any other provision of this License, you have\npermission to link or combine any covered work with a work licensed\nunder version 3 of the GNU General Public License into a single\ncombined work, and to convey the resulting work.  The terms of this\nLicense will continue to apply to the part which is the covered work,\nbut the work with which it is combined will remain governed by version\n3 of the GNU General Public License.\n\n  14. Revised Versions of this License.\n\n  The Free Software Foundation may publish revised and/or new versions of\nthe GNU Affero General Public License from time to time.  Such new versions\nwill be similar in spirit to the present version, but may differ in detail to\naddress new problems or concerns.\n\n  Each version is given a distinguishing version number.  If the\nProgram specifies that a certain numbered version of the GNU Affero General\nPublic License \"or any later version\" applies to it, you have the\noption of following the terms and conditions either of that numbered\nversion or of any later version published by the Free Software\nFoundation.  If the Program does not specify a version number of the\nGNU Affero General Public License, you may choose any version ever published\nby the Free Software Foundation.\n\n  If the Program specifies that a proxy can decide which future\nversions of the GNU Affero General Public License can be used, that proxy's\npublic statement of acceptance of a version permanently authorizes you\nto choose that version for the Program.\n\n  Later license versions may give you additional or different\npermissions.  However, no additional obligations are imposed on any\nauthor or copyright holder as a result of your choosing to follow a\nlater version.\n\n  15. Disclaimer of Warranty.\n\n  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY\nAPPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT\nHOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM \"AS IS\" WITHOUT WARRANTY\nOF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,\nTHE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\nPURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM\nIS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF\nALL NECESSARY SERVICING, REPAIR OR CORRECTION.\n\n  16. Limitation of Liability.\n\n  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\nWILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS\nTHE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY\nGENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE\nUSE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF\nDATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD\nPARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),\nEVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF\nSUCH DAMAGES.\n\n  17. Interpretation of Sections 15 and 16.\n\n  If the disclaimer of warranty and limitation of liability provided\nabove cannot be given local legal effect according to their terms,\nreviewing courts shall apply local law that most closely approximates\nan absolute waiver of all civil liability in connection with the\nProgram, unless a warranty or assumption of liability accompanies a\ncopy of the Program in return for a fee.\n\n                     END OF TERMS AND CONDITIONS\n\n            How to Apply These Terms to Your New Programs\n\n  If you develop a new program, and you want it to be of the greatest\npossible use to the public, the best way to achieve this is to make it\nfree software which everyone can redistribute and change under these terms.\n\n  To do so, attach the following notices to the program.  It is safest\nto attach them to the start of each source file to most effectively\nstate the exclusion of warranty; and each file should have at least\nthe \"copyright\" line and a pointer to where the full notice is found.\n\n    <one line to give the program's name and a brief idea of what it does.>\n    Copyright (C) <year>  <name of author>\n\n    This program is free software: you can redistribute it and/or modify\n    it under the terms of the GNU Affero General Public License as published\n    by the Free Software Foundation, either version 3 of the License, or\n    (at your option) any later version.\n\n    This program is distributed in the hope that it will be useful,\n    but WITHOUT ANY WARRANTY; without even the implied warranty of\n    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n    GNU Affero General Public License for more details.\n\n    You should have received a copy of the GNU Affero General Public License\n    along with this program.  If not, see <https://www.gnu.org/licenses/>.\n\nAlso add information on how to contact you by electronic and paper mail.\n\n  If your software can interact with users remotely through a computer\nnetwork, you should also make sure that it provides a way for users to\nget its source.  For example, if your program is a web application, its\ninterface could display a \"Source\" link that leads users to an archive\nof the code.  There are many ways you could offer source, and different\nsolutions will be better for different programs; see section 13 for the\nspecific requirements.\n\n  You should also get your employer (if you work as a programmer) or school,\nif any, to sign a \"copyright disclaimer\" for the program, if necessary.\nFor more information on this, and how to apply and follow the GNU AGPL, see\n<https://www.gnu.org/licenses/>.\n"
  },
  {
    "path": "MANIFEST.in",
    "content": "recursive-include pr_agent *.toml\nrecursive-exclude pr_agent *.secrets.toml\n"
  },
  {
    "path": "README.md",
    "content": "<a href=\"https://github.com/Codium-ai/pr-agent/commits/main\">\n<img alt=\"GitHub\" src=\"https://img.shields.io/github/last-commit/Codium-ai/pr-agent/main?style=for-the-badge\" height=\"20\">\n</a>\n\n<br />\n\n# 🚀 PR Agent - The Original Open-Source PR Reviewer.\n\n This repository contains the open-source PR Agent Project. \n It is not the Qodo free tier.\n \nTry the free version on our website.\n\n👉[Get Started Now](www.qodo.ai/get-started/)\n\nPR-Agent is an open-source, AI-powered code review agent and a community-maintained legacy project of Qodo. It is distinct from Qodo’s primary AI code review offering, which provides a feature-rich, context-aware experience. Qodo now offers a free tier that integrates seamlessly with GitHub, GitLab, Bitbucket, and Azure DevOps for high-quality automated reviews.\n\n## Table of Contents\n\n- [Getting Started](#getting-started)\n- [Why Use PR-Agent?](#why-use-pr-agent)\n- [Features](#features)\n- [See It in Action](#see-it-in-action)\n- [Try It Now](#try-it-now)\n- [How It Works](#how-it-works)\n- [Data Privacy](#data-privacy)\n- [Contributing](#contributing)\n\n## Getting Started\n\n### 🚀 Quick Start for PR-Agent\n\n#### 1. Try it Instantly (No Setup)\nTest PR-Agent on any public GitHub repository by commenting `@CodiumAI-Agent /improve`\n\n#### 2. GitHub Action (Recommended)\nAdd automated PR reviews to your repository with a simple workflow file:\n```yaml\n# .github/workflows/pr-agent.yml\nname: PR Agent\non:\n  pull_request:\n    types: [opened, synchronize]\njobs:\n  pr_agent_job:\n    runs-on: ubuntu-latest\n    steps:\n    - name: PR Agent action step\n      uses: Codium-ai/pr-agent@main\n      env:\n        OPENAI_KEY: ${{ secrets.OPENAI_KEY }}\n        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n```\n[Full GitHub Action setup guide](https://qodo-merge-docs.qodo.ai/installation/github/#run-as-a-github-action)\n\n#### 3. CLI Usage (Local Development)\nRun PR-Agent locally on your repository:\n```bash\npip install pr-agent\nexport OPENAI_KEY=your_key_here\npr-agent --pr_url https://github.com/owner/repo/pull/123 review\n```\n[Complete CLI setup guide](https://qodo-merge-docs.qodo.ai/usage-guide/automations_and_usage/#local-repo-cli)\n\n#### 4. Other Platforms\n- [GitLab webhook setup](https://qodo-merge-docs.qodo.ai/installation/gitlab/)\n- [BitBucket app installation](https://qodo-merge-docs.qodo.ai/installation/bitbucket/)\n- [Azure DevOps setup](https://qodo-merge-docs.qodo.ai/installation/azure/)\n\n[//]: # (## News and Updates)\n\n[//]: # ()\n[//]: # (## Aug 8, 2025)\n\n[//]: # ()\n[//]: # ()\n[//]: # ()\n[//]: # (## Jul 17, 2025)\n\n[//]: # ()\n[//]: # (Introducing `/compliance`, a new Qodo Merge 💎 tool that runs comprehensive checks for security, ticket requirements, codebase duplication, and custom organizational rules. )\n\n[//]: # ()\n[//]: # (<img width=\"384\" alt=\"compliance-image\" src=\"https://codium.ai/images/pr_agent/compliance_partial.png\"/>)\n\n[//]: # ()\n[//]: # (Read more about it [here]&#40;https://qodo-merge-docs.qodo.ai/tools/compliance/&#41;)\n\n[//]: # ()\n[//]: # ()\n[//]: # (## Jul 1, 2025)\n\n[//]: # (You can now receive automatic feedback from Qodo Merge in your local IDE after each commit. Read more about it [here]&#40;https://github.com/qodo-ai/agents/tree/main/agents/qodo-merge-post-commit&#41;.)\n\n[//]: # ()\n[//]: # ()\n[//]: # (## Jun 21, 2025)\n\n[//]: # ()\n[//]: # (v0.30 was [released]&#40;https://github.com/qodo-ai/pr-agent/releases&#41;)\n\n[//]: # ()\n[//]: # ()\n[//]: # (## Jun 3, 2025)\n\n[//]: # ()\n[//]: # (Qodo Merge now offers a simplified free tier 💎.)\n\n[//]: # (Organizations can use Qodo Merge at no cost, with a [monthly limit]&#40;https://qodo-merge-docs.qodo.ai/installation/qodo_merge/#cloud-users&#41; of 75 PR reviews per organization.)\n\n[//]: # ()\n[//]: # ()\n[//]: # (## Apr 30, 2025)\n\n[//]: # ()\n[//]: # (A new feature is now available in the `/improve` tool for Qodo Merge 💎 - Chat on code suggestions.)\n\n[//]: # ()\n[//]: # (<img width=\"512\" alt=\"image\" src=\"https://codium.ai/images/pr_agent/improve_chat_on_code_suggestions_ask.png\" />)\n\n[//]: # ()\n[//]: # (Read more about it [here]&#40;https://qodo-merge-docs.qodo.ai/tools/improve/#chat-on-code-suggestions&#41;.)\n\n[//]: # ()\n[//]: # ()\n[//]: # (## Apr 16, 2025)\n\n[//]: # ()\n[//]: # (New tool for Qodo Merge 💎 - `/scan_repo_discussions`.)\n\n[//]: # ()\n[//]: # (<img width=\"635\" alt=\"image\" src=\"https://codium.ai/images/pr_agent/scan_repo_discussions_2.png\" />)\n\n[//]: # ()\n[//]: # (Read more about it [here]&#40;https://qodo-merge-docs.qodo.ai/tools/scan_repo_discussions/&#41;.)\n\n## Why Use PR-Agent?\n\n### 🎯 Built for Real Development Teams\n\n**Fast & Affordable**: Each tool (`/review`, `/improve`, `/ask`) uses a single LLM call (~30 seconds, low cost)\n\n**Handles Any PR Size**: Our [PR Compression strategy](https://qodo-merge-docs.qodo.ai/core-abilities/#pr-compression-strategy) effectively processes both small and large PRs\n\n**Highly Customizable**: JSON-based prompting allows easy customization of review categories and behavior via [configuration files](pr_agent/settings/configuration.toml)\n\n**Platform Agnostic**: \n- **Git Providers**: GitHub, GitLab, BitBucket, Azure DevOps, Gitea\n- **Deployment**: CLI, GitHub Actions, Docker, self-hosted, webhooks\n- **AI Models**: OpenAI GPT, Claude, Deepseek, and more\n\n**Open Source Benefits**:\n- Full control over your data and infrastructure\n- Customize prompts and behavior for your team's needs\n- No vendor lock-in\n- Community-driven development\n\n## Features\n\n<div style=\"text-align:left;\">\n\nPR-Agent offers comprehensive pull request functionalities integrated with various git providers:\n\n|                                                         |                                                                                        | GitHub | GitLab | Bitbucket | Azure DevOps | Gitea |\n|---------------------------------------------------------|----------------------------------------------------------------------------------------|:------:|:------:|:---------:|:------------:|:-----:|\n| [TOOLS](https://qodo-merge-docs.qodo.ai/tools/)         | [Describe](https://qodo-merge-docs.qodo.ai/tools/describe/)                            |   ✅   |   ✅   |    ✅     |      ✅      |  ✅   |\n|                                                         | [Review](https://qodo-merge-docs.qodo.ai/tools/review/)                                |   ✅   |   ✅   |    ✅     |      ✅      |  ✅   |\n|                                                         | [Improve](https://qodo-merge-docs.qodo.ai/tools/improve/)                              |   ✅   |   ✅   |    ✅     |      ✅      |  ✅   |\n|                                                         | [Ask](https://qodo-merge-docs.qodo.ai/tools/ask/)                                      |   ✅   |   ✅   |    ✅     |      ✅      |       |\n|                                                         | ⮑ [Ask on code lines](https://qodo-merge-docs.qodo.ai/tools/ask/#ask-lines)            |   ✅   |   ✅   |           |              |       |\n|                                                         | [Help Docs](https://qodo-merge-docs.qodo.ai/tools/help_docs/?h=auto#auto-approval)     |   ✅   |   ✅   |    ✅     |              |       |\n|                                                         | [Update CHANGELOG](https://qodo-merge-docs.qodo.ai/tools/update_changelog/)            |   ✅   |   ✅   |    ✅     |      ✅      |       |\n|                                                         |                                                                                                                     |        |        |           |              |       |\n| [USAGE](https://qodo-merge-docs.qodo.ai/usage-guide/)   | [CLI](https://qodo-merge-docs.qodo.ai/usage-guide/automations_and_usage/#local-repo-cli)                            |   ✅   |   ✅   |    ✅     |      ✅      |  ✅   |\n|                                                         | [App / webhook](https://qodo-merge-docs.qodo.ai/usage-guide/automations_and_usage/#github-app)                      |   ✅   |   ✅   |    ✅     |      ✅      |  ✅   |\n|                                                         | [Tagging bot](https://github.com/Codium-ai/pr-agent#try-it-now)                                                     |   ✅   |        |           |              |       |\n|                                                         | [Actions](https://qodo-merge-docs.qodo.ai/installation/github/#run-as-a-github-action)                              |   ✅   |   ✅   |    ✅     |      ✅      |       |\n|                                                         |                                                                                                                     |        |        |           |              |       |\n| [CORE](https://qodo-merge-docs.qodo.ai/core-abilities/) | [Adaptive and token-aware file patch fitting](https://qodo-merge-docs.qodo.ai/core-abilities/compression_strategy/) |   ✅   |   ✅   |    ✅     |      ✅      |       |\n|                                                         | [Chat on code suggestions](https://qodo-merge-docs.qodo.ai/core-abilities/chat_on_code_suggestions/)                |   ✅   |  ✅   |           |              |       |\n|                                                         | [Dynamic context](https://qodo-merge-docs.qodo.ai/core-abilities/dynamic_context/)                                  |   ✅   |   ✅   |    ✅     |      ✅      |       |\n|                                                         | [Fetching ticket context](https://qodo-merge-docs.qodo.ai/core-abilities/fetching_ticket_context/)                  |   ✅    |  ✅    |     ✅     |              |       |\n|                                                         | [Incremental Update](https://qodo-merge-docs.qodo.ai/core-abilities/incremental_update/)                            |   ✅    |       |           |              |       |\n|                                                         | [Interactivity](https://qodo-merge-docs.qodo.ai/core-abilities/interactivity/)                                      |   ✅   |  ✅   |           |              |       |\n|                                                         | [Local and global metadata](https://qodo-merge-docs.qodo.ai/core-abilities/metadata/)                               |   ✅   |   ✅   |    ✅     |      ✅      |       |\n|                                                         | [Multiple models support](https://qodo-merge-docs.qodo.ai/usage-guide/changing_a_model/)                            |   ✅   |   ✅   |    ✅     |      ✅      |       |\n|                                                         | [PR compression](https://qodo-merge-docs.qodo.ai/core-abilities/compression_strategy/)                              |   ✅   |   ✅   |    ✅     |      ✅      |       |\n|                                                         | [RAG context enrichment](https://qodo-merge-docs.qodo.ai/core-abilities/rag_context_enrichment/)                    |   ✅    |       |    ✅     |              |       |\n|                                                         | [Self reflection](https://qodo-merge-docs.qodo.ai/core-abilities/self_reflection/)                                  |   ✅   |   ✅   |    ✅     |      ✅      |       |\n\n[//]: # (- Support for additional git providers is described in [here]&#40;./docs/Full_environments.md&#41;)\n___\n\n## See It in Action\n\n</div>\n<h4><a href=\"https://github.com/Codium-ai/pr-agent/pull/530\">/describe</a></h4>\n<div align=\"center\">\n<p float=\"center\">\n<img src=\"https://www.codium.ai/images/pr_agent/describe_new_short_main.png\" width=\"512\">\n</p>\n</div>\n<hr>\n\n<h4><a href=\"https://github.com/Codium-ai/pr-agent/pull/732#issuecomment-1975099151\">/review</a></h4>\n<div align=\"center\">\n<p float=\"center\">\n<kbd>\n<img src=\"https://www.codium.ai/images/pr_agent/review_new_short_main.png\" width=\"512\">\n</kbd>\n</p>\n</div>\n<hr>\n\n<h4><a href=\"https://github.com/Codium-ai/pr-agent/pull/732#issuecomment-1975099159\">/improve</a></h4>\n<div align=\"center\">\n<p float=\"center\">\n<kbd>\n<img src=\"https://www.codium.ai/images/pr_agent/improve_new_short_main.png\" width=\"512\">\n</kbd>\n</p>\n</div>\n\n<div align=\"left\">\n\n</div>\n<hr>\n\n## Try It Now\n\nTry the GPT-5 powered PR-Agent instantly on _your public GitHub repository_. Just mention `@CodiumAI-Agent` and add the desired command in any PR comment. The agent will generate a response based on your command.\nFor example, add a comment to any pull request with the following text:\n\n```\n@CodiumAI-Agent /review\n```\n\nand the agent will respond with a review of your PR.\n\nNote that this is a promotional bot, suitable only for initial experimentation.\nIt does not have 'edit' access to your repo, for example, so it cannot update the PR description or add labels (`@CodiumAI-Agent /describe` will publish PR description as a comment). In addition, the bot cannot be used on private repositories, as it does not have access to the files there.\n\n\n## How It Works\n\nThe following diagram illustrates PR-Agent tools and their flow:\n\n![PR-Agent Tools](https://www.qodo.ai/images/pr_agent/diagram-v0.9.png)\n\n## Data Privacy\n\n### Self-hosted PR-Agent\n\n- If you host PR-Agent with your OpenAI API key, it is between you and OpenAI. You can read their API data privacy policy here:\nhttps://openai.com/enterprise-privacy\n\n## Contributing\n\nTo contribute to the project, get started by reading our [Contributing Guide](https://github.com/qodo-ai/pr-agent/blob/b09eec265ef7d36c232063f76553efb6b53979ff/CONTRIBUTING.md).\n\n\n## ❤️ Community\n\nThis open-source release remains here as a community contribution from Qodo — the origin of modern AI-powered code collaboration. We’re proud to share it and inspire developers worldwide.\n\nThe project now has its first external maintainer, Naor ([@naorpeled](https://github.com/naorpeled)), and is currently in the process of being donated to an open-source foundation.\n"
  },
  {
    "path": "RELEASE_NOTES.md",
    "content": "## [Version 0.11] - 2023-12-07\n\n- codiumai/pr-agent:0.11\n- codiumai/pr-agent:0.11-github_app\n- codiumai/pr-agent:0.11-bitbucket-app\n- codiumai/pr-agent:0.11-gitlab_webhook\n- codiumai/pr-agent:0.11-github_polling\n- codiumai/pr-agent:0.11-github_action\n\n### Added::Algo\n\n- New section in `/describe` tool - [PR changes walkthrough](https://github.com/Codium-ai/pr-agent/pull/509)\n- Improving PR Agent [prompts](https://github.com/Codium-ai/pr-agent/pull/501)\n- Persistent tools (`/review`, `/describe`) now send an [update message](https://github.com/Codium-ai/pr-agent/pull/499) after finishing\n- Add Amazon Bedrock [support](https://github.com/Codium-ai/pr-agent/pull/483)\n\n### Fixed\n\n- Update [dependencies](https://github.com/Codium-ai/pr-agent/pull/503) in requirements.txt for Python 3.12\n\n## [Version 0.10] - 2023-11-15\n\n- codiumai/pr-agent:0.10\n- codiumai/pr-agent:0.10-github_app\n- codiumai/pr-agent:0.10-bitbucket-app\n- codiumai/pr-agent:0.10-gitlab_webhook\n- codiumai/pr-agent:0.10-github_polling\n- codiumai/pr-agent:0.10-github_action\n\n### Added::Algo\n\n- Review tool now works with [persistent comments](https://github.com/Codium-ai/pr-agent/pull/451) by default\n- Bitbucket now publishes review suggestions with [code links](https://github.com/Codium-ai/pr-agent/pull/428)\n- Enabling to limit [max number of tokens](https://github.com/Codium-ai/pr-agent/pull/437/files)\n- Support ['gpt-4-1106-preview'](https://github.com/Codium-ai/pr-agent/pull/437/files) model\n- Support for Google's [Vertex AI](https://github.com/Codium-ai/pr-agent/pull/436)\n- Implementing [thresholds](https://github.com/Codium-ai/pr-agent/pull/423) for incremental PR reviews\n- Decoupled custom labels from [PR type](https://github.com/Codium-ai/pr-agent/pull/431)\n\n### Fixed\n\n- Fixed bug in [parsing quotes](https://github.com/Codium-ai/pr-agent/pull/446) in CLI\n- Preserve [user-added labels](https://github.com/Codium-ai/pr-agent/pull/433) in pull requests\n- Bug fixes in GitLab and BitBucket\n\n## [Version 0.9] - 2023-10-29\n\n- codiumai/pr-agent:0.9\n- codiumai/pr-agent:0.9-github_app\n- codiumai/pr-agent:0.9-bitbucket-app\n- codiumai/pr-agent:0.9-gitlab_webhook\n- codiumai/pr-agent:0.9-github_polling\n- codiumai/pr-agent:0.9-github_action\n\n### Added::Algo\n\n- New tool - [generate_labels](https://github.com/Codium-ai/pr-agent/blob/main/docs/GENERATE_CUSTOM_LABELS.md)\n- New ability to use [customize labels](https://github.com/Codium-ai/pr-agent/blob/main/docs/GENERATE_CUSTOM_LABELS.md#how-to-enable-custom-labels) on the `review` and `describe` tools.\n- New tool - [add_docs](https://github.com/Codium-ai/pr-agent/blob/main/docs/ADD_DOCUMENTATION.md)\n- GitHub Action: Can now use a `.pr_agent.toml` file to control configuration parameters (see [Usage Guide](./Usage.md#working-with-github-action)).\n- GitHub App: Added ability to trigger tools on [push events](https://github.com/Codium-ai/pr-agent/blob/main/Usage.md#github-app-automatic-tools-for-new-code-pr-push)\n- Support custom domain URLs for Azure devops integration (see [link](https://github.com/Codium-ai/pr-agent/pull/381)).\n- PR Description default mode is now in [bullet points](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L35).\n\n### Added::Documentation\n\nSignificant documentation updates (see [Installation Guide](https://github.com/Codium-ai/pr-agent/blob/main/INSTALL.md), [Usage Guide](https://github.com/Codium-ai/pr-agent/blob/main/Usage.md), and [Tools Guide](https://github.com/Codium-ai/pr-agent/blob/main/docs/TOOLS_GUIDE.md))\n\n### Fixed\n\n- Fixed support for BitBucket pipeline (see [link](https://github.com/Codium-ai/pr-agent/pull/386))\n- Fixed a bug in `review -i` tool\n- Added blacklist for specific file extensions in `add_docs` tool (see [link](https://github.com/Codium-ai/pr-agent/pull/385/))\n\n## [Version 0.8] - 2023-09-27\n\n- codiumai/pr-agent:0.8\n- codiumai/pr-agent:0.8-github_app\n- codiumai/pr-agent:0.8-bitbucket-app\n- codiumai/pr-agent:0.8-gitlab_webhook\n- codiumai/pr-agent:0.8-github_polling\n- codiumai/pr-agent:0.8-github_action\n\n### Added::Algo\n\n- GitHub Action: Can control which tools will run automatically when a new PR is created. (see usage guide: https://github.com/Codium-ai/pr-agent/blob/main/Usage.md#working-with-github-action)\n- Code suggestion tool: Will try to avoid an 'add comments' suggestion  (see https://github.com/Codium-ai/pr-agent/pull/327)\n\n### Fixed\n\n- Gitlab: Fixed a bug of improper usage of pr_id\n\n## [Version 0.7] - 2023-09-20\n\n### Docker Tags\n\n- codiumai/pr-agent:0.7\n- codiumai/pr-agent:0.7-github_app\n- codiumai/pr-agent:0.7-bitbucket-app\n- codiumai/pr-agent:0.7-gitlab_webhook\n- codiumai/pr-agent:0.7-github_polling\n- codiumai/pr-agent:0.7-github_action\n\n### Added::Algo\n\n- New tool /similar_issue - Currently on GitHub app and CLI: indexes the issues in the repo, find the most similar issues to the target issue.\n- Describe markers: Empower the /describe tool with a templating capability (see more details in https://github.com/Codium-ai/pr-agent/pull/273).\n- New feature in the /review tool - added an estimated effort estimation to the review (https://github.com/Codium-ai/pr-agent/pull/306).\n\n### Added::Infrastructure\n\n- Implementation of a GitLab webhook.\n- Implementation of a BitBucket app.\n\n### Fixed\n\n- Protection against no code suggestions generated.\n- Resilience to repositories where the languages cannot be automatically detected.\n"
  },
  {
    "path": "SECURITY.md",
    "content": "# Security Policy\n\nPR-Agent is an open-source tool to help efficiently review and handle pull requests. Qodo Merge is a paid version of PR-Agent, designed for companies and teams that require additional features and capabilities.\n\nThis document describes the security policy of PR-Agent. For Qodo Merge's security policy, see [here](https://qodo-merge-docs.qodo.ai/overview/data_privacy/#qodo-merge).\n\n## PR-Agent Self-Hosted Solutions\n\nWhen using PR-Agent with your OpenAI (or other LLM provider) API key, the security relationship is directly between you and the provider. We do not send your code to Qodo servers.\n\nTypes of [self-hosted solutions](https://qodo-merge-docs.qodo.ai/installation):\n\n- Locally\n- GitHub integration\n- GitLab integration\n- BitBucket integration\n- Azure DevOps integration\n\n## PR-Agent Supported Versions\n\nThis section outlines which versions of PR-Agent are currently supported with security updates.\n\n### Docker Deployment Options\n\n#### Latest Version\n\nFor the most recent updates, use our latest Docker image which is automatically built nightly:\n\n```yaml\nuses: qodo-ai/pr-agent@main\n```\n\n#### Specific Release Version\n\nFor a fixed version, you can pin your action to a specific release version. Browse available releases at:\n[PR-Agent Releases](https://github.com/qodo-ai/pr-agent/releases)\n\nFor example, to github action:\n\n```yaml\nsteps:\n  - name: PR Agent action step\n    id: pragent\n    uses: docker://codiumai/pr-agent:0.26-github_action\n```\n\n#### Enhanced Security with Docker Digest\n\nFor maximum security, you can specify the Docker image using its digest:\n\n```yaml\nsteps:\n  - name: PR Agent action step\n    id: pragent\n    uses: docker://codiumai/pr-agent@sha256:14165e525678ace7d9b51cda8652c2d74abb4e1d76b57c4a6ccaeba84663cc64\n```\n\n## Reporting a Vulnerability\n\nWe take the security of PR-Agent seriously. If you discover a security vulnerability, please report it immediately to:\n\nEmail: security@qodo.ai\n\nPlease include a description of the vulnerability, steps to reproduce, and the affected PR-Agent version.\n"
  },
  {
    "path": "action.yaml",
    "content": "name: 'Codium PR Agent'\ndescription: 'Summarize, review and suggest improvements for pull requests'\nbranding:\n  icon: 'award'\n  color: 'green'\nruns:\n  using: 'docker'\n  image: 'Dockerfile.github_action_dockerhub'\n"
  },
  {
    "path": "codecov.yml",
    "content": "comment: false\ncoverage:\n  status:\n    patch: false\n    project: false\n"
  },
  {
    "path": "docker/Dockerfile",
    "content": "FROM python:3.12.10-slim AS base\n\nRUN apt update && apt install --no-install-recommends -y git curl && apt-get clean && rm -rf /var/lib/apt/lists/*\n\nWORKDIR /app\nADD pyproject.toml .\nADD requirements.txt .\nADD docs docs\nRUN pip install --no-cache-dir . && rm pyproject.toml requirements.txt\nENV PYTHONPATH=/app\n\nFROM base AS github_app\nADD pr_agent pr_agent\nCMD [\"python\", \"-m\", \"gunicorn\", \"-k\", \"uvicorn.workers.UvicornWorker\", \"-c\", \"pr_agent/servers/gunicorn_config.py\", \"--forwarded-allow-ips\", \"*\", \"pr_agent.servers.github_app:app\"]\n\nFROM base AS bitbucket_app\nADD pr_agent pr_agent\nCMD [\"python\", \"pr_agent/servers/bitbucket_app.py\"]\n\nFROM base AS bitbucket_server_webhook\nADD pr_agent pr_agent\nCMD [\"python\", \"pr_agent/servers/bitbucket_server_webhook.py\"]\n\nFROM base AS github_polling\nADD pr_agent pr_agent\nCMD [\"python\", \"pr_agent/servers/github_polling.py\"]\n\nFROM base AS gitlab_webhook\nADD pr_agent pr_agent\nCMD [\"python\", \"pr_agent/servers/gitlab_webhook.py\"]\n\nFROM base AS azure_devops_webhook\nADD pr_agent pr_agent\nCMD [\"python\", \"pr_agent/servers/azuredevops_server_webhook.py\"]\n\nFROM base AS gitea_app\nADD pr_agent pr_agent\nCMD [\"python\", \"-m\", \"gunicorn\", \"-k\", \"uvicorn.workers.UvicornWorker\", \"-c\", \"pr_agent/servers/gunicorn_config.py\",\"pr_agent.servers.gitea_app:app\"]\n\n\nFROM base AS test\nADD requirements-dev.txt .\nRUN pip install --no-cache-dir -r requirements-dev.txt && rm requirements-dev.txt\nADD pr_agent pr_agent\nADD tests tests\n\nFROM base AS cli\nADD pr_agent pr_agent\nENTRYPOINT [\"python\", \"pr_agent/cli.py\"]\n"
  },
  {
    "path": "docker/Dockerfile.lambda",
    "content": "FROM public.ecr.aws/lambda/python:3.12 AS base\n\nRUN dnf update -y && \\\n    dnf install -y gcc python3-devel git && \\\n    dnf clean all\n\nADD pyproject.toml requirements.txt ./\nRUN pip install --no-cache-dir . && rm pyproject.toml\nRUN pip install --no-cache-dir mangum==0.17.0\nCOPY pr_agent/ ${LAMBDA_TASK_ROOT}/pr_agent/\n\nFROM base AS github_lambda\nCMD [\"pr_agent.servers.github_lambda_webhook.lambda_handler\"]\n\nFROM base AS gitlab_lambda\nCMD [\"pr_agent.servers.gitlab_lambda_webhook.lambda_handler\"]\n\nFROM github_lambda\n"
  },
  {
    "path": "docs/README.md",
    "content": "# [Visit Our Docs Portal](https://qodo-merge-docs.qodo.ai/)\n"
  },
  {
    "path": "docs/docs/.gitbook.yaml",
    "content": "root: ./\n\nstructure:\n  readme: ../README.md\n  summary: ./summary.md\n"
  },
  {
    "path": "docs/docs/CNAME",
    "content": "qodo-merge-docs.qodo.ai\n"
  },
  {
    "path": "docs/docs/core-abilities/compression_strategy.md",
    "content": "\n`Supported Git Platforms: GitHub, GitLab, Bitbucket`\n\n\n## Overview\n\nThere are two scenarios:\n\n1. The PR is small enough to fit in a single prompt (including system and user prompt)\n2. The PR is too large to fit in a single prompt (including system and user prompt)\n\nFor both scenarios, we first use the following strategy\n\n#### Repo language prioritization strategy\n\nWe prioritize the languages of the repo based on the following criteria:\n\n1. Exclude binary files and non code files (e.g. images, pdfs, etc)\n2. Given the main languages used in the repo\n3. We sort the PR files by the most common languages in the repo (in descending order):\n   * ```[[file.py, file2.py],[file3.js, file4.jsx],[readme.md]]```\n\n### Small PR\n\nIn this case, we can fit the entire PR in a single prompt:\n\n1. Exclude binary files and non code files (e.g. images, pdfs, etc)\n2. We Expand the surrounding context of each patch to 3 lines above and below the patch\n\n### Large PR\n\n#### Motivation\n\nPull Requests can be very long and contain a lot of information with varying degree of relevance to the pr-agent.\nWe want to be able to pack as much information as possible in a single LMM prompt, while keeping the information relevant to the pr-agent.\n\n#### Compression strategy\n\nWe prioritize additions over deletions:\n\n* Combine all deleted files into a single list (`deleted files`)\n* File patches are a list of hunks, remove all hunks of type deletion-only from the hunks in the file patch\n\n#### Adaptive and token-aware file patch fitting\n\nWe use [tiktoken](https://github.com/openai/tiktoken) to tokenize the patches after the modifications described above, and we use the following strategy to fit the patches into the prompt:\n\n1. Within each language we sort the files by the number of tokens in the file (in descending order):\n    * ```[[file2.py, file.py],[file4.jsx, file3.js],[readme.md]]```\n2. Iterate through the patches in the order described above\n3. Add the patches to the prompt until the prompt reaches a certain buffer from the max token length\n4. If there are still patches left, add the remaining patches as a list called `other modified files` to the prompt until the prompt reaches the max token length (hard stop), skip the rest of the patches.\n5. If we haven't reached the max token length, add the `deleted files` to the prompt until the prompt reaches the max token length (hard stop), skip the rest of the patches.\n\n#### Example\n\n![Core Abilities](https://codium.ai/images/git_patch_logic.png){width=768}\n"
  },
  {
    "path": "docs/docs/core-abilities/dynamic_context.md",
    "content": "\n`Supported Git Platforms: GitHub, GitLab, Bitbucket`\n\nPR-Agent uses an **asymmetric and dynamic context strategy** to improve AI analysis of code changes in pull requests.\nIt provides more context before changes than after, and dynamically adjusts the context based on code structure (e.g., enclosing functions or classes).\nThis approach balances providing sufficient context for accurate analysis, while avoiding needle-in-the-haystack information overload that could degrade AI performance or exceed token limits.\n\n## Introduction\n\nPull request code changes are retrieved in a unified diff format, showing three lines of context before and after each modified section, with additions marked by '+' and deletions by '-'.\n\n```diff\n@@ -12,5 +12,5 @@ def func1():\n code line that already existed in the file...\n code line that already existed in the file...\n code line that already existed in the file....\n-code line that was removed in the PR\n+new code line added in the PR\n code line that already existed in the file...\n code line that already existed in the file...\n code line that already existed in the file...\n\n@@ -26,2 +26,4 @@ def func2():\n...\n```\n\nThis unified diff format can be challenging for AI models to interpret accurately, as it provides limited context for understanding the full scope of code changes.\nThe presentation of code using '+', '-', and ' ' symbols to indicate additions, deletions, and unchanged lines respectively also differs from the standard code formatting typically used to train AI models.\n\n## Challenges of expanding the context window\n\nWhile expanding the context window is technically feasible, it presents a more fundamental trade-off:\n\nPros:\n\n- Enhanced context allows the model to better comprehend and localize the code changes, results (potentially) in more precise analysis and suggestions. Without enough context, the model may struggle to understand the code changes and provide relevant feedback.\n\nCons:\n\n- Excessive context may overwhelm the model with extraneous information, creating a \"needle in a haystack\" scenario where focusing on the relevant details (the code that actually changed) becomes challenging.\nLLM quality is known to degrade when the context gets larger.\nPull requests often encompass multiple changes across many files, potentially spanning hundreds of lines of modified code. This complexity presents a genuine risk of overwhelming the model with excessive context.\n\n- Increased context expands the token count, increasing processing time and cost, and may prevent the model from processing the entire pull request in a single pass.\n\n## Asymmetric and dynamic context\n\nTo address these challenges, PR-Agent employs an **asymmetric** and **dynamic** context strategy, providing the model with more focused and relevant context information for each code change.\n\n**Asymmetric:**\n\nWe start by recognizing that the context preceding a code change is typically more crucial for understanding the modification than the context following it.\nConsequently, PR-Agent implements an asymmetric context policy, decoupling the context window into two distinct segments: one for the code before the change and another for the code after.\n\nBy independently adjusting each context window, PR-Agent can supply the model with a more tailored and pertinent context for individual code changes.\n\n**Dynamic:**\n\nWe also employ a \"dynamic\" context strategy.\nWe start by recognizing that the optimal context for a code change often corresponds to its enclosing code component (e.g., function, class), rather than a fixed number of lines.\nConsequently, we dynamically adjust the context window based on the code's structure, ensuring the model receives the most pertinent information for each modification.\n\nTo prevent overwhelming the model with excessive context, we impose a limit on the number of lines searched when identifying the enclosing component.\nThis balance allows for comprehensive understanding while maintaining efficiency and limiting context token usage.\n\n## Appendix - relevant configuration options\n\n```toml\n[config]\npatch_extension_skip_types =[\".md\",\".txt\"]  # Skip files with these extensions when trying to extend the context\nallow_dynamic_context=true                  # Allow dynamic context extension\nmax_extra_lines_before_dynamic_context = 8  # will try to include up to X extra lines before the hunk in the patch, until we reach an enclosing function or class\npatch_extra_lines_before = 3                # Number of extra lines (+3 default ones) to include before each hunk in the patch\npatch_extra_lines_after = 1                 # Number of extra lines (+3 default ones) to include after each hunk in the patch\n```\n"
  },
  {
    "path": "docs/docs/core-abilities/fetching_ticket_context.md",
    "content": "# Fetching Ticket Context for PRs\n\n`Supported Git Platforms: GitHub, GitLab, Bitbucket`\n\n!!! note \"Branch-name issue linking: GitHub only (for now)\"\n    Extracting issue links from the **branch name** (and the optional `branch_issue_regex` setting) is currently implemented for **GitHub only**. Support for GitLab, Bitbucket, and other platforms is planned for a later release. The GitHub flow was the most relevant to implement first; other providers will follow.\n\n## Overview\n\nPR-Agent streamlines code review workflows by seamlessly connecting with multiple ticket management systems.\nThis integration enriches the review process by automatically surfacing relevant ticket information and context alongside code changes.\n\n**Ticket systems supported**:\n\n- [GitHub/Gitlab Issues](#githubgitlab-issues-integration)\n- [Jira](#jira-integration)\n\n**Ticket data fetched:**\n\n1. Ticket Title\n2. Ticket Description\n3. Custom Fields (Acceptance criteria)\n4. Subtasks (linked tasks)\n5. Labels\n6. Attached Images/Screenshots\n\n## Affected Tools\n\nTicket Recognition Requirements:\n\n- The PR description should contain a link to the ticket or if the branch name starts with the ticket id / number.\n- For Jira tickets, you should follow the instructions in [Jira Integration](#jira-integration) in order to authenticate with Jira.\n\n### Describe tool\n\nPR-Agent will recognize the ticket and use the ticket content (title, description, labels) to provide additional context for the code changes.\nBy understanding the reasoning and intent behind modifications, the LLM can offer more insightful and relevant code analysis.\n\n### Review tool\n\nSimilarly to the `describe` tool, the `review` tool will use the ticket content to provide additional context for the code changes.\n\nIn addition, this feature will evaluate how well a Pull Request (PR) adheres to its original purpose/intent as defined by the associated ticket or issue mentioned in the PR description.\nEach ticket will be assigned a label (Compliance/Alignment level), Indicates the degree to which the PR fulfills its original purpose:\n\n- Fully Compliant\n- Partially Compliant\n- Not Compliant\n- PR Code Verified\n\n![Ticket Compliance](https://www.qodo.ai/images/pr_agent/ticket_compliance_review.png){width=768}\n\nA `PR Code Verified` label indicates the PR code meets ticket requirements, but requires additional manual testing beyond the code scope. For example - validating UI display across different environments (Mac, Windows, mobile, etc.).\n\n\n#### Configuration options\n\n-\n\n    By default, the `review` tool will automatically validate if the PR complies with the referenced ticket.\n    If you want to disable this feedback, add the following line to your configuration file:\n\n    ```toml\n    [pr_reviewer]\n    require_ticket_analysis_review=false\n    ```\n\n-\n\n    If you set:\n    ```toml\n    [pr_reviewer]\n    check_pr_additional_content=true\n    ```\n    (default: `false`)\n\n    the `review` tool will also validate that the PR code doesn't contain any additional content that is not related to the ticket. If it does, the PR will be labeled at best as `PR Code Verified`, and the `review` tool will provide a comment with the additional unrelated content found in the PR code.\n\n## GitHub/Gitlab Issues Integration\n\nPR-Agent will automatically recognize GitHub/Gitlab issues mentioned in the PR description and fetch the issue content.\nExamples of valid GitHub/Gitlab issue references:\n\n- `https://github.com/<ORG_NAME>/<REPO_NAME>/issues/<ISSUE_NUMBER>` or `https://gitlab.com/<ORG_NAME>/<REPO_NAME>/-/issues/<ISSUE_NUMBER>`\n- `#<ISSUE_NUMBER>`\n- `<ORG_NAME>/<REPO_NAME>#<ISSUE_NUMBER>`\n\nBranch names can also be used to link issues, for example:\n- `123-fix-bug` (where `123` is the issue number)\n\nThis branch-name detection applies **only when the git provider is GitHub**. Support for other platforms is planned for later.\n\nSince PR-Agent is integrated with GitHub, it doesn't require any additional configuration to fetch GitHub issues.\n\n## Jira Integration\n\nWe support both Jira Cloud and Jira Server/Data Center.\n\n### Jira Cloud\n\n#### Email/Token Authentication\n\nYou can create an API token from your Atlassian account:\n\n1. Log in to https://id.atlassian.com/manage-profile/security/api-tokens.\n\n2. Click Create API token.\n\n3. From the dialog that appears, enter a name for your new token and click Create.\n\n4. Click Copy to clipboard.\n\n![Jira Cloud API Token](https://images.ctfassets.net/zsv3d0ugroxu/1RYvh9lqgeZjjNe5S3Hbfb/155e846a1cb38f30bf17512b6dfd2229/screenshot_NewAPIToken){width=384}\n\n5. In your [configuration file](../usage-guide/configuration_options.md) add the following lines:\n\n```toml\n[jira]\njira_api_token = \"YOUR_API_TOKEN\"\njira_api_email = \"YOUR_EMAIL\"\n```\n\n### Jira Data Center/Server\n\n#### Using Basic Authentication for Jira Data Center/Server\n\nYou can use your Jira username and password to authenticate with Jira Data Center/Server.\n\nIn your Configuration file/Environment variables/Secrets file, add the following lines:\n\n```toml\njira_api_email = \"your_username\"\njira_api_token = \"your_password\"\n```\n\n(Note that indeed the 'jira_api_email' field is used for the username, and the 'jira_api_token' field is used for the user password.)\n\n##### Validating Basic authentication via Python script\n\nIf you are facing issues retrieving tickets in PR-Agent with Basic auth, you can validate the flow using a Python script.\nThis following steps will help you check if the basic auth is working correctly, and if you can access the Jira ticket details:\n\n1. run `pip install jira==3.8.0`\n\n2. run the following Python script (after replacing the placeholders with your actual values):\n\n???- example \"Script to validate basic auth\"\n\n    ```python\n    from jira import JIRA\n    \n    \n    if __name__ == \"__main__\":\n        try:\n            # Jira server URL\n            server = \"https://...\"\n            # Basic auth\n            username = \"...\"\n            password = \"...\"\n            # Jira ticket code (e.g. \"PROJ-123\")\n            ticket_id = \"...\"\n    \n            print(\"Initializing JiraServerTicketProvider with JIRA server\")\n            # Initialize JIRA client\n            jira = JIRA(\n                server=server,\n                basic_auth=(username, password),\n                timeout=30\n            )\n            if jira:\n                print(f\"JIRA client initialized successfully\")\n            else:\n                print(\"Error initializing JIRA client\")\n    \n            # Fetch ticket details\n            ticket = jira.issue(ticket_id)\n            print(f\"Ticket title: {ticket.fields.summary}\")\n    \n        except Exception as e:\n            print(f\"Error fetching JIRA ticket details: {e}\")\n    ```\n\n#### Using a Personal Access Token (PAT) for Jira Data Center/Server\n\n1. Create a [Personal Access Token (PAT)](https://confluence.atlassian.com/enterprise/using-personal-access-tokens-1026032365.html) in your Jira account\n2. In your Configuration file/Environment variables/Secrets file, add the following lines:\n\n```toml\n[jira]\njira_base_url = \"YOUR_JIRA_BASE_URL\" # e.g. https://jira.example.com\njira_api_token = \"YOUR_API_TOKEN\"\n```\n\n##### Validating PAT token via Python script\n\nIf you are facing issues retrieving tickets in PR-Agent with PAT token, you can validate the flow using a Python script.\nThis following steps will help you check if the token is working correctly, and if you can access the Jira ticket details:\n\n1. run `pip install jira==3.8.0`\n\n2. run the following Python script (after replacing the placeholders with your actual values):\n\n??? example- \"Script to validate PAT token\"\n\n    ```python\n    from jira import JIRA\n    \n    \n    if __name__ == \"__main__\":\n        try:\n            # Jira server URL\n            server = \"https://...\"\n            # Jira PAT token\n            token_auth = \"...\"\n            # Jira ticket code (e.g. \"PROJ-123\")\n            ticket_id = \"...\"\n    \n            print(\"Initializing JiraServerTicketProvider with JIRA server\")\n            # Initialize JIRA client\n            jira = JIRA(\n                server=server,\n                token_auth=token_auth,\n                timeout=30\n            )\n            if jira:\n                print(f\"JIRA client initialized successfully\")\n            else:\n                print(\"Error initializing JIRA client\")\n    \n            # Fetch ticket details\n            ticket = jira.issue(ticket_id)\n            print(f\"Ticket title: {ticket.fields.summary}\")\n    \n        except Exception as e:\n            print(f\"Error fetching JIRA ticket details: {e}\")\n    ```\n\n\n### Multi-JIRA Server Configuration\n\nPR-Agent supports connecting to multiple JIRA servers using different authentication methods.\n\n=== \"Email/Token (Basic Auth)\"\n\n    Configure multiple servers using Email/Token authentication:\n\n    - `jira_servers`: List of JIRA server URLs\n    - `jira_api_token`: List of API tokens (for Cloud) or passwords (for Data Center)\n    - `jira_api_email`: List of emails (for Cloud) or usernames (for Data Center)\n    - `jira_base_url`: Default server for ticket IDs like `PROJ-123`, Each repository can configure (local config file) its own `jira_base_url` to choose which server to use by default.\n\n    **Example Configuration:**\n    ```toml\n    [jira]\n    # Server URLs\n    jira_servers = [\"https://company.atlassian.net\", \"https://datacenter.jira.com\"]\n\n    # API tokens/passwords\n    jira_api_token = [\"cloud_api_token_here\", \"datacenter_password\"]\n\n    # Emails/usernames (both required)\n    jira_api_email = [\"user@company.com\", \"datacenter_username\"]\n\n    # Default server for ticket IDs\n    jira_base_url = \"https://company.atlassian.net\"\n    ```\n\n=== \"PAT Auth\"\n\n    Configure multiple servers using Personal Access Token authentication:\n\n    - `jira_servers`: List of JIRA server URLs\n    - `jira_api_token`: List of PAT tokens\n    - `jira_api_email`: Not needed (can be omitted or left empty)\n    - `jira_base_url`: Default server for ticket IDs like `PROJ-123`, Each repository can configure (local config file) its own `jira_base_url` to choose which server to use by default.\n\n    **Example Configuration:**\n    ```toml\n    [jira]\n    # Server URLs\n    jira_servers = [\"https://server1.jira.com\", \"https://server2.jira.com\"]\n\n    # PAT tokens only\n    jira_api_token = [\"pat_token_1\", \"pat_token_2\"]\n\n    # Default server for ticket IDs\n    jira_base_url = \"https://server1.jira.com\"\n    ```\n\n    **Mixed Authentication (Email/Token + PAT):**\n    ```toml\n    [jira]\n    jira_servers = [\"https://company.atlassian.net\", \"https://server.jira.com\"]\n    jira_api_token = [\"cloud_api_token\", \"server_pat_token\"]\n    jira_api_email = [\"user@company.com\", \"\"]  # Empty for PAT\n    ```\n\n\n\n\n### How to link a PR to a Jira ticket\n\nTo integrate with Jira, you can link your PR to a ticket using either of these methods:\n\n**Method 1: Description Reference:**\n\nInclude a ticket reference in your PR description, using either the complete URL format `https://<JIRA_ORG>.atlassian.net/browse/ISSUE-123` or the shortened ticket ID `ISSUE-123` (without prefix or suffix for the shortened ID).\n\n**Method 2: Branch Name Detection:**\n\nName your branch with the ticket ID as a prefix (e.g., `ISSUE-123-feature-description` or `ISSUE-123/feature-description`).\n\n!!! note \"Jira Base URL\"\n    For shortened ticket IDs or branch detection (method 2 for JIRA cloud), you must configure the Jira base URL in your configuration file under the [jira] section:\n\n    ```toml\n    [jira]\n    jira_base_url = \"https://<JIRA_ORG>.atlassian.net\"\n    ```\n    Where `<JIRA_ORG>` is your Jira organization identifier (e.g., `mycompany` for `https://mycompany.atlassian.net`).\n"
  },
  {
    "path": "docs/docs/core-abilities/index.md",
    "content": "# Core Abilities\n\nPR-Agent utilizes a variety of core abilities to provide a comprehensive and efficient code review experience. These abilities include:\n\n- [Compression strategy](./compression_strategy.md)\n- [Dynamic context](./dynamic_context.md)\n- [Fetching ticket context](./fetching_ticket_context.md)\n- [Interactivity](./interactivity.md)\n- [Local and global metadata](./metadata.md)\n- [Self-reflection](./self_reflection.md)\n\n## Blogs\n\nHere are some additional technical blogs from Qodo, that delve deeper into the core capabilities and features of Large Language Models (LLMs) when applied to coding tasks.\nThese resources provide more comprehensive insights into leveraging LLMs for software development.\n\n### Code Generation and LLMs\n\n- [Effective AI code suggestions: less is more](https://www.codium.ai/blog/effective-code-suggestions-llms-less-is-more/)\n- [State-of-the-art Code Generation with AlphaCodium – From Prompt Engineering to Flow Engineering](https://www.codium.ai/blog/qodoflow-state-of-the-art-code-generation-for-code-contests/)\n- [RAG for a Codebase with 10k Repos](https://www.codium.ai/blog/rag-for-large-scale-code-repos/)\n\n### Development Processes\n\n- [Understanding the Challenges and Pain Points of the Pull Request Cycle](https://www.codium.ai/blog/understanding-the-challenges-and-pain-points-of-the-pull-request-cycle/)\n- [Introduction to Code Coverage Testing](https://www.codium.ai/blog/introduction-to-code-coverage-testing/)\n\n### Cost Optimization\n\n- [Reduce Your Costs by 30% When Using GPT for Python Code](https://www.codium.ai/blog/reduce-your-costs-by-30-when-using-gpt-3-for-python-code/)\n"
  },
  {
    "path": "docs/docs/core-abilities/interactivity.md",
    "content": "# Interactivity\n\n`Supported Git Platforms: GitHub, GitLab`\n\n## Overview\n\nPR-Agent transforms static code reviews into interactive experiences by enabling direct actions from pull request (PR) comments.\nDevelopers can immediately trigger actions and apply changes with simple checkbox clicks.\n\nThis focused workflow maintains context while dramatically reducing the time between PR creation and final merge.\nThe approach eliminates manual steps, provides clear visual indicators, and creates immediate feedback loops all within the same interface.\n\n## Key Interactive Features\n\n### 1\\. Interactive `/improve` Tool\n\nThe [`/improve`](../tools/improve.md) command delivers a comprehensive interactive experience:\n\n- _**Apply this suggestion**_: Clicking this checkbox instantly converts a suggestion into a committable code change. When committed to the PR, changes made to code that was flagged for improvement will be marked with a check mark, allowing developers to easily track and review implemented recommendations.\n\n- _**More**_: Triggers additional suggestions generation while keeping each suggestion focused and relevant as the original set\n\n- _**Update**_: Triggers a re-analysis of the code, providing updated suggestions based on the latest changes\n\n- _**Author self-review**_: Interactive acknowledgment that developers have opened and reviewed collapsed suggestions\n\n### 2\\. Interactive `/help` Tool\n\nThe [`/help`](../tools/help.md) command not only lists available tools and their descriptions but also enables immediate tool invocation through interactive checkboxes.\nWhen a user checks a tool's checkbox, PR-Agent instantly triggers that tool without requiring additional commands.\nThis transforms the standard help menu into an interactive launch pad for all PR-Agent capabilities, eliminating context switching by keeping developers within their PR workflow.\n"
  },
  {
    "path": "docs/docs/core-abilities/metadata.md",
    "content": "# Local and global metadata injection with multi-stage analysis\n\n`Supported Git Platforms: GitHub, GitLab, Bitbucket`\n\n1\\.\nPR-Agent initially retrieves for each PR the following data:\n\n- PR title and branch name\n- PR original description\n- Commit messages history\n- PR diff patches, in [hunk diff](https://loicpefferkorn.net/2014/02/diff-files-what-are-hunks-and-how-to-extract-them/) format\n- The entire content of the files that were modified in the PR\n\n!!! tip \"Tip: Organization-level metadata\"\n    In addition to the inputs above, PR-Agent can incorporate supplementary preferences provided by the user, like [`extra_instructions` and `organization best practices`](../tools/improve.md#extra-instructions-and-best-practices). This information can be used to enhance the PR analysis.\n\n2\\.\nBy default, the first command that PR-Agent executes is [`describe`](../tools/describe.md), which generates three types of outputs:\n\n- PR Type (e.g. bug fix, feature, refactor, etc)\n- PR Description - a bullet point summary of the PR\n- Changes walkthrough - for each modified file, provide a one-line summary followed by a detailed bullet point list of the changes.\n\nThese AI-generated outputs are now considered as part of the PR metadata, and can be used in subsequent commands like `review` and `improve`.\nThis effectively enables multi-stage chain-of-thought analysis, without doing any additional API calls which will cost time and money.\n\nFor example, when generating code suggestions for different files, PR-Agent can inject the AI-generated [\"Changes walkthrough\"](https://github.com/qodo-ai/pr-agent/pull/1202#issue-2511546839) file summary in the prompt:\n\n```diff\n## File: 'src/file1.py'\n### AI-generated file summary:\n- edited function `func1` that does X\n- Removed function `func2` that was not used\n- ....\n\n@@ ... @@ def func1():\n__new hunk__\n11  unchanged code line0\n12  unchanged code line1\n13 +new code line2 added\n14  unchanged code line3\n__old hunk__\n unchanged code line0\n unchanged code line1\n-old code line2 removed\n unchanged code line3\n\n@@ ... @@ def func2():\n__new hunk__\n...\n__old hunk__\n...\n```\n\n3\\. The entire PR files that were retrieved are also used to expand and enhance the PR context (see [Dynamic Context](./dynamic_context.md)).\n\n4\\. All the metadata described above represents several level of cumulative analysis - ranging from hunk level, to file level, to PR level, to organization level.\nThis comprehensive approach enables PR-Agent AI models to generate more precise and contextually relevant suggestions and feedback.\n"
  },
  {
    "path": "docs/docs/core-abilities/self_reflection.md",
    "content": "`Supported Git Platforms: GitHub, GitLab, Bitbucket`\n\nPR-Agent implements a **self-reflection** process where the AI model reflects, scores, and re-ranks its own suggestions, eliminating irrelevant or incorrect ones.\nThis approach improves the quality and relevance of suggestions, saving users time and enhancing their experience.\nConfiguration options allow users to set a score threshold for further filtering out suggestions.\n\n## Introduction - Efficient Review with Hierarchical Presentation\n\nGiven that not all generated code suggestions will be relevant, it is crucial to enable users to review them in a fast and efficient way, allowing quick identification and filtering of non-applicable ones.\n\nTo achieve this goal, PR-Agent offers a dedicated hierarchical structure when presenting suggestions to users:\n\n- A \"category\" section groups suggestions by their category, allowing users to quickly dismiss irrelevant suggestions.\n- Each suggestion is first described by a one-line summary, which can be expanded to a full description by clicking on a collapsible.\n- Upon expanding a suggestion, the user receives a more comprehensive description, and a code snippet demonstrating the recommendation.\n\n!!! note \"Fast Review\"\n    This hierarchical structure is designed to facilitate rapid review of each suggestion, with users spending an average of ~5-10 seconds per item.\n\n## Self-reflection and Re-ranking\n\nThe AI model is initially tasked with generating suggestions, and outputting them in order of importance.\nHowever, in practice we observe that models often struggle to simultaneously generate high-quality code suggestions and rank them well in a single pass.\nFurthermore, the initial set of generated suggestions sometimes contains easily identifiable errors.\n\nTo address these issues, we implemented a \"self-reflection\" process that refines suggestion ranking and eliminates irrelevant or incorrect proposals.\nThis process consists of the following steps:\n\n1. Presenting the generated suggestions to the model in a follow-up call.\n2. Instructing the model to score each suggestion on a scale of 0-10 and provide a rationale for the assigned score.\n3. Utilizing these scores to re-rank the suggestions and filter out incorrect ones (with a score of 0).\n4. Optionally, filtering out all suggestions below a user-defined score threshold.\n\nNote that presenting all generated suggestions simultaneously provides the model with a comprehensive context, enabling it to make more informed decisions compared to evaluating each suggestion individually.\n\nTo conclude, the self-reflection process enables PR-Agent to prioritize suggestions based on their importance, eliminate inaccurate or irrelevant proposals, and optionally exclude suggestions that fall below a specified threshold of significance.\nThis results in a more refined and valuable set of suggestions for the user, saving time and improving the overall experience.\n\n## Example Results\n\n![self_reflection](https://codium.ai/images/pr_agent/self_reflection1.png){width=768}\n![self_reflection](https://codium.ai/images/pr_agent/self_reflection2.png){width=768}\n\n## Appendix - Relevant Configuration Options\n\n```toml\n[pr_code_suggestions]\nsuggestions_score_threshold = 0 # Filter out suggestions with a score below this threshold (0-10)\n```\n"
  },
  {
    "path": "docs/docs/css/custom.css",
    "content": "/* Neutral color scheme - ready for future branding */\n:root {\n    --md-primary-fg-color: #0f172a;\n    --md-accent-fg-color: #1d4ed8;\n    --md-typeset-a-color: #1e40af;\n}\n\n[data-md-color-scheme=\"slate\"] {\n    --md-primary-fg-color: #0b1220;\n    --md-accent-fg-color: #38bdf8;\n    --md-typeset-a-color: #7dd3fc;\n    --md-default-bg-color: #0b1220;\n    --md-default-fg-color: #e5e7eb;\n    --md-default-fg-color--light: rgba(229, 231, 235, 0.7);\n    --md-default-fg-color--lighter: rgba(229, 231, 235, 0.5);\n    --md-default-fg-color--lightest: rgba(229, 231, 235, 0.3);\n    --md-code-bg-color: #0f172a;\n}\n\n.md-nav--primary {\n    .md-nav__link {\n    font-size: 18px;\n    }\n}\n\n.md-nav--primary {\n    position: relative;\n}\n\n.md-nav--primary::before {\n    content: \"\";\n    position: absolute;\n    top: 0;\n    right: 10px;\n    width: 2px;\n    height: 100%;\n    background-color: #e5e7eb;\n}\n\n[data-md-color-scheme=\"slate\"] .md-nav--primary::before {\n    background-color: #1f2937;\n}\n\n[data-md-color-scheme=\"slate\"] .md-header {\n    background-color: #0d1b36;\n}\n\n[data-md-color-scheme=\"slate\"] .md-tabs {\n    background-color: #0b1220;\n    border-top: 1px solid rgba(148, 163, 184, 0.25);\n}\n\n[data-md-color-scheme=\"slate\"] .md-tabs__link {\n    color: #e2e8f0;\n}\n\n[data-md-color-scheme=\"slate\"] .md-tabs__link--active,\n[data-md-color-scheme=\"slate\"] .md-tabs__link:hover {\n    color: #ffffff;\n    text-decoration: underline;\n    text-underline-offset: 0.25rem;\n}\n\n[data-md-color-scheme=\"slate\"] .md-search__form {\n    background-color: #0f172a;\n    border: 1px solid rgba(148, 163, 184, 0.4);\n}\n\n[data-md-color-scheme=\"slate\"] .md-search__input {\n    color: #e2e8f0;\n}\n\n[data-md-color-scheme=\"slate\"] .md-search__input::placeholder {\n    color: rgba(226, 232, 240, 0.7);\n}\n\n[data-md-color-scheme=\"slate\"] .md-search__icon {\n    color: rgba(226, 232, 240, 0.85);\n}\n\n.md-tabs__link  {\n    font-size: 18px;\n}\n\n.md-header__title {\n    font-size: 20px;\n    margin-left: 12px !important;\n}\n\n.md-header__button.md-logo,\n.md-nav__title .md-logo {\n    display: none;\n}\n\n.md-content img {\n    border-width: 1px;\n    border-style: solid;\n    border-color: rgba(15, 23, 42, 0.2);\n    outline-width: 1px;\n    outline-style: solid;\n    outline-color: rgba(15, 23, 42, 0.25);\n}\n\n[data-md-color-scheme=\"slate\"] .md-content img {\n    border-color: rgba(226, 232, 240, 0.2);\n    outline-color: rgba(226, 232, 240, 0.3);\n}\n\n.md-banner {\n    background-color: #1d4ed8;\n}\n\n[data-md-color-scheme=\"slate\"] .md-banner {\n    background-color: #2563eb;\n}\n\n.md-banner .md-typeset a,\n.md-banner .md-typeset a:hover,\n.md-banner .md-typeset a:focus {\n    color: currentColor;\n    text-decoration: underline;\n}\n"
  },
  {
    "path": "docs/docs/faq/index.md",
    "content": "# FAQ\n\n??? note \"Q: Can PR-Agent serve as a substitute for a human reviewer?\"\n    #### Answer:<span style=\"display:none;\">1</span>\n\n    PR-Agent is designed to assist, not replace, human reviewers.\n\n    Reviewing PRs is a tedious and time-consuming task often seen as a \"chore\". In addition, the longer the PR – the shorter the relative feedback, since long PRs can overwhelm reviewers, both in terms of technical difficulty, and the actual review time.\n    PR-Agent aims to address these pain points, and to assist and empower both the PR author and reviewer.\n\n    However, PR-Agent has built-in safeguards to ensure the developer remains in the driver's seat. For example:\n\n    1. Preserves user's original PR header\n    2. Places user's description above the AI-generated PR description\n    3. Won't approve PRs; approval remains reviewer's responsibility\n    4. The code suggestions are optional, and aim to:\n        - Encourage self-review and self-reflection\n        - Highlight potential bugs or oversights\n        - Enhance code quality and promote best practices\n\n    Read more about this issue in our [blog](https://www.qodo.ai/blog/understanding-the-challenges-and-pain-points-of-the-pull-request-cycle/)\n\n___\n\n??? note \"Q: I received an incorrect or irrelevant suggestion. Why?\"\n\n    #### Answer:<span style=\"display:none;\">2</span>\n\n    - Modern AI models, like Claude Sonnet and GPT-5, are improving rapidly but remain imperfect. Users should critically evaluate all suggestions rather than accepting them automatically.\n    - AI errors are rare, but possible. A main value from reviewing the code suggestions lies in their high probability of catching **mistakes or bugs made by the PR author**. We believe it's worth spending 30-60 seconds reviewing suggestions, even if some aren't relevant, as this practice can enhance code quality and prevent bugs in production.\n\n\n    - The hierarchical structure of the suggestions is designed to help the user _quickly_ understand them, and to decide which ones are relevant and which are not:\n\n        - Only if the `Category` header is relevant, the user should move to the summarized suggestion description.\n        - Only if the summarized suggestion description is relevant, the user should click on the collapsible, to read the full suggestion description with a code preview example.\n\n    - In addition, we recommend to use the [`extra_instructions`](../tools/improve.md#extra-instructions-and-best-practices) field to guide the model to suggestions that are more relevant to the specific needs of the project.\n\n___\n\n??? note \"Q: How can I get more tailored suggestions?\"\n    #### Answer:<span style=\"display:none;\">3</span>\n\n    See [here](../tools/improve.md#extra-instructions-and-best-practices) for more information on how to use the `extra_instructions` and `best_practices` configuration options, to guide the model to more tailored suggestions.\n\n___\n\n??? note \"Q: Will you store my code? Are you using my code to train models?\"\n    #### Answer:<span style=\"display:none;\">4</span>\n\n    No. PR-Agent strict privacy policy ensures that your code is not stored or used for training purposes.\n\n    For a detailed overview of our data privacy policy, please refer to [this link](../overview/data_privacy.md)\n\n___\n\n??? note \"Q: Can PR-Agent review draft/offline PRs?\"\n    #### Answer:<span style=\"display:none;\">6</span>\n\n    Yes. While PR-Agent won't automatically review draft PRs, you can still get feedback by manually requesting it through [online commenting](../usage-guide/automations_and_usage.md#online-usage).\n\n    For active PRs, you can customize the automatic feedback settings [here](../usage-guide/automations_and_usage.md#pr-agent-automatic-feedback) to match your team's workflow.\n___\n\n??? note \"Q: Can the 'Review effort' feedback be calibrated or customized?\"\n    #### Answer:<span style=\"display:none;\">7</span>\n\n    Yes, you can customize review effort estimates using the `extra_instructions` configuration option (see [documentation](../tools/review.md#configuration-options)).\n    \n    Example mapping:\n\n    - Effort 1: < 30 minutes review time\n    - Effort 2: 30-60 minutes review time\n    - Effort 3: 60-90 minutes review time\n    - ...\n    \n    Note: The effort levels (1-5) are primarily meant for _comparative_ purposes, helping teams prioritize reviewing smaller PRs first. The actual review duration may vary, as the focus is on providing consistent relative effort estimates.\n\n___\n\n??? note \"Q: How to reduce the noise generated by PR-Agent?\"\n    #### Answer:<span style=\"display:none;\">3</span>\n\n    The default configuration of PR-Agent is designed to balance helpful feedback with noise reduction. It reduces noise through several approaches:\n\n    - Auto-feedback uses three highly structured tools (`/describe`, `/review`, and `/improve`), designed to be accessible at a glance without creating large visual overload\n    - Suggestions are presented in a table format rather than as committable comments, which are far noisier\n    - The 'File Walkthrough' section is folded by default, as it tends to be verbose\n    - Intermediate comments are avoided when creating new PRs (like \"PR-Agent is now reviewing your PR...\"), which would generate email noise\n    \n    From our experience, especially in large teams or organizations, complaints about \"noise\" sometimes stem from the following issues:\n\n    - **Feedback from multiple bots**: When multiple bots provide feedback on the same PR, it creates confusion and noise. We recommend using PR-Agent as the primary feedback tool to streamline the process and reduce redundancy.\n    - **Getting familiar with the tool**: Unlike many tools that provide feedback only on demand, PR-Agent automatically analyzes and suggests improvements for every code change. While this proactive approach can feel intimidating at first, it's designed to continuously enhance code quality and catch bugs and problems when they occur. We recommend reviewing [this guide](../tools/improve.md#understanding-ai-code-suggestions) to help align expectations and maximize the value of PR-Agent's auto-feedback.\n\n    Therefore, at a global configuration level, we recommend using the default configuration, which is designed to reduce noise while providing valuable feedback.\n    \n    However, if you still find the feedback too noisy, you can adjust the configuration. Since each user and team has different needs, it's definitely possible - and even recommended - to adjust configurations for specific repos as needed.\n    Ways to adjust the configuration for noise reduction include for example:\n\n    - [Score thresholds for code suggestions](../tools/improve.md#configuration-options)\n    - [Utilizing the `extra_instructions` field for more tailored feedback](../tools/improve.md#extra-instructions)\n    - [Controlling which tools run automatically](../usage-guide/automations_and_usage.md#github-app-automatic-tools-when-a-new-pr-is-opened)\n\n    Note that some users may prefer the opposite - more thorough and detailed feedback. PR-Agent is designed to be flexible and customizable, allowing you to tailor the feedback to your team's specific needs and preferences.\n    Examples of ways to increase feedback include:\n\n    - [Dual-publishing mode](../tools/improve.md#dual-publishing-mode)\n    - [Interactive usage](../core-abilities/interactivity.md)\n___\n"
  },
  {
    "path": "docs/docs/index.md",
    "content": "# Overview\n\n[PR-Agent](https://github.com/qodo-ai/pr-agent) is an open-source, AI-powered code review agent and a community-maintained legacy project of Qodo. It is distinct from Qodo's primary AI code review offering, which provides a feature-rich, context-aware experience. Qodo now offers a free tier that integrates seamlessly with GitHub, GitLab, Bitbucket, and Azure DevOps for high-quality automated reviews.\n\n- See the [Installation Guide](./installation/index.md) for instructions on installing and running the tool on different git platforms.\n\n- See the [Usage Guide](./usage-guide/index.md) for instructions on running commands via different interfaces, including _CLI_, _online usage_, or by _automatically triggering_ them when a new PR is opened.\n\n- See the [Tools Guide](./tools/index.md) for a detailed description of the different tools.\n\n## Docs Smart Search\n\nTo search the documentation site using natural language:\n\n1) Comment `/help \"your question\"` in a pull request where PR-Agent is installed\n\n2) The bot will respond with an [answer](https://github.com/qodo-ai/pr-agent/pull/1241#issuecomment-2365259334) that includes relevant documentation links.\n\n## Features\n\nPR-Agent offers comprehensive pull request functionalities integrated with various git providers:\n\n|       |                                                                                       | GitHub | GitLab | Bitbucket | Azure DevOps | Gitea |\n| ----- |---------------------------------------------------------------------------------------|:------:|:------:|:---------:|:------------:|:-----:|\n| [TOOLS](./tools/index.md) | [Describe](./tools/describe.md)                                     |   ✅   |   ✅   |    ✅     |      ✅       |  ✅   |\n|       | [Review](./tools/review.md)                                                           |   ✅   |   ✅   |    ✅     |      ✅       |  ✅   |\n|       | [Improve](./tools/improve.md)                                                         |   ✅   |   ✅   |    ✅     |      ✅       |  ✅   |\n|       | [Ask](./tools/ask.md)                                                                 |   ✅   |   ✅   |    ✅     |      ✅       |       |\n|       | ⮑ [Ask on code lines](./tools/ask.md#ask-lines)                                       |   ✅   |   ✅   |           |              |       |\n|       | [Add Docs](./tools/add_docs.md)                                                       |   ✅   |   ✅   |    ✅     |      ✅       |       |\n|       | [Generate Labels](./tools/generate_labels.md)                                         |   ✅   |   ✅   |    ✅     |      ✅       |       |\n|       | [Similar Issues](./tools/similar_issues.md)                                           |   ✅   |        |           |              |       |\n|       | [Help](./tools/help.md)                                                               |   ✅   |   ✅   |    ✅     |      ✅       |       |\n|       | [Help Docs](./tools/help_docs.md)                                                     |   ✅   |   ✅   |    ✅     |              |       |\n|       | [Update CHANGELOG](./tools/update_changelog.md)                                       |   ✅   |   ✅   |    ✅     |      ✅       |       |\n|       |                                                                                       |        |        |           |              |       |\n| [USAGE](./usage-guide/index.md) | [CLI](./usage-guide/automations_and_usage.md#local-repo-cli)      |   ✅   |   ✅   |    ✅     |      ✅       |  ✅   |\n|       | [App / webhook](./usage-guide/automations_and_usage.md#github-app)                    |   ✅   |   ✅   |    ✅     |      ✅       |  ✅   |\n|       | [Tagging bot](https://github.com/qodo-ai/pr-agent#try-it-now)                       |   ✅   |        |           |              |       |\n|       | [Actions](./installation/github.md#run-as-a-github-action)                            |   ✅   |   ✅   |    ✅     |      ✅       |       |\n|       |                                                                                       |        |        |           |              |       |\n| [CORE](./core-abilities/index.md) | [Adaptive and token-aware file patch fitting](./core-abilities/compression_strategy.md) |   ✅   |   ✅   |    ✅     |      ✅       |       |\n|       | [Chat on code suggestions](./core-abilities/interactivity.md)                         |   ✅   |  ✅   |           |              |       |\n|       | [Compression strategy](./core-abilities/compression_strategy.md)                      |   ✅   |   ✅   |    ✅     |      ✅       |       |\n|       | [Dynamic context](./core-abilities/dynamic_context.md)                                |   ✅   |   ✅   |    ✅     |      ✅       |       |\n|       | [Fetching ticket context](./core-abilities/fetching_ticket_context.md)                |   ✅   |  ✅   |    ✅     |              |       |\n|       | [Interactivity](./core-abilities/interactivity.md)                                    |   ✅   |  ✅   |           |              |       |\n|       | [Local and global metadata](./core-abilities/metadata.md)                             |   ✅   |   ✅   |    ✅     |      ✅       |       |\n|       | [Multiple models support](./usage-guide/changing_a_model.md)                          |   ✅   |   ✅   |    ✅     |      ✅       |       |\n|       | [Self reflection](./core-abilities/self_reflection.md)                                |   ✅   |   ✅   |    ✅     |      ✅       |       |\n\n## Example Results\n\n<hr>\n\n#### [/describe](https://github.com/qodo-ai/pr-agent/pull/530)\n\n<figure markdown=\"1\">\n![/describe](https://www.codium.ai/images/pr_agent/describe_new_short_main.png){width=512}\n</figure>\n<hr>\n\n#### [/review](https://github.com/qodo-ai/pr-agent/pull/732#issuecomment-1975099151)\n\n<figure markdown=\"1\">\n![/review](https://www.codium.ai/images/pr_agent/review_new_short_main.png){width=512}\n</figure>\n<hr>\n\n#### [/improve](https://github.com/qodo-ai/pr-agent/pull/732#issuecomment-1975099159)\n\n<figure markdown=\"1\">\n![/improve](https://www.codium.ai/images/pr_agent/improve_new_short_main.png){width=512}\n</figure>\n<hr>\n\n## How it Works\n\nThe following diagram illustrates PR-Agent tools and their flow:\n\n![PR-Agent Tools](https://codium.ai/images/pr_agent/diagram-v0.9.png)\n\nCheck out the [PR Compression strategy](core-abilities/index.md) page for more details on how we convert a code diff to a manageable LLM prompt\n"
  },
  {
    "path": "docs/docs/installation/azure.md",
    "content": "## Azure DevOps Pipeline\n\nYou can use a pre-built Action Docker image to run PR-Agent as an Azure DevOps pipeline.\nAdd the following file to your repository under `azure-pipelines.yml`:\n\n```yaml\n# Opt out of CI triggers\ntrigger: none\n\n# Configure PR trigger\n# pr:\n#   branches:\n#     include:\n#     - '*'\n#   autoCancel: true\n#   drafts: false\n\n# NOTE for Azure Repos Git:\n# Azure Repos does not honor YAML pr: triggers. Configure Build Validation\n# via Branch Policies instead (see note below). You can safely omit pr:.\n\nstages:\n- stage: pr_agent\n  displayName: 'PR Agent Stage'\n  jobs:\n  - job: pr_agent_job\n    displayName: 'PR Agent Job'\n    pool:\n      vmImage: 'ubuntu-latest'\n    container:\n      image: codiumai/pr-agent:latest\n      options: --entrypoint \"\"\n    variables:\n      - group: pr_agent\n    steps:\n    - script: |\n        echo \"Running PR Agent action step\"\n\n        # Construct PR_URL\n        PR_URL=\"${SYSTEM_COLLECTIONURI}${SYSTEM_TEAMPROJECT}/_git/${BUILD_REPOSITORY_NAME}/pullrequest/${SYSTEM_PULLREQUEST_PULLREQUESTID}\"\n        echo \"PR_URL=$PR_URL\"\n\n        # Extract organization URL from System.CollectionUri\n        ORG_URL=$(echo \"$(System.CollectionUri)\" | sed 's/\\/$//') # Remove trailing slash if present\n        echo \"Organization URL: $ORG_URL\"\n\n        export azure_devops__org=\"$ORG_URL\"\n        export config__git_provider=\"azure\"\n\n        pr-agent --pr_url=\"$PR_URL\" describe\n        pr-agent --pr_url=\"$PR_URL\" review\n        pr-agent --pr_url=\"$PR_URL\" improve\n      env:\n        azure_devops__pat: $(azure_devops_pat)\n        openai__key: $(OPENAI_KEY)\n      displayName: 'Run PR-Agent'\n```\n\nThis script will run PR-Agent on every new merge request, with the `improve`, `review`, and `describe` commands.\nNote that you need to export the `azure_devops__pat` and `OPENAI_KEY` variables in the Azure DevOps pipeline settings (Pipelines -> Library -> + Variable group):\n\n![PR-Agent](https://codium.ai/images/pr_agent/azure_devops_pipeline_secrets.png){width=468}\n\nMake sure to give pipeline permissions to the `pr_agent` variable group.\n\n> Note that Azure Pipelines lacks support for triggering workflows from PR comments. If you find a viable solution, please contribute it to our [issue tracker](https://github.com/qodo-ai/pr-agent/issues)\n\n### Azure Repos Git PR triggers and Build Validation\n\nAzure Repos Git does not use YAML `pr:` triggers for pipelines. Instead, configure Build Validation on the target branch to run the PR Agent pipeline for pull requests:\n\n1. Go to Project Settings → Repositories → Branches.\n2. Select the target branch and open Branch Policies.\n3. Under Build Validation, add a policy:\n   - Select the PR Agent pipeline (the `azure-pipelines.yml` above).\n   - Set it as Required.\n4. Remove the `pr:` section from your YAML (not needed for Azure Repos Git).\n\nThis distinction applies specifically to Azure Repos Git. Other providers like GitHub and Bitbucket Cloud can use YAML-based PR triggers.\n\n## Azure DevOps from CLI\n\nTo use Azure DevOps provider use the following settings in configuration.toml:\n\n```toml\n[config]\ngit_provider=\"azure\"\n```\n\nAzure DevOps provider supports [PAT token](https://learn.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops&tabs=Windows) or [DefaultAzureCredential](https://learn.microsoft.com/en-us/azure/developer/python/sdk/authentication-overview#authentication-in-server-environments) authentication.\nPAT is faster to create, but has built-in expiration date, and will use the user identity for API calls.\nUsing DefaultAzureCredential you can use managed identity or Service principle, which are more secure and will create separate ADO user identity (via AAD) to the agent.\n\nIf PAT was chosen, you can assign the value in .secrets.toml.\nIf DefaultAzureCredential was chosen, you can assigned the additional env vars like AZURE_CLIENT_SECRET directly,\nor use managed identity/az cli (for local development) without any additional configuration.\nin any case, 'org' value must be assigned in .secrets.toml:\n\n```toml\n[azure_devops]\norg = \"https://dev.azure.com/YOUR_ORGANIZATION/\"\n# pat = \"YOUR_PAT_TOKEN\" needed only if using PAT for authentication\n```\n\n## Azure DevOps Webhook\n\nTo trigger from an Azure webhook, you need to manually [add a webhook](https://learn.microsoft.com/en-us/azure/devops/service-hooks/services/webhooks?view=azure-devops).\nUse the \"Pull request created\" type to trigger a review, or \"Pull request commented on\" to trigger any supported comment with /<command> <args> comment on the relevant PR. Note that for the \"Pull request commented on\" trigger, only API v2.0 is supported.\n\nFor webhook security, create a sporadic username/password pair and configure the webhook username and password on both the server and Azure DevOps webhook. These will be sent as basic Auth data by the webhook with each request:\n\n```toml\n[azure_devops_server]\nwebhook_username = \"<basic auth user>\"\nwebhook_password = \"<basic auth password>\"\n```\n\n> :warning: **Ensure that the webhook endpoint is only accessible over HTTPS** to mitigate the risk of credential interception when using basic authentication.\n"
  },
  {
    "path": "docs/docs/installation/bitbucket.md",
    "content": "## Run as a Bitbucket Pipeline\n\nYou can use the Bitbucket Pipeline system to run PR-Agent on every pull request open or update.\n\n1. Add the following file in your repository bitbucket-pipelines.yml\n\n```yaml\npipelines:\n    pull-requests:\n      '**':\n        - step:\n            name: PR Agent Review\n            image: codiumai/pr-agent:latest\n            script:\n              - pr-agent --pr_url=https://bitbucket.org/$BITBUCKET_WORKSPACE/$BITBUCKET_REPO_SLUG/pull-requests/$BITBUCKET_PR_ID review\n```\n\n2. Add the following secure variables to your repository under Repository settings > Pipelines > Repository variables.\n\n   - CONFIG__GIT_PROVIDER: `bitbucket`\n   - OPENAI__KEY: `<your key>`\n   - BITBUCKET__AUTH_TYPE: `basic` or `bearer` (default is `bearer`)\n   - BITBUCKET__BEARER_TOKEN: `<your token>` (required when auth_type is bearer)\n   - BITBUCKET__BASIC_TOKEN: `<your token>` (required when auth_type is basic)\n\nYou can get a Bitbucket token for your repository by following Repository Settings -> Security -> Access Tokens.\nFor basic auth, you can generate a base64 encoded token from your username:password combination.\n\nNote that comments on a PR are not supported in Bitbucket Pipeline.\n\n## Bitbucket Server and Data Center\n\nLogin into your on-prem instance of Bitbucket with your service account username and password.\nNavigate to `Manage account`, `HTTP Access tokens`, `Create Token`.\nGenerate the token and add it to .secret.toml under `bitbucket_server` section\n\n```toml\n[bitbucket_server]\nbearer_token = \"<your key>\"\n```\n\nDon't forget to also set the URL of your Bitbucket Server instance (either in `.secret.toml` or in `configuration.toml`):\n\n```toml\n[bitbucket_server]\nurl = \"<full URL to your Bitbucket instance, e.g.: https://git.bitbucket.com>\"\n```\n\n### Run it as CLI\n\nModify `configuration.toml`:\n\n```toml\ngit_provider=\"bitbucket_server\"\n```\n\n\n\nand pass the Pull request URL:\n\n```shell\npython cli.py --pr_url https://git.on-prem-instance-of-bitbucket.com/projects/PROJECT/repos/REPO/pull-requests/1 review\n```\n\n### Run it as service\n\nTo run PR-Agent as webhook, build the docker image:\n\n```bash\ndocker build . -t codiumai/pr-agent:bitbucket_server_webhook --target bitbucket_server_webhook -f docker/Dockerfile\ndocker push codiumai/pr-agent:bitbucket_server_webhook  # Push to your Docker repository\n```\n\nNavigate to `Projects` or `Repositories`, `Settings`, `Webhooks`, `Create Webhook`.\nFill in the name and URL. For Authentication, select 'None'. Select the 'Pull Request Opened' checkbox to receive that event as a webhook.\n\nThe URL should end with `/webhook`, for example: https://domain.com/webhook\n"
  },
  {
    "path": "docs/docs/installation/gitea.md",
    "content": "## Run a Gitea webhook server\n\n1. In Gitea create a new user and give it \"Reporter\" role for the intended group or project.\n\n2. For the user from step 1. generate a `personal_access_token` with `api` access.\n\n3. Generate a random secret for your app, and save it for later (`webhook_secret`). For example, you can use:\n\n```bash\nWEBHOOK_SECRET=$(python -c \"import secrets; print(secrets.token_hex(10))\")\n```\n\n4. Clone this repository:\n\n```bash\ngit clone https://github.com/qodo-ai/pr-agent.git\n```\n\n5. Prepare variables and secrets. Skip this step if you plan on setting these as environment variables when running the agent:\n    - In the configuration file/variables:\n        - Set `config.git_provider` to \"gitea\"\n    - In the secrets file/variables:\n        - Set your AI model key in the respective section\n        - In the [Gitea] section, set `personal_access_token` (with token from step 2) and `webhook_secret` (with secret from step 3)\n\n6. Build a Docker image for the app and optionally push it to a Docker repository. We'll use Dockerhub as an example:\n\n```bash\ndocker build -f /docker/Dockerfile -t pr-agent:gitea_app --target gitea_app .\ndocker push codiumai/pr-agent:gitea_webhook  # Push to your Docker repository\n```\n\n7. Set the environmental variables, the method depends on your docker runtime. Skip this step if you included your secrets/configuration directly in the Docker image.\n\n```bash\nCONFIG__GIT_PROVIDER=gitea\nGITEA__PERSONAL_ACCESS_TOKEN=<personal_access_token>\nGITEA__WEBHOOK_SECRET=<webhook_secret>\nGITEA__URL=https://gitea.com # Or self host\nOPENAI__KEY=<your_openai_api_key>\nGITEA__SKIP_SSL_VERIFICATION=false # or true\nGITEA__SSL_CA_CERT=/path/to/cacert.pem\n```\n\n8. Create a webhook in your Gitea project. Set the URL to `http[s]://<PR_AGENT_HOSTNAME>/api/v1/gitea_webhooks`, the secret token to the generated secret from step 3, and enable the triggers `push`, `comments` and `merge request events`.\n\n9. Test your installation by opening a merge request or commenting on a merge request using one of PR Agent's commands.\n"
  },
  {
    "path": "docs/docs/installation/github.md",
    "content": "In this page we will cover how to install and run PR-Agent as a GitHub Action or GitHub App, and how to configure it for your needs.\n\n## Run as a GitHub Action\n\nYou can use our pre-built Github Action Docker image to run PR-Agent as a Github Action.\n\n1) Add the following file to your repository under `.github/workflows/pr_agent.yml`:\n\n```yaml\non:\n  pull_request:\n    types: [opened, reopened, ready_for_review]\n  issue_comment:\njobs:\n  pr_agent_job:\n    if: ${{ github.event.sender.type != 'Bot' }}\n    runs-on: ubuntu-latest\n    permissions:\n      issues: write\n      pull-requests: write\n      contents: write\n    name: Run pr agent on every pull request, respond to user comments\n    steps:\n      - name: PR Agent action step\n        id: pragent\n        uses: qodo-ai/pr-agent@main\n        env:\n          OPENAI_KEY: ${{ secrets.OPENAI_KEY }}\n          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n```\n\n2) Add the following secret to your repository under `Settings > Secrets and variables > Actions > New repository secret > Add secret`:\n\n```\nName = OPENAI_KEY\nSecret = <your key>\n```\n\nThe GITHUB_TOKEN secret is automatically created by GitHub.\n\n3) Merge this change to your main branch.\nWhen you open your next PR, you should see a comment from `github-actions` bot with a review of your PR, and instructions on how to use the rest of the tools.\n\n4) You may configure PR-Agent by adding environment variables under the env section corresponding to any configurable property in the [configuration](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml) file. Some examples:\n\n```yaml\n      env:\n        # ... previous environment values\n        OPENAI.ORG: \"<Your organization name under your OpenAI account>\"\n        PR_REVIEWER.REQUIRE_TESTS_REVIEW: \"false\" # Disable tests review\n        PR_CODE_SUGGESTIONS.NUM_CODE_SUGGESTIONS: 6 # Increase number of code suggestions\n```\n\nSee detailed usage instructions in the [USAGE GUIDE](../usage-guide/automations_and_usage.md#github-action)\n\n### Configuration Examples\n\nThis section provides detailed, step-by-step examples for configuring PR-Agent with different models and advanced options in GitHub Actions.\n\n#### Quick Start Examples\n\n##### Basic Setup (OpenAI Default)\n\nCopy this minimal workflow to get started with the default OpenAI models:\n\n```yaml\nname: PR Agent\non:\n  pull_request:\n    types: [opened, reopened, ready_for_review]\n  issue_comment:\njobs:\n  pr_agent_job:\n    if: ${{ github.event.sender.type != 'Bot' }}\n    runs-on: ubuntu-latest\n    permissions:\n      issues: write\n      pull-requests: write\n      contents: write\n    steps:\n      - name: PR Agent action step\n        uses: qodo-ai/pr-agent@main\n        env:\n          OPENAI_KEY: ${{ secrets.OPENAI_KEY }}\n          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n```\n\n##### Gemini Setup\n\nReady-to-use workflow for Gemini models:\n\n```yaml\nname: PR Agent (Gemini)\non:\n  pull_request:\n    types: [opened, reopened, ready_for_review]\n  issue_comment:\njobs:\n  pr_agent_job:\n    if: ${{ github.event.sender.type != 'Bot' }}\n    runs-on: ubuntu-latest\n    permissions:\n      issues: write\n      pull-requests: write\n      contents: write\n    steps:\n      - name: PR Agent action step\n        uses: qodo-ai/pr-agent@main\n        env:\n          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n          config.model: \"gemini/gemini-1.5-flash\"\n          config.fallback_models: '[\"gemini/gemini-1.5-flash\"]'\n          GOOGLE_AI_STUDIO.GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}\n          github_action_config.auto_review: \"true\"\n          github_action_config.auto_describe: \"true\"\n          github_action_config.auto_improve: \"true\"\n```\n\n#### Claude Setup\n\nReady-to-use workflow for Claude models:\n\n```yaml\nname: PR Agent (Claude)\non:\n  pull_request:\n    types: [opened, reopened, ready_for_review]\n  issue_comment:\njobs:\n  pr_agent_job:\n    if: ${{ github.event.sender.type != 'Bot' }}\n    runs-on: ubuntu-latest\n    permissions:\n      issues: write\n      pull-requests: write\n      contents: write\n    steps:\n      - name: PR Agent action step\n        uses: qodo-ai/pr-agent@main\n        env:\n          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n          config.model: \"anthropic/claude-3-opus-20240229\"\n          config.fallback_models: '[\"anthropic/claude-3-haiku-20240307\"]'\n          ANTHROPIC.KEY: ${{ secrets.ANTHROPIC_KEY }}\n          github_action_config.auto_review: \"true\"\n          github_action_config.auto_describe: \"true\"\n          github_action_config.auto_improve: \"true\"\n```\n\n#### Basic Configuration with Tool Controls\n\nStart with this enhanced workflow that includes tool configuration:\n\n```yaml\non:\n  pull_request:\n    types: [opened, reopened, ready_for_review]\n  issue_comment:\njobs:\n  pr_agent_job:\n    if: ${{ github.event.sender.type != 'Bot' }}\n    runs-on: ubuntu-latest\n    permissions:\n      issues: write\n      pull-requests: write\n      contents: write\n    name: Run pr agent on every pull request, respond to user comments\n    steps:\n      - name: PR Agent action step\n        id: pragent\n        uses: qodo-ai/pr-agent@main\n        env:\n          OPENAI_KEY: ${{ secrets.OPENAI_KEY }}\n          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n          # Enable/disable automatic tools\n          github_action_config.auto_review: \"true\"\n          github_action_config.auto_describe: \"true\"\n          github_action_config.auto_improve: \"true\"\n          # Configure which PR events trigger the action\n          github_action_config.pr_actions: '[\"opened\", \"reopened\", \"ready_for_review\", \"review_requested\"]'\n```\n\n#### Switching Models\n\n##### Using Gemini (Google AI Studio)\n\nTo use Gemini models instead of the default OpenAI models:\n\n```yaml\n      env:\n        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n        # Set the model to Gemini\n        config.model: \"gemini/gemini-1.5-flash\"\n        config.fallback_models: '[\"gemini/gemini-1.5-flash\"]'\n        # Add your Gemini API key\n        GOOGLE_AI_STUDIO.GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}\n        # Tool configuration\n        github_action_config.auto_review: \"true\"\n        github_action_config.auto_describe: \"true\"\n        github_action_config.auto_improve: \"true\"\n```\n\n**Required Secrets:**\n\n- Add `GEMINI_API_KEY` to your repository secrets (get it from [Google AI Studio](https://aistudio.google.com/))\n\n**Note:** When using non-OpenAI models like Gemini, you don't need to set `OPENAI_KEY` - only the model-specific API key is required.\n\n##### Using Claude (Anthropic)\n\nTo use Claude models:\n\n```yaml\n      env:\n        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n        # Set the model to Claude\n        config.model: \"anthropic/claude-3-opus-20240229\"\n        config.fallback_models: '[\"anthropic/claude-3-haiku-20240307\"]'\n        # Add your Anthropic API key\n        ANTHROPIC.KEY: ${{ secrets.ANTHROPIC_KEY }}\n        # Tool configuration\n        github_action_config.auto_review: \"true\"\n        github_action_config.auto_describe: \"true\"\n        github_action_config.auto_improve: \"true\"\n```\n\n**Required Secrets:**\n\n- Add `ANTHROPIC_KEY` to your repository secrets (get it from [Anthropic Console](https://console.anthropic.com/))\n\n**Note:** When using non-OpenAI models like Claude, you don't need to set `OPENAI_KEY` - only the model-specific API key is required.\n\n##### Using Azure OpenAI\n\nTo use Azure OpenAI services:\n\n```yaml\n      env:\n        OPENAI_KEY: ${{ secrets.AZURE_OPENAI_KEY }}\n        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n        # Azure OpenAI configuration\n        OPENAI.API_TYPE: \"azure\"\n        OPENAI.API_VERSION: \"2023-05-15\"\n        OPENAI.API_BASE: ${{ secrets.AZURE_OPENAI_ENDPOINT }}\n        OPENAI.DEPLOYMENT_ID: ${{ secrets.AZURE_OPENAI_DEPLOYMENT }}\n        # Set the model to match your Azure deployment\n        config.model: \"gpt-4o\"\n        config.fallback_models: '[\"gpt-4o\"]'\n        # Tool configuration\n        github_action_config.auto_review: \"true\"\n        github_action_config.auto_describe: \"true\"\n        github_action_config.auto_improve: \"true\"\n```\n\n**Required Secrets:**\n\n- `AZURE_OPENAI_KEY`: Your Azure OpenAI API key\n- `AZURE_OPENAI_ENDPOINT`: Your Azure OpenAI endpoint URL\n- `AZURE_OPENAI_DEPLOYMENT`: Your deployment name\n\n##### Using Local Models (Ollama)\n\nTo use local models via Ollama:\n\n```yaml\n      env:\n        OPENAI_KEY: ${{ secrets.OPENAI_KEY }}\n        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n        # Set the model to a local Ollama model\n        config.model: \"ollama/qwen2.5-coder:32b\"\n        config.fallback_models: '[\"ollama/qwen2.5-coder:32b\"]'\n        config.custom_model_max_tokens: \"128000\"\n        # Ollama configuration\n        OLLAMA.API_BASE: \"http://localhost:11434\"\n        # Tool configuration\n        github_action_config.auto_review: \"true\"\n        github_action_config.auto_describe: \"true\"\n        github_action_config.auto_improve: \"true\"\n```\n\n**Note:** For local models, you'll need to use a self-hosted runner with Ollama installed, as GitHub Actions hosted runners cannot access localhost services.\n\n#### Advanced Configuration Options\n\n##### Custom Review Instructions\n\nAdd specific instructions for the review process:\n\n```yaml\n      env:\n        OPENAI_KEY: ${{ secrets.OPENAI_KEY }}\n        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n        # Custom review instructions\n        pr_reviewer.extra_instructions: \"Focus on security vulnerabilities and performance issues. Check for proper error handling.\"\n        # Tool configuration\n        github_action_config.auto_review: \"true\"\n        github_action_config.auto_describe: \"true\"\n        github_action_config.auto_improve: \"true\"\n```\n\n##### Language-Specific Configuration\n\nConfigure for specific programming languages:\n\n```yaml\n      env:\n        OPENAI_KEY: ${{ secrets.OPENAI_KEY }}\n        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n        # Language-specific settings\n        pr_reviewer.extra_instructions: \"Focus on Python best practices, type hints, and docstrings.\"\n        pr_code_suggestions.num_code_suggestions: \"8\"\n        pr_code_suggestions.suggestions_score_threshold: \"7\"\n        # Tool configuration\n        github_action_config.auto_review: \"true\"\n        github_action_config.auto_describe: \"true\"\n        github_action_config.auto_improve: \"true\"\n```\n\n##### Selective Tool Execution\n\nRun only specific tools automatically:\n\n```yaml\n      env:\n        OPENAI_KEY: ${{ secrets.OPENAI_KEY }}\n        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n        # Only run review and describe, skip improve\n        github_action_config.auto_review: \"true\"\n        github_action_config.auto_describe: \"true\"\n        github_action_config.auto_improve: \"false\"\n        # Only trigger on PR open and reopen\n        github_action_config.pr_actions: '[\"opened\", \"reopened\"]'\n```\n\n#### Using Configuration Files\n\nInstead of setting all options via environment variables, you can use a `.pr_agent.toml` file in your repository root:\n\n1. Create a `.pr_agent.toml` file in your repository root:\n\n```toml\n[config]\nmodel = \"gemini/gemini-1.5-flash\"\nfallback_models = [\"anthropic/claude-3-opus-20240229\"]\n\n[pr_reviewer]\nextra_instructions = \"Focus on security issues and code quality.\"\n\n[pr_code_suggestions]\nnum_code_suggestions = 6\nsuggestions_score_threshold = 7\n```\n\n2. Use a simpler workflow file:\n\n```yaml\non:\n  pull_request:\n    types: [opened, reopened, ready_for_review]\n  issue_comment:\njobs:\n  pr_agent_job:\n    if: ${{ github.event.sender.type != 'Bot' }}\n    runs-on: ubuntu-latest\n    permissions:\n      issues: write\n      pull-requests: write\n      contents: write\n    name: Run pr agent on every pull request, respond to user comments\n    steps:\n      - name: PR Agent action step\n        id: pragent\n        uses: qodo-ai/pr-agent@main\n        env:\n          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n          GOOGLE_AI_STUDIO.GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}\n          ANTHROPIC.KEY: ${{ secrets.ANTHROPIC_KEY }}\n          github_action_config.auto_review: \"true\"\n          github_action_config.auto_describe: \"true\"\n          github_action_config.auto_improve: \"true\"\n```\n\n#### Troubleshooting Common Issues\n\n##### Model Not Found Errors\n\nIf you get model not found errors:\n\n1. **Check model name format**: Ensure you're using the correct model identifier format (e.g., `gemini/gemini-1.5-flash`, not just `gemini-1.5-flash`)\n\n2. **Verify API keys**: Make sure your API keys are correctly set as repository secrets\n\n3. **Check model availability**: Some models may not be available in all regions or may require specific access\n\n##### Environment Variable Format\n\nRemember these key points about environment variables:\n\n- Use dots (`.`) or double underscores (`__`) to separate sections and keys\n- Boolean values should be strings: `\"true\"` or `\"false\"`\n- Arrays should be JSON strings: `'[\"item1\", \"item2\"]'`\n- Model names are case-sensitive\n\n##### Rate Limiting\n\nIf you encounter rate limiting:\n\n```yaml\n      env:\n        OPENAI_KEY: ${{ secrets.OPENAI_KEY }}\n        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n        # Add fallback models for better reliability\n        config.fallback_models: '[\"gpt-4o\", \"gpt-3.5-turbo\"]'\n        # Increase timeout for slower models\n        config.ai_timeout: \"300\"\n        github_action_config.auto_review: \"true\"\n        github_action_config.auto_describe: \"true\"\n        github_action_config.auto_improve: \"true\"\n```\n\n##### Common Error Messages and Solutions\n\n**Error: \"Model not found\"**\n- **Solution**: Check the model name format and ensure it matches the exact identifier. See the [Changing a model in PR-Agent](../usage-guide/changing_a_model.md) guide for supported models and their correct identifiers.\n\n**Error: \"API key not found\"**\n- **Solution**: Verify that your API key is correctly set as a repository secret and the environment variable name matches exactly\n- **Note**: For non-OpenAI models (Gemini, Claude, etc.), you only need the model-specific API key, not `OPENAI_KEY`\n\n**Error: \"Rate limit exceeded\"**\n- **Solution**: Add fallback models or increase the `config.ai_timeout` value\n\n**Error: \"Permission denied\"**\n- **Solution**: Ensure your workflow has the correct permissions set:\n  ```yaml\n  permissions:\n    issues: write\n    pull-requests: write\n    contents: write\n  ```\n\n**Error: \"Invalid JSON format\"**\n\n- **Solution**: Check that arrays are properly formatted as JSON strings:\n\n```yaml\n\nCorrect:\nconfig.fallback_models: '[\"model1\", \"model2\"]'\nIncorrect (interpreted as a YAML list, not a string):\nconfig.fallback_models: [\"model1\", \"model2\"]\n```\n\n##### Debugging Tips\n\n1. **Enable verbose logging**: Add `config.verbosity_level: \"2\"` to see detailed logs\n2. **Check GitHub Actions logs**: Look at the step output for specific error messages\n3. **Test with minimal configuration**: Start with just the basic setup and add options one by one\n4. **Verify secrets**: Double-check that all required secrets are set in your repository settings\n\n##### Performance Optimization\n\nFor better performance with large repositories:\n\n```yaml\n      env:\n        OPENAI_KEY: ${{ secrets.OPENAI_KEY }}\n        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n        # Optimize for large PRs\n        config.large_patch_policy: \"clip\"\n        config.max_model_tokens: \"32000\"\n        config.patch_extra_lines_before: \"3\"\n        config.patch_extra_lines_after: \"1\"\n        github_action_config.auto_review: \"true\"\n        github_action_config.auto_describe: \"true\"\n        github_action_config.auto_improve: \"true\"\n```\n\n#### Reference\n\nFor more detailed configuration options, see:\n\n- [Changing a model in PR-Agent](../usage-guide/changing_a_model.md)\n- [Configuration options](../usage-guide/configuration_options.md)\n- [Automations and usage](../usage-guide/automations_and_usage.md#github-action)\n\n### Using a specific release\n\n!!! tip \"\"\n    if you want to pin your action to a specific release (v0.23 for example) for stability reasons, use:\n    ```yaml\n    ...\n        steps:\n          - name: PR Agent action step\n            id: pragent\n            uses: docker://codiumai/pr-agent:0.23-github_action\n    ...\n    ```\n\n    For enhanced security, you can also specify the Docker image by its [digest](https://hub.docker.com/repository/docker/codiumai/pr-agent/tags):\n    ```yaml\n    ...\n        steps:\n          - name: PR Agent action step\n            id: pragent\n            uses: docker://codiumai/pr-agent@sha256:14165e525678ace7d9b51cda8652c2d74abb4e1d76b57c4a6ccaeba84663cc64\n    ...\n    ```\n\n### Action for GitHub enterprise server\n\n!!! tip \"\"\n    To use the action with a GitHub enterprise server, add an environment variable `GITHUB.BASE_URL` with the API URL of your GitHub server.\n\n    For example, if your GitHub server is at `https://github.mycompany.com`, add the following to your workflow file:\n    ```yaml\n          env:\n            # ... previous environment values\n            GITHUB.BASE_URL: \"https://github.mycompany.com/api/v3\"\n    ```\n\n---\n\n## Run as a GitHub App\n\nAllowing you to automate the review process on your private or public repositories.\n\n1) Create a GitHub App from the [Github Developer Portal](https://docs.github.com/en/developers/apps/creating-a-github-app).\n\n   - Set the following permissions:\n     - Pull requests: Read & write\n     - Issue comment: Read & write\n     - Metadata: Read-only\n     - Contents: Read-only\n   - Set the following events:\n     - Issue comment\n     - Pull request\n     - Push (if you need to enable triggering on PR update)\n\n2) Generate a random secret for your app, and save it for later. For example, you can use:\n\n```bash\nWEBHOOK_SECRET=$(python -c \"import secrets; print(secrets.token_hex(10))\")\n```\n\n3) Acquire the following pieces of information from your app's settings page:\n\n   - App private key (click \"Generate a private key\" and save the file)\n   - App ID\n\n4) Clone this repository:\n\n```bash\ngit clone https://github.com/qodo-ai/pr-agent.git\n```\n\n5) Copy the secrets template file and fill in the following:\n\n```bash\ncp pr_agent/settings/.secrets_template.toml pr_agent/settings/.secrets.toml\n# Edit .secrets.toml file\n```\n\n- Your OpenAI key.\n- Copy your app's private key to the private_key field.\n- Copy your app's ID to the app_id field.\n- Copy your app's webhook secret to the webhook_secret field.\n- Set deployment_type to 'app' in [configuration.toml](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml)\n\n    > The .secrets.toml file is not copied to the Docker image by default, and is only used for local development.\n    > If you want to use the .secrets.toml file in your Docker image, you can add remove it from the .dockerignore file.\n    > In most production environments, you would inject the secrets file as environment variables or as mounted volumes.\n    > For example, in order to inject a secrets file as a volume in a Kubernetes environment you can update your pod spec to include the following,\n    > assuming you have a secret named `pr-agent-settings` with a key named `.secrets.toml`:\n\n    ```\n           volumes:\n            - name: settings-volume\n              secret:\n                secretName: pr-agent-settings\n    // ...\n           containers:\n    // ...\n              volumeMounts:\n                - mountPath: /app/pr_agent/settings_prod\n                  name: settings-volume\n    ```\n\n    > Another option is to set the secrets as environment variables in your deployment environment, for example `OPENAI.KEY` and `GITHUB.USER_TOKEN`.\n\n6) Build a Docker image for the app and optionally push it to a Docker repository. We'll use Dockerhub as an example:\n\n    ```bash\n    docker build . -t codiumai/pr-agent:github_app --target github_app -f docker/Dockerfile\n    docker push codiumai/pr-agent:github_app  # Push to your Docker repository\n    ```\n\n7. Host the app using a server, serverless function, or container environment. Alternatively, for development and\n   debugging, you may use tools like smee.io to forward webhooks to your local machine.\n    You can check [Deploy as a Lambda Function](#deploy-as-a-lambda-function)\n\n8. Go back to your app's settings, and set the following:\n\n   - Webhook URL: The URL of your app's server or the URL of the smee.io channel.\n   - Webhook secret: The secret you generated earlier.\n\n9. Install the app by navigating to the \"Install App\" tab and selecting your desired repositories.\n\n> **Note:** When running PR-Agent from GitHub app, the default configuration file (configuration.toml) will be loaded.\n> However, you can override the default tool parameters by uploading a local configuration file `.pr_agent.toml`\n> For more information please check out the [USAGE GUIDE](../usage-guide/automations_and_usage.md#github-app)\n---\n\n## Additional deployment methods\n\n### Deploy as a Lambda Function\n\nNote that since AWS Lambda env vars cannot have \".\" in the name, you can replace each \".\" in an env variable with \"__\".<br>\nFor example: `GITHUB.WEBHOOK_SECRET` --> `GITHUB__WEBHOOK_SECRET`\n\n1. Follow steps 1-5 from [here](#run-as-a-github-app).\n2. Build a docker image that can be used as a lambda function\n\n    ```shell\n    docker buildx build --platform=linux/amd64 . -t codiumai/pr-agent:github_lambda --target github_lambda -f docker/Dockerfile.lambda\n   ```\n   (Note: --target github_lambda is optional as it's the default target)\n\n\n3. Push image to ECR\n\n    ```shell\n    docker tag codiumai/pr-agent:github_lambda <AWS_ACCOUNT>.dkr.ecr.<AWS_REGION>.amazonaws.com/codiumai/pr-agent:github_lambda\n    docker push <AWS_ACCOUNT>.dkr.ecr.<AWS_REGION>.amazonaws.com/codiumai/pr-agent:github_lambda\n    ```\n\n4. Create a lambda function that uses the uploaded image. Set the lambda timeout to be at least 3m.\n5. Configure the lambda function to have a Function URL.\n6. In the environment variables of the Lambda function, specify `AZURE_DEVOPS_CACHE_DIR` to a writable location such as /tmp. (see [link](https://github.com/qodo-ai/pr-agent/pull/450#issuecomment-1840242269))\n7. Go back to steps 8-9 of [Method 5](#run-as-a-github-app) with the function url as your Webhook URL.\n    The Webhook URL would look like `https://<LAMBDA_FUNCTION_URL>/api/v1/github_webhooks`\n\n#### Using AWS Secrets Manager\n\nFor production Lambda deployments, use AWS Secrets Manager instead of environment variables:\n\n1. Create a secret in AWS Secrets Manager with JSON format like this:\n\n```json\n{\n  \"openai.key\": \"sk-proj-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\",\n  \"github.webhook_secret\": \"your-webhook-secret-from-step-2\",\n  \"github.private_key\": \"-----BEGIN RSA PRIVATE KEY-----\\nMIIEpAIBAAKCAQEA...\\n-----END RSA PRIVATE KEY-----\"\n}\n```\n\n2. Add IAM permission `secretsmanager:GetSecretValue` to your Lambda execution role\n3. Set these environment variables in your Lambda:\n\n```bash\nAWS_SECRETS_MANAGER__SECRET_ARN=arn:aws:secretsmanager:us-east-1:123456789012:secret:pr-agent-secrets-AbCdEf\nCONFIG__SECRET_PROVIDER=aws_secrets_manager\n```\n\n---\n\n### AWS CodeCommit Setup\n\nNot all features have been added to CodeCommit yet.  As of right now, CodeCommit has been implemented to run the PR-Agent CLI on the command line, using AWS credentials stored in environment variables.  (More features will be added in the future.)  The following is a set of instructions to have PR-Agent do a review of your CodeCommit pull request from the command line:\n\n1. Create an IAM user that you will use to read CodeCommit pull requests and post comments\n    - Note: That user should have CLI access only, not Console access\n2. Add IAM permissions to that user, to allow access to CodeCommit (see IAM Role example below)\n3. Generate an Access Key for your IAM user\n4. Set the Access Key and Secret using environment variables (see Access Key example below)\n5. Set the `git_provider` value to `codecommit` in the `pr_agent/settings/configuration.toml` settings file\n6. Set the `PYTHONPATH` to include your `pr-agent` project directory\n    - Option A: Add `PYTHONPATH=\"/PATH/TO/PROJECTS/pr-agent` to your `.env` file\n    - Option B: Set `PYTHONPATH` and run the CLI in one command, for example:\n        - `PYTHONPATH=\"/PATH/TO/PROJECTS/pr-agent python pr_agent/cli.py [--ARGS]`\n\n---\n\n##### AWS CodeCommit IAM Role Example\n\nExample IAM permissions to that user to allow access to CodeCommit:\n\n- Note: The following is a working example of IAM permissions that has read access to the repositories and write access to allow posting comments\n- Note: If you only want pr-agent to review your pull requests, you can tighten the IAM permissions further, however this IAM example will work, and allow the pr-agent to post comments to the PR\n- Note: You may want to replace the `\"Resource\": \"*\"` with your list of repos, to limit access to only those repos\n\n```json\n{\n    \"Version\": \"2012-10-17\",\n    \"Statement\": [\n        {\n            \"Effect\": \"Allow\",\n            \"Action\": [\n                \"codecommit:BatchDescribe*\",\n                \"codecommit:BatchGet*\",\n                \"codecommit:Describe*\",\n                \"codecommit:EvaluatePullRequestApprovalRules\",\n                \"codecommit:Get*\",\n                \"codecommit:List*\",\n                \"codecommit:PostComment*\",\n                \"codecommit:PutCommentReaction\",\n                \"codecommit:UpdatePullRequestDescription\",\n                \"codecommit:UpdatePullRequestTitle\"\n            ],\n            \"Resource\": \"*\"\n        }\n    ]\n}\n```\n\n##### AWS CodeCommit Access Key and Secret\n\nExample setting the Access Key and Secret using environment variables\n\n```sh\nexport AWS_ACCESS_KEY_ID=\"XXXXXXXXXXXXXXXX\"\nexport AWS_SECRET_ACCESS_KEY=\"XXXXXXXXXXXXXXXX\"\nexport AWS_DEFAULT_REGION=\"us-east-1\"\n```\n\n##### AWS CodeCommit CLI Example\n\nAfter you set up AWS CodeCommit using the instructions above, here is an example CLI run that tells pr-agent to **review** a given pull request.\n(Replace your specific PYTHONPATH and PR URL in the example)\n\n```sh\nPYTHONPATH=\"/PATH/TO/PROJECTS/pr-agent\" python pr_agent/cli.py \\\n  --pr_url https://us-east-1.console.aws.amazon.com/codesuite/codecommit/repositories/MY_REPO_NAME/pull-requests/321 \\\n  review\n```"
  },
  {
    "path": "docs/docs/installation/gitlab.md",
    "content": "## Run as a GitLab Pipeline\n\nYou can use a pre-built Action Docker image to run PR-Agent as a GitLab pipeline. This is a simple way to get started with PR-Agent without setting up your own server.\n\n(1) Add the following file to your repository under `.gitlab-ci.yml`:\n\n```yaml\nstages:\n  - pr_agent\n\npr_agent_job:\n  stage: pr_agent\n  image:\n    name: codiumai/pr-agent:latest\n    entrypoint: [\"\"]\n  script:\n    - cd /app\n    - echo \"Running PR Agent action step\"\n    - export MR_URL=\"$CI_MERGE_REQUEST_PROJECT_URL/merge_requests/$CI_MERGE_REQUEST_IID\"\n    - echo \"MR_URL=$MR_URL\"\n    - export gitlab__url=$CI_SERVER_PROTOCOL://$CI_SERVER_FQDN\n    - export gitlab__PERSONAL_ACCESS_TOKEN=$GITLAB_PERSONAL_ACCESS_TOKEN\n    - export config__git_provider=\"gitlab\"\n    - export openai__key=$OPENAI_KEY\n    - python -m pr_agent.cli --pr_url=\"$MR_URL\" describe\n    - python -m pr_agent.cli --pr_url=\"$MR_URL\" review\n    - python -m pr_agent.cli --pr_url=\"$MR_URL\" improve\n  rules:\n    - if: '$CI_PIPELINE_SOURCE == \"merge_request_event\"'\n```\n\nThis script will run PR-Agent on every new merge request. You can modify the `rules` section to run PR-Agent on different events.\nYou can also modify the `script` section to run different PR-Agent commands, or with different parameters by exporting different environment variables.\n\n(2) Add the following masked variables to your GitLab repository (CI/CD -> Variables):\n\n- `GITLAB_PERSONAL_ACCESS_TOKEN`: Your GitLab personal access token.\n\n- `OPENAI_KEY`: Your OpenAI key.\n\nNote that if your base branches are not protected, don't set the variables as `protected`, since the pipeline will not have access to them.\n\n> **Note**: The `$CI_SERVER_FQDN` variable is available starting from GitLab version 16.10. If you're using an earlier version, this variable will not be available. However, you can combine `$CI_SERVER_HOST` and `$CI_SERVER_PORT` to achieve the same result. Please ensure you're using a compatible version or adjust your configuration.\n\n> **Note**: The `gitlab__SSL_VERIFY` environment variable can be used to specify the path to a custom CA certificate bundle for SSL verification. GitLab exposes the `$CI_SERVER_TLS_CA_FILE` variable, which points to the custom CA certificate file configured in your GitLab instance.\n> Alternatively, SSL verification can be disabled entirely by setting `gitlab__SSL_VERIFY=false`, although this is not recommended.\n\n## Run a GitLab webhook server\n\n1. In GitLab create a new user and give it \"Reporter\" role for the intended group or project.\n\n2. For the user from step 1, generate a `personal_access_token` with `api` access.\n\n3. Generate a random secret for your app, and save it for later (`shared_secret`). For example, you can use:\n\n```bash\nSHARED_SECRET=$(python -c \"import secrets; print(secrets.token_hex(10))\")\n```\n\n4. Clone this repository:\n\n```bash\ngit clone https://github.com/qodo-ai/pr-agent.git\n```\n\n5. Prepare variables and secrets. Skip this step if you plan on setting these as environment variables when running the agent:\n    1. In the configuration file/variables:\n        - Set `config.git_provider` to \"gitlab\"\n\n    2. In the secrets file/variables:\n        - Set your AI model key in the respective section\n        - In the [gitlab] section, set `personal_access_token` (with token from step 2) and `shared_secret` (with secret from step 3)\n        - **Authentication type**: Set `auth_type` to `\"private_token\"` for older GitLab versions (e.g., 11.x) or private deployments. Default is `\"oauth_token\"` for gitlab.com and newer versions.\n\n6. Build a Docker image for the app and optionally push it to a Docker repository. We'll use Dockerhub as an example:\n\n```bash\ndocker build . -t gitlab_pr_agent --target gitlab_webhook -f docker/Dockerfile\ndocker push codiumai/pr-agent:gitlab_webhook  # Push to your Docker repository\n```\n\n7. Set the environmental variables, the method depends on your docker runtime. Skip this step if you included your secrets/configuration directly in the Docker image.\n\n```bash\nCONFIG__GIT_PROVIDER=gitlab\nGITLAB__PERSONAL_ACCESS_TOKEN=<personal_access_token>\nGITLAB__SHARED_SECRET=<shared_secret>\nGITLAB__URL=https://gitlab.com\nGITLAB__AUTH_TYPE=oauth_token  # Use \"private_token\" for older GitLab versions\nOPENAI__KEY=<your_openai_api_key>\nPORT=3000  # Optional: override the webhook server port\n```\n\n8. Create a webhook in your GitLab project. Set the URL to `http[s]://<PR_AGENT_HOSTNAME>/webhook`, the secret token to the generated secret from step 3, and enable the triggers `push`, `comments` and `merge request events`.\n\n9. Test your installation by opening a merge request or commenting on a merge request using one of PR Agent's commands.\n\n## Deploy as a Lambda Function\n\nNote that since AWS Lambda env vars cannot have \".\" in the name, you can replace each \".\" in an env variable with \"__\".<br>\nFor example: `GITLAB.PERSONAL_ACCESS_TOKEN` --> `GITLAB__PERSONAL_ACCESS_TOKEN`\n\n1. Follow steps 1-5 from [Run a GitLab webhook server](#run-a-gitlab-webhook-server).\n2. Build a docker image that can be used as a lambda function\n\n    ```shell\n    docker buildx build --platform=linux/amd64 . -t codiumai/pr-agent:gitlab_lambda --target gitlab_lambda -f docker/Dockerfile.lambda\n   ```\n\n3. Push image to ECR\n\n    ```shell\n    docker tag codiumai/pr-agent:gitlab_lambda <AWS_ACCOUNT>.dkr.ecr.<AWS_REGION>.amazonaws.com/codiumai/pr-agent:gitlab_lambda\n    docker push <AWS_ACCOUNT>.dkr.ecr.<AWS_REGION>.amazonaws.com/codiumai/pr-agent:gitlab_lambda\n    ```\n\n4. Create a lambda function that uses the uploaded image. Set the lambda timeout to be at least 3m.\n5. Configure the lambda function to have a Function URL.\n6. In the environment variables of the Lambda function, specify `AZURE_DEVOPS_CACHE_DIR` to a writable location such as /tmp. (see [link](https://github.com/qodo-ai/pr-agent/pull/450#issuecomment-1840242269))\n7. Go back to steps 8-9 of [Run a GitLab webhook server](#run-a-gitlab-webhook-server) with the function URL as your Webhook URL.\n    The Webhook URL would look like `https://<LAMBDA_FUNCTION_URL>/webhook`\n\n### Using AWS Secrets Manager\n\nFor production Lambda deployments, use AWS Secrets Manager instead of environment variables:\n\n1. Create individual secrets for each GitLab webhook with this JSON format (e.g., secret name: `project-webhook-secret-001`)\n\n```json\n{\n  \"gitlab_token\": \"glpat-xxxxxxxxxxxxxxxxxxxxxxxx\",\n  \"token_name\": \"project-webhook-001\"\n}\n```\n\n2. Create a main configuration secret for common settings (e.g., secret name: `pr-agent-main-config`)\n\n```json\n{\n  \"openai.key\": \"sk-proj-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"\n}\n```\n\n3. Set these environment variables in your Lambda:\n\n```bash\nCONFIG__SECRET_PROVIDER=aws_secrets_manager\nAWS_SECRETS_MANAGER__SECRET_ARN=arn:aws:secretsmanager:us-east-1:123456789012:secret:pr-agent-main-config-AbCdEf\n```\n\n4. In your GitLab webhook configuration, set the **Secret Token** to the **Secret name** created in step 1:\n   - Example: `project-webhook-secret-001`\n\n**Important**: When using Secrets Manager, GitLab's webhook secret must be the Secrets Manager secret name.\n\n5. Add IAM permission `secretsmanager:GetSecretValue` to your Lambda execution role\n"
  },
  {
    "path": "docs/docs/installation/index.md",
    "content": "# Installation\n\nThere are several ways to use PR-Agent:\n\n- [Locally](./locally.md)\n- [GitHub integration](./github.md)\n- [GitLab integration](./gitlab.md)\n- [BitBucket integration](./bitbucket.md)\n- [Azure DevOps integration](./azure.md)\n- [Gitea integration](./gitea.md)\n"
  },
  {
    "path": "docs/docs/installation/locally.md",
    "content": "To run PR-Agent locally, you first need to acquire two keys:\n\n1. An OpenAI key from [here](https://platform.openai.com/api-keys){:target=\"_blank\"}, with access to GPT-4 and o4-mini (or a key for other [language models](../usage-guide/changing_a_model.md), if you prefer).\n2. A personal access token from your Git platform (GitHub, GitLab, BitBucket,Gitea) with repo scope. GitHub token, for example, can be issued from [here](https://github.com/settings/tokens){:target=\"_blank\"}\n\n## Using Docker image\n\nA list of the relevant tools can be found in the [tools guide](../tools/).\n\nTo invoke a tool (for example `review`), you can run PR-Agent directly from the Docker image. Here's how:\n\n- For GitHub:\n\n    ```bash\n    docker run --rm -it -e OPENAI.KEY=<your_openai_key> -e GITHUB.USER_TOKEN=<your_github_token> codiumai/pr-agent:latest --pr_url <pr_url> review\n    ```\n\n    If you are using GitHub enterprise server, you need to specify the custom url as variable.\n    For example, if your GitHub server is at `https://github.mycompany.com`, add the following to the command:\n\n    ```bash\n    -e GITHUB.BASE_URL=https://github.mycompany.com/api/v3\n    ```\n\n- For GitLab:\n\n    ```bash\n    docker run --rm -it -e OPENAI.KEY=<your key> -e CONFIG.GIT_PROVIDER=gitlab -e GITLAB.PERSONAL_ACCESS_TOKEN=<your token> codiumai/pr-agent:latest --pr_url <pr_url> review\n    ```\n\n    If you have a dedicated GitLab instance, you need to specify the custom url as variable:\n\n    ```bash\n    -e GITLAB.URL=<your gitlab instance url>\n    ```\n\n- For BitBucket:\n\n    ```bash\n    docker run --rm -it -e CONFIG.GIT_PROVIDER=bitbucket -e OPENAI.KEY=$OPENAI_API_KEY -e BITBUCKET.BEARER_TOKEN=$BITBUCKET_BEARER_TOKEN codiumai/pr-agent:latest --pr_url=<pr_url> review\n    ```\n\n- For Gitea:\n\n    ```bash\n    docker run --rm -it -e OPENAI.KEY=<your key> -e CONFIG.GIT_PROVIDER=gitea -e GITEA.PERSONAL_ACCESS_TOKEN=<your token> codiumai/pr-agent:latest --pr_url <pr_url> review\n    ```\n\n    If you have a dedicated Gitea instance, you need to specify the custom url as variable:\n\n    ```bash\n    -e GITEA.URL=<your gitea instance url>\n    ```\n\n\nFor other git providers, update `CONFIG.GIT_PROVIDER` accordingly and check the [`pr_agent/settings/.secrets_template.toml`](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/.secrets_template.toml) file for environment variables expected names and values.\n\n### Utilizing environment variables\n\nIt is also possible to provide or override the configuration by setting the corresponding environment variables.\nYou can define the corresponding environment variables by following this convention: `<TABLE>__<KEY>=<VALUE>` or `<TABLE>.<KEY>=<VALUE>`.\nThe `<TABLE>` refers to a table/section in a configuration file and `<KEY>=<VALUE>` refers to the key/value pair of a setting in the configuration file.\n\nFor example, suppose you want to run `pr_agent` that connects to a self-hosted GitLab instance similar to an example above.\nYou can define the environment variables in a plain text file named `.env` with the following content:\n\n```bash\nCONFIG__GIT_PROVIDER=\"gitlab\"\nGITLAB__URL=\"<your url>\"\nGITLAB__PERSONAL_ACCESS_TOKEN=\"<your token>\"\nOPENAI__KEY=\"<your key>\"\n```\n\nThen, you can run `pr_agent` using Docker with the following command:\n\n```shell\ndocker run --rm -it --env-file .env codiumai/pr-agent:latest <tool> <tool parameter>\n```\n\n---\n\n### I get an error when running the Docker image. What should I do?\n\nIf you encounter an error when running the Docker image, it is almost always due to a misconfiguration of api keys or tokens.\n\nNote that litellm, which is used by pr-agent, sometimes returns non-informative error messages such as `APIError: OpenAIException - Connection error.`\nCarefully check the api keys and tokens you provided and make sure they are correct.\nAdjustments may be needed depending on your llm provider.\n\nFor example, for Azure OpenAI, additional keys are [needed](../usage-guide/changing_a_model.md#azure).\nSame goes for other providers, make sure to check the [documentation](../usage-guide/changing_a_model.md#changing-a-model)\n\n## Using pip package\n\nInstall the package:\n\n```bash\npip install pr-agent\n```\n\nThen run the relevant tool with the script below.\n<br>\nMake sure to fill in the required parameters (`user_token`, `openai_key`, `pr_url`, `command`):\n\n```python\nfrom pr_agent import cli\nfrom pr_agent.config_loader import get_settings\n\ndef main():\n    # Fill in the following values\n    provider = \"github\" # github/gitlab/bitbucket/azure_devops\n    user_token = \"...\"  #  user token\n    openai_key = \"...\"  # OpenAI key\n    pr_url = \"...\"      # PR URL, for example 'https://github.com/qodo-ai/pr-agent/pull/809'\n    command = \"/review\" # Command to run (e.g. '/review', '/describe', '/ask=\"What is the purpose of this PR?\"', ...)\n\n    # Setting the configurations\n    get_settings().set(\"CONFIG.git_provider\", provider)\n    get_settings().set(\"openai.key\", openai_key)\n    get_settings().set(\"github.user_token\", user_token)\n\n    # Run the command. Feedback will appear in GitHub PR comments\n    cli.run_command(pr_url, command)\n\n\nif __name__ == '__main__':\n    main()\n```\n\n## Run from source\n\n1. Clone this repository:\n\n```bash\ngit clone https://github.com/qodo-ai/pr-agent.git\n```\n\n2. Navigate to the `/pr-agent` folder and install the requirements in your favorite virtual environment:\n\n```bash\npip install -e .\n```\n\n*Note: If you get an error related to Rust in the dependency installation then make sure Rust is installed and in your `PATH`, instructions: https://rustup.rs*\n\n3. Copy the secrets template file and fill in your OpenAI key and your GitHub user token:\n\n```bash\ncp pr_agent/settings/.secrets_template.toml pr_agent/settings/.secrets.toml\nchmod 600 pr_agent/settings/.secrets.toml\n# Edit .secrets.toml file\n```\n\n4. Run the cli.py script:\n\n```bash\npython3 -m pr_agent.cli --pr_url <pr_url> review\npython3 -m pr_agent.cli --pr_url <pr_url> ask <your question>\npython3 -m pr_agent.cli --pr_url <pr_url> describe\npython3 -m pr_agent.cli --pr_url <pr_url> improve\npython3 -m pr_agent.cli --pr_url <pr_url> add_docs\npython3 -m pr_agent.cli --pr_url <pr_url> generate_labels\npython3 -m pr_agent.cli --issue_url <issue_url> similar_issue\n...\n```\n\n[Optional] Add the pr_agent folder to your PYTHONPATH\n\n```bash\nexport PYTHONPATH=$PYTHONPATH:<PATH to pr_agent folder>\n```\n"
  },
  {
    "path": "docs/docs/installation/pr_agent.md",
    "content": "# PR-Agent Installation Guide\n\nPR-Agent can be deployed in various environments and platforms. Choose the installation method that best suits your needs:\n\n## 🖥️ Local Installation\n\nLearn how to run PR-Agent locally using:\n\n- Docker image\n- pip package\n- CLI from source code\n\n[View Local Installation Guide →](./locally.md)\n\n## 🐙 GitHub Integration\n\nSet up PR-Agent with GitHub as:\n\n- GitHub Action\n- Local GitHub App\n\n[View GitHub Integration Guide →](./github.md)\n\n## 🦊 GitLab Integration\n\nDeploy PR-Agent on GitLab as:\n\n- GitLab pipeline job\n- Local GitLab webhook server\n\n[View GitLab Integration Guide →](./gitlab.md)\n\n## 🟦 BitBucket Integration\n\nImplement PR-Agent in BitBucket as:\n\n- BitBucket pipeline job\n- Local BitBucket server\n\n[View BitBucket Integration Guide →](./bitbucket.md)\n\n## 🔷  Azure DevOps Integration\n\nConfigure PR-Agent with Azure DevOps as:\n\n- Azure DevOps pipeline job\n- Local Azure DevOps webhook\n\n[View Azure DevOps Integration Guide →](./azure.md)\n"
  },
  {
    "path": "docs/docs/overview/data_privacy.md",
    "content": "## Self-hosted PR-Agent\n\n- If you self-host PR-Agent with your OpenAI (or other LLM provider) API key, it is between you and the provider.\n"
  },
  {
    "path": "docs/docs/summary.md",
    "content": "# Table of contents\n\n* [Overview](index.md)\n  * [Data Privacy](overview/data_privacy.md)\n\n## Installation\n\n* [Installation](installation/index.md)\n* [PR-Agent](installation/pr_agent.md)\n\n## Usage Guide\n\n* [Usage Guide](usage-guide/index.md)\n* [Introduction](usage-guide/introduction.md)\n* [Configuration File](usage-guide/configuration_options.md)\n* [Usage and Automation](usage-guide/automations_and_usage.md)\n* [Managing Mail Notifications](usage-guide/mail_notifications.md)\n* [Changing a Model](usage-guide/changing_a_model.md)\n* [Additional Configurations](usage-guide/additional_configurations.md)\n* [Frequently Asked Questions](faq/index.md)\n\n## Tools\n\n* [Tools](tools/index.md)\n* [Describe](tools/describe.md)\n* [Review](tools/review.md)\n* [Improve](tools/improve.md)\n* [Ask](tools/ask.md)\n* [Add Docs](tools/add_docs.md)\n* [Generate Labels](tools/generate_labels.md)\n* [Similar Issues](tools/similar_issues.md)\n* [Help](tools/help.md)\n* [Help Docs](tools/help_docs.md)\n* [Update Changelog](tools/update_changelog.md)\n\n## Core Abilities\n\n* [Core Abilities](core-abilities/index.md)\n* [Chat on code suggestions](core-abilities/interactivity.md)\n* [Compression strategy](core-abilities/compression_strategy.md)\n* [Dynamic context](core-abilities/dynamic_context.md)\n* [Fetching ticket context](core-abilities/fetching_ticket_context.md)\n* [Interactivity](core-abilities/interactivity.md)\n* [Local and global metadata](core-abilities/metadata.md)\n* [Self-reflection](core-abilities/self_reflection.md)\n"
  },
  {
    "path": "docs/docs/tools/add_docs.md",
    "content": "## Overview\n\nThe `add_docs` tool scans the PR code changes and suggests documentation for any code components that are missing documentation, such as functions, classes, and methods.\n\nIt can be invoked manually by commenting on any PR:\n\n```\n/add_docs\n```\n\n## Example usage\n\nInvoke the tool manually by commenting `/add_docs` on any PR:\n\n![Add Docs](https://codium.ai/images/pr_agent/add_docs_comment.png){width=512}\n\nThe tool will generate documentation suggestions as inline code suggestions:\n\n![Add Docs Result](https://codium.ai/images/pr_agent/add_docs_result.png){width=512}\n\n### Language-specific documentation styles\n\nThe tool automatically detects the programming language and generates documentation in the appropriate format:\n\n| Language | Documentation Format |\n|----------|---------------------|\n| Python | Docstrings (Sphinx, Google, Numpy styles) |\n| Java | Javadocs |\n| JavaScript/TypeScript | JSdocs |\n| C++ | Doxygen |\n| Other | Generic documentation |\n\n## Configuration options\n\nUnder the section `[pr_add_docs]`, the following options are available:\n\n| Option | Type | Default | Description |\n|--------|------|---------|-------------|\n| `extra_instructions` | string | `\"\"` | Additional instructions for the AI model |\n| `docs_style` | string | `\"Sphinx\"` | Documentation style for Python. Options: `\"Sphinx\"`, `\"Google Style with Args, Returns, Attributes...etc\"`, `\"Numpy Style\"`, `\"PEP257\"`, `\"reStructuredText\"` |\n| `file` | string | `\"\"` | Specific file to document (useful when multiple components have the same name) |\n| `class_name` | string | `\"\"` | Specific class name to target (useful when methods have the same name in the same file) |\n\n### Example configuration\n\nTo customize the documentation style, add the following to your configuration file:\n\n```toml\n[pr_add_docs]\ndocs_style = \"Google Style with Args, Returns, Attributes...etc\"\nextra_instructions = \"Focus on documenting public methods and include usage examples\"\n```\n\n### Command line options\n\nYou can pass configuration options directly in the command:\n\n```\n/add_docs --pr_add_docs.docs_style=\"Numpy Style\"\n```\n\n## How it works\n\n1. The tool analyzes the PR diff to identify code components (functions, classes, methods) that lack documentation\n2. It uses AI to generate appropriate documentation based on the code context and language\n3. Documentation suggestions are published as inline code suggestions that can be applied with a single click\n"
  },
  {
    "path": "docs/docs/tools/ask.md",
    "content": "## Overview\n\nThe `ask` tool answers questions about the PR, based on the PR code changes. Make sure to be specific and clear in your questions.\nIt can be invoked manually by commenting on any PR:\n\n```\n/ask \"...\"\n```\n\n## Example usage\n\n![Ask Comment](https://codium.ai/images/pr_agent/ask_comment.png){width=512}\n\n![Ask](https://codium.ai/images/pr_agent/ask.png){width=512}\n\n## Ask lines\n\nYou can run `/ask` on specific lines of code in the PR from the PR's diff view. The tool will answer questions based on the code changes in the selected lines.\n\n- Click on the '+' sign next to the line number to select the line.\n- To select multiple lines, click on the '+' sign of the first line and then hold and drag to select the rest of the lines.\n- write `/ask \"...\"` in the comment box and press `Add single comment` button.\n\n![Ask Line](https://codium.ai/images/pr_agent/Ask_line.png){width=512}\n\nNote that the tool does not have \"memory\" of previous questions, and answers each question independently.\n\n## Ask on images\n\nYou can also ask questions about images that appear in the comment, where the entire PR code will be used as context.\n<br>\nThe basic syntax is:\n\n```\n/ask \"...\"\n\n[Image](https://real_link_to_image)\n```\n\nwhere `https://real_link_to_image` is the direct link to the image.\n\nNote that GitHub has a built-in mechanism of pasting images in comments. However, pasted image does not provide a direct link.\nTo get a direct link to an image, we recommend using the following scheme:\n\n1\\. First, post a comment that contains **only** the image:\n\n![Ask image1](https://codium.ai/images/pr_agent/ask_images1.png){width=512}\n\n2\\. Quote reply to that comment:\n\n![Ask image2](https://codium.ai/images/pr_agent/ask_images2.png){width=512}\n\n3\\. In the screen opened, type the question below the image:\n\n![Ask image3](https://codium.ai/images/pr_agent/ask_images3.png){width=512}\n![Ask image4](https://codium.ai/images/pr_agent/ask_images4.png){width=512}\n\n4\\. Post the comment, and receive the answer:\n\n![Ask image5](https://codium.ai/images/pr_agent/ask_images5.png){width=512}\n\nSee a full video tutorial [here](https://codium.ai/images/pr_agent/ask_image_video.mov)\n"
  },
  {
    "path": "docs/docs/tools/describe.md",
    "content": "## Overview\n\nThe `describe` tool scans the PR code changes, and generates a description for the PR - title, type, summary, walkthrough and labels.\n\nThe tool can be triggered automatically every time a new PR is [opened](../usage-guide/automations_and_usage.md#github-app-automatic-tools-when-a-new-pr-is-opened), or it can be invoked manually by commenting on any PR:\n\n```\n/describe\n```\n\n## Example usage\n\n### Manual triggering\n\nInvoke the tool manually by commenting `/describe` on any PR:\n\n![Describe comment](https://codium.ai/images/pr_agent/describe_comment.png){width=512}\n\nAfter ~30 seconds, the tool will generate a description for the PR:\n\n![Describe New](https://codium.ai/images/pr_agent/describe_new.png){width=512}\n\nIf you want to edit [configurations](#configuration-options), add the relevant ones to the command:\n\n```\n/describe --pr_description.some_config1=... --pr_description.some_config2=...\n```\n\n### Automatic triggering\n\nTo run the `describe` automatically when a PR is opened, define in a [configuration file](../usage-guide/configuration_options.md#wiki-configuration-file):\n\n```\n[github_app]\npr_commands = [\n    \"/describe\",\n    ...\n]\n\n[pr_description]\npublish_labels = true\n...\n```\n\n- The `pr_commands` lists commands that will be executed automatically when a PR is opened.\n- The `[pr_description]` section contains the configurations for the `describe` tool you want to edit (if any).\n\n## Preserving the original user description\n\nBy default, PR-Agent tries to preserve your original PR description by placing it above the generated content.\nThis requires including your description during the initial PR creation.\n\n\"PR-Agent removed the original description from the PR. Why\"?\n\nFrom our experience, there are two possible reasons:\n\n- If you edit the description _while_ the automated tool is running, a race condition may occur, potentially causing your original description to be lost. Hence, create a description before launching the PR.\n\n- When _updating_ PR descriptions, the `/describe` tool considers everything above the \"PR Type\" field as user content and will preserve it.\nEverything below this marker is treated as previously auto-generated content and will be replaced.\n\n![Describe comment](https://codium.ai/images/pr_agent/pr_description_user_description.png){width=512}\n\n## Sequence Diagram Support \nThe `/describe` tool includes a Mermaid sequence diagram showing component/function interactions. \n\nThis option is enabled by default via the `pr_description.enable_pr_diagram` param.\n\n\n[//]: # (### How to enable\\disable)\n\n[//]: # ()\n[//]: # (In your configuration:)\n\n[//]: # ()\n[//]: # (```)\n\n[//]: # (toml)\n\n[//]: # ([pr_description])\n\n[//]: # (enable_pr_diagram = true)\n\n[//]: # (```)\n\n## Configuration options\n\n???+ example \"Possible configurations\"\n\n    <table>\n      <tr>\n        <td><b>publish_labels</b></td>\n        <td>If set to true, the tool will publish labels to the PR. Default is false.</td>\n      </tr>\n      <tr>\n        <td><b>publish_description_as_comment</b></td>\n        <td>If set to true, the tool will publish the description as a comment to the PR. If false, it will overwrite the original description. Default is false.</td>\n      </tr>\n      <tr>\n        <td><b>publish_description_as_comment_persistent</b></td>\n        <td>If set to true and `publish_description_as_comment` is true, the tool will publish the description as a persistent comment to the PR. Default is true.</td>\n      </tr>\n      <tr>\n        <td><b>add_original_user_description</b></td>\n        <td>If set to true, the tool will add the original user description to the generated description. Default is true.</td>\n      </tr>\n      <tr>\n        <td><b>generate_ai_title</b></td>\n        <td>If set to true, the tool will also generate an AI title for the PR. Default is false.</td>\n      </tr>\n      <tr>\n        <td><b>extra_instructions</b></td>\n        <td>Optional extra instructions to the tool. For example: \"focus on the changes in the file X. Ignore change in ...\"</td>\n      </tr>\n      <tr>\n        <td><b>enable_pr_type</b></td>\n        <td>If set to false, it will not show the `PR type` as a text value in the description content. Default is true.</td>\n      </tr>\n      <tr>\n        <td><b>final_update_message</b></td>\n        <td>If set to true, it will add a comment message [`PR Description updated to latest commit...`](https://github.com/qodo-ai/pr-agent/pull/499#issuecomment-1837412176) after finishing calling `/describe`. Default is true.</td>\n      </tr>\n      <tr>\n        <td><b>enable_semantic_files_types</b></td>\n        <td>If set to true, \"Changes walkthrough\" section will be generated. Default is true.</td>\n      </tr>\n      <tr>\n            <td><b>file_table_collapsible_open_by_default</b></td>\n            <td>If set to true, the file list in the \"Changes walkthrough\" section will be open by default. If set to false, it will be closed by default. Default is false.</td>\n      </tr>\n      <tr>\n        <td><b>collapsible_file_list</b></td>\n        <td>If set to true, the file list in the \"Changes walkthrough\" section will be collapsible. If set to \"adaptive\", the file list will be collapsible only if there are more than 8 files. Default is \"adaptive\".</td>\n      </tr>\n      <tr>\n        <td><b>enable_large_pr_handling</b></td>\n        <td>If set to true, in case of a large PR the tool will make several calls to the AI and combine them to be able to cover more files. Default is true.</td>\n      </tr>\n      <tr>\n        <td><b>enable_help_text</b></td>\n        <td>If set to true, the tool will display a help text in the comment. Default is false.</td>\n      </tr>\n      <tr>\n        <td><b>enable_pr_diagram</b></td>\n        <td>If set to true, the tool will generate a horizontal Mermaid flowchart summarizing the main pull request changes. This field remains empty if not applicable. Default is true.</td>\n      </tr>\n      <tr>\n        <td><b>auto_create_ticket</b></td>\n        <td>If set to true, this will automatically create a ticket in the ticketing system when a PR is opened. Default is false.</td>\n      </tr>\n    </table>\n\n## Markers template\n\nTo enable markers, set `pr_description.use_description_markers=true`.\nMarkers enable to easily integrate user's content and auto-generated content, with a template-like mechanism.\n\nFor example, if the PR original description was:\n\n```\nUser content...\n\n## PR Type:\npr_agent:type\n\n## PR Description:\npr_agent:summary\n\n## PR Walkthrough:\npr_agent:walkthrough\n\n## PR Diagram:\npr_agent:diagram\n```\n\nThe marker `pr_agent:type` will be replaced with the PR type, `pr_agent:summary` will be replaced with the PR summary, `pr_agent:walkthrough` will be replaced with the PR walkthrough, and `pr_agent:diagram` will be replaced with the sequence diagram (if enabled).\n\n![Describe markers before](https://codium.ai/images/pr_agent/describe_markers_before.png){width=512}\n\nbecomes\n\n![Describe markers after](https://codium.ai/images/pr_agent/describe_markers_after.png){width=512}\n\n**Configuration params**:\n\n- `use_description_markers`: if set to true, the tool will use markers template. It replaces every marker of the form `pr_agent:marker_name` with the relevant content. Default is false.\n- `include_generated_by_header`: if set to true, the tool will add a dedicated header: 'Generated by PR Agent at ...' to any automatic content. Default is true.\n- `diagram`: if present as a marker, will be replaced by the PR sequence diagram (if enabled).\n\n## Custom labels\n\nThe default labels of the describe tool are quite generic, since they are meant to be used in any repo: [`Bug fix`, `Tests`, `Enhancement`, `Documentation`, `Other`].\n\nYou can define custom labels that are relevant for your repo and use cases.\nCustom labels can be defined in a configuration file, or directly in the repo's [labels page](#handle-custom-labels-from-the-repos-labels-page).\n\nMake sure to provide proper title, and a detailed and well-phrased description for each label, so the tool will know when to suggest it.\nEach label description should be a **conditional statement**, that indicates if to add the label to the PR or not, according to the PR content.\n\n???+ tip \"Auto-remove custom label when no longer relevant\"\n    If the custom label is no longer relevant, it will be automatically removed from the PR by running the `generate_labels` tool or the `describe` tool.\n\n\n### Handle custom labels from a configuration file\n\nExample for a custom labels configuration setup in a configuration file:\n\n```\n[config]\nenable_custom_labels=true\n\n\n[custom_labels.\"sql_changes\"]\ndescription = \"Use when a PR contains changes to SQL queries\"\n\n[custom_labels.\"test\"]\ndescription = \"use when a PR primarily contains new tests\"\n\n...\n```\n\n### Handle custom labels from the Repo's labels page\n\nYou can also control the custom labels that will be suggested by the `describe` tool from the repo's labels page:\n\n- GitHub : go to `https://github.com/{owner}/{repo}/labels` (or click on the \"Labels\" tab in the issues or PRs page)\n- GitLab : go to `https://gitlab.com/{owner}/{repo}/-/labels` (or click on \"Manage\" -> \"Labels\" on the left menu)\n\nNow add/edit the custom labels. they should be formatted as follows:\n\n- Label name: The name of the custom label.\n- Description: Start the description of with prefix `pr_agent:`, for example: `pr_agent: Description of when AI should suggest this label`.<br>\n\nExamples for custom labels:\n\n- `Main topic:performance` -  pr_agent:The main topic of this PR is performance\n- `New endpoint` -  pr_agent:A new endpoint was added in this PR\n- `SQL query` -  pr_agent:A new SQL query was added in this PR\n- `Dockerfile changes` - pr_agent:The PR contains changes in the Dockerfile\n- ...\n\nThe description should be comprehensive and detailed, indicating when to add the desired label. For example:\n![Add native custom labels](https://codium.ai/images/pr_agent/add_native_custom_labels.png){width=768}\n\n## Usage Tips\n\n!!! tip \"Automation\"\n    - When you first install PR-Agent app, the [default mode](../usage-guide/automations_and_usage.md#github-app) for the describe tool is:\n    ```\n    pr_commands = [\"/describe\", ...]\n    ```\n    meaning the `describe` tool will run automatically on every PR, with the default configurations.\n\n- Markers are an alternative way to control the generated description, to give maximal control to the user. If you set:\n\n   ```\n   pr_commands = [\"/describe --pr_description.use_description_markers=true\", ...]\n   ```\n\n   the tool will replace every marker of the form `pr_agent:marker_name` in the PR description with the relevant content, where `marker_name` is one of the following:\n         *`type`: the PR type.\n         * `summary`: the PR summary.\n         * `walkthrough`: the PR walkthrough.\n\n- Note that when markers are enabled, if the original PR description does not contain any markers, the tool will not alter the description at all.\n"
  },
  {
    "path": "docs/docs/tools/generate_labels.md",
    "content": "## Overview\n\nThe `generate_labels` tool scans the PR code changes and generates custom labels for the PR based on the content and context of the changes.\n\nIt can be invoked manually by commenting on any PR:\n\n```\n/generate_labels\n```\n\n## Example usage\n\nInvoke the tool manually by commenting `/generate_labels` on any PR:\n\n![Generate Labels](https://codium.ai/images/pr_agent/generate_labels_comment.png){width=512}\n\nThe tool will analyze the PR and add appropriate labels:\n\n![Generate Labels Result](https://codium.ai/images/pr_agent/generate_labels_result.png){width=512}\n\n## Configuration options\n\nThe `generate_labels` tool uses configurations from the `[pr_description]` section for custom labels.\n\n### Enabling custom labels\n\nTo use custom labels, you need to enable them in the configuration:\n\n```toml\n[config]\nenable_custom_labels = true\n```\n\n### Defining custom labels\n\nYou can define your own custom labels in the `[custom_labels]` section. See the [custom_labels.toml](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/custom_labels.toml) file for examples.\n\nExample configuration:\n\n```toml\n[custom_labels.\"Bug fix\"]\ndescription = \"A fix for a bug in the codebase\"\n\n[custom_labels.\"Feature\"]\ndescription = \"A new feature or enhancement\"\n\n[custom_labels.\"Documentation\"]\ndescription = \"Documentation changes only\"\n\n[custom_labels.\"Tests\"]\ndescription = \"Adding or modifying tests\"\n\n[custom_labels.\"Refactoring\"]\ndescription = \"Code refactoring without functional changes\"\n```\n\n### How labels are applied\n\n1. The tool analyzes the PR diff and commit messages\n2. It uses AI to determine which labels best match the PR content\n3. Labels are automatically applied to the PR (if the git provider supports it)\n4. If labels cannot be applied directly, they are published as a comment\n\n## Comparison with `/describe` labels\n\nThe `/describe` tool also generates labels as part of its output. The key differences are:\n\n| Feature | `/generate_labels` | `/describe` |\n|---------|-------------------|-------------|\n| Purpose | Dedicated label generation | Full PR description with labels |\n| Output | Labels only | Title, summary, walkthrough, and labels |\n| Custom labels | ✅ Supported | ✅ Supported |\n| Use case | When you only need labels | When you want a complete PR description |\n\n## Tips\n\n- Use custom labels that match your team's workflow and labeling conventions\n- Combine with automation to automatically label PRs when they are opened\n- Review the generated labels and adjust custom label descriptions if the AI consistently misclassifies PRs\n"
  },
  {
    "path": "docs/docs/tools/help.md",
    "content": "## Overview\n\nThe `help` tool provides a list of all the available tools and their descriptions.\nFor PR-Agent users, it also enables to trigger each tool by checking the relevant box.\n\nIt can be invoked manually by commenting on any PR:\n\n```\n/help\n```\n\n## Example usage\n\nInvoke the `help` tool by commenting on a PR with:\n\n![Help tool input](https://codium.ai/images/pr_agent/help1.png){width=750}\n\n\nResponse will include a list of available tools:\n\n![Help tool output](https://codium.ai/images/pr_agent/help2.png){width=750}\n"
  },
  {
    "path": "docs/docs/tools/help_docs.md",
    "content": "## Overview\n\nThe `help_docs` tool can answer a free-text question based on a git documentation folder.\n\nIt can be invoked manually by commenting on any PR or Issue:\n\n```\n/help_docs \"...\"\n```\n\nOr configured to be triggered automatically when a [new issue is opened](#run-as-a-github-action).\n\nThe tool assumes by default that the documentation is located in the root of the repository, at `/docs` folder.\nHowever, this can be customized by setting the `docs_path` configuration option:\n\n```toml\n[pr_help_docs]\nrepo_url = \"\"                 # The repository to use as context\ndocs_path = \"docs\"            # The documentation folder\nrepo_default_branch = \"main\"  # The branch to use in case repo_url overwritten\n\n```\n\nSee more configuration options in the [Configuration options](#configuration-options) section.\n\n## Example usage\n\n[//]: # (#### Asking a question about this repository:)\n\n[//]: # (![help_docs on the documentation of this repository]&#40;https://codium.ai/images/pr_agent/help_docs_comment.png&#41;{width=512})\n\n**Asking a question about another repository**\n\n![help_docs on the documentation of another repository](https://codium.ai/images/pr_agent/help_docs_comment_explicit_git.png){width=512}\n\n**Response**:\n\n![help_docs response](https://codium.ai/images/pr_agent/help_docs_response.png){width=512}\n\n## Run automatically when a new issue is opened\n\nYou can configure PR-Agent to run `help_docs` automatically on any newly created issue.\nThis can be useful, for example, for providing immediate feedback to users who open issues with questions on open-source projects with extensive documentation.\n\nHere's how:\n\n1) Follow the steps depicted under [Run as a Github Action](../installation/github.md#run-as-a-github-action) to create a new workflow, such as:`.github/workflows/help_docs.yml`:\n\n2) Edit your yaml file to the following:\n\n```yaml\nname: Run pr agent on every opened issue, respond to user comments on an issue\n\n#When the action is triggered\non:\n  issues:\n    types: [opened] #New issue\n\n# Read env. variables\nenv:\n  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n  GITHUB_API_URL: ${{ github.api_url }}\n  GIT_REPO_URL: ${{ github.event.repository.clone_url }}\n  ISSUE_URL: ${{ github.event.issue.html_url || github.event.comment.html_url }}\n  ISSUE_BODY: ${{ github.event.issue.body || github.event.comment.body }}\n  OPENAI_KEY: ${{ secrets.OPENAI_KEY }}\n\n# The actual set of actions\njobs:\n  issue_agent:\n    runs-on: ubuntu-latest\n    if: ${{ github.event.sender.type != 'Bot' }} #Do not respond to bots\n\n    # Set required permissions\n    permissions:\n      contents: read    # For reading repository contents\n      issues: write     # For commenting on issues\n\n    steps:\n      - name: Run PR Agent on Issues\n        if: ${{ env.ISSUE_URL != '' }}\n        uses: docker://codiumai/pr-agent:latest\n        with:\n          entrypoint: /bin/bash #Replace invoking cli.py directly with a shell\n          args: |\n            -c \"cd /app && \\\n            echo 'Running Issue Agent action step on ISSUE_URL=$ISSUE_URL' && \\\n            export config__git_provider='github' && \\\n                        export github__user_token=$GITHUB_TOKEN && \\\n            export github__base_url=$GITHUB_API_URL && \\\n            export openai__key=$OPENAI_KEY && \\\n            python -m pr_agent.cli --issue_url=$ISSUE_URL --pr_help_docs.repo_url=\"...\" --pr_help_docs.docs_path=\"...\" --pr_help_docs.openai_key=$OPENAI_KEY && \\\n            help_docs \"$ISSUE_BODY\"\n```\n\n3) Following completion of the remaining steps (such as adding secrets and relevant configurations, such as `repo_url` and `docs_path`) merge this change to your main branch.\nWhen a new issue is opened, you should see a comment from `github-actions` bot with an auto response, assuming the question is related to the documentation of the repository.\n\n---\n\n## Configuration options\n\nUnder the section `pr_help_docs`, the [configuration file](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L199) contains options to customize the 'help docs' tool:\n\n- `repo_url`: If not overwritten, will use the repo from where the context came from (issue or PR), otherwise - use the given repo as context.\n- `repo_default_branch`: The branch to use in case repo_url overwritten, otherwise - has no effect.\n- `docs_path`: Relative path from root of repository (either the one this PR has been issued for, or above repo url).\n- `exclude_root_readme`:  Whether or not to exclude the root README file for querying the model.\n- `supported_doc_exts` : Which file extensions should be included for the purpose of querying the model.\n\n---\n"
  },
  {
    "path": "docs/docs/tools/improve.md",
    "content": "## Overview\n\nThe `improve` tool scans the PR code changes, and automatically generates meaningful suggestions for improving the PR code.\nThe tool can be triggered automatically every time a new PR is [opened](../usage-guide/automations_and_usage.md#github-app-automatic-tools-when-a-new-pr-is-opened), or it can be invoked manually by commenting on any PR:\n\n```toml\n/improve\n```\n\n## How it looks\n\n=== \"Suggestions Overview\"\n    ![code_suggestions_as_comment_closed](https://codium.ai/images/pr_agent/code_suggestions_as_comment_closed.png){width=512}\n\n=== \"Selecting a specific suggestion\"\n    ![code_suggestions_as_comment_open](https://codium.ai/images/pr_agent/code_suggestions_as_comment_open.png){width=512}\n\n___\n\n## Example usage\n\n### Manual triggering\n\nInvoke the tool manually by commenting `/improve` on any PR. The code suggestions by default are presented as a single comment:\n\nTo edit [configurations](#configuration-options) related to the `improve` tool, use the following template:\n\n```toml\n/improve --pr_code_suggestions.some_config1=... --pr_code_suggestions.some_config2=...\n```\n\nFor example, you can choose to present all the suggestions as committable code comments, by running the following command:\n\n```toml\n/improve --pr_code_suggestions.commitable_code_suggestions=true\n```\n\n![improve](https://codium.ai/images/pr_agent/improve.png){width=512}\n\n### Automatic triggering\n\nTo run the `improve` automatically when a PR is opened, define in a [configuration file](../usage-guide/configuration_options.md#wiki-configuration-file):\n\n```toml\n[github_app]\npr_commands = [\n    \"/improve\",\n    ...\n]\n\n[pr_code_suggestions]\nnum_code_suggestions_per_chunk = ...\n...\n```\n\n- The `pr_commands` lists commands that will be executed automatically when a PR is opened.\n- The `[pr_code_suggestions]` section contains the configurations for the `improve` tool you want to edit (if any)\n\n### Table vs Committable code comments\n\nPR-Agent supports two modes for presenting code suggestions: \n\n1) [Table](https://codium.ai/images/pr_agent/code_suggestions_as_comment_closed.png) mode \n\n2) [Inline Committable](https://codium.ai/images/pr_agent/improve.png) code comments mode.\n\nThe table format offers several key advantages:\n\n- **Reduced noise**: Creates a cleaner PR experience with less clutter\n- **Quick overview and prioritization**: Enables quick review of one-liner summaries, impact levels, and easy prioritization\n- **High-level suggestions**: High-level suggestions that aren't tied to specific code chunks are presented only in the table mode\n- **Interactive features**: Provides 'more' and 'update' functionality via clickable buttons\n- **Centralized tracking**: Shows suggestion implementation status in one place\n- **IDE integration**: Allows applying suggestions directly in your IDE via the CLI tool\n\nTable mode is the default of PR-Agent, and is recommended approach for most users due to these benefits. \n\n![code_suggestions_as_comment_closed.png](https://codium.ai/images/pr_agent/code_suggestions_as_comment_closed.png){width=512}\n\nTeams with specific preferences can enable committable code comments mode in their local configuration, or use [dual publishing mode](#dual-publishing-mode).\n\n> `Note - due to platform limitations, Bitbucket cloud and server supports only committable code comments mode.`\n\n\n## `Extra instructions` and `best practices`\n\nThe `improve` tool can be further customized by providing additional instructions and best practices to the AI model.\n\n### Extra instructions\n\nYou can use the `extra_instructions` configuration option to give the AI model additional instructions for the `improve` tool.\nBe specific, clear, and concise in the instructions. With extra instructions, you are the prompter.\n\nExamples for possible instructions:\n\n```toml\n[pr_code_suggestions]\nextra_instructions=\"\"\"\\\n(1) Answer in Japanese\n(2) Don't suggest to add try-except block\n(3) Ignore changes in toml files\n...\n\"\"\"\n```\n\nUse triple quotes to write multi-line instructions. Use bullet points or numbers to make the instructions more readable.\n\n### Best practices\n\n`Platforms supported: GitHub, GitLab, Bitbucket`\n\nPR-Agent supports both simple and hierarchical best practices configurations to provide guidance to the AI model for generating relevant code suggestions.\n\n???- tip \"Writing effective best practices files\"\n    \n    The following guidelines apply to all best practices files:\n    \n    - Write clearly and concisely\n    - Include brief code examples when helpful with before/after patterns\n    - Focus on project-specific guidelines that will result in relevant suggestions you actually want to get\n    - Keep each file relatively short, under 800 lines, since:\n        - AI models may not process effectively very long documents\n        - Long files tend to contain generic guidelines already known to AI\n        - Maximum multiple file accumulated content is limited to 2000 lines.\n    - Use pattern-based structure rather than simple bullet points for better clarity\n\n???- tip \"Example of a best practices file\"\n \n    Pattern 1: Add proper error handling with try-except blocks around external function calls.\n    \n    Example code before:\n\n    ```python\n    # Some code that might raise an exception\n    return process_pr_data(data)\n    ```\n\n    Example code after:\n\n    ```python\n    try:\n        # Some code that might raise an exception\n        return process_pr_data(data)\n    except Exception as e:\n        logger.exception(\"Failed to process request\", extra={\"error\": e})\n    ```\n\n    Pattern 2: Add defensive null/empty checks before accessing object properties or performing operations on potentially null variables to prevent runtime errors.\n    \n    Example code before:\n\n    ```python\n    def get_pr_code(pr_data):\n        if \"changed_code\" in pr_data:\n            return pr_data.get(\"changed_code\", \"\")\n        return \"\"\n    ```\n\n    Example code after:\n\n    ```python\n    def get_pr_code(pr_data):\n        if pr_data is None:\n            return \"\"\n        if \"changed_code\" in pr_data:\n            return pr_data.get(\"changed_code\", \"\")\n        return \"\"\n    ```\n\n#### Local best practices\n\nFor basic usage, create a `best_practices.md` file in your repository's root directory containing a list of best practices, coding standards, and guidelines specific to your repository.\n\nThe AI model will use this `best_practices.md` file as a reference, and in case the PR code violates any of the guidelines, it will create additional suggestions, with a dedicated label: `Organization best practice`.\n\n### Combining 'extra instructions' and 'best practices'\n\nThe `extra instructions` configuration is more related to the `improve` tool prompt. It can be used, for example, to avoid specific suggestions (\"Don't suggest to add try-except block\", \"Ignore changes in toml files\", ...) or to emphasize specific aspects or formats (\"Answer in Japanese\", \"Give only short suggestions\", ...)\n\nIn contrast, the `best_practices.md` file is a general guideline for the way code should be written in the repo.\n\nUsing a combination of both can help the AI model to provide relevant and tailored suggestions.\n\n## Usage Tips\n\n### Implementing the proposed code suggestions\n\nEach generated suggestion consists of three key elements:\n\n1. A single-line summary of the proposed change\n2. An expandable section containing a comprehensive description of the suggestion\n3. A diff snippet showing the recommended code modification (before and after)\n\nWe advise users to apply critical analysis and judgment when implementing the proposed suggestions.\nIn addition to mistakes (which may happen, but are rare), sometimes the presented code modification may serve more as an _illustrative example_ than a directly applicable solution.\nIn such cases, we recommend prioritizing the suggestion's detailed description, using the diff snippet primarily as a supporting reference.\n\n### Dual publishing mode\n\nOur recommended approach for presenting code suggestions is through a [table](./improve.md#overview) (`--pr_code_suggestions.commitable_code_suggestions=false`).\nThis method significantly reduces the PR footprint and allows for quick and easy digestion of multiple suggestions.\n\nWe also offer a complementary **dual publishing mode**. When enabled, suggestions exceeding a certain score threshold are not only displayed in the table, but also presented as committable PR comments.\nThis mode helps highlight suggestions deemed more critical.\n\nTo activate dual publishing mode, use the following setting:\n\n```toml\n[pr_code_suggestions]\ndual_publishing_score_threshold = x\n```\n\nWhere x represents the minimum score threshold (>=) for suggestions to be presented as committable PR comments in addition to the table. Default is -1 (disabled).\n\n### Self-review\n\n`Platforms supported: GitHub, GitLab`\n\nIf you set in a configuration file:\n\n```toml\n[pr_code_suggestions]\ndemand_code_suggestions_self_review = true\n```\n\nThe `improve` tool will add a checkbox below the suggestions, prompting user to acknowledge that they have reviewed the suggestions.\nYou can set the content of the checkbox text via:\n\n```toml\n[pr_code_suggestions]\ncode_suggestions_self_review_text = \"... (your text here) ...\"\n```\n\n![self_review_1](https://codium.ai/images/pr_agent/self_review_1.png){width=512}\n\n!!! tip \"Tip - Reducing visual footprint after self-review\"\n\n    The configuration parameter `pr_code_suggestions.fold_suggestions_on_self_review` (default is True)\n    can be used to automatically fold the suggestions after the user clicks the self-review checkbox.\n\n    This reduces the visual footprint of the suggestions, and also indicates to the PR reviewer that the suggestions have been reviewed by the PR author, and don't require further attention.\n\n!!! tip \"Tip - Demanding self-review from the PR author\"\n\n    By setting:\n    ```toml\n    [pr_code_suggestions]\n    approve_pr_on_self_review = true\n    ```\n    the tool can automatically add an approval when the PR author clicks the self-review checkbox.\n\n\n    - If you set the number of required reviewers for a PR to 2, this effectively means that the PR author must click the self-review checkbox before the PR can be merged (in addition to a human reviewer).\n\n    ![self_review_2](https://codium.ai/images/pr_agent/self_review_2.png){width=512}\n\n    - If you keep the number of required reviewers for a PR to 1 and enable this configuration, this effectively means that the PR author can approve the PR by actively clicking the self-review checkbox.\n\n        To prevent unauthorized approvals, this configuration defaults to false, and cannot be altered through online comments; enabling requires a direct update to the configuration file and a commit to the repository. This ensures that utilizing the feature demands a deliberate documented decision by the repository owner.\n\n\n### How many code suggestions are generated?\n\nPR-Agent uses a dynamic strategy to generate code suggestions based on the size of the pull request (PR). Here's how it works:\n\n#### 1. Chunking large PRs\n\n- PR-Agent divides large PRs into 'chunks'.\n- Each chunk contains up to `config.max_model_tokens` tokens (default: 32,000).\n\n#### 2. Generating suggestions\n\n- For each chunk, PR-Agent generates up to `pr_code_suggestions.num_code_suggestions_per_chunk` suggestions (default: 3).\n\nThis approach has two main benefits:\n\n- Scalability: The number of suggestions scales with the PR size, rather than being fixed.\n- Quality: By processing smaller chunks, the AI can maintain higher quality suggestions, as larger contexts tend to decrease AI performance.\n\nNote: Chunking is primarily relevant for large PRs. For most PRs (up to 600 lines of code), PR-Agent will be able to process the entire code in a single call.\n\n## Configuration options\n\n???+ example \"General options\"\n\n    <table>\n      <tr>\n        <td><b>extra_instructions</b></td>\n        <td>Optional extra instructions to the tool. For example: \"focus on the changes in the file X. Ignore change in ...\".</td>\n      </tr>\n      <tr>\n        <td><b>commitable_code_suggestions</b></td>\n        <td>If set to true, the tool will display the suggestions as committable code comments. Default is false.</td>\n      </tr>\n      <tr>\n        <td><b>dual_publishing_score_threshold</b></td>\n        <td>Minimum score threshold for suggestions to be presented as committable PR comments in addition to the table. Default is -1 (disabled).</td>\n      </tr>\n      <tr>\n        <td><b>focus_only_on_problems</b></td>\n        <td>If set to true, suggestions will focus primarily on identifying and fixing code problems, and less on style considerations like best practices, maintainability, or readability. Default is true.</td> \n      </tr>\n      <tr>\n        <td><b>persistent_comment</b></td>\n        <td>If set to true, the improve comment will be persistent, meaning that every new improve request will edit the previous one. Default is true.</td>\n      </tr>\n      <tr>\n        <td><b>suggestions_score_threshold</b></td>\n        <td> Any suggestion with importance score less than this threshold will be removed. Default is 0. Highly recommend not to set this value above 7-8, since above it may clip relevant suggestions that can be useful. </td>\n      </tr>\n      <tr>\n        <td><b>enable_help_text</b></td>\n        <td>If set to true, the tool will display a help text in the comment. Default is false.</td>\n      </tr>\n      <tr>\n        <td><b>enable_chat_text</b></td>\n        <td>If set to true, the tool will display a reference to the PR chat in the comment. Default is false.</td>\n      </tr>\n      <tr>\n        <td><b>publish_output_no_suggestions</b></td>\n        <td>If set to true, the tool will publish a comment even if no suggestions were found. Default is true.</td>\n      </tr>\n    </table>\n\n???+ example \"Params for number of suggestions and AI calls\"\n\n    <table>\n      <tr>\n        <td><b>num_code_suggestions_per_chunk</b></td>\n        <td>Number of code suggestions provided by the 'improve' tool, per chunk. Default is 3.</td>\n      </tr>\n      <tr>\n        <td><b>max_number_of_calls</b></td>\n        <td>Maximum number of chunks. Default is 3.</td>\n      </tr>\n    </table>\n\n## Understanding AI Code Suggestions\n\n- **AI Limitations:** AI models for code are getting better and better, but they are not flawless. Not all the suggestions will be perfect, and a user should not accept all of them automatically. Critical reading and judgment are required. Mistakes of the AI are rare but can happen, and it is usually quite easy for a human to spot them.\n- **Purpose of Suggestions:**\n    - **Self-reflection:** The suggestions aim to enable developers to _self-reflect_ and improve their pull requests. This process can help to identify blind spots, uncover missed edge cases, and enhance code readability and coherency. Even when a specific code suggestion isn't suitable, the underlying issue it highlights often reveals something important that might deserve attention.\n    - **Bug detection:** The suggestions also alert on any _critical bugs_ that may have been identified during the analysis. This provides an additional safety net to catch potential issues before they make it into production. It's perfectly acceptable to implement only the suggestions you find valuable for your specific context.\n- **Hierarchy:** Presenting the suggestions in a structured hierarchical table enables the user to _quickly_ understand them, and to decide which ones are relevant and which are not.\n- **Customization:** To guide the model to suggestions that are more relevant to the specific needs of your project, we recommend using the [`extra_instructions`](./improve.md#extra-instructions-and-best-practices) and [`best practices`](./improve.md#best-practices) fields.\n- **Model Selection:** For specific programming languages or use cases, some models may perform better than others.\n"
  },
  {
    "path": "docs/docs/tools/index.md",
    "content": "# Tools\n\nHere is a list of PR-Agent tools, each with a dedicated page that explains how to use it:\n\n| Tool                                                                                     | Description                                                                                                                                 |\n|------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------|\n| **[PR Description (`/describe`)](./describe.md)**                                        | Automatically generating PR description - title, type, summary, code walkthrough and labels                                                 |\n| **[PR Review (`/review`)](./review.md)**                                                 | Adjustable feedback about the PR, possible issues, security concerns, review effort and more                                                |\n| **[Code Suggestions (`/improve`)](./improve.md)**                                        | Code suggestions for improving the PR                                                                                                       |\n| **[Question Answering (`/ask ...`)](./ask.md)**                                          | Answering free-text questions about the PR, or on specific code lines                                                                       |\n| **[Add Documentation (`/add_docs`)](./add_docs.md)**                                     | Generate documentation for code components that are missing it                                                                              |\n| **[Generate Labels (`/generate_labels`)](./generate_labels.md)**                         | Generate custom labels for the PR based on the code changes                                                                                 |\n| **[Similar Issues (`/similar_issue`)](./similar_issues.md)**                             | Find similar issues in the repository based on the current issue                                                                            |\n| **[Help (`/help`)](./help.md)**                                                          | Provides a list of all the available tools                                                                                                  |\n| **[Help Docs (`/help_docs`)](./help_docs.md)**                                           | Answer a free-text question based on a git documentation folder                                                                             |\n| **[Update Changelog (`/update_changelog`)](./update_changelog.md)**                      | Automatically updating the CHANGELOG.md file with the PR changes                                                                            |\n"
  },
  {
    "path": "docs/docs/tools/review.md",
    "content": "## Overview\n\nThe `review` tool scans the PR code changes, and generates feedback about the PR, aiming to aid the reviewing process.\n<br>\nThe tool can be triggered automatically every time a new PR is [opened](../usage-guide/automations_and_usage.md#github-app-automatic-tools-when-a-new-pr-is-opened), or can be invoked manually by commenting on any PR:\n\n```\n/review\n```\n\nNote that the main purpose of the `review` tool is to provide the **PR reviewer** with useful feedback and insights. The PR author, in contrast, may prefer to save time and focus on the output of the [improve](./improve.md) tool, which provides actionable code suggestions.\n\n(Read more about the different personas in the PR process and how PR-Agent aims to assist them in our [blog](https://www.codium.ai/blog/understanding-the-challenges-and-pain-points-of-the-pull-request-cycle/))\n\n## Example usage\n\n### Manual triggering\n\nInvoke the tool manually by commenting `/review` on any PR:\n\n![review comment](https://codium.ai/images/pr_agent/review_comment.png){width=512}\n\nAfter ~30 seconds, the tool will generate a review for the PR:\n\n![review](https://codium.ai/images/pr_agent/review3.png){width=512}\n\nIf you want to edit [configurations](#configuration-options), add the relevant ones to the command:\n\n```\n/review --pr_reviewer.some_config1=... --pr_reviewer.some_config2=...\n```\n\n### Automatic triggering\n\nTo run the `review` automatically when a PR is opened, define in a [configuration file](../usage-guide/configuration_options.md#wiki-configuration-file):\n\n```\n[github_app]\npr_commands = [\n    \"/review\",\n    ...\n]\n\n[pr_reviewer]\nextra_instructions = \"...\"\n...\n```\n\n- The `pr_commands` lists commands that will be executed automatically when a PR is opened.\n- The `[pr_reviewer]` section contains the configurations for the `review` tool you want to edit (if any).\n\n## Configuration options\n\n???+ example \"General options\"\n\n    <table>\n      <tr>\n        <td><b>persistent_comment</b></td>\n        <td>If set to true, the review comment will be persistent, meaning that every new review request will edit the previous one. Default is true.</td>\n      </tr>\n      <tr>\n      <td><b>final_update_message</b></td>\n      <td>When set to true, updating a persistent review comment during online commenting will automatically add a short comment with a link to the updated review in the pull request .Default is true.</td>\n      </tr>\n      <tr>\n        <td><b>extra_instructions</b></td>\n        <td>Optional extra instructions to the tool. For example: \"focus on the changes in the file X. Ignore change in ...\".</td>\n      </tr>\n      <tr>\n        <td><b>enable_help_text</b></td>\n        <td>If set to true, the tool will display a help text in the comment. Default is false.</td>\n      </tr>\n      <tr>\n        <td><b>num_max_findings</b></td>\n        <td>Number of maximum returned findings. Default is 3.</td>\n      </tr>\n    </table>\n\n???+ example \"Enable\\\\disable specific sub-sections\"\n\n    <table>\n      <tr>\n        <td><b>require_score_review</b></td>\n        <td>If set to true, the tool will add a section that scores the PR. Default is false.</td>\n      </tr>\n      <tr>\n        <td><b>require_tests_review</b></td>\n        <td>If set to true, the tool will add a section that checks if the PR contains tests. Default is true.</td>\n      </tr>\n      <tr>\n        <td><b>require_estimate_effort_to_review</b></td>\n        <td>If set to true, the tool will add a section that estimates the effort needed to review the PR. Default is true.</td>\n      </tr>\n      <tr>\n        <td><b>require_estimate_contribution_time_cost</b></td>\n        <td>If set to true, the tool will add a section that estimates the time required for a senior developer to create and submit such changes. Default is false.</td>\n      </tr>\n      <tr>\n        <td><b>require_can_be_split_review</b></td>\n        <td>If set to true, the tool will add a section that checks if the PR contains several themes, and can be split into smaller PRs. Default is false.</td>\n      </tr>\n      <tr>\n        <td><b>require_security_review</b></td>\n        <td>If set to true, the tool will add a section that checks if the PR contains a possible security or vulnerability issue. Default is true.</td>\n      </tr>\n        <tr>\n        <td><b>require_todo_scan</b></td>\n        <td>If set to true, the tool will add a section that lists TODO comments found in the PR code changes. Default is false.\n        </td>\n      </tr>\n      <tr>\n        <td><b>require_ticket_analysis_review</b></td>\n        <td>If set to true, and the PR contains a GitHub or Jira ticket link, the tool will add a section that checks if the PR in fact fulfilled the ticket requirements. Default is true.</td>\n      </tr>\n    </table>\n\n???+ example \"Adding PR labels\"\n\n    You can enable\\disable the `review` tool to add specific labels to the PR:\n\n    <table>\n      <tr>\n        <td><b>enable_review_labels_security</b></td>\n        <td>If set to true, the tool will publish a 'possible security issue' label if it detects a security issue. Default is true.</td>\n      </tr>\n      <tr>\n        <td><b>enable_review_labels_effort</b></td>\n        <td>If set to true, the tool will publish a 'Review effort x/5' label (1–5 scale). Default is true.</td>\n      </tr>\n    </table>\n\n## Usage Tips\n\n### General guidelines\n\n!!! tip \"\"\n\n    The `review` tool provides a collection of configurable feedbacks about a PR.\n    It is recommended to review the [Configuration options](#configuration-options) section, and choose the relevant options for your use case.\n\n    Some of the features that are disabled by default are quite useful, and should be considered for enabling. For example:\n    `require_score_review`, and more.\n\n    On the other hand, if you find one of the enabled features to be irrelevant for your use case, disable it. No default configuration can fit all use cases.\n\n### Automation\n\n!!! tip \"\"\n    When you first install PR-Agent app, the [default mode](../usage-guide/automations_and_usage.md#github-app-automatic-tools-when-a-new-pr-is-opened) for the `review` tool is:\n    ```\n    pr_commands = [\"/review\", ...]\n    ```\n    Meaning the `review` tool will run automatically on every PR, without any additional configurations.\n    Edit this field to enable/disable the tool, or to change the configurations used.\n\n### Auto-generated PR labels by the Review Tool\n\n!!! tip \"\"\n\n    The `review` can tool automatically add labels to your Pull Requests:\n\n    - **`possible security issue`**: This label is applied if the tool detects a potential [security vulnerability](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/pr_reviewer_prompts.toml#L121) in the PR's code. This feedback is controlled by the 'enable_review_labels_security' flag (default is true).\n    - **`review effort [x/5]`**: This label estimates the [effort](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/pr_reviewer_prompts.toml#L105) required to review the PR on a relative scale of 1 to 5, where 'x' represents the assessed effort. This feedback is controlled by the 'enable_review_labels_effort' flag (default is true).\n    - **`ticket compliance`**: Adds a label indicating code compliance level (\"Fully compliant\" | \"PR Code Verified\" | \"Partially compliant\" | \"Not compliant\") to any GitHub/Jira/Linea ticket linked in the PR. Controlled by the 'require_ticket_labels' flag (default: false). If 'require_no_ticket_labels' is also enabled, PRs without ticket links will receive a \"No ticket found\" label.\n\n\n### Auto-blocking PRs from being merged based on the generated labels\n\n!!! tip \"\"\n\n    You can configure a CI/CD Action to prevent merging PRs with specific labels. For example, implement a dedicated [GitHub Action](https://medium.com/sequra-tech/quick-tip-block-pull-request-merge-using-labels-6cc326936221).\n\n    This approach helps ensure PRs with potential security issues or ticket compliance problems will not be merged without further review.\n\n    Since AI may make mistakes or lack complete context, use this feature judiciously. For flexibility, users with appropriate permissions can remove generated labels when necessary. When a label is removed, this action will be automatically documented in the PR discussion, clearly indicating it was a deliberate override by an authorized user to allow the merge.\n\n### Extra instructions\n\n!!! tip \"\" \n\n    Extra instructions are important.\n    The `review` tool can be configured with extra instructions, which can be used to guide the model to a feedback tailored to the needs of your project.\n\n    Be specific, clear, and concise in the instructions. With extra instructions, you are the prompter. Specify the relevant sub-tool, and the relevant aspects of the PR that you want to emphasize.\n\n    Examples of extra instructions:\n    ```\n    [pr_reviewer]\n    extra_instructions=\"\"\"\\\n    In the code feedback section, emphasize the following:\n    - Does the code logic cover relevant edge cases?\n    - Is the code logic clear and easy to understand?\n    - Is the code logic efficient?\n    ...\n    \"\"\"\n    ```\n    Use triple quotes to write multi-line instructions. Use bullet points to make the instructions more readable.\n"
  },
  {
    "path": "docs/docs/tools/similar_issues.md",
    "content": "## Overview\n\nThe similar issue tool retrieves the most similar issues to the current issue.\nIt can be invoked manually by commenting on any PR:\n\n```\n/similar_issue\n```\n\n## Example usage\n\n![similar_issue_original_issue](https://codium.ai/images/pr_agent/similar_issue_original_issue.png){width=768}\n\n![similar_issue_comment](https://codium.ai/images/pr_agent/similar_issue_comment.png){width=768}\n\n![similar_issue](https://codium.ai/images/pr_agent/similar_issue.png){width=768}\n\nNote that to perform retrieval, the `similar_issue` tool indexes all the repo previous issues (once).\n\n### Selecting a Vector Database\n\nConfigure your preferred database by changing the `pr_similar_issue` parameter in `configuration.toml` file.\n\n#### Available Options\n\nChoose from the following Vector Databases:\n\n1. LanceDB\n2. Pinecone\n3. Qdrant\n\n#### Pinecone Configuration\n\nTo use Pinecone with the `similar issue` tool, add these credentials to `.secrets.toml` (or set as environment variables):\n\n```\n[pinecone]\napi_key = \"...\"\nenvironment = \"...\"\n```\n\nThese parameters can be obtained by registering to [Pinecone](https://app.pinecone.io/?sessionType=signup/).\n\n#### Qdrant Configuration\n\nTo use Qdrant with the `similar issue` tool, add these credentials to `.secrets.toml` (or set as environment variables):\n\n```\n[qdrant]\nurl = \"https://YOUR-QDRANT-URL\" # e.g., https://xxxxxxxx-xxxxxxxx.eu-central-1-0.aws.cloud.qdrant.io\napi_key = \"...\"\n```\n\nThen select Qdrant in `configuration.toml`:\n\n```\n[pr_similar_issue]\nvectordb = \"qdrant\"\n```\n\nYou can get a free managed Qdrant instance from [Qdrant Cloud](https://cloud.qdrant.io/).\n\n## How to use\n\n- To invoke the 'similar issue' tool from **CLI**, run:\n`python3 cli.py --issue_url=... similar_issue`\n\n- To invoke the 'similar' issue tool via online usage, [comment](https://github.com/qodo-ai/pr-agent/issues/178#issuecomment-1716934893) on a PR:\n`/similar_issue`\n\n- You can also enable the 'similar issue' tool to run automatically when a new issue is opened, by adding it to the [pr_commands list in the github_app section](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L229)\n"
  },
  {
    "path": "docs/docs/tools/update_changelog.md",
    "content": "## Overview\n\nThe `update_changelog` tool automatically updates the CHANGELOG.md file with the PR changes.\nIt can be invoked manually by commenting on any PR:\n\n```\n/update_changelog\n```\n\n## Example usage\n\n![update_changelog_comment](https://codium.ai/images/pr_agent/update_changelog_comment.png){width=768}\n\n![update_changelog](https://codium.ai/images/pr_agent/update_changelog.png){width=768}\n\n## Configuration options\n\nUnder the section `pr_update_changelog`, the [configuration file](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L169) contains options to customize the 'update changelog' tool:\n\n- `push_changelog_changes`: whether to push the changes to CHANGELOG.md, or just publish them as a comment. Default is false (publish as comment).\n- `extra_instructions`: Optional extra instructions to the tool. For example: \"Use the following structure: ...\"\n- `add_pr_link`: whether the model should try to add a link to the PR in the changelog. Default is true.\n- `skip_ci_on_push`: whether the commit message (when `push_changelog_changes` is true) will include the term \"[skip ci]\", preventing CI tests to be triggered on the changelog commit. Default is true.\n"
  },
  {
    "path": "docs/docs/usage-guide/EXAMPLE_BEST_PRACTICE.md",
    "content": "## Recommend Python Best Practices\n\nThis document outlines a series of recommended best practices for Python development. These guidelines aim to improve code quality, maintainability, and readability.\n\n### Imports\n\nUse  `import`  statements for packages and modules only, not for individual types, classes, or functions.\n\n#### Definition\n\nReusability mechanism for sharing code from one module to another.\n\n#### Decision\n\n- Use  `import x`  for importing packages and modules.\n- Use  `from x import y`  where  `x`  is the package prefix and  `y`  is the module name with no prefix.\n- Use  `from x import y as z`  in any of the following circumstances:\n  - Two modules named  `y`  are to be imported.\n  - `y`  conflicts with a top-level name defined in the current module.\n  - `y`  conflicts with a common parameter name that is part of the public API (e.g.,  `features`).\n  - `y`  is an inconveniently long name, or too generic in the context of your code\n- Use  `import y as z`  only when  `z`  is a standard abbreviation (e.g.,  `import numpy as np`).\n\nFor example the module  `sound.effects.echo`  may be imported as follows:\n\n```\nfrom sound.effects import echo\n...\necho.EchoFilter(input, output, delay=0.7, atten=4)\n\n```\n\nDo not use relative names in imports. Even if the module is in the same package, use the full package name. This helps prevent unintentionally importing a package twice.\n\n##### Exemptions\n\nExemptions from this rule:\n\n- Symbols from the following modules are used to support static analysis and type checking:\n  - [`typing`  module](https://google.github.io/styleguide/pyguide.html#typing-imports)\n  - [`collections.abc`  module](https://google.github.io/styleguide/pyguide.html#typing-imports)\n  - [`typing_extensions`  module](https://github.com/python/typing_extensions/blob/main/README.md)\n- Redirects from the  [six.moves module](https://six.readthedocs.io/#module-six.moves).\n\n### Packages\n\nImport each module using the full pathname location of the module.\n\n#### Decision\n\nAll new code should import each module by its full package name.\n\nImports should be as follows:\n\n```\nYes:\n  # Reference absl.flags in code with the complete name (verbose).\n  import absl.flags\n  from doctor.who import jodie\n\n  _FOO = absl.flags.DEFINE_string(...)\n\n```\n\n```\nYes:\n  # Reference flags in code with just the module name (common).\n  from absl import flags\n  from doctor.who import jodie\n\n  _FOO = flags.DEFINE_string(...)\n\n```\n\n_(assume this file lives in  `doctor/who/`  where  `jodie.py`  also exists)_\n\n```\nNo:\n  # Unclear what module the author wanted and what will be imported.  The actual\n  # import behavior depends on external factors controlling sys.path.\n  # Which possible jodie module did the author intend to import?\n  import jodie\n\n```\n\nThe directory the main binary is located in should not be assumed to be in  `sys.path`  despite that happening in some environments. This being the case, code should assume that  `import jodie`  refers to a third-party or top-level package named  `jodie`, not a local  `jodie.py`.\n\n### Default Iterators and Operators\n\nUse default iterators and operators for types that support them, like lists, dictionaries, and files.\n\n#### Definition\n\nContainer types, like dictionaries and lists, define default iterators and membership test operators (“in” and “not in”).\n\n#### Decision\n\nUse default iterators and operators for types that support them, like lists, dictionaries, and files. The built-in types define iterator methods, too. Prefer these methods to methods that return lists, except that you should not mutate a container while iterating over it.\n\n```\nYes:  for key in adict: ...\n      if obj in alist: ...\n      for line in afile: ...\n      for k, v in adict.items(): ...\n```\n\n```\nNo:   for key in adict.keys(): ...\n      for line in afile.readlines(): ...\n```\n\n### Lambda Functions\n\nOkay for one-liners. Prefer generator expressions over  `map()`  or  `filter()`  with a  `lambda`.\n\n#### Decision\n\nLambdas are allowed. If the code inside the lambda function spans multiple lines or is longer than 60-80 chars, it might be better to define it as a regular  [nested function](https://google.github.io/styleguide/pyguide.html#lexical-scoping).\n\nFor common operations like multiplication, use the functions from the  `operator`  module instead of lambda functions. For example, prefer  `operator.mul`  to  `lambda x, y: x * y`.\n\n### Default Argument Values\n\nOkay in most cases.\n\n#### Definition\n\nYou can specify values for variables at the end of a function’s parameter list, e.g.,  `def foo(a, b=0):`. If  `foo`  is called with only one argument,  `b`  is set to 0. If it is called with two arguments,  `b`  has the value of the second argument.\n\n#### Decision\n\nOkay to use with the following caveat:\n\nDo not use mutable objects as default values in the function or method definition.\n\n```\nYes: def foo(a, b=None):\n         if b is None:\n             b = []\nYes: def foo(a, b: Sequence | None = None):\n         if b is None:\n             b = []\nYes: def foo(a, b: Sequence = ()):  # Empty tuple OK since tuples are immutable.\n         ...\n```\n\n```\nfrom absl import flags\n_FOO = flags.DEFINE_string(...)\n\nNo:  def foo(a, b=[]):\n         ...\nNo:  def foo(a, b=time.time()):  # Is `b` supposed to represent when this module was loaded?\n         ...\nNo:  def foo(a, b=_FOO.value):  # sys.argv has not yet been parsed...\n         ...\nNo:  def foo(a, b: Mapping = {}):  # Could still get passed to unchecked code.\n         ...\n```\n\n### True/False Evaluations\n\nUse the “implicit” false if possible, e.g.,  `if foo:`  rather than  `if foo != []:`\n\n### Lexical Scoping\n\nOkay to use.\n\nAn example of the use of this feature is:\n\n```\ndef get_adder(summand1: float) -> Callable[[float], float]:\n    \"\"\"Returns a function that adds numbers to a given number.\"\"\"\n    def adder(summand2: float) -> float:\n        return summand1 + summand2\n\n    return adder\n```\n\n#### Decision\n\nOkay to use.\n\n### Threading\n\nDo not rely on the atomicity of built-in types.\n\nWhile Python’s built-in data types such as dictionaries appear to have atomic operations, there are corner cases where they aren’t atomic (e.g. if  `__hash__`  or  `__eq__`  are implemented as Python methods) and their atomicity should not be relied upon. Neither should you rely on atomic variable assignment (since this in turn depends on dictionaries).\n\nUse the  `queue`  module’s  `Queue`  data type as the preferred way to communicate data between threads. Otherwise, use the  `threading`  module and its locking primitives. Prefer condition variables and  `threading.Condition`  instead of using lower-level locks.\n"
  },
  {
    "path": "docs/docs/usage-guide/additional_configurations.md",
    "content": "## Show possible configurations\n\nThe possible configurations of PR-Agent are stored in [here](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml){:target=\"_blank\"}.\nIn the [tools](../tools/index.md) page you can find explanations on how to use these configurations for each tool.\n\nTo print all the available configurations as a comment on your PR, you can use the following command:\n\n```\n/config\n```\n\n![possible_config1](https://codium.ai/images/pr_agent/possible_config1.png){width=512}\n\nTo view the **actual** configurations used for a specific tool, after all the user settings are applied, you can add for each tool a `--config.output_relevant_configurations=true` suffix.\nFor example:\n\n```\n/improve --config.output_relevant_configurations=true\n```\n\nWill output an additional field showing the actual configurations used for the `improve` tool.\n\n![possible_config2](https://codium.ai/images/pr_agent/possible_config2.png){width=512}\n\n## Ignoring files from analysis\n\nIn some cases, you may want to exclude specific files or directories from the analysis performed by PR-Agent. This can be useful, for example, when you have files that are generated automatically or files that shouldn't be reviewed, like vendor code.\n\nYou can ignore files or folders using the following methods:\n\n- `IGNORE.GLOB`\n- `IGNORE.REGEX`\n\nwhich you can edit to ignore files or folders based on glob or regex patterns.\n\n### Example usage\n\nLet's look at an example where we want to ignore all files with `.py` extension from the analysis.\n\nTo ignore Python files in a PR with online usage, comment on a PR:\n`/review --ignore.glob=\"['*.py']\"`\n\nTo ignore Python files in all PRs using `glob` pattern, set in a configuration file:\n\n```\n[ignore]\nglob = ['*.py']\n```\n\nAnd to ignore Python files in all PRs using `regex` pattern, set in a configuration file:\n\n```\n[ignore]\nregex = ['.*\\.py$']\n```\n\n## Extra instructions\n\nAll PR-Agent tools have a parameter called `extra_instructions`, that enables to add free-text extra instructions. Example usage:\n\n```\n/update_changelog --pr_update_changelog.extra_instructions=\"Make sure to update also the version ...\"\n```\n\n## Language Settings\n\nThe default response language for PR-Agent is **U.S. English**. However, some development teams may prefer to display information in a different language. For example, your team's workflow might improve if PR descriptions and code suggestions are set to your country's native language.\n\nTo configure this, set the `response_language` parameter in the configuration file. This will prompt the model to respond in the specified language. Use a **standard locale code** based on [ISO 3166](https://en.wikipedia.org/wiki/ISO_3166) (country codes) and [ISO 639](https://en.wikipedia.org/wiki/ISO_639) (language codes) to define a language-country pair. See this [comprehensive list of locale codes](https://simplelocalize.io/data/locales/).\n\nExample:\n\n```toml\n[config]\nresponse_language = \"it-IT\"\n```\n\nThis will set the response language globally for all the commands to Italian.\n\n> **Important:** Note that only dynamic text generated by the AI model is translated to the configured language. Static text such as labels and table headers that are not part of the AI models response will remain in US English. In addition, the model you are using must have good support for the specified language.\n\n[//]: # (## Working with large PRs)\n\n[//]: # ()\n[//]: # (The default mode of CodiumAI is to have a single call per tool, using GPT-4, which has a token limit of 8000 tokens.)\n\n[//]: # (This mode provides a very good speed-quality-cost tradeoff, and can handle most PRs successfully.)\n\n[//]: # (When the PR is above the token limit, it employs a [PR Compression strategy]&#40;../core-abilities/index.md&#41;.)\n\n[//]: # ()\n[//]: # (However, for very large PRs, or in case you want to emphasize quality over speed and cost, there are two possible solutions:)\n\n[//]: # (1&#41; [Use a model]&#40;./changing_a_model.md&#41; with larger context, like GPT-32K, or claude-100K. This solution will be applicable for all the tools.)\n\n[//]: # (2&#41; For the `/improve` tool, there is an ['extended' mode]&#40;../tools/improve.md&#41; &#40;`/improve --extended`&#41;,)\n\n[//]: # (which divides the PR into chunks, and processes each chunk separately. With this mode, regardless of the model, no compression will be done &#40;but for large PRs, multiple model calls may occur&#41;)\n\n\n## Expand GitLab submodule diffs\n\nBy default, GitLab merge requests show submodule updates as `Subproject commit` lines. To include the actual file-level changes from those submodules in PR-Agent analysis, enable:\n\n```toml\n[gitlab]\nexpand_submodule_diffs = true\n```\n\nWhen enabled, PR-Agent will fetch and attach diffs from the submodule repositories. The default is `false` to avoid extra GitLab API calls.\n\n## Log Level\n\nPR-Agent allows you to control the verbosity of logging by using the `log_level` configuration parameter. This is particularly useful for troubleshooting and debugging issues with your PR workflows.\n\n```\n[config]\nlog_level = \"DEBUG\"  # Options: \"DEBUG\", \"INFO\", \"WARNING\", \"ERROR\", \"CRITICAL\"\n```\n\nThe default log level is \"DEBUG\", which provides detailed output of all operations. If you prefer less verbose logs, you can set higher log levels like \"INFO\" or \"WARNING\".\n\n## Integrating with Logging Observability Platforms\n\nVarious logging observability tools can be used out-of-the box when using the default LiteLLM AI Handler. Simply configure the LiteLLM callback settings in `configuration.toml` and set environment variables according to the LiteLLM [documentation](https://docs.litellm.ai/docs/).\n\nFor example, to use [LangSmith](https://www.langchain.com/langsmith) you can add the following to your `configuration.toml` file:\n\n```\n[litellm]\nenable_callbacks = true\nsuccess_callback = [\"langsmith\"]\nfailure_callback = [\"langsmith\"]\nservice_callback = []\n```\n\nThen set the following environment variables:\n\n```\nLANGSMITH_API_KEY=<api_key>\nLANGSMITH_PROJECT=<project>\nLANGSMITH_BASE_URL=<url>\n```\n\n## Bringing additional repository metadata to PR-Agent\n\nTo provide PR-Agent tools with additional context about your project, you can enable automatic repository metadata detection. \n\nIf you set:\n\n```toml\n[config]\nadd_repo_metadata = true\n```\n\nPR-Agent automatically searches for repository metadata files in your PR's head branch root directory. By default, it looks for:\n[AGENTS.MD](https://agents.md/), [QODO.MD](https://docs.codium.ai/qodo-documentation/qodo-command/getting-started/setup-and-quickstart), [CLAUDE.MD](https://www.anthropic.com/engineering/claude-code-best-practices).\n\nYou can also specify custom filenames to search for:\n\n```toml\n[config]\nadd_repo_metadata_file_list= [\"file1.md\", \"file2.md\", ...]\n```\n\n## Ignoring automatic commands in PRs\n\nPR-Agent allows you to automatically ignore certain PRs based on various criteria:\n\n- PRs with specific titles (using regex matching)\n- PRs between specific branches (using regex matching)\n- PRs from specific repositories (using regex matching)\n- PRs not from specific folders\n- PRs containing specific labels\n- PRs opened by specific users\n\n### Ignoring PRs with specific titles\n\nTo ignore PRs with a specific title such as \"[Bump]: ...\", you can add the following to your `configuration.toml` file:\n\n```toml\n[config]\nignore_pr_title = [\"\\\\[Bump\\\\]\"]\n```\n\nWhere the `ignore_pr_title` is a list of regex patterns to match the PR title you want to ignore. Default is `ignore_pr_title = [\"^\\\\[Auto\\\\]\", \"^Auto\"]`.\n\n### Ignoring PRs between specific branches\n\nTo ignore PRs from specific source or target branches, you can add the following to your `configuration.toml` file:\n\n```toml\n[config]\nignore_pr_source_branches = ['develop', 'main', 'master', 'stage']\nignore_pr_target_branches = [\"qa\"]\n```\n\nWhere the `ignore_pr_source_branches` and `ignore_pr_target_branches` are lists of regex patterns to match the source and target branches you want to ignore.\nThey are not mutually exclusive, you can use them together or separately.\n\n### Ignoring PRs from specific repositories\n\nTo ignore PRs from specific repositories, you can add the following to your `configuration.toml` file:\n\n```toml\n[config]\nignore_repositories = [\"my-org/my-repo1\", \"my-org/my-repo2\"]\n```\n\nWhere the `ignore_repositories` is a list of regex patterns to match the repositories you want to ignore. This is useful when you have multiple repositories and want to exclude certain ones from analysis.\n\n\n### Ignoring PRs not from specific folders\n\nTo allow only specific folders (often needed in large monorepos), set:\n\n```\n[config]\nallow_only_specific_folders=['folder1','folder2']\n```\n\nFor the configuration above, automatic feedback will only be triggered when the PR changes include files where 'folder1' or 'folder2' is in the file path\n\n### Ignoring PRs containing specific labels\n\nTo ignore PRs containing specific labels, you can add the following to your `configuration.toml` file:\n\n```\n[config]\nignore_pr_labels = [\"do-not-merge\"]\n```\n\nWhere the `ignore_pr_labels` is a list of labels that when present in the PR, the PR will be ignored.\n\n### Ignoring PRs from specific users\n\nPR-Agent tries to automatically identify and ignore pull requests created by bots using:\n\n- GitHub's native bot detection system\n- Name-based pattern matching\n\nWhile this detection is robust, it may not catch all cases, particularly when:\n\n- Bots are registered as regular user accounts\n- Bot names don't match common patterns\n\nTo supplement the automatic bot detection, you can manually specify users to ignore. Add the following to your `configuration.toml` file to ignore PRs from specific users:\n\n```\n[config]\nignore_pr_authors = [\"my-special-bot-user\", ...]\n```\n\nWhere the `ignore_pr_authors` is a regex list of usernames that you want to ignore.\n\n!!! note\n    There is one specific case where bots will receive an automatic response - when they generated a PR with a _failed test_.\n\n### Ignoring Generated Files by Language/Framework\n\nTo automatically exclude files generated by specific languages or frameworks, you can add the following to your `configuration.toml` file:\n\n```\n[config]\nignore_language_framework = ['protobuf', ...]\n```\n\nYou can view the list of auto-generated file patterns in [`generated_code_ignore.toml`](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/generated_code_ignore.toml).\nFiles matching these glob patterns will be automatically excluded from PR Agent analysis.\n\n### Ignoring Tickets with Specific Labels\n\nWhen PR-Agent analyzes tickets (JIRA, GitHub Issues, GitLab Issues, etc.) referenced in your PR, you may want to exclude tickets that have certain labels from the analysis. This is useful for filtering out tickets marked as \"ignore-compliance\", \"skip-review\", or other labels that indicate the ticket should not be considered during PR review.\n\nTo ignore tickets with specific labels, add the following to your `configuration.toml` file:\n\n```toml\n[config]\nignore_ticket_labels = [\"ignore-compliance\", \"skip-review\", \"wont-fix\"]\n```\n\nWhere `ignore_ticket_labels` is a list of label names that should be ignored during ticket analysis.\n"
  },
  {
    "path": "docs/docs/usage-guide/automations_and_usage.md",
    "content": "## Local repo (CLI)\n\nWhen running from your locally cloned PR-Agent repo (CLI), your local configuration file will be used.\nExamples of invoking the different tools via the CLI:\n\n- **Review**:       `python -m pr_agent.cli --pr_url=<pr_url>  review`\n- **Describe**:     `python -m pr_agent.cli --pr_url=<pr_url>  describe`\n- **Improve**:      `python -m pr_agent.cli --pr_url=<pr_url>  improve`\n- **Ask**:          `python -m pr_agent.cli --pr_url=<pr_url>  ask \"Write me a poem about this PR\"`\n- **Update Changelog**:      `python -m pr_agent.cli --pr_url=<pr_url>  update_changelog`\n\n`<pr_url>` is the url of the relevant PR (for example: [#50](https://github.com/qodo-ai/pr-agent/pull/50)).\n\n**Notes:**\n\n1. in addition to editing your local configuration file, you can also change any configuration value by adding it to the command line:\n\n```\npython -m pr_agent.cli --pr_url=<pr_url>  /review --pr_reviewer.extra_instructions=\"focus on the file: ...\"\n```\n\n2. You can print results locally, without publishing them, by setting in `configuration.toml`:\n\n```\n[config]\npublish_output=false\nverbosity_level=2\n```\n\nThis is useful for debugging or experimenting with different tools.\n\n3. **git provider**: The [git_provider](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L12) field in a configuration file determines the GIT provider that will be used by PR-Agent. Currently, the following providers are supported:\n`github` **(default)**, `gitlab`, `bitbucket`, `azure`, `codecommit`, `local`, and `gitea`.\n\n### CLI Health Check\n\nTo verify that PR-Agent has been configured correctly, you can run this health check command from the repository root:\n\n```bash\npython -m tests.health_test.main\n```\n\nIf the health check passes, you will see the following output:\n\n```\n========\nHealth test passed successfully\n========\n```\n\nAt the end of the run.\n\nBefore running the health check, ensure you have:\n\n- Configured your [LLM provider](./changing_a_model.md)\n- Added a valid GitHub token to your configuration file\n\n## Online usage\n\nOnline usage means invoking PR-Agent tools by [comments](https://github.com/qodo-ai/pr-agent/pull/229#issuecomment-1695021901) on a PR.\nCommands for invoking the different tools via comments:\n\n- **Review**:       `/review`\n- **Describe**:     `/describe`\n- **Improve**:      `/improve`  (or `/improve_code` for bitbucket, since `/improve` is sometimes reserved)\n- **Ask**:          `/ask \"...\"`\n- **Update Changelog**:      `/update_changelog`\n\nTo edit a specific configuration value, just add `--config_path=<value>` to any command.\nFor example, if you want to edit the `review` tool configurations, you can run:\n\n```\n/review --pr_reviewer.extra_instructions=\"...\" --pr_reviewer.require_score_review=false\n```\n\nAny configuration value in [configuration file](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml) file can be similarly edited. Comment `/config` to see the list of available configurations.\n\n## PR-Agent Automatic Feedback\n\n### Disabling all automatic feedback\n\nTo easily disable all automatic feedback from PR-Agent (GitHub App, GitLab Webhook, BitBucket App, Azure DevOps Webhook), set in a configuration file:\n\n```toml\n[config]\ndisable_auto_feedback = true\n```\n\nWhen this parameter is set to `true`, PR-Agent will not run any automatic tools (like `describe`, `review`, `improve`) when a new PR is opened, or when new code is pushed to an open PR.\n\n### GitHub App\n\n!!! note \"Configurations for PR-Agent\"\n    PR-Agent for GitHub is an App, hosted by Codium. So all the instructions below are relevant for PR-Agent users.\n    Same goes for [GitLab webhook](#gitlab-webhook) and [BitBucket App](#bitbucket-app) sections.\n\n#### GitHub app automatic tools when a new PR is opened\n\nThe [github_app](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L223) section defines GitHub app specific configurations.\n\nThe configuration parameter `pr_commands` defines the list of tools that will be **run automatically** when a new PR is opened:\n\n```toml\n[github_app]\npr_commands = [\n    \"/describe\",\n    \"/review\",\n    \"/improve\",\n]\n```\n\nThis means that when a new PR is opened/reopened or marked as ready for review, PR-Agent will run the `describe`, `review` and `improve` tools.  \n\n**Draft PRs:** \n\nBy default, draft PRs are not considered for automatic tools, but you can change this by setting the `feedback_on_draft_pr` parameter to `true` in the configuration file.\n\n```toml\n[github_app]\nfeedback_on_draft_pr = true\n```\n\n**Changing default tool parameters:**\n\nYou can override the default tool parameters by using one the three options for a [configuration file](./configuration_options.md): **wiki**, **local**, or **global**.\nFor example, if your configuration file contains:\n\n```toml\n[pr_description]\ngenerate_ai_title = true\n```\n\nEvery time you run the `describe` tool (including automatic runs) the PR title will be generated by the AI.\n\n\n**Parameters for automated runs:**\n\nYou can customize configurations specifically for automated runs by using the `--config_path=<value>` parameter.\nFor instance, to modify the `review` tool settings only for newly opened PRs, use:\n\n```toml\n[github_app]\npr_commands = [\n    \"/describe\",\n    \"/review --pr_reviewer.extra_instructions='focus on the file: ...'\",\n    \"/improve\",\n]\n```\n\n#### GitHub app automatic tools for push actions (commits to an open PR)\n\nIn addition to running automatic tools when a PR is opened, the GitHub app can also respond to new code that is pushed to an open PR.\n\nThe configuration toggle `handle_push_trigger` can be used to enable this feature.\nThe configuration parameter `push_commands` defines the list of tools that will be **run automatically** when new code is pushed to the PR.\n\n```toml\n[github_app]\nhandle_push_trigger = true\npush_commands = [\n    \"/describe\",\n    \"/review\",\n]\n```\n\nThis means that when new code is pushed to the PR, PR-Agent will run the `describe` and `review` tools, with the specified parameters.\n\n### GitHub Action\n\n`GitHub Action` is a different way to trigger PR-Agent tools, and uses a different configuration mechanism than `GitHub App`.<br>\nYou can configure settings for `GitHub Action` by adding environment variables under the env section in `.github/workflows/pr_agent.yml` file.\nSpecifically, start by setting the following environment variables:\n\n```yaml\n      env:\n        OPENAI_KEY: ${{ secrets.OPENAI_KEY }} # Make sure to add your OpenAI key to your repo secrets\n        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Make sure to add your GitHub token to your repo secrets\n        github_action_config.auto_review: \"true\" # enable\\disable auto review\n        github_action_config.auto_describe: \"true\" # enable\\disable auto describe\n        github_action_config.auto_improve: \"true\" # enable\\disable auto improve\n        github_action_config.pr_actions: '[\"opened\", \"reopened\", \"ready_for_review\", \"review_requested\"]'\n```\n\n`github_action_config.auto_review`, `github_action_config.auto_describe` and `github_action_config.auto_improve` are used to enable/disable automatic tools that run when a new PR is opened.\nIf not set, the default configuration is for all three tools to run automatically when a new PR is opened.\n\n`github_action_config.pr_actions` is used to configure which `pull_requests` events will trigger the enabled auto flags\nIf not set, the default configuration is `[\"opened\", \"reopened\", \"ready_for_review\", \"review_requested\"]`\n\n`github_action_config.enable_output` are used to enable/disable github actions [output parameter](https://docs.github.com/en/actions/creating-actions/metadata-syntax-for-github-actions#outputs-for-docker-container-and-javascript-actions) (default is `true`).\nReview result is output as JSON to `steps.{step-id}.outputs.review` property.\nThe JSON structure is equivalent to the yaml data structure defined in [pr_reviewer_prompts.toml](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/pr_reviewer_prompts.toml).\n\nNote that you can give additional config parameters by adding environment variables to `.github/workflows/pr_agent.yml`, or by using a `.pr_agent.toml` [configuration file](./configuration_options.md#global-configuration-file) in the root of your repo\n\nFor example, you can set an environment variable: `pr_description.publish_labels=false`, or add a `.pr_agent.toml` file with the following content:\n\n```toml\n[pr_description]\npublish_labels = false\n```\n\nto prevent PR-Agent from publishing labels when running the `describe` tool.\n\n#### Enable using commands in PR\n\nYou can configure your GitHub Actions workflow to trigger on `issue_comment` [events](https://docs.github.com/en/actions/reference/workflows-and-actions/events-that-trigger-workflows#issue_comment) (`created` and `edited`).\n\nExample GitHub Actions workflow configuration:\n\n```yaml\non:\n  issue_comment:\n    types: [created, edited]\n```\n\nWhen this is configured, PR-Agent can be invoked by commenting on the PR.\n\n#### Quick Reference: Model Configuration in GitHub Actions\n\nFor detailed step-by-step examples of configuring different models (Gemini, Claude, Azure OpenAI, etc.) in GitHub Actions, see the [Configuration Examples](../installation/github.md#configuration-examples) section in the installation guide.\n\n**Common Model Configuration Patterns:**\n\n- **OpenAI**: Set `config.model: \"gpt-4o\"` and `OPENAI_KEY`\n- **Gemini**: Set `config.model: \"gemini/gemini-1.5-flash\"` and `GOOGLE_AI_STUDIO.GEMINI_API_KEY` (no `OPENAI_KEY` needed)\n- **Claude**: Set `config.model: \"anthropic/claude-3-opus-20240229\"` and `ANTHROPIC.KEY` (no `OPENAI_KEY` needed)\n- **Azure OpenAI**: Set `OPENAI.API_TYPE: \"azure\"`, `OPENAI.API_BASE`, and `OPENAI.DEPLOYMENT_ID`\n- **Local Models**: Set `config.model: \"ollama/model-name\"` and `OLLAMA.API_BASE`\n\n**Environment Variable Format:**\n- Use dots (`.`) to separate sections and keys: `config.model`, `pr_reviewer.extra_instructions`\n- Boolean values as strings: `\"true\"` or `\"false\"`\n- Arrays as JSON strings: `'[\"item1\", \"item2\"]'`\n\nFor complete model configuration details, see [Changing a model in PR-Agent](changing_a_model.md).\n\n### GitLab Webhook\n\nAfter setting up a GitLab webhook, to control which commands will run automatically when a new MR is opened, you can set the `pr_commands` parameter in the configuration file, similar to the GitHub App:\n\n```toml\n[gitlab]\npr_commands = [\n    \"/describe\",\n    \"/review\",\n    \"/improve\",\n]\n```\n\nthe GitLab webhook can also respond to new code that is pushed to an open MR.\nThe configuration toggle `handle_push_trigger` can be used to enable this feature.\nThe configuration parameter `push_commands` defines the list of tools that will be **run automatically** when new code is pushed to the MR.\n\n```toml\n[gitlab]\nhandle_push_trigger = true\npush_commands = [\n    \"/describe\",\n    \"/review\",\n]\n```\n\nNote that to use the 'handle_push_trigger' feature, you need to give the gitlab webhook also the \"Push events\" scope.\n\n### BitBucket App\n\nSimilar to GitHub app, when running PR-Agent from BitBucket App, the default [configuration file](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml) will be initially loaded.\n\nBy uploading a local `.pr_agent.toml` file to the root of the repo's default branch, you can edit and customize any configuration parameter. Note that you need to upload `.pr_agent.toml` prior to creating a PR, in order for the configuration to take effect.\n\nFor example, if your local `.pr_agent.toml` file contains:\n\n```toml\n[pr_reviewer]\nextra_instructions = \"Answer in japanese\"\n```\n\nEach time you invoke a `/review` tool, it will use the extra instructions you set in the local configuration file.\n\nNote that among other limitations, BitBucket provides relatively low rate-limits for applications (up to 1000 requests per hour), and does not provide an API to track the actual rate-limit usage.\nIf you experience a lack of responses from PR-Agent, you might want to set: `bitbucket_app.avoid_full_files=true` in your configuration file.\nThis will prevent PR-Agent from acquiring the full file content, and will only use the diff content. This will reduce the number of requests made to BitBucket, at the cost of small decrease in accuracy, as dynamic context will not be applicable.\n\n#### BitBucket Self-Hosted App automatic tools\n\nTo control which commands will run automatically when a new PR is opened, you can set the `pr_commands` parameter in the configuration file:\nSpecifically, set the following values:\n\n```toml\n[bitbucket_app]\npr_commands = [\n    \"/review\",\n    \"/improve --pr_code_suggestions.commitable_code_suggestions=true --pr_code_suggestions.suggestions_score_threshold=7\",\n]\n```\n\nNote that we set specifically for bitbucket, we recommend using: `--pr_code_suggestions.suggestions_score_threshold=7` and that is the default value we set for bitbucket.\nSince this platform only supports inline code suggestions, we want to limit the number of suggestions, and only present a limited number.\n\nTo enable BitBucket app to respond to each **push** to the PR, set (for example):\n\n```toml\n[bitbucket_app]\nhandle_push_trigger = true\npush_commands = [\n    \"/describe\",\n    \"/review\",\n]\n```\n\n### Azure DevOps provider\n\nTo use Azure DevOps provider use the following settings in configuration.toml:\n\n```toml\n[config]\ngit_provider=\"azure\"\n```\n\nAzure DevOps provider supports [PAT token](https://learn.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops&tabs=Windows) or [DefaultAzureCredential](https://learn.microsoft.com/en-us/azure/developer/python/sdk/authentication-overview#authentication-in-server-environments) authentication.\nPAT is faster to create, but has build in expiration date, and will use the user identity for API calls.\nUsing DefaultAzureCredential you can use managed identity or Service principle, which are more secure and will create separate ADO user identity (via AAD) to the agent.\n\nIf PAT was chosen, you can assign the value in .secrets.toml.\nIf DefaultAzureCredential was chosen, you can assigned the additional env vars like AZURE_CLIENT_SECRET directly,\nor use managed identity/az cli (for local development) without any additional configuration.\nin any case, 'org' value must be assigned in .secrets.toml:\n\n```\n[azure_devops]\norg = \"https://dev.azure.com/YOUR_ORGANIZATION/\"\n# pat = \"YOUR_PAT_TOKEN\" needed only if using PAT for authentication\n```\n\n#### Azure DevOps Webhook\n\nTo control which commands will run automatically when a new PR is opened, you can set the `pr_commands` parameter in the configuration file, similar to the GitHub App:\n\n```toml\n[azure_devops_server]\npr_commands = [\n    \"/describe\",\n    \"/review\",\n    \"/improve\",\n]\n```\n\n### Gitea Webhook\n\nAfter setting up a Gitea webhook, to control which commands will run automatically when a new MR is opened, you can set the `pr_commands` parameter in the configuration file, similar to the GitHub App:\n\n```toml\n[gitea]\npr_commands = [\n    \"/describe\",\n    \"/review\",\n    \"/improve\",\n]\n```\n"
  },
  {
    "path": "docs/docs/usage-guide/changing_a_model.md",
    "content": "## Changing a model in PR-Agent\n\nSee [here](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/algo/__init__.py) for a list of supported models in PR-Agent.\nThe default model of PR-Agent is `GPT-5` from OpenAI.\nTo use a different model than the default, you need to edit in the [configuration file](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L7) the fields:\n\n```toml\n[config]\nmodel = \"...\"\nfallback_models = [\"...\"]\n```\n\nFor models and environments not from OpenAI, you might need to provide additional keys and other parameters.\nYou can give parameters via a configuration file, or from environment variables.\n\n!!! note \"Model-specific environment variables\"\n    See [litellm documentation](https://litellm.vercel.app/docs/proxy/quick_start#supported-llms) for the environment variables needed per model, as they may vary and change over time. Our documentation per-model may not always be up-to-date with the latest changes.\n    Failing to set the needed keys of a specific model will usually result in litellm not identifying the model type, and failing to utilize it.\n\n### OpenAI like API\nTo use an OpenAI like API, set the following in your `.secrets.toml` file:\n\n```toml\n[openai]\napi_base = \"https://api.openai.com/v1\"\napi_key = \"sk-...\"\n```\n\nor use the environment variables (make sure to use double underscores `__`):\n\n```bash\nOPENAI__API_BASE=https://api.openai.com/v1\nOPENAI__KEY=sk-...\n```\n\n### OpenAI Flex Processing\n\nTo reduce costs for non-urgent/background tasks, enable Flex Processing:\n\n```toml\n[litellm]\nextra_body='{\"processing_mode\": \"flex\"}'\n```\n\nSee [OpenAI Flex Processing docs](https://platform.openai.com/docs/guides/flex-processing) for details.\n\n### Azure\n\nTo use Azure, set in your `.secrets.toml` (working from CLI), or in the GitHub `Settings > Secrets and variables` (working from GitHub App or GitHub Action):\n\n```toml\n[openai]\nkey = \"\" # your azure api key\napi_type = \"azure\"\napi_version = '2023-05-15'  # Check Azure documentation for the current API version\napi_base = \"\"  # The base URL for your Azure OpenAI resource. e.g. \"https://<your resource name>.openai.azure.com\"\ndeployment_id = \"\"  # The deployment name you chose when you deployed the engine\n```\n\nand set in your configuration file:\n\n```toml\n[config]\nmodel=\"\" # the OpenAI model you've deployed on Azure (e.g. gpt-4o)\nfallback_models=[\"...\"]\n```\n\nTo use Azure AD (Entra id) based authentication set in your `.secrets.toml` (working from CLI), or in the GitHub `Settings > Secrets and variables` (working from GitHub App or GitHub Action):\n\n```toml\n[azure_ad]\nclient_id = \"\"  # Your Azure AD application client ID\nclient_secret = \"\"  # Your Azure AD application client secret\ntenant_id = \"\"  # Your Azure AD tenant ID\napi_base = \"\"  # Your Azure OpenAI service base URL (e.g., https://openai.xyz.com/)\n```\n\nPassing custom headers to the underlying LLM Model API can be done by setting extra_headers parameter to litellm.\n\n```toml\n[litellm]\nextra_headers='{\"projectId\": \"<authorized projectId >\", ...}') #The value of this setting should be a JSON string representing the desired headers, a ValueError is thrown otherwise.\n```\n\nThis enables users to pass authorization tokens or API keys, when routing requests through an API management gateway.\n\n### Ollama\n\nYou can run models locally through either [VLLM](https://docs.litellm.ai/docs/providers/vllm) or [Ollama](https://docs.litellm.ai/docs/providers/ollama)\n\nE.g. to use a new model locally via Ollama, set in `.secrets.toml` or in a configuration file:\n\n```toml\n[config]\nmodel = \"ollama/qwen2.5-coder:32b\"\nfallback_models=[\"ollama/qwen2.5-coder:32b\"]\ncustom_model_max_tokens=128000 # set the maximal input tokens for the model\nduplicate_examples=true # will duplicate the examples in the prompt, to help the model to generate structured output\n\n[ollama]\napi_base = \"http://localhost:11434\" # or whatever port you're running Ollama on\n```\n\nBy default, Ollama uses a context window size of 2048 tokens. In most cases this is not enough to cover pr-agent prompt and pull-request diff. Context window size can be overridden with the `OLLAMA_CONTEXT_LENGTH` environment variable. For example, to set the default context length to 8K, use: `OLLAMA_CONTEXT_LENGTH=8192 ollama serve`. More information you can find on the [official ollama faq](https://github.com/ollama/ollama/blob/main/docs/faq.md#how-can-i-specify-the-context-window-size).\n\nPlease note that the `custom_model_max_tokens` setting should be configured in accordance with the `OLLAMA_CONTEXT_LENGTH`. Failure to do so may result in unexpected model output.\n\n!!! note \"Local models vs commercial models\"\n    PR-Agent is compatible with almost any AI model, but analyzing complex code repositories and pull requests requires a model specifically optimized for code analysis.\n\n    Commercial models such as GPT-5, Claude Sonnet, and Gemini have demonstrated robust capabilities in generating structured output for code analysis tasks with large input. In contrast, most open-source models currently available (as of January 2025) face challenges with these complex tasks.\n\n    Based on our testing, local open-source models are suitable for experimentation and learning purposes (mainly for the `ask` command), but they are not suitable for production-level code analysis tasks.\n    \n    Hence, for production workflows and real-world usage, we recommend using commercial models.\n\n### Hugging Face\n\nTo use a new model with Hugging Face Inference Endpoints, for example, set:\n\n```toml\n[config] # in configuration.toml\nmodel = \"huggingface/meta-llama/Llama-2-7b-chat-hf\"\nfallback_models=[\"huggingface/meta-llama/Llama-2-7b-chat-hf\"]\ncustom_model_max_tokens=... # set the maximal input tokens for the model\n\n[huggingface] # in .secrets.toml\nkey = ... # your Hugging Face api key\napi_base = ... # the base url for your Hugging Face inference endpoint\n```\n\n(you can obtain a Llama2 key from [here](https://replicate.com/replicate/llama-2-70b-chat/api))\n\n### Replicate\n\nTo use Llama2 model with Replicate, for example, set:\n\n```toml\n[config] # in configuration.toml\nmodel = \"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\"\nfallback_models=[\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\"]\n[replicate] # in .secrets.toml\nkey = ...\n```\n\n(you can obtain a Llama2 key from [here](https://replicate.com/replicate/llama-2-70b-chat/api))\n\nAlso, review the [.secrets_template.toml](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/.secrets_template.toml) file for instructions on how to set keys for other models.\n\n### Groq\n\nTo use Llama3 model with Groq, for example, set:\n\n```toml\n[config] # in configuration.toml\nmodel = \"llama3-70b-8192\"\nfallback_models = [\"groq/llama3-70b-8192\"]\n[groq] # in .secrets.toml\nkey = ... # your Groq api key\n```\n\n(you can obtain a Groq key from [here](https://console.groq.com/keys))\n\n### xAI\n\nTo use xAI's models with PR-Agent, set:\n\n```toml\n[config] # in configuration.toml\nmodel = \"xai/grok-2-latest\"\nfallback_models = [\"xai/grok-2-latest\"] # or any other model as fallback\n\n[xai] # in .secrets.toml\nkey = \"...\" # your xAI API key\n```\n\nYou can obtain an xAI API key from [xAI's console](https://console.x.ai/) by creating an account and navigating to the developer settings page.\n\n### Vertex AI\n\nTo use Google's Vertex AI platform and its associated models (chat-bison/codechat-bison) set:\n\n```toml\n[config] # in configuration.toml\nmodel = \"vertex_ai/codechat-bison\"\nfallback_models=\"vertex_ai/codechat-bison\"\n\n[vertexai] # in .secrets.toml\nvertex_project = \"my-google-cloud-project\"\nvertex_location = \"\"\n```\n\nYour [application default credentials](https://cloud.google.com/docs/authentication/application-default-credentials) will be used for authentication so there is no need to set explicit credentials in most environments.\n\nIf you do want to set explicit credentials, then you can use the `GOOGLE_APPLICATION_CREDENTIALS` environment variable set to a path to a json credentials file.\n\n### Google AI Studio\n\nTo use [Google AI Studio](https://aistudio.google.com/) models, set the relevant models in the configuration section of the configuration file:\n\n```toml\n[config] # in configuration.toml\nmodel=\"gemini/gemini-1.5-flash\"\nfallback_models=[\"gemini/gemini-1.5-flash\"]\n\n[google_ai_studio] # in .secrets.toml\ngemini_api_key = \"...\"\n```\n\nIf you don't want to set the API key in the .secrets.toml file, you can set the `GOOGLE_AI_STUDIO.GEMINI_API_KEY` environment variable.\n\n### Anthropic\n\nTo use Anthropic models, set the relevant models in the configuration section of the configuration file:\n\n```toml\n[config]\nmodel=\"anthropic/claude-3-opus-20240229\"\nfallback_models=[\"anthropic/claude-3-opus-20240229\"]\n```\n\nAnd also set the api key in the .secrets.toml file:\n\n```toml\n[anthropic]\nKEY = \"...\"\n```\n\nSee [litellm](https://docs.litellm.ai/docs/providers/anthropic#usage) documentation for more information about the environment variables required for Anthropic.\n\n### Amazon Bedrock\n\nTo use Amazon Bedrock and its foundational models, add the below configuration:\n\n```toml\n[config] # in configuration.toml\nmodel=\"bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0\"\nfallback_models=[\"bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0\"]\n\n[aws]\nAWS_ACCESS_KEY_ID=\"...\"\nAWS_SECRET_ACCESS_KEY=\"...\"\nAWS_REGION_NAME=\"...\"\n```\n\nYou can also use the new Meta Llama 4 models available on Amazon Bedrock:\n\n```toml\n[config] # in configuration.toml\nmodel=\"bedrock/us.meta.llama4-scout-17b-instruct-v1:0\"\nfallback_models=[\"bedrock/us.meta.llama4-maverick-17b-instruct-v1:0\"]\n```\n\n#### Custom Inference Profiles\n\nTo use a custom inference profile with Amazon Bedrock (for cost allocation tags and other configuration settings), add the `model_id` parameter to your configuration:\n\n```toml\n[config] # in configuration.toml\nmodel=\"bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0\"\nfallback_models=[\"bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0\"]\n\n[aws]\nAWS_ACCESS_KEY_ID=\"...\"\nAWS_SECRET_ACCESS_KEY=\"...\"\nAWS_REGION_NAME=\"...\"\n\n[litellm]\nmodel_id = \"your-custom-inference-profile-id\"\n```\n\nThe `model_id` parameter will be passed to all Bedrock completion calls, allowing you to use custom inference profiles for better cost allocation and reporting.\n\nSee [litellm](https://docs.litellm.ai/docs/providers/bedrock#usage) documentation for more information about the environment variables required for Amazon Bedrock.\n\n### DeepSeek\n\nTo use deepseek-chat model with DeepSeek, for example, set:\n\n```toml\n[config] # in configuration.toml\nmodel = \"deepseek/deepseek-chat\"\nfallback_models=[\"deepseek/deepseek-chat\"]\n```\n\nand fill up your key\n\n```toml\n[deepseek] # in .secrets.toml\nkey = ...\n```\n\n(you can obtain a deepseek-chat key from [here](https://platform.deepseek.com))\n\n### DeepInfra\n\nTo use DeepSeek model with DeepInfra, for example, set:\n\n```toml\n[config] # in configuration.toml\nmodel = \"deepinfra/deepseek-ai/DeepSeek-R1-Distill-Llama-70B\"\nfallback_models = [\"deepinfra/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B\"]\n[deepinfra] # in .secrets.toml\nkey = ... # your DeepInfra api key\n```\n\n(you can obtain a DeepInfra key from [here](https://deepinfra.com/dash/api_keys))\n\n### Mistral\n\nTo use models like Mistral or Codestral with Mistral, for example, set:\n\n```toml\n[config] # in configuration.toml\nmodel = \"mistral/mistral-small-latest\"\nfallback_models = [\"mistral/mistral-medium-latest\"]\n[mistral] # in .secrets.toml\nkey = \"...\" # your Mistral api key\n```\n\n(you can obtain a Mistral key from [here](https://console.mistral.ai/api-keys))\n\n### Codestral\n\nTo use Codestral model with Codestral, for example, set:\n\n```toml\n[config] # in configuration.toml\nmodel = \"codestral/codestral-latest\"\nfallback_models = [\"codestral/codestral-2405\"]\n[codestral] # in .secrets.toml\nkey = \"...\" # your Codestral api key\n```\n\n(you can obtain a Codestral key from [here](https://console.mistral.ai/codestral))\n\n### Openrouter\n\nTo use model from Openrouter, for example, set:\n\n```toml\n[config] # in configuration.toml \nmodel=\"openrouter/anthropic/claude-3.7-sonnet\"\nfallback_models=[\"openrouter/deepseek/deepseek-chat\"]\ncustom_model_max_tokens=20000\n\n[openrouter]  # in .secrets.toml or passed an environment variable openrouter__key\nkey = \"...\" # your openrouter api key\n```\n\n(you can obtain an Openrouter API key from [here](https://openrouter.ai/settings/keys))\n\n### Custom models\n\nIf the relevant model doesn't appear [here](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/algo/__init__.py), you can still use it as a custom model:\n\n1. Set the model name in the configuration file:\n\n```toml\n[config]\nmodel=\"custom_model_name\"\nfallback_models=[\"custom_model_name\"]\n```\n\n2. Set the maximal tokens for the model:\n\n```toml\n[config]\ncustom_model_max_tokens= ...\n```\n\n3. Go to [litellm documentation](https://litellm.vercel.app/docs/proxy/quick_start#supported-llms), find the model you want to use, and set the relevant environment variables.\n\n4. Most reasoning models do not support chat-style inputs (`system` and `user` messages) or temperature settings.\nTo bypass chat templates and temperature controls, set `config.custom_reasoning_model = true` in your configuration file.\n\n## Dedicated parameters\n\n### OpenAI models\n\n```toml\n[config]\nreasoning_effort = \"medium\" # \"low\", \"medium\", \"high\"\n```\n\nWith the OpenAI models that support reasoning effort (eg: o4-mini), you can specify its reasoning effort via `config` section. The default value is `medium`. You can change it to `high` or `low` based on your usage.\n\n### Anthropic models\n\n```toml\n[config]\nenable_claude_extended_thinking = false # Set to true to enable extended thinking feature\nextended_thinking_budget_tokens = 2048\nextended_thinking_max_output_tokens = 4096\n```\n"
  },
  {
    "path": "docs/docs/usage-guide/configuration_options.md",
    "content": "The different tools and sub-tools used by PR-Agent are adjustable via a Git configuration file.\nThere are three main ways to set persistent configurations:\n\n1. [Wiki](./configuration_options.md#wiki-configuration-file) configuration page\n2. [Local](./configuration_options.md#local-configuration-file) configuration file\n3. [Global](./configuration_options.md#global-configuration-file) configuration file\n\nIn terms of precedence, wiki configurations will override local configurations, and local configurations will override global configurations.\n\n\nFor a list of all possible configurations, see the [configuration options](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml) page.\nIn addition to general configuration options, each tool has its own configurations. For example, the `review` tool will use parameters from the [pr_reviewer](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L66) section in the configuration file.\n\n!!! tip \"Tip1: Edit only what you need\"\n    Your configuration file should be minimal, and edit only the relevant values. Don't copy the entire configuration options, since it can lead to legacy problems when something changes.\n!!! tip \"Tip2: Show relevant configurations\"\n    If you set `config.output_relevant_configurations` to True, each tool will also output in a collapsible section its relevant configurations. This can be useful for debugging, or getting to know the configurations better.\n\n\n\n## Wiki configuration file\n\n`Platforms supported: GitHub, GitLab, Bitbucket`\n\nWith PR-Agent, you can set configurations by creating a page called `.pr_agent.toml` in the [wiki](https://github.com/qodo-ai/pr-agent/wiki/pr_agent.toml) of the repo.\nThe advantage of this method is that it allows to set configurations without needing to commit new content to the repo - just edit the wiki page and **save**.\n\n![wiki_configuration](https://codium.ai/images/pr_agent/wiki_configuration.png){width=512}\n\nClick [here](https://codium.ai/images/pr_agent/wiki_configuration_pr_agent.mp4) to see a short instructional video. We recommend surrounding the configuration content with triple-quotes (or \\`\\`\\`toml), to allow better presentation when displayed in the wiki as markdown.\nAn example content:\n\n```toml\n[pr_description]\ngenerate_ai_title=true\n```\n\nPR-Agent will know to remove the surrounding quotes when reading the configuration content.\n\n## Local configuration file\n\n`Platforms supported: GitHub, GitLab, Bitbucket, Azure DevOps`\n\nBy uploading a local `.pr_agent.toml` file to the root of the repo's default branch, you can edit and customize any configuration parameter. Note that you need to upload or update `.pr_agent.toml` before using the PR Agent tools (either at PR creation or via manual trigger) for the configuration to take effect.\n\nFor example, if you set in `.pr_agent.toml`:\n\n```\n[pr_reviewer]\nextra_instructions=\"\"\"\\\n- instruction a\n- instruction b\n...\n\"\"\"\n```\n\nThen you can give a list of extra instructions to the `review` tool.\n\n## Global configuration file\n\n`Platforms supported: GitHub, GitLab (cloud), Bitbucket (cloud)`\n\nIf you create a repo called `pr-agent-settings` in your **organization**, its configuration file `.pr_agent.toml` will be used as a global configuration file for any other repo that belongs to the same organization.\nParameters from a local `.pr_agent.toml` file, in a specific repo, will override the global configuration parameters.\n\nFor example, in the GitHub organization `qodo-ai`:\n\n- The file [`https://github.com/qodo-ai/pr-agent-settings/.pr_agent.toml`](https://github.com/qodo-ai/pr-agent-settings/blob/main/.pr_agent.toml)  serves as a global configuration file for all the repos in the GitHub organization `qodo-ai`.\n\n- The repo [`https://github.com/qodo-ai/pr-agent`](https://github.com/qodo-ai/pr-agent/blob/main/.pr_agent.toml) inherits the global configuration file from `pr-agent-settings`.\n\n## Project/Group level configuration file\n\n`Platforms supported: GitLab, Bitbucket Data Center`\n\nCreate a repository named `pr-agent-settings` within a specific project (Bitbucket) or a group/subgroup (Gitlab). \nThe configuration file in this repository will apply to all repositories directly under the same project/group/subgroup.\n\n!!! note \"Note\"\n    For Gitlab, in case of a repository nested in several sub groups, the lookup for a pr-agent-settings repo will be only on one level above such repository.\n\n\n## Organization level configuration file\n\n`Relevant platforms: Bitbucket Data Center`\n\nCreate a dedicated project to hold a global configuration file that affects all repositories across all projects in your organization.\n\n**Setting up organization-level global configuration:**\n\n1. Create a new project with both the name and key: PR_AGENT_SETTINGS.\n2. Inside the PR_AGENT_SETTINGS project, create a repository named pr-agent-settings.\n3. In this repository, add a `.pr_agent.toml` configuration file—structured similarly to the global configuration file described above.\n4. Optionally, you can add organizational-level [global best practices](../tools/improve.md#global-hierarchical-best-practices).\n\nRepositories across your entire Bitbucket organization will inherit the configuration from this file.\n\n!!! note \"Note\"\n    If both organization-level and project-level global settings are defined, the project-level settings will take precedence over the organization-level configuration. Additionally, parameters from a repository’s local .pr_agent.toml file will always override both global settings.\n"
  },
  {
    "path": "docs/docs/usage-guide/index.md",
    "content": "# Usage guide\n\nThis section provides a detailed guide on how to use PR-Agent.\nIt includes information on how to adjust PR-Agent configurations, define which tools will run automatically, and other advanced configurations.\n\n- [Introduction](./introduction.md)\n- [Configuration File](./configuration_options.md)\n- [Usage and Automation](./automations_and_usage.md)\n    - [Local Repo (CLI)](./automations_and_usage.md#local-repo-cli)\n    - [Online Usage](./automations_and_usage.md#online-usage)\n    - [GitHub App](./automations_and_usage.md#github-app)\n    - [GitHub Action](./automations_and_usage.md#github-action)\n    - [GitLab Webhook](./automations_and_usage.md#gitlab-webhook)\n    - [Gitea Webhook](./automations_and_usage.md#gitea-webhook)\n    - [BitBucket App](./automations_and_usage.md#bitbucket-app)\n    - [Azure DevOps Provider](./automations_and_usage.md#azure-devops-provider)\n- [Managing Mail Notifications](./mail_notifications.md)\n- [Changing a Model](./changing_a_model.md)\n- [Additional Configurations](./additional_configurations.md)\n    - [Ignoring files from analysis](./additional_configurations.md#ignoring-files-from-analysis)\n    - [Extra instructions](./additional_configurations.md#extra-instructions)\n    - [Working with large PRs](./additional_configurations.md#working-with-large-prs)\n    - [Changing a model](./changing_a_model.md)\n- [FAQ](../faq/index.md)\n"
  },
  {
    "path": "docs/docs/usage-guide/introduction.md",
    "content": "After [installation](../installation/index.md), there are three basic ways to invoke PR-Agent:\n\n1. Locally running a CLI command\n2. Online usage - by [commenting](https://github.com/qodo-ai/pr-agent/pull/229#issuecomment-1695021901){:target=\"_blank\"} on a PR\n3. Enabling PR-Agent tools to run automatically when a new PR is opened\n\nSpecifically, CLI commands can be issued by invoking a pre-built [docker image](../installation/locally.md#using-docker-image), or by invoking a [locally cloned repo](../installation/locally.md#run-from-source).\n\nFor online usage, you will need to setup either a [GitHub App](../installation/github.md#run-as-a-github-app) or a [GitHub Action](../installation/github.md#run-as-a-github-action) (GitHub), a [GitLab webhook](../installation/gitlab.md#run-a-gitlab-webhook-server) (GitLab), or a [BitBucket App](../installation/bitbucket.md#run-using-codiumai-hosted-bitbucket-app) (BitBucket).\nThese platforms also enable to run PR-Agent specific tools automatically when a new PR is opened, or on each push to a branch.\n"
  },
  {
    "path": "docs/docs/usage-guide/mail_notifications.md",
    "content": "\nUnfortunately, it is not possible in GitHub to disable mail notifications from a specific user.\nIf you are subscribed to notifications for a repo with PR-Agent, we recommend turning off notifications for PR comments, to avoid lengthy emails:\n\n![notifications](https://codium.ai/images/pr_agent/notifications.png){width=512}\n\nAs an alternative, you can filter in your mail provider the notifications specifically from the PR-Agent bot, [see how](https://www.quora.com/How-can-you-filter-emails-for-specific-people-in-Gmail#:~:text=On%20the%20Filters%20and%20Blocked,the%20body%20of%20the%20email).\n\n![filter_mail_notifications](https://codium.ai/images/pr_agent/filter_mail_notifications.png){width=512}\n\nAnother option to reduce the mail overload, yet still receive notifications on PR-Agent tools, is to disable the help collapsible section in PR-Agent bot comments.\nThis can done by setting `enable_help_text=false` for the relevant tool in the configuration file.\nFor example, to disable the help text for the `pr_reviewer` tool, set:\n\n```\n[pr_reviewer]\nenable_help_text = false\n```\n"
  },
  {
    "path": "docs/mkdocs.yml",
    "content": "site_name: PR-Agent\nrepo_url: https://github.com/qodo-ai/pr-agent\nrepo_name: Qodo-ai/pr-agent\n\nnav:\n  - Overview:\n    - 'index.md'\n    - Data Privacy: 'overview/data_privacy.md'\n  - Installation:\n    - 'installation/index.md'\n    - PR-Agent: 'installation/pr_agent.md'\n  - Usage Guide:\n    - 'usage-guide/index.md'\n    - Introduction: 'usage-guide/introduction.md'\n    - Configuration File: 'usage-guide/configuration_options.md'\n    - Usage and Automation: 'usage-guide/automations_and_usage.md'\n    - Managing Mail Notifications: 'usage-guide/mail_notifications.md'\n    - Changing a Model: 'usage-guide/changing_a_model.md'\n    - Additional Configurations: 'usage-guide/additional_configurations.md'\n    - Frequently Asked Questions: 'faq/index.md'\n  - Tools:\n     - 'tools/index.md'\n     - Describe: 'tools/describe.md'\n     - Review: 'tools/review.md'\n     - Improve: 'tools/improve.md'\n     - Ask: 'tools/ask.md'\n     - Add Docs: 'tools/add_docs.md'\n     - Generate Labels: 'tools/generate_labels.md'\n     - Similar Issues: 'tools/similar_issues.md'\n     - Help: 'tools/help.md'\n     - Help Docs: 'tools/help_docs.md'\n     - Update Changelog: 'tools/update_changelog.md'\n  - Core Abilities:\n      - 'core-abilities/index.md'\n      - Compression strategy: 'core-abilities/compression_strategy.md'\n      - Dynamic context: 'core-abilities/dynamic_context.md'\n      - Fetching ticket context: 'core-abilities/fetching_ticket_context.md'\n      - Interactivity: 'core-abilities/interactivity.md'\n      - Local and global metadata: 'core-abilities/metadata.md'\n      - Self-reflection: 'core-abilities/self_reflection.md'\n#  - Code Fine-tuning Benchmark: 'finetuning_benchmark/index.md'\n\ntheme:\n  logo: assets/favicon.svg\n  favicon: assets/favicon.svg\n  name: material\n  icon:\n    repo: fontawesome/brands/github\n  features:\n    - navigation.tabs\n    - navigation.expand\n    - navigation.path\n    - navigation.top\n    - navigation.tracking\n    - navigation.indexes\n    - search.suggest\n    - search.highlight\n    - content.tabs.link\n    - content.code.annotation\n    - content.code.copy\n    - announce.dismiss\n  language: en\n  custom_dir: overrides\n\n  palette:\n    - media: \"(prefers-color-scheme)\"\n      toggle:\n        icon: material/brightness-auto\n        name: Switch to light mode\n    - media: \"(prefers-color-scheme: light)\"\n      scheme: default\n      toggle:\n        icon: material/toggle-switch-off-outline\n        name: Switch to dark mode\n      primary: custom\n      accent: custom\n    - media: \"(prefers-color-scheme: dark)\"\n      scheme: slate\n      toggle:\n        icon: material/toggle-switch\n        name: Switch to light mode\n      primary: custom\n      accent: custom\n\nplugins:\n  - social\n  - search\n  - glightbox\n\nextra:\n  generator: false\n  social:\n    - icon: fontawesome/brands/github\n      link: https://github.com/qodo-ai/pr-agent\n\nextra_css:\n  - css/custom.css\n\nmarkdown_extensions:\n  - pymdownx.highlight:\n      anchor_linenums: true\n  - pymdownx.inlinehilite\n  - pymdownx.snippets\n  - admonition\n  - pymdownx.arithmatex:\n      generic: true\n  - footnotes\n  - pymdownx.details\n  - pymdownx.superfences\n  - pymdownx.mark\n  - md_in_html\n  - attr_list\n  - pymdownx.emoji:\n      emoji_index: !!python/name:material.extensions.emoji.twemoji\n      emoji_generator: !!python/name:material.extensions.emoji.to_svg\n  - pymdownx.tabbed:\n      alternate_style: true\n  - toc:\n      title: On this page\n      toc_depth: 3\n      permalink: true\n\n\ncopyright: |\n  &copy; 2026 PR-Agent Contributors\n"
  },
  {
    "path": "docs/overrides/main.html",
    "content": "{% extends \"base.html\" %}\n\n{% block announce %}\n  Open source PR Agent documentation. For the Qodo free version, Get Started: <a href=\"https://www.qodo.ai/get-started/\">https://www.qodo.ai/get-started/</a>\n{% endblock %}\n\n{% block scripts %}\n  {{ super() }}\n\n    <!-- Google Tag Manager (noscript) -->\n    <noscript><iframe src=\"https://www.googletagmanager.com/ns.html?id=GTM-5C9KZBM3\"\n    height=\"0\" width=\"0\" style=\"display:none;visibility:hidden\"></iframe></noscript>\n    <!-- End Google Tag Manager (noscript) -->\n{% endblock %}\n"
  },
  {
    "path": "docs/overrides/partials/footer.html",
    "content": "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n<meta charset=\"UTF-8\">\n<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n<title>Footer</title>\n<style>\n  body {\n    margin: 0;\n    padding: 0;\n    font-family: Arial, sans-serif;\n    font-size: 16px;\n  }\n\n  .wrapper {\n    background-color: #1a202c;\n  }\n\n  .container {\n    display: flex;\n    flex-direction: row;\n    align-items: center;\n    justify-content: space-between;\n    color: white;\n    padding: 20px;\n    max-width: 61rem;\n    margin-left: auto;\n    margin-right: auto;\n  }\n\n  .footer-links, .social-icons {\n    padding: 0;\n    list-style-type: none;\n    display: flex;\n    justify-content: center;\n    gap: 20px;\n    align-items: center;\n  }\n\n  .footer-links a:hover, .social-icons a:hover {\n    color: #a0aec0;\n  }\n\n  .social-icons svg {\n    width: 24px;\n    height: auto;\n    fill: white;\n  }\n\n  .footer-text {\n    width: 240px;\n  }\n\n  @media (max-width: 768px) {\n    .container {\n      flex-direction: column;\n      align-items: center;\n      text-align: center;\n    }\n\n    .footer-links, .social-icons, .footer-text {\n      width: 100%;\n      justify-content: center;\n      margin: 10px 0;\n    }\n\n    .footer-links {\n      order: 1;\n    }\n\n    .social-icons {\n      order: 2;\n    }\n\n    .footer-text {\n      order: 3;\n    }\n  }\n</style>\n</head>\n<body>\n\n<footer class=\"wrapper\">\n  <div class=\"container\">\n    <p class=\"footer-text\">&copy; 2026 PR-Agent Contributors</p>\n    <div class=\"footer-links\">\n      <a href=\"https://github.com/qodo-ai/pr-agent\">GitHub</a>\n    </div>\n    <div class=\"social-icons\">\n      <a href=\"https://github.com/qodo-ai/pr-agent\" target=\"_blank\" rel=\"noopener\" title=\"github.com\" class=\"social-link\">\n        <svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 496 512\"><!--! Font Awesome Free 6.5.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d=\"M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z\"></path></svg>\n      </a>\n    </div>\n  </div>\n</footer>\n\n</body>\n</html>\n"
  },
  {
    "path": "docs/overrides/partials/integrations/analytics/custom.html",
    "content": "<!-- Google Tag Manager -->\n<script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':\n    new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],\n    j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=\n    'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);\n    })(window,document,'script','dataLayer','GTM-5C9KZBM3');</script>\n    <!-- End Google Tag Manager -->\n"
  },
  {
    "path": "github_action/entrypoint.sh",
    "content": "#!/bin/bash\npython /app/pr_agent/servers/github_action_runner.py\n"
  },
  {
    "path": "pr_agent/__init__.py",
    "content": ""
  },
  {
    "path": "pr_agent/agent/__init__.py",
    "content": ""
  },
  {
    "path": "pr_agent/agent/pr_agent.py",
    "content": "import shlex\nfrom functools import partial\n\nfrom pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler\nfrom pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler\nfrom pr_agent.algo.cli_args import CliArgs\nfrom pr_agent.algo.utils import update_settings_from_args\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers.utils import apply_repo_settings\nfrom pr_agent.log import get_logger\nfrom pr_agent.tools.pr_add_docs import PRAddDocs\nfrom pr_agent.tools.pr_code_suggestions import PRCodeSuggestions\nfrom pr_agent.tools.pr_config import PRConfig\nfrom pr_agent.tools.pr_description import PRDescription\nfrom pr_agent.tools.pr_generate_labels import PRGenerateLabels\nfrom pr_agent.tools.pr_help_docs import PRHelpDocs\nfrom pr_agent.tools.pr_help_message import PRHelpMessage\nfrom pr_agent.tools.pr_line_questions import PR_LineQuestions\nfrom pr_agent.tools.pr_questions import PRQuestions\nfrom pr_agent.tools.pr_reviewer import PRReviewer\nfrom pr_agent.tools.pr_similar_issue import PRSimilarIssue\nfrom pr_agent.tools.pr_update_changelog import PRUpdateChangelog\n\ncommand2class = {\n    \"auto_review\": PRReviewer,\n    \"answer\": PRReviewer,\n    \"review\": PRReviewer,\n    \"review_pr\": PRReviewer,\n    \"describe\": PRDescription,\n    \"describe_pr\": PRDescription,\n    \"improve\": PRCodeSuggestions,\n    \"improve_code\": PRCodeSuggestions,\n    \"ask\": PRQuestions,\n    \"ask_question\": PRQuestions,\n    \"ask_line\": PR_LineQuestions,\n    \"update_changelog\": PRUpdateChangelog,\n    \"config\": PRConfig,\n    \"settings\": PRConfig,\n    \"help\": PRHelpMessage,\n    \"similar_issue\": PRSimilarIssue,\n    \"add_docs\": PRAddDocs,\n    \"generate_labels\": PRGenerateLabels,\n    \"help_docs\": PRHelpDocs,\n}\n\ncommands = list(command2class.keys())\n\n\n\nclass PRAgent:\n    def __init__(self, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):\n        self.ai_handler = ai_handler  # will be initialized in run_action\n\n    async def _handle_request(self, pr_url, request, notify=None) -> bool:\n        # First, apply repo specific settings if exists\n        apply_repo_settings(pr_url)\n\n        # Then, apply user specific settings if exists\n        if isinstance(request, str):\n            request = request.replace(\"'\", \"\\\\'\")\n            lexer = shlex.shlex(request, posix=True)\n            lexer.whitespace_split = True\n            action, *args = list(lexer)\n        else:\n            action, *args = request\n\n        # validate args\n        is_valid, arg = CliArgs.validate_user_args(args)\n        if not is_valid:\n            get_logger().error(\n                f\"CLI argument for param '{arg}' is forbidden. Use instead a configuration file.\"\n            )\n            return False\n\n        # Update settings from args\n        args = update_settings_from_args(args)\n\n        # Append the response language in the extra instructions\n        response_language = get_settings().config.get('response_language', 'en-us')\n        if response_language.lower() != 'en-us':\n            get_logger().info(f'User has set the response language to: {response_language}')\n            for key in get_settings():\n                setting = get_settings().get(key)\n                if str(type(setting)) == \"<class 'dynaconf.utils.boxing.DynaBox'>\":\n                    if hasattr(setting, 'extra_instructions'):\n                        current_extra_instructions = setting.extra_instructions\n                        \n                        # Define the language-specific instruction and the separator\n                        lang_instruction_text = f\"Your response MUST be written in the language corresponding to locale code: '{response_language}'. This is crucial.\"\n                        separator_text = \"\\n======\\n\\nIn addition, \"\n\n                        # Check if the specific language instruction is already present to avoid duplication\n                        if lang_instruction_text not in str(current_extra_instructions):\n                            if current_extra_instructions: # If there's existing text\n                                setting.extra_instructions = str(current_extra_instructions) + separator_text + lang_instruction_text\n                            else: # If extra_instructions was None or empty\n                                setting.extra_instructions = lang_instruction_text\n                        # If lang_instruction_text is already present, do nothing.\n\n        action = action.lstrip(\"/\").lower()\n        if action not in command2class:\n            get_logger().warning(f\"Unknown command: {action}\")\n            return False\n        with get_logger().contextualize(command=action, pr_url=pr_url):\n            get_logger().info(\"PR-Agent request handler started\", analytics=True)\n            if action == \"answer\":\n                if notify:\n                    notify()\n                await PRReviewer(pr_url, is_answer=True, args=args, ai_handler=self.ai_handler).run()\n            elif action == \"auto_review\":\n                await PRReviewer(pr_url, is_auto=True, args=args, ai_handler=self.ai_handler).run()\n            elif action in command2class:\n                if notify:\n                    notify()\n\n                await command2class[action](pr_url, ai_handler=self.ai_handler, args=args).run()\n            else:\n                return False\n            return True\n\n    async def handle_request(self, pr_url, request, notify=None) -> bool:\n        try:\n            return await self._handle_request(pr_url, request, notify)\n        except:\n            get_logger().exception(\"Failed to process the command.\")\n            return False\n"
  },
  {
    "path": "pr_agent/algo/__init__.py",
    "content": "MAX_TOKENS = {\n    'text-embedding-ada-002': 8000,\n    'gpt-3.5-turbo': 16000,\n    'gpt-3.5-turbo-0125': 16000,\n    'gpt-3.5-turbo-0613': 4000,\n    'gpt-3.5-turbo-1106': 16000,\n    'gpt-3.5-turbo-16k': 16000,\n    'gpt-3.5-turbo-16k-0613': 16000,\n    'gpt-4': 8000,\n    'gpt-4-0613': 8000,\n    'gpt-4-32k': 32000,\n    'gpt-4-1106-preview': 128000,  # 128K, but may be limited by config.max_model_tokens\n    'gpt-4-0125-preview': 128000,  # 128K, but may be limited by config.max_model_tokens\n    'gpt-4o': 128000,  # 128K, but may be limited by config.max_model_tokens\n    'gpt-4o-2024-05-13': 128000,  # 128K, but may be limited by config.max_model_tokens\n    'gpt-4-turbo-preview': 128000,  # 128K, but may be limited by config.max_model_tokens\n    'gpt-4-turbo-2024-04-09': 128000,  # 128K, but may be limited by config.max_model_tokens\n    'gpt-4-turbo': 128000,  # 128K, but may be limited by config.max_model_tokens\n    'gpt-4o-mini': 128000,  # 128K, but may be limited by config.max_model_tokens\n    'gpt-4o-mini-2024-07-18': 128000,  # 128K, but may be limited by config.max_model_tokens\n    'gpt-4o-2024-08-06': 128000,  # 128K, but may be limited by config.max_model_tokens\n    'gpt-4o-2024-11-20': 128000,  # 128K, but may be limited by config.max_model_tokens\n    'gpt-4.5-preview': 128000,  # 128K, but may be limited by config.max_model_tokens\n    'gpt-4.5-preview-2025-02-27': 128000,  # 128K, but may be limited by config.max_model_tokens\n    'gpt-4.1': 1047576,\n    'gpt-4.1-2025-04-14': 1047576,\n    'gpt-4.1-mini': 1047576,\n    'gpt-4.1-mini-2025-04-14': 1047576,\n    'gpt-4.1-nano': 1047576,\n    'gpt-4.1-nano-2025-04-14': 1047576,\n    'gpt-5-nano': 200000,  # 200K, but may be limited by config.max_model_tokens\n    'gpt-5-mini': 200000,  # 200K, but may be limited by config.max_model_tokens\n    'gpt-5': 200000,\n    'gpt-5-2025-08-07': 200000,\n    'gpt-5.1': 200000,\n    'gpt-5.1-2025-11-13': 200000,\n    'gpt-5.1-chat-latest': 200000,\n    'gpt-5.1-codex': 200000,\n    'gpt-5.1-codex-mini': 200000,\n    'gpt-5.2': 400000,  # 400K, but may be limited by config.max_model_tokens\n    'gpt-5.2-2025-12-11': 400000,  # 400K, but may be limited by config.max_model_tokens\n    'gpt-5.2-chat-latest': 128000,  # 128K, but may be limited by config.max_model_tokens\n    'gpt-5.2-codex': 400000,  # 400K, but may be limited by config.max_model_tokens\n    'gpt-5.3-codex': 400000,  # 400K, but may be limited by config.max_model_tokens\n    'gpt-5.4': 272000,  # 272K safe default without opt-in 1M context parameters\n    'gpt-5.4-2026-03-05': 272000,  # 272K safe default without opt-in 1M context parameters\n    'o1-mini': 128000,  # 128K, but may be limited by config.max_model_tokens\n    'o1-mini-2024-09-12': 128000,  # 128K, but may be limited by config.max_model_tokens\n    'o1-preview': 128000,  # 128K, but may be limited by config.max_model_tokens\n    'o1-preview-2024-09-12': 128000,  # 128K, but may be limited by config.max_model_tokens\n    'o1-2024-12-17': 204800,  # 200K, but may be limited by config.max_model_tokens\n    'o1': 204800,  # 200K, but may be limited by config.max_model_tokens\n    'o3-mini': 204800,  # 200K, but may be limited by config.max_model_tokens\n    'o3-mini-2025-01-31': 204800,  # 200K, but may be limited by config.max_model_tokens\n    'o3': 200000,  # 200K, but may be limited by config.max_model_tokens\n    'o3-2025-04-16': 200000,  # 200K, but may be limited by config.max_model_tokens\n    'o4-mini': 200000, # 200K, but may be limited by config.max_model_tokens\n    'o4-mini-2025-04-16': 200000, # 200K, but may be limited by config.max_model_tokens\n    'claude-instant-1': 100000,\n    'claude-2': 100000,\n    'command-nightly': 4096,\n    'deepseek/deepseek-chat': 128000,  # 128K, but may be limited by config.max_model_tokens\n    'deepseek/deepseek-reasoner': 64000,  # 64K, but may be limited by config.max_model_tokens\n    'openai/qwq-plus': 131072,  # 131K context length, but may be limited by config.max_model_tokens\n    'replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1': 4096,\n    'meta-llama/Llama-2-7b-chat-hf': 4096,\n    'vertex_ai/codechat-bison': 6144,\n    'vertex_ai/codechat-bison-32k': 32000,\n    'vertex_ai/claude-3-haiku@20240307': 100000,\n    'vertex_ai/claude-3-5-haiku@20241022': 100000,\n    'vertex_ai/claude-haiku-4-5@20251001': 200000,\n    'vertex_ai/claude-3-sonnet@20240229': 100000,\n    'vertex_ai/claude-3-opus@20240229': 100000,\n    'vertex_ai/claude-opus-4@20250514': 200000,\n    'vertex_ai/claude-opus-4-1@20250805': 200000,\n    'vertex_ai/claude-opus-4-5@20251101': 200000,\n    'vertex_ai/claude-opus-4-6@20260120': 200000,\n    'vertex_ai/claude-opus-4-6': 200000,\n    'vertex_ai/claude-3-5-sonnet@20240620': 100000,\n    'vertex_ai/claude-3-5-sonnet-v2@20241022': 100000,\n    'vertex_ai/claude-3-7-sonnet@20250219': 200000,\n    'vertex_ai/claude-sonnet-4@20250514': 200000,\n    'vertex_ai/claude-sonnet-4-5@20250929': 200000,\n    'vertex_ai/claude-sonnet-4-6': 200000,\n    'vertex_ai/gemini-1.5-pro': 1048576,\n    'vertex_ai/gemini-2.5-pro-preview-03-25': 1048576,\n    'vertex_ai/gemini-2.5-pro-preview-05-06': 1048576,\n    'vertex_ai/gemini-2.5-pro-preview-06-05': 1048576,\n    'vertex_ai/gemini-2.5-pro': 1048576,\n    'vertex_ai/gemini-1.5-flash': 1048576,\n    'vertex_ai/gemini-2.0-flash': 1048576,\n    'vertex_ai/gemini-2.5-flash-preview-04-17': 1048576,\n    'vertex_ai/gemini-2.5-flash-preview-05-20': 1048576,\n    'vertex_ai/gemini-2.5-flash': 1048576,\n    'vertex_ai/gemini-3-flash-preview': 1048576,\n    'vertex_ai/gemini-3-pro-preview': 1048576,\n    'vertex_ai/gemini-3.1-pro-preview': 1048576,\n    'vertex_ai/gemma2': 8200,\n    'gemini/gemini-1.5-pro': 1048576,\n    'gemini/gemini-1.5-flash': 1048576,\n    'gemini/gemini-2.0-flash': 1048576,\n    'gemini/gemini-2.5-flash-preview-04-17': 1048576,\n    'gemini/gemini-2.5-flash-preview-05-20': 1048576,\n    'gemini/gemini-2.5-flash': 1048576,\n    'gemini/gemini-2.5-pro-preview-03-25': 1048576,\n    'gemini/gemini-2.5-pro-preview-05-06': 1048576,\n    'gemini/gemini-2.5-pro-preview-06-05': 1048576,\n    'gemini/gemini-2.5-pro': 1048576,\n    'gemini/gemini-3-flash-preview': 1048576,\n    'gemini/gemini-3-pro-preview': 1048576,\n    'gemini/gemini-3.1-pro-preview': 1048576,\n    'codechat-bison': 6144,\n    'codechat-bison-32k': 32000,\n    'anthropic.claude-instant-v1': 100000,\n    'anthropic.claude-v1': 100000,\n    'anthropic.claude-v2': 100000,\n    'anthropic/claude-3-opus-20240229': 100000,\n    'anthropic/claude-opus-4-20250514': 200000,\n    'anthropic/claude-opus-4-1-20250805': 200000,\n    'anthropic/claude-opus-4-5-20251101': 200000,\n    'anthropic/claude-opus-4-6': 200000,\n    'anthropic/claude-opus-4-6-20260120': 200000,\n    'anthropic/claude-3-5-sonnet-20240620': 100000,\n    'anthropic/claude-3-5-sonnet-20241022': 100000,\n    'anthropic/claude-3-7-sonnet-20250219': 200000,\n    'anthropic/claude-sonnet-4-20250514': 200000,\n    'anthropic/claude-sonnet-4-5-20250929': 200000,\n    'anthropic/claude-sonnet-4-6': 200000,\n    'claude-opus-4-1-20250805': 200000,\n    'claude-opus-4-5-20251101': 200000,\n    'claude-opus-4-6': 200000,\n    'claude-opus-4-6-20260120': 200000,\n    'claude-3-7-sonnet-20250219': 200000,\n    'claude-sonnet-4-6': 200000,\n    'anthropic/claude-3-5-haiku-20241022': 100000,\n    'anthropic/claude-haiku-4-5-20251001': 200000,\n    'claude-haiku-4-5-20251001': 200000,\n    'bedrock/anthropic.claude-instant-v1': 100000,\n    'bedrock/anthropic.claude-v2': 100000,\n    'bedrock/anthropic.claude-v2:1': 100000,\n    'bedrock/anthropic.claude-3-sonnet-20240229-v1:0': 100000,\n    'bedrock/anthropic.claude-opus-4-20250514-v1:0': 200000,\n    'bedrock/anthropic.claude-opus-4-1-20250805-v1:0': 200000,\n    'bedrock/anthropic.claude-opus-4-6-20260120-v1:0': 200000,\n    'bedrock/anthropic.claude-opus-4-6-v1:0': 200000,\n    'bedrock/anthropic.claude-3-haiku-20240307-v1:0': 100000,\n    'bedrock/anthropic.claude-3-5-haiku-20241022-v1:0': 100000,\n    'bedrock/anthropic.claude-haiku-4-5-20251001-v1:0': 200000,\n    'bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0': 100000,\n    'bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0': 100000,\n    'bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0': 200000,\n    'bedrock/anthropic.claude-sonnet-4-20250514-v1:0': 200000,\n    'bedrock/anthropic.claude-sonnet-4-5-20250929-v1:0': 200000,\n    'bedrock/anthropic.claude-sonnet-4-6': 200000,\n    \"bedrock/us.anthropic.claude-opus-4-20250514-v1:0\": 200000,\n    \"bedrock/us.anthropic.claude-opus-4-1-20250805-v1:0\": 200000,\n    \"bedrock/us.anthropic.claude-opus-4-6-20260120-v1:0\": 200000,\n    \"bedrock/global.anthropic.claude-opus-4-5-20251101-v1:0\": 200000,\n    \"bedrock/us.anthropic.claude-opus-4-5-20251101-v1:0\": 200000,\n    \"bedrock/global.anthropic.claude-opus-4-6-v1:0\": 200000,\n    \"bedrock/us.anthropic.claude-opus-4-6-v1:0\": 200000,\n    \"bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0\": 100000,\n    \"bedrock/us.anthropic.claude-haiku-4-5-20251001-v1:0\": 200000,\n    \"bedrock/eu.anthropic.claude-haiku-4-5-20251001-v1:0\": 200000,\n    \"bedrock/au.anthropic.claude-haiku-4-5-20251001-v1:0\": 200000,\n    \"bedrock/jp.anthropic.claude-haiku-4-5-20251001-v1:0\": 200000,\n    \"bedrock/apac.anthropic.claude-haiku-4-5-20251001-v1:0\": 200000,\n    \"bedrock/global.anthropic.claude-haiku-4-5-20251001-v1:0\": 200000,\n    \"bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0\": 200000,\n    \"bedrock/us.anthropic.claude-sonnet-4-20250514-v1:0\": 200000,\n    \"bedrock/global.anthropic.claude-sonnet-4-20250514-v1:0\": 200000,\n    \"bedrock/us.anthropic.claude-sonnet-4-5-20250929-v1:0\": 200000,\n    \"bedrock/au.anthropic.claude-sonnet-4-5-20250929-v1:0\": 200000,\n    \"bedrock/us.anthropic.claude-sonnet-4-6\": 200000,\n    \"bedrock/au.anthropic.claude-sonnet-4-6\": 200000,\n    \"bedrock/apac.anthropic.claude-3-5-sonnet-20241022-v2:0\": 100000,\n    \"bedrock/apac.anthropic.claude-3-7-sonnet-20250219-v1:0\": 200000,\n    \"bedrock/apac.anthropic.claude-sonnet-4-20250514-v1:0\": 200000,\n    \"bedrock/eu.anthropic.claude-sonnet-4-5-20250929-v1:0\": 200000,\n    \"bedrock/eu.anthropic.claude-sonnet-4-6\": 200000,\n    \"bedrock/jp.anthropic.claude-sonnet-4-5-20250929-v1:0\": 200000,\n    \"bedrock/jp.anthropic.claude-sonnet-4-6\": 200000,\n    \"bedrock/global.anthropic.claude-sonnet-4-5-20250929-v1:0\": 200000,\n    \"bedrock/global.anthropic.claude-sonnet-4-6\": 200000,\n    'claude-3-5-sonnet': 100000,\n    'bedrock/us.meta.llama4-scout-17b-instruct-v1:0': 128000,\n    'bedrock/us.meta.llama4-maverick-17b-instruct-v1:0': 128000,\n    'groq/openai/gpt-oss-120b': 131072,\n    'groq/openai/gpt-oss-20b': 131072,\n    'groq/qwen/qwen3-32b': 131000,\n    'groq/moonshotai/kimi-k2-instruct': 131072,\n    'groq/deepseek-r1-distill-llama-70b': 128000,\n    'groq/meta-llama/llama-4-maverick-17b-128e-instruct': 131072,\n    'groq/meta-llama/llama-4-scout-17b-16e-instruct': 131072,\n    'groq/llama-3.3-70b-versatile': 128000,\n    'groq/llama-3.1-8b-instant': 128000,\n    'xai/grok-2': 131072,\n    'xai/grok-2-1212': 131072,\n    'xai/grok-2-latest': 131072,\n    'xai/grok-3': 131072,\n    'xai/grok-3-beta': 131072,\n    'xai/grok-3-fast': 131072,\n    'xai/grok-3-fast-beta': 131072,\n    'xai/grok-3-mini': 131072,\n    'xai/grok-3-mini-beta': 131072,\n    'xai/grok-3-mini-fast': 131072,\n    'xai/grok-3-mini-fast-beta': 131072,\n    'ollama/llama3': 4096,\n    'watsonx/meta-llama/llama-3-8b-instruct': 4096,\n    \"watsonx/meta-llama/llama-3-70b-instruct\": 4096,\n    \"watsonx/meta-llama/llama-3-405b-instruct\": 16384,\n    \"watsonx/ibm/granite-13b-chat-v2\": 8191,\n    \"watsonx/ibm/granite-34b-code-instruct\": 8191,\n    \"watsonx/mistralai/mistral-large\": 32768,\n    \"deepinfra/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B\": 128000,\n    \"deepinfra/deepseek-ai/DeepSeek-R1-Distill-Llama-70B\": 128000,\n    \"deepinfra/deepseek-ai/DeepSeek-R1\": 128000,\n    \"mistral/mistral-small-latest\": 8191,\n    \"mistral/mistral-medium-latest\": 8191,\n    \"mistral/mistral-large-2407\": 128000,\n    \"mistral/mistral-large-latest\": 128000,\n    \"mistral/open-mistral-7b\": 8191,\n    \"mistral/open-mixtral-8x7b\": 8191,\n    \"mistral/open-mixtral-8x22b\": 8191,\n    \"mistral/codestral-latest\": 8191,\n    \"mistral/open-mistral-nemo\": 128000,\n    \"mistral/open-mistral-nemo-2407\": 128000,\n    \"mistral/open-codestral-mamba\": 256000,\n    \"mistral/codestral-mamba-latest\": 256000,\n    \"codestral/codestral-latest\": 8191,\n    \"codestral/codestral-2405\": 8191,\n}\n\nUSER_MESSAGE_ONLY_MODELS = [\n    \"deepseek/deepseek-reasoner\",\n    \"o1-mini\",\n    \"o1-mini-2024-09-12\",\n    \"o1-preview\"\n]\n\nNO_SUPPORT_TEMPERATURE_MODELS = [\n    \"deepseek/deepseek-reasoner\",\n    \"o1-mini\",\n    \"o1-mini-2024-09-12\",\n    \"o1\",\n    \"o1-2024-12-17\",\n    \"o3-mini\",\n    \"o3-mini-2025-01-31\",\n    \"o1-preview\",\n    \"o3\",\n    \"o3-2025-04-16\",\n    \"o4-mini\",\n    \"o4-mini-2025-04-16\",\n    \"gpt-5.1-codex\",\n    \"gpt-5.1-codex-mini\",\n    \"gpt-5.2-codex\",\n    \"gpt-5.3-codex\",\n    \"gpt-5-mini\"\n]\n\nSUPPORT_REASONING_EFFORT_MODELS = [\n    \"o3-mini\",\n    \"o3-mini-2025-01-31\",\n    \"o3\",\n    \"o3-2025-04-16\",\n    \"o4-mini\",\n    \"o4-mini-2025-04-16\",\n]\n\nCLAUDE_EXTENDED_THINKING_MODELS = [\n    \"anthropic/claude-3-7-sonnet-20250219\",\n    \"claude-3-7-sonnet-20250219\"\n]\n\n# Models that require streaming mode\nSTREAMING_REQUIRED_MODELS = [\n    \"openai/qwq-plus\"\n]\n"
  },
  {
    "path": "pr_agent/algo/ai_handlers/base_ai_handler.py",
    "content": "from abc import ABC, abstractmethod\n\n\nclass BaseAiHandler(ABC):\n    \"\"\"\n    This class defines the interface for an AI handler to be used by the PR Agents.\n    \"\"\"\n\n    @abstractmethod\n    def __init__(self):\n        pass\n\n    @property\n    @abstractmethod\n    def deployment_id(self):\n        pass\n\n    @abstractmethod\n    async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2, img_path: str = None):\n        \"\"\"\n        This method should be implemented to return a chat completion from the AI model.\n        Args:\n            model (str): the name of the model to use for the chat completion\n            system (str): the system message string to use for the chat completion\n            user (str): the user message string to use for the chat completion\n            temperature (float): the temperature to use for the chat completion\n        \"\"\"\n        pass\n"
  },
  {
    "path": "pr_agent/algo/ai_handlers/langchain_ai_handler.py",
    "content": "_LANGCHAIN_INSTALLED = False\n\ntry:\n    from langchain_core.messages import HumanMessage, SystemMessage\n    from langchain_openai import AzureChatOpenAI, ChatOpenAI\n    _LANGCHAIN_INSTALLED = True\nexcept:  # we don't enforce langchain as a dependency, so if it's not installed, just move on\n    pass\n\nimport functools\n\nimport openai\nfrom tenacity import retry, retry_if_exception_type, retry_if_not_exception_type, stop_after_attempt\nfrom langchain_core.runnables import Runnable\n\nfrom pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.log import get_logger\n\nOPENAI_RETRIES = 5\n\n\nclass LangChainOpenAIHandler(BaseAiHandler):\n    def __init__(self):\n        if not _LANGCHAIN_INSTALLED:\n            error_msg = \"LangChain is not installed. Please install it with `pip install langchain`.\"\n            get_logger().error(error_msg)\n            raise ImportError(error_msg)\n        \n        super().__init__()\n        self.azure = get_settings().get(\"OPENAI.API_TYPE\", \"\").lower() == \"azure\"\n\n    @property\n    def deployment_id(self):\n        \"\"\"\n        Returns the deployment ID for the OpenAI API.\n        \"\"\"\n        return get_settings().get(\"OPENAI.DEPLOYMENT_ID\", None)\n\n    async def _create_chat_async(self, deployment_id=None):\n        try:\n            if self.azure:\n                # Using Azure OpenAI service\n                return AzureChatOpenAI(\n                    openai_api_key=get_settings().openai.key,\n                    openai_api_version=get_settings().openai.api_version,\n                    azure_deployment=deployment_id,\n                    azure_endpoint=get_settings().openai.api_base,\n                )\n            else:\n                # Using standard OpenAI or other LLM services\n                openai_api_base = get_settings().get(\"OPENAI.API_BASE\", None)\n                if openai_api_base is None or len(openai_api_base) == 0:\n                    return ChatOpenAI(openai_api_key=get_settings().openai.key)\n                else:\n                    return ChatOpenAI(\n                        openai_api_key=get_settings().openai.key, \n                        openai_api_base=openai_api_base\n                    )\n        except AttributeError as e:\n            # Handle configuration errors\n            error_msg = f\"OpenAI {e.name} is required\" if getattr(e, \"name\") else str(e)\n            get_logger().error(error_msg)\n            raise ValueError(error_msg) from e\n\n    @retry(\n        retry=retry_if_exception_type(openai.APIError) & retry_if_not_exception_type(openai.RateLimitError),\n        stop=stop_after_attempt(OPENAI_RETRIES),\n    )\n    async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2, img_path: str = None):\n        if img_path:\n            get_logger().warning(f\"Image path is not supported for LangChainOpenAIHandler. Ignoring image path: {img_path}\")\n        try:\n            messages = [SystemMessage(content=system), HumanMessage(content=user)]\n            llm = await self._create_chat_async(deployment_id=self.deployment_id)\n            \n            if not isinstance(llm, Runnable):\n                error_message = (\n                    f\"The Langchain LLM object ({type(llm)}) does not implement the Runnable interface. \"\n                    f\"Please update your Langchain library to the latest version or \"\n                    f\"check your LLM configuration to support async calls. \"\n                    f\"PR-Agent is designed to utilize Langchain's async capabilities.\"\n                )\n                get_logger().error(error_message)\n                raise NotImplementedError(error_message)\n\n            # Handle parameters based on LLM type\n            if isinstance(llm, (ChatOpenAI, AzureChatOpenAI)):\n                # OpenAI models support all parameters\n                resp = await llm.ainvoke(\n                    input=messages,\n                    model=model,\n                    temperature=temperature\n                )\n            else:\n                # Other LLMs (like Gemini) only support input parameter\n                get_logger().info(f\"Using simplified ainvoke for {type(llm)}\")\n                resp = await llm.ainvoke(input=messages)\n\n            finish_reason = \"completed\"\n            return resp.content, finish_reason\n\n        except openai.RateLimitError as e:\n            get_logger().error(f\"Rate limit error during LLM inference: {e}\")\n            raise\n        except openai.APIError as e:\n            get_logger().warning(f\"Error during LLM inference: {e}\")\n            raise\n        except Exception as e:\n            get_logger().warning(f\"Unknown error during LLM inference: {e}\")\n            raise openai.APIError from e\n"
  },
  {
    "path": "pr_agent/algo/ai_handlers/litellm_ai_handler.py",
    "content": "import os\nimport litellm\nimport openai\nimport requests\nfrom litellm import acompletion\nfrom tenacity import retry, retry_if_exception_type, retry_if_not_exception_type, stop_after_attempt\n\nfrom pr_agent.algo import CLAUDE_EXTENDED_THINKING_MODELS, NO_SUPPORT_TEMPERATURE_MODELS, SUPPORT_REASONING_EFFORT_MODELS, USER_MESSAGE_ONLY_MODELS, STREAMING_REQUIRED_MODELS\nfrom pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler\nfrom pr_agent.algo.ai_handlers.litellm_helpers import _handle_streaming_response, MockResponse, _get_azure_ad_token, \\\n    _process_litellm_extra_body\nfrom pr_agent.algo.utils import ReasoningEffort, get_version\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.log import get_logger\nimport json\n\nMODEL_RETRIES = 2\n\n\nclass LiteLLMAIHandler(BaseAiHandler):\n    \"\"\"\n    This class handles interactions with the OpenAI API for chat completions.\n    It initializes the API key and other settings from a configuration file,\n    and provides a method for performing chat completions using the OpenAI ChatCompletion API.\n    \"\"\"\n\n    def __init__(self):\n        \"\"\"\n        Initializes the OpenAI API key and other settings from a configuration file.\n        Raises a ValueError if the OpenAI key is missing.\n        \"\"\"\n        self.azure = False\n        self.api_base = None\n        self.repetition_penalty = None\n\n        if get_settings().get(\"LITELLM.DISABLE_AIOHTTP\", False):\n            litellm.disable_aiohttp_transport = True\n        if get_settings().get(\"OPENAI.KEY\", None):\n            openai.api_key = get_settings().openai.key\n            litellm.openai_key = get_settings().openai.key\n        elif 'OPENAI_API_KEY' not in os.environ:\n            litellm.api_key = \"dummy_key\"\n        if get_settings().get(\"aws.AWS_ACCESS_KEY_ID\"):\n            assert get_settings().aws.AWS_SECRET_ACCESS_KEY and get_settings().aws.AWS_REGION_NAME, \"AWS credentials are incomplete\"\n            os.environ[\"AWS_ACCESS_KEY_ID\"] = get_settings().aws.AWS_ACCESS_KEY_ID\n            os.environ[\"AWS_SECRET_ACCESS_KEY\"] = get_settings().aws.AWS_SECRET_ACCESS_KEY\n            os.environ[\"AWS_REGION_NAME\"] = get_settings().aws.AWS_REGION_NAME\n        if get_settings().get(\"LITELLM.DROP_PARAMS\", None):\n            litellm.drop_params = get_settings().litellm.drop_params\n        if get_settings().get(\"LITELLM.SUCCESS_CALLBACK\", None):\n            litellm.success_callback = get_settings().litellm.success_callback\n        if get_settings().get(\"LITELLM.FAILURE_CALLBACK\", None):\n            litellm.failure_callback = get_settings().litellm.failure_callback\n        if get_settings().get(\"LITELLM.SERVICE_CALLBACK\", None):\n            litellm.service_callback = get_settings().litellm.service_callback\n        if get_settings().get(\"OPENAI.ORG\", None):\n            litellm.organization = get_settings().openai.org\n        if get_settings().get(\"OPENAI.API_TYPE\", None):\n            if get_settings().openai.api_type == \"azure\":\n                self.azure = True\n                litellm.azure_key = get_settings().openai.key\n        if get_settings().get(\"OPENAI.API_VERSION\", None):\n            litellm.api_version = get_settings().openai.api_version\n        if get_settings().get(\"OPENAI.API_BASE\", None):\n            litellm.api_base = get_settings().openai.api_base\n            self.api_base = get_settings().openai.api_base\n        if get_settings().get(\"ANTHROPIC.KEY\", None):\n            litellm.anthropic_key = get_settings().anthropic.key\n        if get_settings().get(\"COHERE.KEY\", None):\n            litellm.cohere_key = get_settings().cohere.key\n        if get_settings().get(\"GROQ.KEY\", None):\n            litellm.api_key = get_settings().groq.key\n        if get_settings().get(\"REPLICATE.KEY\", None):\n            litellm.replicate_key = get_settings().replicate.key\n        if get_settings().get(\"XAI.KEY\", None):\n            litellm.api_key = get_settings().xai.key\n        if get_settings().get(\"HUGGINGFACE.KEY\", None):\n            litellm.huggingface_key = get_settings().huggingface.key\n        if get_settings().get(\"HUGGINGFACE.API_BASE\", None) and 'huggingface' in get_settings().config.model:\n            litellm.api_base = get_settings().huggingface.api_base\n            self.api_base = get_settings().huggingface.api_base\n        if get_settings().get(\"OLLAMA.API_BASE\", None):\n            litellm.api_base = get_settings().ollama.api_base\n            self.api_base = get_settings().ollama.api_base\n        if get_settings().get(\"HUGGINGFACE.REPETITION_PENALTY\", None):\n            self.repetition_penalty = float(get_settings().huggingface.repetition_penalty)\n        if get_settings().get(\"VERTEXAI.VERTEX_PROJECT\", None):\n            litellm.vertex_project = get_settings().vertexai.vertex_project\n            litellm.vertex_location = get_settings().get(\n                \"VERTEXAI.VERTEX_LOCATION\", None\n            )\n        # Google AI Studio\n        # SEE https://docs.litellm.ai/docs/providers/gemini\n        if get_settings().get(\"GOOGLE_AI_STUDIO.GEMINI_API_KEY\", None):\n          os.environ[\"GEMINI_API_KEY\"] = get_settings().google_ai_studio.gemini_api_key\n\n        # Support deepseek models\n        if get_settings().get(\"DEEPSEEK.KEY\", None):\n            os.environ['DEEPSEEK_API_KEY'] = get_settings().get(\"DEEPSEEK.KEY\")\n\n        # Support deepinfra models\n        if get_settings().get(\"DEEPINFRA.KEY\", None):\n            os.environ['DEEPINFRA_API_KEY'] = get_settings().get(\"DEEPINFRA.KEY\")\n\n        # Support mistral models\n        if get_settings().get(\"MISTRAL.KEY\", None):\n            os.environ[\"MISTRAL_API_KEY\"] = get_settings().get(\"MISTRAL.KEY\")\n        \n        # Support codestral models\n        if get_settings().get(\"CODESTRAL.KEY\", None):\n            os.environ[\"CODESTRAL_API_KEY\"] = get_settings().get(\"CODESTRAL.KEY\")\n\n        # Check for Azure AD configuration\n        if get_settings().get(\"AZURE_AD.CLIENT_ID\", None):\n            self.azure = True\n            # Generate access token using Azure AD credentials from settings\n            access_token = _get_azure_ad_token()\n            litellm.api_key = access_token\n            openai.api_key = access_token\n            \n            # Set API base from settings\n            self.api_base = get_settings().azure_ad.api_base\n            litellm.api_base = self.api_base\n            openai.api_base = self.api_base\n\n        # Support for Openrouter models\n        if get_settings().get(\"OPENROUTER.KEY\", None):\n            openrouter_api_key = get_settings().get(\"OPENROUTER.KEY\", None)\n            os.environ[\"OPENROUTER_API_KEY\"] = openrouter_api_key\n            litellm.api_key = openrouter_api_key\n            openai.api_key = openrouter_api_key\n\n            openrouter_api_base = get_settings().get(\"OPENROUTER.API_BASE\", \"https://openrouter.ai/api/v1\")\n            os.environ[\"OPENROUTER_API_BASE\"] = openrouter_api_base\n            self.api_base = openrouter_api_base\n            litellm.api_base = openrouter_api_base\n\n        # Models that only use user message\n        self.user_message_only_models = USER_MESSAGE_ONLY_MODELS\n\n        # Model that doesn't support temperature argument\n        self.no_support_temperature_models = NO_SUPPORT_TEMPERATURE_MODELS\n\n        # Models that support reasoning effort\n        self.support_reasoning_models = SUPPORT_REASONING_EFFORT_MODELS\n\n        # Models that support extended thinking\n        self.claude_extended_thinking_models = CLAUDE_EXTENDED_THINKING_MODELS\n\n        # Models that require streaming\n        self.streaming_required_models = STREAMING_REQUIRED_MODELS\n\n    def prepare_logs(self, response, system, user, resp, finish_reason):\n        response_log = response.dict().copy()\n        response_log['system'] = system\n        response_log['user'] = user\n        response_log['output'] = resp\n        response_log['finish_reason'] = finish_reason\n        if hasattr(self, 'main_pr_language'):\n            response_log['main_pr_language'] = self.main_pr_language\n        else:\n            response_log['main_pr_language'] = 'unknown'\n        return response_log\n\n    def _configure_claude_extended_thinking(self, model: str, kwargs: dict) -> dict:\n        \"\"\"\n        Configure Claude extended thinking parameters if applicable.\n\n        Args:\n            model (str): The AI model being used\n            kwargs (dict): The keyword arguments for the model call\n\n        Returns:\n            dict: Updated kwargs with extended thinking configuration\n        \"\"\"\n        extended_thinking_budget_tokens = get_settings().config.get(\"extended_thinking_budget_tokens\", 2048)\n        extended_thinking_max_output_tokens = get_settings().config.get(\"extended_thinking_max_output_tokens\", 4096)\n\n        # Validate extended thinking parameters\n        if not isinstance(extended_thinking_budget_tokens, int) or extended_thinking_budget_tokens <= 0:\n            raise ValueError(f\"extended_thinking_budget_tokens must be a positive integer, got {extended_thinking_budget_tokens}\")\n        if not isinstance(extended_thinking_max_output_tokens, int) or extended_thinking_max_output_tokens <= 0:\n            raise ValueError(f\"extended_thinking_max_output_tokens must be a positive integer, got {extended_thinking_max_output_tokens}\")\n        if extended_thinking_max_output_tokens < extended_thinking_budget_tokens:\n            raise ValueError(f\"extended_thinking_max_output_tokens ({extended_thinking_max_output_tokens}) must be greater than or equal to extended_thinking_budget_tokens ({extended_thinking_budget_tokens})\")\n\n        kwargs[\"thinking\"] = {\n            \"type\": \"enabled\",\n            \"budget_tokens\": extended_thinking_budget_tokens\n        }\n        if get_settings().config.verbosity_level >= 2:\n            get_logger().info(f\"Adding max output tokens {extended_thinking_max_output_tokens} to model {model}, extended thinking budget tokens: {extended_thinking_budget_tokens}\")\n        kwargs[\"max_tokens\"] = extended_thinking_max_output_tokens\n\n        # temperature may only be set to 1 when thinking is enabled\n        if get_settings().config.verbosity_level >= 2:\n            get_logger().info(\"Temperature may only be set to 1 when thinking is enabled with claude models.\")\n        kwargs[\"temperature\"] = 1\n\n        return kwargs\n\n    def add_litellm_callbacks(self, kwargs) -> dict:\n        captured_extra = []\n\n        def capture_logs(message):\n            # Parsing the log message and context\n            record = message.record\n            log_entry = {}\n            if record.get('extra', None).get('command', None) is not None:\n                log_entry.update({\"command\": record['extra'][\"command\"]})\n            if record.get('extra', {}).get('pr_url', None) is not None:\n                log_entry.update({\"pr_url\": record['extra'][\"pr_url\"]})\n\n            # Append the log entry to the captured_logs list\n            captured_extra.append(log_entry)\n\n        # Adding the custom sink to Loguru\n        handler_id = get_logger().add(capture_logs)\n        get_logger().debug(\"Capturing logs for litellm callbacks\")\n        get_logger().remove(handler_id)\n\n        context = captured_extra[0] if len(captured_extra) > 0 else None\n\n        command = context.get(\"command\", \"unknown\")\n        pr_url = context.get(\"pr_url\", \"unknown\")\n        git_provider = get_settings().config.git_provider\n\n        metadata = dict()\n        callbacks = litellm.success_callback + litellm.failure_callback + litellm.service_callback\n        if \"langfuse\" in callbacks:\n            metadata.update({\n                \"trace_name\": command,\n                \"tags\": [git_provider, command, f'version:{get_version()}'],\n                \"trace_metadata\": {\n                    \"command\": command,\n                    \"pr_url\": pr_url,\n                },\n            })\n        if \"langsmith\" in callbacks:\n            metadata.update({\n                \"run_name\": command,\n                \"tags\": [git_provider, command, f'version:{get_version()}'],\n                \"extra\": {\n                    \"metadata\": {\n                        \"command\": command,\n                        \"pr_url\": pr_url,\n                    }\n                },\n            })\n\n        # Adding the captured logs to the kwargs\n        kwargs[\"metadata\"] = metadata\n\n        return kwargs\n\n    @property\n    def deployment_id(self):\n        \"\"\"\n        Returns the deployment ID for the OpenAI API.\n        \"\"\"\n        return get_settings().get(\"OPENAI.DEPLOYMENT_ID\", None)\n\n    @retry(\n        retry=retry_if_exception_type(openai.APIError) & retry_if_not_exception_type(openai.RateLimitError),\n        stop=stop_after_attempt(MODEL_RETRIES),\n    )\n    async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2, img_path: str = None):\n        try:\n            resp, finish_reason = None, None\n            deployment_id = self.deployment_id\n            if self.azure:\n                model = 'azure/' + model\n            if 'claude' in model and not system:\n                system = \"No system prompt provided\"\n                get_logger().warning(\n                    \"Empty system prompt for claude model. Adding a newline character to prevent OpenAI API error.\")\n            messages = [{\"role\": \"system\", \"content\": system}, {\"role\": \"user\", \"content\": user}]\n\n            if img_path:\n                try:\n                    # check if the image link is alive\n                    r = requests.head(img_path, allow_redirects=True)\n                    if r.status_code == 404:\n                        error_msg = f\"The image link is not [alive](img_path).\\nPlease repost the original image as a comment, and send the question again with 'quote reply' (see [instructions](https://pr-agent-docs.codium.ai/tools/ask/#ask-on-images-using-the-pr-code-as-context)).\"\n                        get_logger().error(error_msg)\n                        return f\"{error_msg}\", \"error\"\n                except Exception as e:\n                    get_logger().error(f\"Error fetching image: {img_path}\", e)\n                    return f\"Error fetching image: {img_path}\", \"error\"\n                messages[1][\"content\"] = [{\"type\": \"text\", \"text\": messages[1][\"content\"]},\n                                          {\"type\": \"image_url\", \"image_url\": {\"url\": img_path}}]\n\n            thinking_kwargs_gpt5 = None\n            if model.startswith('gpt-5'):\n                # Use configured reasoning_effort or default to MEDIUM\n                config_effort = get_settings().config.reasoning_effort\n                try:\n                    ReasoningEffort(config_effort)\n                    effort = config_effort\n                except (ValueError, TypeError):\n                    effort = ReasoningEffort.MEDIUM.value\n                    if config_effort is not None:\n                        get_logger().warning(\n                            f\"Invalid reasoning_effort '{config_effort}' in config. \"\n                            f\"Using default '{effort}'. Valid values: {[e.value for e in ReasoningEffort]}\"\n                        )\n\n                thinking_kwargs_gpt5 = {\n                    \"reasoning_effort\": effort,\n                    \"allowed_openai_params\": [\"reasoning_effort\"],\n                }\n                get_logger().info(f\"Using reasoning_effort='{effort}' for GPT-5 model\")\n                model = 'openai/'+model.replace('_thinking', '')  # remove _thinking suffix\n\n\n            # Currently, some models do not support a separate system and user prompts\n            if model in self.user_message_only_models or get_settings().config.custom_reasoning_model:\n                user = f\"{system}\\n\\n\\n{user}\"\n                system = \"\"\n                get_logger().info(f\"Using model {model}, combining system and user prompts\")\n                messages = [{\"role\": \"user\", \"content\": user}]\n                kwargs = {\n                    \"model\": model,\n                    \"deployment_id\": deployment_id,\n                    \"messages\": messages,\n                    \"timeout\": get_settings().config.ai_timeout,\n                    \"api_base\": self.api_base,\n                }\n            else:\n                kwargs = {\n                    \"model\": model,\n                    \"deployment_id\": deployment_id,\n                    \"messages\": messages,\n                    \"timeout\": get_settings().config.ai_timeout,\n                    \"api_base\": self.api_base,\n                }\n\n            # Add temperature only if model supports it\n            if model not in self.no_support_temperature_models and not get_settings().config.custom_reasoning_model:\n                # get_logger().info(f\"Adding temperature with value {temperature} to model {model}.\")\n                kwargs[\"temperature\"] = temperature\n\n            if thinking_kwargs_gpt5:\n                kwargs.update(thinking_kwargs_gpt5)\n                if 'temperature' in kwargs:\n                    del kwargs['temperature']\n\n            # Add reasoning_effort if model supports it\n            if model in self.support_reasoning_models:\n                config_effort = get_settings().config.reasoning_effort\n                try:\n                    ReasoningEffort(config_effort)\n                    reasoning_effort = config_effort\n                except (ValueError, TypeError):\n                    reasoning_effort = ReasoningEffort.MEDIUM.value\n                    if config_effort is not None:\n                        get_logger().warning(\n                            f\"Invalid reasoning_effort '{config_effort}' in config. \"\n                            f\"Using default '{reasoning_effort}'. Valid values: {[e.value for e in ReasoningEffort]}\"\n                        )\n\n                get_logger().info(f\"Adding reasoning_effort with value {reasoning_effort} to model {model}.\")\n                kwargs[\"reasoning_effort\"] = reasoning_effort\n\n            # https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking\n            if (model in self.claude_extended_thinking_models) and get_settings().config.get(\"enable_claude_extended_thinking\", False):\n                kwargs = self._configure_claude_extended_thinking(model, kwargs)\n\n            if get_settings().litellm.get(\"enable_callbacks\", False):\n                kwargs = self.add_litellm_callbacks(kwargs)\n\n            seed = get_settings().config.get(\"seed\", -1)\n            if temperature > 0 and seed >= 0:\n                raise ValueError(f\"Seed ({seed}) is not supported with temperature ({temperature}) > 0\")\n            elif seed >= 0:\n                get_logger().info(f\"Using fixed seed of {seed}\")\n                kwargs[\"seed\"] = seed\n\n            if self.repetition_penalty:\n                kwargs[\"repetition_penalty\"] = self.repetition_penalty\n\n            #Added support for extra_headers while using litellm to call underlying model, via a api management gateway, would allow for passing custom headers for security and authorization\n            if get_settings().get(\"LITELLM.EXTRA_HEADERS\", None):\n                try:\n                    litellm_extra_headers = json.loads(get_settings().litellm.extra_headers)\n                    if not isinstance(litellm_extra_headers, dict):\n                        raise ValueError(\"LITELLM.EXTRA_HEADERS must be a JSON object\")\n                except json.JSONDecodeError as e:\n                    raise ValueError(f\"LITELLM.EXTRA_HEADERS contains invalid JSON: {str(e)}\")\n                kwargs[\"extra_headers\"] = litellm_extra_headers\n\n            # Support for custom OpenAI body fields (e.g., Flex Processing)\n            kwargs = _process_litellm_extra_body(kwargs)\n\n            # Support for Bedrock custom inference profile via model_id\n            model_id = get_settings().get(\"litellm.model_id\")\n            if model_id and 'bedrock/' in model:\n                kwargs[\"model_id\"] = model_id\n                get_logger().info(f\"Using Bedrock custom inference profile: {model_id}\")\n\n            get_logger().debug(\"Prompts\", artifact={\"system\": system, \"user\": user})\n\n            if get_settings().config.verbosity_level >= 2:\n                get_logger().info(f\"\\nSystem prompt:\\n{system}\")\n                get_logger().info(f\"\\nUser prompt:\\n{user}\")\n\n            # Get completion with automatic streaming detection\n            resp, finish_reason, response_obj = await self._get_completion(**kwargs)\n\n        except openai.RateLimitError as e:\n            get_logger().error(f\"Rate limit error during LLM inference: {e}\")\n            raise\n        except openai.APIError as e:\n            get_logger().warning(f\"Error during LLM inference: {e}\")\n            raise\n        except Exception as e:\n            get_logger().warning(f\"Unknown error during LLM inference: {e}\")\n            raise openai.APIError from e\n\n        get_logger().debug(f\"\\nAI response:\\n{resp}\")\n\n        # log the full response for debugging\n        response_log = self.prepare_logs(response_obj, system, user, resp, finish_reason)\n        get_logger().debug(\"Full_response\", artifact=response_log)\n\n        # for CLI debugging\n        if get_settings().config.verbosity_level >= 2:\n            get_logger().info(f\"\\nAI response:\\n{resp}\")\n\n        return resp, finish_reason\n\n    async def _get_completion(self, **kwargs):\n        \"\"\"\n        Wrapper that automatically handles streaming for required models.\n        \"\"\"\n        model = kwargs[\"model\"]\n        if model in self.streaming_required_models:\n            kwargs[\"stream\"] = True\n            get_logger().info(f\"Using streaming mode for model {model}\")\n            response = await acompletion(**kwargs)\n            resp, finish_reason = await _handle_streaming_response(response)\n            # Create MockResponse for streaming since we don't have the full response object\n            mock_response = MockResponse(resp, finish_reason)\n            return resp, finish_reason, mock_response\n        else:\n            response = await acompletion(**kwargs)\n            if response is None or len(response[\"choices\"]) == 0:\n                raise openai.APIError\n            return (response[\"choices\"][0]['message']['content'],\n                    response[\"choices\"][0][\"finish_reason\"],\n                    response)\n"
  },
  {
    "path": "pr_agent/algo/ai_handlers/litellm_helpers.py",
    "content": "import json\n\nimport openai\n\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.log import get_logger\n\n\nasync def _handle_streaming_response(response):\n    \"\"\"\n    Handle streaming response from acompletion and collect the full response.\n\n    Args:\n        response: The streaming response object from acompletion\n\n    Returns:\n        tuple: (full_response_content, finish_reason)\n    \"\"\"\n    full_response = \"\"\n    finish_reason = None\n\n    try:\n        async for chunk in response:\n            if chunk.choices and len(chunk.choices) > 0:\n                choice = chunk.choices[0]\n                delta = choice.delta\n                content = getattr(delta, 'content', None)\n                if content:\n                    full_response += content\n                if choice.finish_reason:\n                    finish_reason = choice.finish_reason\n    except Exception as e:\n        get_logger().error(f\"Error handling streaming response: {e}\")\n        raise\n\n    if not full_response and finish_reason is None:\n        get_logger().warning(\"Streaming response resulted in empty content with no finish reason\")\n        raise openai.APIError(\"Empty streaming response received without proper completion\")\n    elif not full_response and finish_reason:\n        get_logger().debug(f\"Streaming response resulted in empty content but completed with finish_reason: {finish_reason}\")\n        raise openai.APIError(f\"Streaming response completed with finish_reason '{finish_reason}' but no content received\")\n    return full_response, finish_reason\n\n\nclass MockResponse:\n    \"\"\"Mock response object for streaming models to enable consistent logging.\"\"\"\n\n    def __init__(self, resp, finish_reason):\n        self._data = {\n            \"choices\": [\n                {\n                    \"message\": {\"content\": resp},\n                    \"finish_reason\": finish_reason\n                }\n            ]\n        }\n\n    def dict(self):\n        return self._data\n\n\ndef _get_azure_ad_token():\n    \"\"\"\n    Generates an access token using Azure AD credentials from settings.\n    Returns:\n        str: The access token\n    \"\"\"\n    from azure.identity import ClientSecretCredential\n    try:\n        credential = ClientSecretCredential(\n            tenant_id=get_settings().azure_ad.tenant_id,\n            client_id=get_settings().azure_ad.client_id,\n            client_secret=get_settings().azure_ad.client_secret\n        )\n        # Get token for Azure OpenAI service\n        token = credential.get_token(\"https://cognitiveservices.azure.com/.default\")\n        return token.token\n    except Exception as e:\n        get_logger().error(f\"Failed to get Azure AD token: {e}\")\n        raise\n\n\ndef _process_litellm_extra_body(kwargs: dict) -> dict:\n    \"\"\"\n    Process LITELLM.EXTRA_BODY configuration and update kwargs accordingly.\n\n    Args:\n        kwargs: The current kwargs dictionary to update\n\n    Returns:\n        Updated kwargs dictionary\n\n    Raises:\n        ValueError: If extra_body contains invalid JSON, unsupported keys, or colliding keys\n    \"\"\"\n    allowed_extra_body_keys = {\"processing_mode\", \"service_tier\"}\n    extra_body = getattr(getattr(get_settings(), \"litellm\", None), \"extra_body\", None)\n    if extra_body:\n        try:\n            litellm_extra_body = json.loads(extra_body)\n            if not isinstance(litellm_extra_body, dict):\n                raise ValueError(\"LITELLM.EXTRA_BODY must be a JSON object\")\n            unsupported_keys = set(litellm_extra_body.keys()) - allowed_extra_body_keys\n            if unsupported_keys:\n                raise ValueError(f\"LITELLM.EXTRA_BODY contains unsupported keys: {', '.join(unsupported_keys)}. Allowed keys: {', '.join(allowed_extra_body_keys)}\")\n            colliding_keys = kwargs.keys() & litellm_extra_body.keys()\n            if colliding_keys:\n                raise ValueError(f\"LITELLM.EXTRA_BODY cannot override existing parameters: {', '.join(colliding_keys)}\")\n            kwargs.update(litellm_extra_body)\n        except json.JSONDecodeError as e:\n            raise ValueError(f\"LITELLM.EXTRA_BODY contains invalid JSON: {str(e)}\")\n    return kwargs"
  },
  {
    "path": "pr_agent/algo/ai_handlers/openai_ai_handler.py",
    "content": "from os import environ\nfrom pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler\nimport openai\nfrom openai import AsyncOpenAI\nfrom tenacity import retry, retry_if_exception_type, retry_if_not_exception_type, stop_after_attempt\n\nfrom pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.log import get_logger\n\nOPENAI_RETRIES = 5\n\n\nclass OpenAIHandler(BaseAiHandler):\n    def __init__(self):\n        # Initialize OpenAIHandler specific attributes here\n        try:\n            super().__init__()\n            environ[\"OPENAI_API_KEY\"] = get_settings().openai.key\n            if get_settings().get(\"OPENAI.ORG\", None):\n                openai.organization = get_settings().openai.org\n            if get_settings().get(\"OPENAI.API_TYPE\", None):\n                if get_settings().openai.api_type == \"azure\":\n                    self.azure = True\n                    openai.azure_key = get_settings().openai.key\n            if get_settings().get(\"OPENAI.API_VERSION\", None):\n                openai.api_version = get_settings().openai.api_version\n            if get_settings().get(\"OPENAI.API_BASE\", None):\n                environ[\"OPENAI_BASE_URL\"] = get_settings().openai.api_base\n\n        except AttributeError as e:\n            raise ValueError(\"OpenAI key is required\") from e\n\n    @property\n    def deployment_id(self):\n        \"\"\"\n        Returns the deployment ID for the OpenAI API.\n        \"\"\"\n        return get_settings().get(\"OPENAI.DEPLOYMENT_ID\", None)\n\n    @retry(\n        retry=retry_if_exception_type(openai.APIError) & retry_if_not_exception_type(openai.RateLimitError),\n        stop=stop_after_attempt(OPENAI_RETRIES),\n    )\n    async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2, img_path: str = None):\n        try:\n            if img_path:\n                get_logger().warning(f\"Image path is not supported for OpenAIHandler. Ignoring image path: {img_path}\")\n            get_logger().info(\"System: \", system)\n            get_logger().info(\"User: \", user)\n            messages = [{\"role\": \"system\", \"content\": system}, {\"role\": \"user\", \"content\": user}]\n            client = AsyncOpenAI()\n            chat_completion = await client.chat.completions.create(\n                model=model,\n                messages=messages,\n                temperature=temperature,\n            )\n            resp = chat_completion.choices[0].message.content\n            finish_reason = chat_completion.choices[0].finish_reason\n            usage = chat_completion.usage\n            get_logger().info(\"AI response\", response=resp, messages=messages, finish_reason=finish_reason,\n                              model=model, usage=usage)\n            return resp, finish_reason\n        except openai.RateLimitError as e:\n            get_logger().error(f\"Rate limit error during LLM inference: {e}\")\n            raise\n        except openai.APIError as e:\n            get_logger().warning(f\"Error during LLM inference: {e}\")\n            raise\n        except Exception as e:\n            get_logger().warning(f\"Unknown error during LLM inference: {e}\")\n            raise openai.APIError from e\n"
  },
  {
    "path": "pr_agent/algo/cli_args.py",
    "content": "from base64 import b64decode, encode, b64encode\nimport hashlib\n\nclass CliArgs:\n    @staticmethod\n    def validate_user_args(args: list) -> (bool, str):\n        try:\n            if not args:\n                return True, \"\"\n\n            # decode forbidden args\n            # b64encode('word'.encode()).decode()\n            _encoded_args = 'c2hhcmVkX3NlY3JldA==:dXNlcg==:c3lzdGVt:ZW5hYmxlX2NvbW1lbnRfYXBwcm92YWw=:ZW5hYmxlX21hbnVhbF9hcHByb3ZhbA==:ZW5hYmxlX2F1dG9fYXBwcm92YWw=:YXBwcm92ZV9wcl9vbl9zZWxmX3Jldmlldw==:YmFzZV91cmw=:dXJs:YXBwX25hbWU=:c2VjcmV0X3Byb3ZpZGVy:Z2l0X3Byb3ZpZGVy:c2tpcF9rZXlz:b3BlbmFpLmtleQ==:QU5BTFlUSUNTX0ZPTERFUg==:dXJp:YXBwX2lk:d2ViaG9va19zZWNyZXQ=:YmVhcmVyX3Rva2Vu:UEVSU09OQUxfQUNDRVNTX1RPS0VO:b3ZlcnJpZGVfZGVwbG95bWVudF90eXBl:cHJpdmF0ZV9rZXk=:bG9jYWxfY2FjaGVfcGF0aA==:ZW5hYmxlX2xvY2FsX2NhY2hl:amlyYV9iYXNlX3VybA==:YXBpX2Jhc2U=:YXBpX3R5cGU=:YXBpX3ZlcnNpb24=:c2tpcF9rZXlz'\n\n            forbidden_cli_args = []\n            for e in _encoded_args.split(':'):\n                forbidden_cli_args.append(b64decode(e).decode())\n\n            # lowercase all forbidden args\n            for i, _ in enumerate(forbidden_cli_args):\n                forbidden_cli_args[i] = forbidden_cli_args[i].lower()\n                if '.' not in forbidden_cli_args[i]:\n                    forbidden_cli_args[i] = '.' + forbidden_cli_args[i]\n\n            for arg in args:\n                if arg.startswith('--'):\n                    arg_word = arg.lower()\n                    arg_word = arg_word.replace('__', '.')  # replace double underscore with dot, e.g. --openai__key -> --openai.key\n                    for forbidden_arg_word in forbidden_cli_args:\n                        if forbidden_arg_word in arg_word:\n                            return False, forbidden_arg_word\n            return True, \"\"\n        except Exception as e:\n            return False, str(e)\n\n\n"
  },
  {
    "path": "pr_agent/algo/file_filter.py",
    "content": "import fnmatch\nimport re\n\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.log import get_logger\n\n\ndef filter_ignored(files, platform = 'github'):\n    \"\"\"\n    Filter out files that match the ignore patterns.\n    \"\"\"\n\n    try:\n        # load regex patterns, and translate glob patterns to regex\n        patterns = get_settings().ignore.regex\n        if isinstance(patterns, str):\n            patterns = [patterns]\n        glob_setting = get_settings().ignore.glob\n        if isinstance(glob_setting, str):  # --ignore.glob=[.*utils.py], --ignore.glob=.*utils.py\n            glob_setting = glob_setting.strip('[]').split(\",\")\n        patterns += translate_globs_to_regexes(glob_setting)\n\n        code_generators = get_settings().config.get('ignore_language_framework', [])\n        if isinstance(code_generators, str):\n            get_logger().warning(\"'ignore_language_framework' should be a list. Skipping language framework filtering.\")\n            code_generators = []\n        for cg in code_generators:\n            glob_patterns = get_settings().generated_code.get(cg, [])\n            if isinstance(glob_patterns, str):\n                glob_patterns = [glob_patterns]\n            patterns += translate_globs_to_regexes(glob_patterns)\n\n        # compile all valid patterns\n        compiled_patterns = []\n        for r in patterns:\n            try:\n                compiled_patterns.append(re.compile(r))\n            except re.error:\n                pass\n\n        # keep filenames that _don't_ match the ignore regex\n        if files and isinstance(files, list):\n            for r in compiled_patterns:\n                if platform == 'github':\n                    files = [f for f in files if (f.filename and not r.match(f.filename))]\n                elif platform == 'bitbucket':\n                    # files = [f for f in files if (f.new.path and not r.match(f.new.path))]\n                    files_o = []\n                    for f in files:\n                        if hasattr(f, 'new'):\n                            if f.new and f.new.path and not r.match(f.new.path):\n                                files_o.append(f)\n                                continue\n                        if hasattr(f, 'old'):\n                            if f.old and f.old.path and not r.match(f.old.path):\n                                files_o.append(f)\n                                continue\n                    files = files_o\n                elif platform == 'bitbucket_server':\n                    files = [f for f in files if f.get('path', {}).get('toString') and not r.match(f['path']['toString'])]\n                elif platform == 'gitlab':\n                    # files = [f for f in files if (f['new_path'] and not r.match(f['new_path']))]\n                    files_o = []\n                    for f in files:\n                        if 'new_path' in f and f['new_path'] and not r.match(f['new_path']):\n                            files_o.append(f)\n                            continue\n                        if 'old_path' in f and f['old_path'] and not r.match(f['old_path']):\n                            files_o.append(f)\n                            continue\n                    files = files_o\n                elif platform == 'azure':\n                    files = [f for f in files if not r.match(f)]\n                elif platform == 'gitea':\n                    files = [f for f in files if not r.match(f.get(\"filename\", \"\"))]\n\n\n    except Exception as e:\n        print(f\"Could not filter file list: {e}\")\n\n    return files\n\ndef translate_globs_to_regexes(globs: list):\n    regexes = []\n    for pattern in globs:\n        regexes.append(fnmatch.translate(pattern))\n        if pattern.startswith(\"**/\"): # cover root-level files\n            regexes.append(fnmatch.translate(pattern[3:]))\n    return regexes\n"
  },
  {
    "path": "pr_agent/algo/git_patch_processing.py",
    "content": "from __future__ import annotations\n\nimport re\nimport traceback\n\nfrom pr_agent.algo.types import EDIT_TYPE, FilePatchInfo\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.log import get_logger\n\n\ndef extend_patch(original_file_str, patch_str, patch_extra_lines_before=0,\n                 patch_extra_lines_after=0, filename: str = \"\", new_file_str=\"\") -> str:\n    if not patch_str or (patch_extra_lines_before == 0 and patch_extra_lines_after == 0) or not original_file_str:\n        return patch_str\n\n    original_file_str = decode_if_bytes(original_file_str)\n    new_file_str = decode_if_bytes(new_file_str)\n    if not original_file_str:\n        return patch_str\n\n    if should_skip_patch(filename):\n        return patch_str\n\n    try:\n        extended_patch_str = process_patch_lines(patch_str, original_file_str,\n                                                 patch_extra_lines_before, patch_extra_lines_after, new_file_str)\n    except Exception as e:\n        get_logger().warning(f\"Failed to extend patch: {e}\", artifact={\"traceback\": traceback.format_exc()})\n        return patch_str\n\n    return extended_patch_str\n\n\ndef decode_if_bytes(original_file_str):\n    if isinstance(original_file_str, (bytes, bytearray)):\n        try:\n            return original_file_str.decode('utf-8')\n        except UnicodeDecodeError:\n            encodings_to_try = ['iso-8859-1', 'latin-1', 'ascii', 'utf-16']\n            for encoding in encodings_to_try:\n                try:\n                    return original_file_str.decode(encoding)\n                except UnicodeDecodeError:\n                    continue\n            return \"\"\n    return original_file_str\n\n\ndef should_skip_patch(filename):\n    patch_extension_skip_types = get_settings().config.patch_extension_skip_types\n    if patch_extension_skip_types and filename:\n        return any(filename.endswith(skip_type) for skip_type in patch_extension_skip_types)\n    return False\n\n\ndef process_patch_lines(patch_str, original_file_str, patch_extra_lines_before, patch_extra_lines_after, new_file_str=\"\"):\n    allow_dynamic_context = get_settings().config.allow_dynamic_context\n    patch_extra_lines_before_dynamic = get_settings().config.max_extra_lines_before_dynamic_context\n\n    file_original_lines = original_file_str.splitlines()\n    file_new_lines = new_file_str.splitlines() if new_file_str else []\n    len_original_lines = len(file_original_lines)\n    patch_lines = patch_str.splitlines()\n    extended_patch_lines = []\n\n    is_valid_hunk = True\n    start1, size1, start2, size2 = -1, -1, -1, -1\n    RE_HUNK_HEADER = re.compile(\n        r\"^@@ -(\\d+)(?:,(\\d+))? \\+(\\d+)(?:,(\\d+))? @@[ ]?(.*)\")\n    try:\n        for i,line in enumerate(patch_lines):\n            if line.startswith('@@'):\n                match = RE_HUNK_HEADER.match(line)\n                # identify hunk header\n                if match:\n                    # finish processing previous hunk\n                    if is_valid_hunk and (start1 != -1 and patch_extra_lines_after > 0):\n                        delta_lines_original = [f' {line}' for line in file_original_lines[start1 + size1 - 1:start1 + size1 - 1 + patch_extra_lines_after]]\n                        extended_patch_lines.extend(delta_lines_original)\n\n                    section_header, size1, size2, start1, start2 = extract_hunk_headers(match)\n\n                    is_valid_hunk = check_if_hunk_lines_matches_to_file(i, file_original_lines, patch_lines, start1)\n\n                    if is_valid_hunk and (patch_extra_lines_before > 0 or patch_extra_lines_after > 0):\n                        def _calc_context_limits(patch_lines_before):\n                            extended_start1 = max(1, start1 - patch_lines_before)\n                            extended_size1 = size1 + (start1 - extended_start1) + patch_extra_lines_after\n                            extended_start2 = max(1, start2 - patch_lines_before)\n                            extended_size2 = size2 + (start2 - extended_start2) + patch_extra_lines_after\n                            if extended_start1 - 1 + extended_size1 > len_original_lines:\n                                # we cannot extend beyond the original file\n                                delta_cap = extended_start1 - 1 + extended_size1 - len_original_lines\n                                extended_size1 = max(extended_size1 - delta_cap, size1)\n                                extended_size2 = max(extended_size2 - delta_cap, size2)\n                            return extended_start1, extended_size1, extended_start2, extended_size2\n\n                        if allow_dynamic_context and file_new_lines:\n                            extended_start1, extended_size1, extended_start2, extended_size2 = \\\n                                _calc_context_limits(patch_extra_lines_before_dynamic)\n\n                            lines_before_original = file_original_lines[extended_start1 - 1:start1 - 1]\n                            lines_before_new = file_new_lines[extended_start2 - 1:start2 - 1]\n                            found_header = False\n                            for i, line in enumerate(lines_before_original):\n                                if section_header in line:\n                                    # Update start and size in one line each\n                                    extended_start1, extended_start2 = extended_start1 + i, extended_start2 + i\n                                    extended_size1, extended_size2 = extended_size1 - i, extended_size2 - i\n                                    lines_before_original_dynamic_context = lines_before_original[i:]\n                                    lines_before_new_dynamic_context = lines_before_new[i:]\n                                    if lines_before_original_dynamic_context == lines_before_new_dynamic_context:\n                                        # get_logger().debug(f\"found dynamic context match for section header: {section_header}\")\n                                        found_header = True\n                                        section_header = ''\n                                    else:\n                                        pass  # its ok to be here. We cant apply dynamic context if the lines are different if 'old' and 'new' hunks\n                                    break\n\n                            if not found_header:\n                                # get_logger().debug(f\"Section header not found in the extra lines before the hunk\")\n                                extended_start1, extended_size1, extended_start2, extended_size2 = \\\n                                    _calc_context_limits(patch_extra_lines_before)\n                        else:\n                            extended_start1, extended_size1, extended_start2, extended_size2 = \\\n                                _calc_context_limits(patch_extra_lines_before)\n\n                        # check if extra lines before hunk are different in original and new file\n                        delta_lines_original = [f' {line}' for line in file_original_lines[extended_start1 - 1:start1 - 1]]\n                        if file_new_lines:\n                            delta_lines_new = [f' {line}' for line in file_new_lines[extended_start2 - 1:start2 - 1]]\n                            if delta_lines_original != delta_lines_new:\n                                found_mini_match = False\n                                for i in range(len(delta_lines_original)):\n                                    if delta_lines_original[i:] == delta_lines_new[i:]:\n                                        delta_lines_original = delta_lines_original[i:]\n                                        delta_lines_new = delta_lines_new[i:]\n                                        extended_start1 += i\n                                        extended_size1 -= i\n                                        extended_start2 += i\n                                        extended_size2 -= i\n                                        found_mini_match = True\n                                        break\n                                if not found_mini_match:\n                                    extended_start1 = start1\n                                    extended_size1 = size1\n                                    extended_start2 = start2\n                                    extended_size2 = size2\n                                    delta_lines_original = []\n                                    # get_logger().debug(f\"Extra lines before hunk are different in original and new file\",\n                                    #                    artifact={\"delta_lines_original\": delta_lines_original,\n                                    #                              \"delta_lines_new\": delta_lines_new})\n\n                        #  logic to remove section header if its in the extra delta lines (in dynamic context, this is also done)\n                        if section_header and not allow_dynamic_context:\n                            for line in delta_lines_original:\n                                if section_header in line:\n                                    section_header = ''  # remove section header if it is in the extra delta lines\n                                    break\n                    else:\n                        extended_start1 = start1\n                        extended_size1 = size1\n                        extended_start2 = start2\n                        extended_size2 = size2\n                        delta_lines_original = []\n                    extended_patch_lines.append('')\n                    extended_patch_lines.append(\n                        f'@@ -{extended_start1},{extended_size1} '\n                        f'+{extended_start2},{extended_size2} @@ {section_header}')\n                    extended_patch_lines.extend(delta_lines_original)  # one to zero based\n                    continue\n            extended_patch_lines.append(line)\n    except Exception as e:\n        get_logger().warning(f\"Failed to extend patch: {e}\", artifact={\"traceback\": traceback.format_exc()})\n        return patch_str\n\n    # finish processing last hunk\n    if start1 != -1 and patch_extra_lines_after > 0 and is_valid_hunk:\n        delta_lines_original = file_original_lines[start1 + size1 - 1:start1 + size1 - 1 + patch_extra_lines_after]\n        # add space at the beginning of each extra line\n        delta_lines_original = [f' {line}' for line in delta_lines_original]\n        extended_patch_lines.extend(delta_lines_original)\n\n    extended_patch_str = '\\n'.join(extended_patch_lines)\n    return extended_patch_str\n\ndef check_if_hunk_lines_matches_to_file(i, original_lines, patch_lines, start1):\n    \"\"\"\n    Check if the hunk lines match the original file content. We saw cases where the hunk header line doesn't match the original file content, and then\n    extending the hunk with extra lines before the hunk header can cause the hunk to be invalid.\n    \"\"\"\n    is_valid_hunk = True\n    try:\n        if i + 1 < len(patch_lines) and patch_lines[i + 1][0] == ' ': # an existing line in the file\n            if patch_lines[i + 1].strip() != original_lines[start1 - 1].strip():\n                # check if different encoding is needed\n                original_line = original_lines[start1 - 1].strip()\n                for encoding in ['iso-8859-1', 'latin-1', 'ascii', 'utf-16']:\n                    try:\n                        if original_line.encode(encoding).decode().strip() == patch_lines[i + 1].strip():\n                            get_logger().info(f\"Detected different encoding in hunk header line {start1}, needed encoding: {encoding}\")\n                            return False # we still want to avoid extending the hunk. But we don't want to log an error\n                    except:\n                        pass\n\n                is_valid_hunk = False\n                get_logger().info(\n                    f\"Invalid hunk in PR, line {start1} in hunk header doesn't match the original file content\")\n    except:\n        pass\n    return is_valid_hunk\n\n\ndef extract_hunk_headers(match):\n    res = list(match.groups())\n    for i in range(len(res)):\n        if res[i] is None:\n            res[i] = 0\n    try:\n        start1, size1, start2, size2 = map(int, res[:4])\n    except:  # '@@ -0,0 +1 @@' case\n        start1, size1, size2 = map(int, res[:3])\n        start2 = 0\n    section_header = res[4]\n    return section_header, size1, size2, start1, start2\n\n\ndef omit_deletion_hunks(patch_lines) -> str:\n    \"\"\"\n    Omit deletion hunks from the patch and return the modified patch.\n    Args:\n    - patch_lines: a list of strings representing the lines of the patch\n    Returns:\n    - A string representing the modified patch with deletion hunks omitted\n    \"\"\"\n\n    temp_hunk = []\n    added_patched = []\n    add_hunk = False\n    inside_hunk = False\n    RE_HUNK_HEADER = re.compile(\n        r\"^@@ -(\\d+)(?:,(\\d+))? \\+(\\d+)(?:,(\\d+))?\\ @@[ ]?(.*)\")\n\n    for line in patch_lines:\n        if line.startswith('@@'):\n            match = RE_HUNK_HEADER.match(line)\n            if match:\n                # finish previous hunk\n                if inside_hunk and add_hunk:\n                    added_patched.extend(temp_hunk)\n                    temp_hunk = []\n                    add_hunk = False\n                temp_hunk.append(line)\n                inside_hunk = True\n        else:\n            temp_hunk.append(line)\n            if line:\n                edit_type = line[0]\n                if edit_type == '+':\n                    add_hunk = True\n    if inside_hunk and add_hunk:\n        added_patched.extend(temp_hunk)\n\n    return '\\n'.join(added_patched)\n\n\ndef handle_patch_deletions(patch: str, original_file_content_str: str,\n                           new_file_content_str: str, file_name: str, edit_type: EDIT_TYPE = EDIT_TYPE.UNKNOWN) -> str:\n    \"\"\"\n    Handle entire file or deletion patches.\n\n    This function takes a patch, original file content, new file content, and file name as input.\n    It handles entire file or deletion patches and returns the modified patch with deletion hunks omitted.\n\n    Args:\n        patch (str): The patch to be handled.\n        original_file_content_str (str): The original content of the file.\n        new_file_content_str (str): The new content of the file.\n        file_name (str): The name of the file.\n\n    Returns:\n        str: The modified patch with deletion hunks omitted.\n\n    \"\"\"\n    if not new_file_content_str and (edit_type == EDIT_TYPE.DELETED or edit_type == EDIT_TYPE.UNKNOWN):\n        # logic for handling deleted files - don't show patch, just show that the file was deleted\n        if get_settings().config.verbosity_level > 0:\n            get_logger().info(f\"Processing file: {file_name}, minimizing deletion file\")\n        patch = None # file was deleted\n    else:\n        patch_lines = patch.splitlines()\n        patch_new = omit_deletion_hunks(patch_lines)\n        if patch != patch_new:\n            if get_settings().config.verbosity_level > 0:\n                get_logger().info(f\"Processing file: {file_name}, hunks were deleted\")\n            patch = patch_new\n    return patch\n\n\ndef decouple_and_convert_to_hunks_with_lines_numbers(patch: str, file) -> str:\n    \"\"\"\n    Convert a given patch string into a string with line numbers for each hunk, indicating the new and old content of\n    the file.\n\n    Args:\n        patch (str): The patch string to be converted.\n        file: An object containing the filename of the file being patched.\n\n    Returns:\n        str: A string with line numbers for each hunk, indicating the new and old content of the file.\n\n    example output:\n## src/file.ts\n__new hunk__\n881        line1\n882        line2\n883        line3\n887 +      line4\n888 +      line5\n889        line6\n890        line7\n...\n__old hunk__\n        line1\n        line2\n-       line3\n-       line4\n        line5\n        line6\n           ...\n    \"\"\"\n\n    # Add a header for the file\n    if file:\n        # if the file was deleted, return a message indicating that the file was deleted\n        if hasattr(file, 'edit_type') and file.edit_type == EDIT_TYPE.DELETED:\n            return f\"\\n\\n## File '{file.filename.strip()}' was deleted\\n\"\n\n        patch_with_lines_str = f\"\\n\\n## File: '{file.filename.strip()}'\\n\"\n    else:\n        patch_with_lines_str = \"\"\n\n    patch_lines = patch.splitlines()\n    RE_HUNK_HEADER = re.compile(\n        r\"^@@ -(\\d+)(?:,(\\d+))? \\+(\\d+)(?:,(\\d+))? @@[ ]?(.*)\")\n    new_content_lines = []\n    old_content_lines = []\n    match = None\n    start1, size1, start2, size2 = -1, -1, -1, -1\n    prev_header_line = []\n    header_line = []\n    for line_i, line in enumerate(patch_lines):\n        if 'no newline at end of file' in line.lower():\n            continue\n\n        if line.startswith('@@'):\n            header_line = line\n            match = RE_HUNK_HEADER.match(line)\n            if match and (new_content_lines or old_content_lines):  # found a new hunk, split the previous lines\n                if prev_header_line:\n                    patch_with_lines_str += f'\\n{prev_header_line}\\n'\n                is_plus_lines = is_minus_lines = False\n                if new_content_lines:\n                    is_plus_lines = any([line.startswith('+') for line in new_content_lines])\n                if old_content_lines:\n                    is_minus_lines = any([line.startswith('-') for line in old_content_lines])\n                if is_plus_lines or is_minus_lines: # notice 'True' here - we always present __new hunk__ for section, otherwise LLM gets confused\n                    patch_with_lines_str = patch_with_lines_str.rstrip() + '\\n__new hunk__\\n'\n                    for i, line_new in enumerate(new_content_lines):\n                        patch_with_lines_str += f\"{start2 + i} {line_new}\\n\"\n                if is_minus_lines:\n                    patch_with_lines_str = patch_with_lines_str.rstrip() + '\\n__old hunk__\\n'\n                    for line_old in old_content_lines:\n                        patch_with_lines_str += f\"{line_old}\\n\"\n                new_content_lines = []\n                old_content_lines = []\n            if match:\n                prev_header_line = header_line\n\n            section_header, size1, size2, start1, start2 = extract_hunk_headers(match)\n\n        elif line.startswith('+'):\n            new_content_lines.append(line)\n        elif line.startswith('-'):\n            old_content_lines.append(line)\n        else:\n            if not line and line_i: # if this line is empty and the next line is a hunk header, skip it\n                if line_i + 1 < len(patch_lines) and patch_lines[line_i + 1].startswith('@@'):\n                    continue\n                elif line_i + 1 == len(patch_lines):\n                    continue\n            new_content_lines.append(line)\n            old_content_lines.append(line)\n\n    # finishing last hunk\n    if match and new_content_lines:\n        patch_with_lines_str += f'\\n{header_line}\\n'\n        is_plus_lines = is_minus_lines = False\n        if new_content_lines:\n            is_plus_lines = any([line.startswith('+') for line in new_content_lines])\n        if old_content_lines:\n            is_minus_lines = any([line.startswith('-') for line in old_content_lines])\n        if is_plus_lines or is_minus_lines:  # notice 'True' here - we always present __new hunk__ for section, otherwise LLM gets confused\n            patch_with_lines_str = patch_with_lines_str.rstrip() + '\\n__new hunk__\\n'\n            for i, line_new in enumerate(new_content_lines):\n                patch_with_lines_str += f\"{start2 + i} {line_new}\\n\"\n        if is_minus_lines:\n            patch_with_lines_str = patch_with_lines_str.rstrip() + '\\n__old hunk__\\n'\n            for line_old in old_content_lines:\n                patch_with_lines_str += f\"{line_old}\\n\"\n\n    return patch_with_lines_str.rstrip()\n\n\ndef extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, side, remove_trailing_chars: bool = True) -> tuple[str, str]:\n    try:\n        patch_with_lines_str = f\"\\n\\n## File: '{file_name.strip()}'\\n\\n\"\n        selected_lines = \"\"\n        patch_lines = patch.splitlines()\n        RE_HUNK_HEADER = re.compile(\n            r\"^@@ -(\\d+)(?:,(\\d+))? \\+(\\d+)(?:,(\\d+))? @@[ ]?(.*)\")\n        match = None\n        start1, size1, start2, size2 = -1, -1, -1, -1\n        skip_hunk = False\n        selected_lines_num = 0\n        for line in patch_lines:\n            if 'no newline at end of file' in line.lower():\n                continue\n\n            if line.startswith('@@'):\n                skip_hunk = False\n                selected_lines_num = 0\n                header_line = line\n\n                match = RE_HUNK_HEADER.match(line)\n\n                section_header, size1, size2, start1, start2 = extract_hunk_headers(match)\n\n                # check if line range is in this hunk\n                if side.lower() == 'left':\n                    # check if line range is in this hunk\n                    if not (start1 <= line_start <= start1 + size1):\n                        skip_hunk = True\n                        continue\n                elif side.lower() == 'right':\n                    if not (start2 <= line_start <= start2 + size2):\n                        skip_hunk = True\n                        continue\n                patch_with_lines_str += f'\\n{header_line}\\n'\n\n            elif not skip_hunk:\n                if side.lower() == 'right' and line_start <= start2 + selected_lines_num <= line_end:\n                    selected_lines += line + '\\n'\n                if side.lower() == 'left' and start1 <= selected_lines_num + start1 <= line_end:\n                    selected_lines += line + '\\n'\n                patch_with_lines_str += line + '\\n'\n                if not line.startswith('-'): # currently we don't support /ask line for deleted lines\n                    selected_lines_num += 1\n    except Exception as e:\n        get_logger().error(f\"Failed to extract hunk lines from patch: {e}\", artifact={\"traceback\": traceback.format_exc()})\n        return \"\", \"\"\n\n    if remove_trailing_chars:\n        patch_with_lines_str = patch_with_lines_str.rstrip()\n        selected_lines = selected_lines.rstrip()\n\n    return patch_with_lines_str, selected_lines\n"
  },
  {
    "path": "pr_agent/algo/language_handler.py",
    "content": "# Language Selection, source: https://github.com/bigcode-project/bigcode-dataset/blob/main/language_selection/programming-languages-to-file-extensions.json  # noqa E501\nfrom typing import Dict\n\nfrom pr_agent.config_loader import get_settings\n\n\ndef filter_bad_extensions(files):\n    # Bad Extensions, source: https://github.com/EleutherAI/github-downloader/blob/345e7c4cbb9e0dc8a0615fd995a08bf9d73b3fe6/download_repo_text.py  # noqa: E501\n    bad_extensions = get_settings().bad_extensions.default\n    if get_settings().config.use_extra_bad_extensions:\n        bad_extensions += get_settings().bad_extensions.extra\n    return [f for f in files if f.filename is not None and is_valid_file(f.filename, bad_extensions)]\n\n\ndef is_valid_file(filename:str, bad_extensions=None) -> bool:\n    if not filename:\n        return False\n    if not bad_extensions:\n        bad_extensions = get_settings().bad_extensions.default\n        if get_settings().config.use_extra_bad_extensions:\n            bad_extensions += get_settings().bad_extensions.extra\n\n    auto_generated_files = ['package-lock.json', 'yarn.lock', 'composer.lock', 'Gemfile.lock', 'poetry.lock']\n    for forbidden_file in auto_generated_files:\n        if filename.endswith(forbidden_file):\n            return False\n\n    return filename.split('.')[-1] not in bad_extensions\n\n\ndef sort_files_by_main_languages(languages: Dict, files: list):\n    \"\"\"\n    Sort files by their main language, put the files that are in the main language first and the rest files after\n    \"\"\"\n    # sort languages by their size\n    languages_sorted_list = [k for k, v in sorted(languages.items(), key=lambda item: item[1], reverse=True)]\n    # languages_sorted = sorted(languages, key=lambda x: x[1], reverse=True)\n    # get all extensions for the languages\n    main_extensions = []\n    language_extension_map_org = get_settings().language_extension_map_org\n    language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()}\n    for language in languages_sorted_list:\n        if language.lower() in language_extension_map:\n            main_extensions.append(language_extension_map[language.lower()])\n        else:\n            main_extensions.append([])\n\n    # filter out files bad extensions\n    files_filtered = filter_bad_extensions(files)\n\n    # sort files by their extension, put the files that are in the main extension first\n    # and the rest files after, map languages_sorted to their respective files\n    files_sorted = []\n    rest_files = {}\n\n    # if no languages detected, put all files in the \"Other\" category\n    if not languages:\n        files_sorted = [({\"language\": \"Other\", \"files\": list(files_filtered)})]\n        return files_sorted\n\n    main_extensions_flat = []\n    for ext in main_extensions:\n        main_extensions_flat.extend(ext)\n\n    for extensions, lang in zip(main_extensions, languages_sorted_list):  # noqa: B905\n        tmp = []\n        for file in files_filtered:\n            extension_str = f\".{file.filename.split('.')[-1]}\"\n            if extension_str in extensions:\n                tmp.append(file)\n            else:\n                if (file.filename not in rest_files) and (extension_str not in main_extensions_flat):\n                    rest_files[file.filename] = file\n        if len(tmp) > 0:\n            files_sorted.append({\"language\": lang, \"files\": tmp})\n    files_sorted.append({\"language\": \"Other\", \"files\": list(rest_files.values())})\n    return files_sorted\n"
  },
  {
    "path": "pr_agent/algo/pr_processing.py",
    "content": "from __future__ import annotations\n\nimport traceback\nfrom typing import Callable, List, Tuple\n\nfrom github import RateLimitExceededException\n\nfrom pr_agent.algo.file_filter import filter_ignored\nfrom pr_agent.algo.git_patch_processing import (\n    extend_patch, handle_patch_deletions,\n    decouple_and_convert_to_hunks_with_lines_numbers)\nfrom pr_agent.algo.language_handler import sort_files_by_main_languages\nfrom pr_agent.algo.token_handler import TokenHandler\nfrom pr_agent.algo.types import EDIT_TYPE, FilePatchInfo\nfrom pr_agent.algo.utils import ModelType, clip_tokens, get_max_tokens, get_model\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers.git_provider import GitProvider\nfrom pr_agent.log import get_logger\n\nDELETED_FILES_ = \"Deleted files:\\n\"\n\nMORE_MODIFIED_FILES_ = \"Additional modified files (insufficient token budget to process):\\n\"\n\nADDED_FILES_ = \"Additional added files (insufficient token budget to process):\\n\"\n\nOUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD = 1500\nOUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 1000\nMAX_EXTRA_LINES = 10\n\n\ndef cap_and_log_extra_lines(value, direction) -> int:\n    if value > MAX_EXTRA_LINES:\n        get_logger().warning(f\"patch_extra_lines_{direction} was {value}, capping to {MAX_EXTRA_LINES}\")\n        return MAX_EXTRA_LINES\n    return value\n\n\ndef get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler,\n                model: str,\n                add_line_numbers_to_hunks: bool = False,\n                disable_extra_lines: bool = False,\n                large_pr_handling=False,\n                return_remaining_files=False):\n    if disable_extra_lines:\n        PATCH_EXTRA_LINES_BEFORE = 0\n        PATCH_EXTRA_LINES_AFTER = 0\n    else:\n        PATCH_EXTRA_LINES_BEFORE = get_settings().config.patch_extra_lines_before\n        PATCH_EXTRA_LINES_AFTER = get_settings().config.patch_extra_lines_after\n        PATCH_EXTRA_LINES_BEFORE = cap_and_log_extra_lines(PATCH_EXTRA_LINES_BEFORE, \"before\")\n        PATCH_EXTRA_LINES_AFTER = cap_and_log_extra_lines(PATCH_EXTRA_LINES_AFTER, \"after\")\n\n    try:\n        diff_files = git_provider.get_diff_files()\n    except RateLimitExceededException as e:\n        get_logger().error(f\"Rate limit exceeded for git provider API. original message {e}\")\n        raise\n\n    # get pr languages\n    pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files)\n    if pr_languages:\n        try:\n            get_logger().info(f\"PR main language: {pr_languages[0]['language']}\")\n        except Exception as e:\n            pass\n\n    # generate a standard diff string, with patch extension\n    patches_extended, total_tokens, patches_extended_tokens = pr_generate_extended_diff(\n        pr_languages, token_handler, add_line_numbers_to_hunks,\n        patch_extra_lines_before=PATCH_EXTRA_LINES_BEFORE, patch_extra_lines_after=PATCH_EXTRA_LINES_AFTER)\n\n    # if we are under the limit, return the full diff\n    if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < get_max_tokens(model):\n        get_logger().info(f\"Tokens: {total_tokens}, total tokens under limit: {get_max_tokens(model)}, \"\n                          f\"returning full diff.\")\n        return \"\\n\".join(patches_extended)\n\n    # if we are over the limit, start pruning (If we got here, we will not extend the patches with extra lines)\n    get_logger().info(f\"Tokens: {total_tokens}, total tokens over limit: {get_max_tokens(model)}, \"\n                      f\"pruning diff.\")\n    patches_compressed_list, total_tokens_list, deleted_files_list, remaining_files_list, file_dict, files_in_patches_list = \\\n        pr_generate_compressed_diff(pr_languages, token_handler, model, add_line_numbers_to_hunks, large_pr_handling)\n\n    if large_pr_handling and len(patches_compressed_list) > 1:\n        get_logger().info(f\"Large PR handling mode, and found {len(patches_compressed_list)} patches with original diff.\")\n        return \"\" # return empty string, as we want to generate multiple patches with a different prompt\n\n    # return the first patch\n    patches_compressed = patches_compressed_list[0]\n    total_tokens_new = total_tokens_list[0]\n    files_in_patch = files_in_patches_list[0]\n\n    # Insert additional information about added, modified, and deleted files if there is enough space\n    max_tokens = get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD\n    curr_token = total_tokens_new  # == token_handler.count_tokens(final_diff)+token_handler.prompt_tokens\n    final_diff = \"\\n\".join(patches_compressed)\n    delta_tokens = 10\n    added_list_str = modified_list_str = deleted_list_str = \"\"\n    unprocessed_files = []\n    # generate the added, modified, and deleted files lists\n    if (max_tokens - curr_token) > delta_tokens:\n        for filename, file_values in file_dict.items():\n            if filename in files_in_patch:\n                continue\n            if file_values['edit_type'] == EDIT_TYPE.ADDED:\n                unprocessed_files.append(filename)\n                if not added_list_str:\n                    added_list_str = ADDED_FILES_ + f\"\\n{filename}\"\n                else:\n                    added_list_str = added_list_str + f\"\\n{filename}\"\n            elif file_values['edit_type'] in [EDIT_TYPE.MODIFIED, EDIT_TYPE.RENAMED]:\n                unprocessed_files.append(filename)\n                if not modified_list_str:\n                    modified_list_str = MORE_MODIFIED_FILES_ + f\"\\n{filename}\"\n                else:\n                    modified_list_str = modified_list_str + f\"\\n{filename}\"\n            elif file_values['edit_type'] == EDIT_TYPE.DELETED:\n                # unprocessed_files.append(filename) # not needed here, because the file was deleted, so no need to process it\n                if not deleted_list_str:\n                    deleted_list_str = DELETED_FILES_ + f\"\\n{filename}\"\n                else:\n                    deleted_list_str = deleted_list_str + f\"\\n{filename}\"\n\n    # prune the added, modified, and deleted files lists, and add them to the final diff\n    added_list_str = clip_tokens(added_list_str, max_tokens - curr_token)\n    if added_list_str:\n        final_diff = final_diff + \"\\n\\n\" + added_list_str\n        curr_token += token_handler.count_tokens(added_list_str) + 2\n    modified_list_str = clip_tokens(modified_list_str, max_tokens - curr_token)\n    if modified_list_str:\n        final_diff = final_diff + \"\\n\\n\" + modified_list_str\n        curr_token += token_handler.count_tokens(modified_list_str) + 2\n    deleted_list_str = clip_tokens(deleted_list_str, max_tokens - curr_token)\n    if deleted_list_str:\n        final_diff = final_diff + \"\\n\\n\" + deleted_list_str\n\n    get_logger().debug(f\"After pruning, added_list_str: {added_list_str}, modified_list_str: {modified_list_str}, \"\n                       f\"deleted_list_str: {deleted_list_str}\")\n    if not return_remaining_files:\n        return final_diff\n    else:\n        return final_diff, remaining_files_list\n\n\ndef get_pr_diff_multiple_patchs(git_provider: GitProvider, token_handler: TokenHandler, model: str,\n                add_line_numbers_to_hunks: bool = False, disable_extra_lines: bool = False):\n    try:\n        diff_files = git_provider.get_diff_files()\n    except RateLimitExceededException as e:\n        get_logger().error(f\"Rate limit exceeded for git provider API. original message {e}\")\n        raise\n\n    # get pr languages\n    pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files)\n    if pr_languages:\n        try:\n            get_logger().info(f\"PR main language: {pr_languages[0]['language']}\")\n        except Exception as e:\n            pass\n\n    patches_compressed_list, total_tokens_list, deleted_files_list, remaining_files_list, file_dict, files_in_patches_list = \\\n        pr_generate_compressed_diff(pr_languages, token_handler, model, add_line_numbers_to_hunks, large_pr_handling=True)\n\n    return patches_compressed_list, total_tokens_list, deleted_files_list, remaining_files_list, file_dict, files_in_patches_list\n\n\ndef pr_generate_extended_diff(pr_languages: list,\n                              token_handler: TokenHandler,\n                              add_line_numbers_to_hunks: bool,\n                              patch_extra_lines_before: int = 0,\n                              patch_extra_lines_after: int = 0) -> Tuple[list, int, list]:\n    total_tokens = token_handler.prompt_tokens  # initial tokens\n    patches_extended = []\n    patches_extended_tokens = []\n    for lang in pr_languages:\n        for file in lang['files']:\n            original_file_content_str = file.base_file\n            new_file_content_str = file.head_file\n            patch = file.patch\n            if not patch:\n                continue\n\n            # extend each patch with extra lines of context\n            extended_patch = extend_patch(original_file_content_str, patch,\n                                          patch_extra_lines_before, patch_extra_lines_after, file.filename,\n                                          new_file_str=new_file_content_str)\n            if not extended_patch:\n                get_logger().warning(f\"Failed to extend patch for file: {file.filename}\")\n                continue\n\n            if add_line_numbers_to_hunks:\n                full_extended_patch = decouple_and_convert_to_hunks_with_lines_numbers(extended_patch, file)\n            else:\n                extended_patch = extended_patch.replace('\\n@@ ', '\\n\\n@@ ') # add extra line before each hunk\n                full_extended_patch = f\"\\n\\n## File: '{file.filename.strip()}'\\n\\n{extended_patch.strip()}\\n\"\n\n            # add AI-summary metadata to the patch\n            if file.ai_file_summary and get_settings().get(\"config.enable_ai_metadata\", False):\n                full_extended_patch = add_ai_summary_top_patch(file, full_extended_patch)\n\n            patch_tokens = token_handler.count_tokens(full_extended_patch)\n            file.tokens = patch_tokens\n            total_tokens += patch_tokens\n            patches_extended_tokens.append(patch_tokens)\n            patches_extended.append(full_extended_patch)\n\n    return patches_extended, total_tokens, patches_extended_tokens\n\n\ndef pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, model: str,\n                                convert_hunks_to_line_numbers: bool,\n                                large_pr_handling: bool) -> Tuple[list, list, list, list, dict, list]:\n    deleted_files_list = []\n\n    # sort each one of the languages in top_langs by the number of tokens in the diff\n    sorted_files = []\n    for lang in top_langs:\n        sorted_files.extend(sorted(lang['files'], key=lambda x: x.tokens, reverse=True))\n\n    # generate patches for each file, and count tokens\n    file_dict = {}\n    for file in sorted_files:\n        original_file_content_str = file.base_file\n        new_file_content_str = file.head_file\n        patch = file.patch\n        if not patch:\n            continue\n\n        # removing delete-only hunks\n        patch = handle_patch_deletions(patch, original_file_content_str,\n                                       new_file_content_str, file.filename, file.edit_type)\n        if patch is None:\n            if file.filename not in deleted_files_list:\n                deleted_files_list.append(file.filename)\n            continue\n\n        if convert_hunks_to_line_numbers:\n            patch = decouple_and_convert_to_hunks_with_lines_numbers(patch, file)\n\n        ## add AI-summary metadata to the patch (disabled, since we are in the compressed diff)\n        # if file.ai_file_summary and get_settings().config.get('config.is_auto_command', False):\n        #     patch = add_ai_summary_top_patch(file, patch)\n\n        new_patch_tokens = token_handler.count_tokens(patch)\n        file_dict[file.filename] = {'patch': patch, 'tokens': new_patch_tokens, 'edit_type': file.edit_type}\n\n    max_tokens_model = get_max_tokens(model)\n\n    # first iteration\n    files_in_patches_list = []\n    remaining_files_list =  [file.filename for file in sorted_files]\n    patches_list =[]\n    total_tokens_list = []\n    total_tokens, patches, remaining_files_list, files_in_patch_list = generate_full_patch(convert_hunks_to_line_numbers, file_dict,\n                                       max_tokens_model, remaining_files_list, token_handler)\n    patches_list.append(patches)\n    total_tokens_list.append(total_tokens)\n    files_in_patches_list.append(files_in_patch_list)\n\n    # additional iterations (if needed)\n    if large_pr_handling:\n        NUMBER_OF_ALLOWED_ITERATIONS = get_settings().pr_description.max_ai_calls - 1 # one more call is to summarize\n        for i in range(NUMBER_OF_ALLOWED_ITERATIONS-1):\n            if remaining_files_list:\n                total_tokens, patches, remaining_files_list, files_in_patch_list = generate_full_patch(convert_hunks_to_line_numbers,\n                                                                                 file_dict,\n                                                                                  max_tokens_model,\n                                                                                  remaining_files_list, token_handler)\n                if patches:\n                    patches_list.append(patches)\n                    total_tokens_list.append(total_tokens)\n                    files_in_patches_list.append(files_in_patch_list)\n            else:\n                break\n\n    return patches_list, total_tokens_list, deleted_files_list, remaining_files_list, file_dict, files_in_patches_list\n\n\ndef generate_full_patch(convert_hunks_to_line_numbers, file_dict, max_tokens_model,remaining_files_list_prev, token_handler):\n    total_tokens = token_handler.prompt_tokens # initial tokens\n    patches = []\n    remaining_files_list_new = []\n    files_in_patch_list = []\n    for filename, data in file_dict.items():\n        if filename not in remaining_files_list_prev:\n            continue\n\n        patch = data['patch']\n        new_patch_tokens = data['tokens']\n        edit_type = data['edit_type']\n\n        # Hard Stop, no more tokens\n        if total_tokens > max_tokens_model - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD:\n            get_logger().warning(f\"File was fully skipped, no more tokens: {filename}.\")\n            continue\n\n        # If the patch is too large, just show the file name\n        if total_tokens + new_patch_tokens > max_tokens_model - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:\n            # Current logic is to skip the patch if it's too large\n            # TODO: Option for alternative logic to remove hunks from the patch to reduce the number of tokens\n            #  until we meet the requirements\n            if get_settings().config.verbosity_level >= 2:\n                get_logger().warning(f\"Patch too large, skipping it: '{filename}'\")\n            remaining_files_list_new.append(filename)\n            continue\n\n        if patch:\n            if not convert_hunks_to_line_numbers:\n                patch_final = f\"\\n\\n## File: '{filename.strip()}'\\n\\n{patch.strip()}\\n\"\n            else:\n                patch_final = \"\\n\\n\" + patch.strip()\n            patches.append(patch_final)\n            total_tokens += token_handler.count_tokens(patch_final)\n            files_in_patch_list.append(filename)\n            if get_settings().config.verbosity_level >= 2:\n                get_logger().info(f\"Tokens: {total_tokens}, last filename: {filename}\")\n    return total_tokens, patches, remaining_files_list_new, files_in_patch_list\n\n\nasync def retry_with_fallback_models(f: Callable, model_type: ModelType = ModelType.REGULAR):\n    all_models = _get_all_models(model_type)\n    all_deployments = _get_all_deployments(all_models)\n    # try each (model, deployment_id) pair until one is successful, otherwise raise exception\n    for i, (model, deployment_id) in enumerate(zip(all_models, all_deployments)):\n        try:\n            get_logger().debug(\n                f\"Generating prediction with {model}\"\n                f\"{(' from deployment ' + deployment_id) if deployment_id else ''}\"\n            )\n            get_settings().set(\"openai.deployment_id\", deployment_id)\n            return await f(model)\n        except Exception as e:\n            get_logger().warning(\n                f\"Failed to generate prediction with {model}\",\n                artifact={\"error\": e},\n            )\n            if i == len(all_models) - 1:  # If it's the last iteration\n                raise Exception(f\"Failed to generate prediction with any model of {all_models}\") from e\n\n\ndef _get_all_models(model_type: ModelType = ModelType.REGULAR) -> List[str]:\n    if model_type == ModelType.WEAK:\n        model = get_model('model_weak')\n    elif model_type == ModelType.REASONING:\n        model = get_model('model_reasoning')\n    elif model_type == ModelType.REGULAR:\n        model = get_settings().config.model\n    else:\n        model = get_settings().config.model\n    fallback_models = get_settings().config.fallback_models\n    if not isinstance(fallback_models, list):\n        fallback_models = [m.strip() for m in fallback_models.split(\",\")]\n    all_models = [model] + fallback_models\n    return all_models\n\n\ndef _get_all_deployments(all_models: List[str]) -> List[str]:\n    deployment_id = get_settings().get(\"openai.deployment_id\", None)\n    fallback_deployments = get_settings().get(\"openai.fallback_deployments\", [])\n    if not isinstance(fallback_deployments, list) and fallback_deployments:\n        fallback_deployments = [d.strip() for d in fallback_deployments.split(\",\")]\n    if fallback_deployments:\n        all_deployments = [deployment_id] + fallback_deployments\n        if len(all_deployments) < len(all_models):\n            raise ValueError(f\"The number of deployments ({len(all_deployments)}) \"\n                             f\"is less than the number of models ({len(all_models)})\")\n    else:\n        all_deployments = [deployment_id] * len(all_models)\n    return all_deployments\n\n\ndef get_pr_multi_diffs(git_provider: GitProvider,\n                       token_handler: TokenHandler,\n                       model: str,\n                       max_calls: int = 5,\n                       add_line_numbers: bool = True) -> List[str]:\n    \"\"\"\n    Retrieves the diff files from a Git provider, sorts them by main language, and generates patches for each file.\n    The patches are split into multiple groups based on the maximum number of tokens allowed for the given model.\n\n    Args:\n        git_provider (GitProvider): An object that provides access to Git provider APIs.\n        token_handler (TokenHandler): An object that handles tokens in the context of a pull request.\n        model (str): The name of the model.\n        max_calls (int, optional): The maximum number of calls to retrieve diff files. Defaults to 5.\n\n    Returns:\n        List[str]: A list of final diff strings, split into multiple groups based on the maximum number of tokens allowed for the given model.\n\n    Raises:\n        RateLimitExceededException: If the rate limit for the Git provider API is exceeded.\n    \"\"\"\n    try:\n        diff_files = git_provider.get_diff_files()\n    except RateLimitExceededException as e:\n        get_logger().error(f\"Rate limit exceeded for git provider API. original message {e}\")\n        raise\n\n    # Sort files by main language\n    pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files)\n\n    # Get the maximum number of extra lines before and after the patch\n    PATCH_EXTRA_LINES_BEFORE = get_settings().config.patch_extra_lines_before\n    PATCH_EXTRA_LINES_AFTER = get_settings().config.patch_extra_lines_after\n    PATCH_EXTRA_LINES_BEFORE = cap_and_log_extra_lines(PATCH_EXTRA_LINES_BEFORE, \"before\")\n    PATCH_EXTRA_LINES_AFTER = cap_and_log_extra_lines(PATCH_EXTRA_LINES_AFTER, \"after\")\n\n    # try first a single run with standard diff string, with patch extension, and no deletions\n    patches_extended, total_tokens, patches_extended_tokens = pr_generate_extended_diff(\n        pr_languages, token_handler,\n        add_line_numbers_to_hunks=add_line_numbers,\n        patch_extra_lines_before=PATCH_EXTRA_LINES_BEFORE,\n        patch_extra_lines_after=PATCH_EXTRA_LINES_AFTER)\n\n    # if we are under the limit, return the full diff\n    if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < get_max_tokens(model):\n        return [\"\\n\".join(patches_extended)] if patches_extended else []\n\n    # Sort files within each language group by tokens in descending order\n    sorted_files = []\n    for lang in pr_languages:\n        sorted_files.extend(sorted(lang['files'], key=lambda x: x.tokens, reverse=True))\n\n    patches = []\n    final_diff_list = []\n    total_tokens = token_handler.prompt_tokens\n    call_number = 1\n    for file in sorted_files:\n        if call_number > max_calls:\n            if get_settings().config.verbosity_level >= 2:\n                get_logger().info(f\"Reached max calls ({max_calls})\")\n            break\n\n        original_file_content_str = file.base_file\n        new_file_content_str = file.head_file\n        patch = file.patch\n        if not patch:\n            continue\n\n        # Remove delete-only hunks\n        patch = handle_patch_deletions(patch, original_file_content_str, new_file_content_str, file.filename, file.edit_type)\n        if patch is None:\n            continue\n\n        # Add line numbers and metadata to the patch\n        if add_line_numbers:\n            patch = decouple_and_convert_to_hunks_with_lines_numbers(patch, file)\n        else:\n            patch = f\"\\n\\n## File: '{file.filename.strip()}'\\n\\n{patch.strip()}\\n\"\n\n        # add AI-summary metadata to the patch\n        if file.ai_file_summary and get_settings().get(\"config.enable_ai_metadata\", False):\n            patch = add_ai_summary_top_patch(file, patch)\n        new_patch_tokens = token_handler.count_tokens(patch)\n\n        if patch and (token_handler.prompt_tokens + new_patch_tokens) > get_max_tokens(\n                model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:\n            if get_settings().config.get('large_patch_policy', 'skip') == 'skip':\n                get_logger().warning(f\"Patch too large, skipping: {file.filename}\")\n                continue\n            elif get_settings().config.get('large_patch_policy') == 'clip':\n                delta_tokens = get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD - token_handler.prompt_tokens\n                patch_clipped = clip_tokens(patch, delta_tokens, delete_last_line=True, num_input_tokens=new_patch_tokens)\n                new_patch_tokens = token_handler.count_tokens(patch_clipped)\n                if patch_clipped and (token_handler.prompt_tokens + new_patch_tokens) > get_max_tokens(\n                        model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:\n                    get_logger().warning(f\"Patch too large, skipping: {file.filename}\")\n                    continue\n                else:\n                    get_logger().info(f\"Clipped large patch for file: {file.filename}\")\n                    patch = patch_clipped\n            else:\n                get_logger().warning(f\"Patch too large, skipping: {file.filename}\")\n                continue\n\n        if patch and (total_tokens + new_patch_tokens > get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD):\n            final_diff = \"\\n\".join(patches)\n            final_diff_list.append(final_diff)\n            patches = []\n            total_tokens = token_handler.prompt_tokens\n            call_number += 1\n            if call_number > max_calls: # avoid creating new patches\n                if get_settings().config.verbosity_level >= 2:\n                    get_logger().info(f\"Reached max calls ({max_calls})\")\n                break\n            if get_settings().config.verbosity_level >= 2:\n                get_logger().info(f\"Call number: {call_number}\")\n\n        if patch:\n            patches.append(patch)\n            total_tokens += new_patch_tokens\n            if get_settings().config.verbosity_level >= 2:\n                get_logger().info(f\"Tokens: {total_tokens}, last filename: {file.filename}\")\n\n    # Add the last chunk\n    if patches:\n        final_diff = \"\\n\".join(patches)\n        final_diff_list.append(final_diff.strip())\n\n    return final_diff_list\n\n\ndef add_ai_metadata_to_diff_files(git_provider, pr_description_files):\n    \"\"\"\n    Adds AI metadata to the diff files based on the PR description files (FilePatchInfo.ai_file_summary).\n    \"\"\"\n    try:\n        if not pr_description_files:\n            get_logger().warning(f\"PR description files are empty.\")\n            return\n        available_files = {pr_file['full_file_name'].strip(): pr_file for pr_file in pr_description_files}\n        diff_files = git_provider.get_diff_files()\n        found_any_match = False\n        for file in diff_files:\n            filename = file.filename.strip()\n            if filename in available_files:\n                file.ai_file_summary = available_files[filename]\n                found_any_match = True\n        if not found_any_match:\n            get_logger().error(f\"Failed to find any matching files between PR description and diff files.\",\n                               artifact={\"pr_description_files\": pr_description_files})\n    except Exception as e:\n        get_logger().error(f\"Failed to add AI metadata to diff files: {e}\",\n                           artifact={\"traceback\": traceback.format_exc()})\n\n\ndef add_ai_summary_top_patch(file, full_extended_patch):\n    try:\n        # below every instance of '## File: ...' in the patch, add the ai-summary metadata\n        full_extended_patch_lines = full_extended_patch.split(\"\\n\")\n        for i, line in enumerate(full_extended_patch_lines):\n            if line.startswith(\"## File:\") or line.startswith(\"## file:\"):\n                full_extended_patch_lines.insert(i + 1,\n                                                 f\"### AI-generated changes summary:\\n{file.ai_file_summary['long_summary']}\")\n                full_extended_patch = \"\\n\".join(full_extended_patch_lines)\n                return full_extended_patch\n\n        # if no '## File: ...' was found\n        return full_extended_patch\n    except Exception as e:\n        get_logger().error(f\"Failed to add AI summary to the top of the patch: {e}\",\n                           artifact={\"traceback\": traceback.format_exc()})\n        return full_extended_patch\n"
  },
  {
    "path": "pr_agent/algo/token_handler.py",
    "content": "from threading import Lock\nfrom math import ceil\nimport re\n\nfrom jinja2 import Environment, StrictUndefined\nfrom tiktoken import encoding_for_model, get_encoding\n\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.log import get_logger\n\n\nclass ModelTypeValidator:\n    @staticmethod\n    def is_openai_model(model_name: str) -> bool:\n        return 'gpt' in model_name or re.match(r\"^o[1-9](-mini|-preview)?$\", model_name)\n    \n    @staticmethod\n    def is_anthropic_model(model_name: str) -> bool:\n        return 'claude' in model_name\n\n\nclass TokenEncoder:\n    _encoder_instance = None\n    _model = None\n    _lock = Lock()  # Create a lock object\n\n    @classmethod\n    def get_token_encoder(cls):\n        model = get_settings().config.model\n        if cls._encoder_instance is None or model != cls._model:  # Check without acquiring the lock for performance\n            with cls._lock:  # Lock acquisition to ensure thread safety\n                if cls._encoder_instance is None or model != cls._model:\n                    cls._model = model\n                    try:\n                        cls._encoder_instance = encoding_for_model(cls._model) if \"gpt\" in cls._model else get_encoding(\n                            \"o200k_base\")\n                    except:\n                        cls._encoder_instance = get_encoding(\"o200k_base\")\n        return cls._encoder_instance\n\n\nclass TokenHandler:\n    \"\"\"\n    A class for handling tokens in the context of a pull request.\n\n    Attributes:\n    - encoder: An object of the encoding_for_model class from the tiktoken module. Used to encode strings and count the\n      number of tokens in them.\n    - limit: The maximum number of tokens allowed for the given model, as defined in the MAX_TOKENS dictionary in the\n      pr_agent.algo module.\n    - prompt_tokens: The number of tokens in the system and user strings, as calculated by the _get_system_user_tokens\n      method.\n    \"\"\"\n\n    # Constants\n    CLAUDE_MODEL = \"claude-3-7-sonnet-20250219\"\n    CLAUDE_MAX_CONTENT_SIZE = 9_000_000 # Maximum allowed content size (9MB) for Claude API\n\n    def __init__(self, pr=None, vars: dict = {}, system=\"\", user=\"\"):\n        \"\"\"\n        Initializes the TokenHandler object.\n\n        Args:\n        - pr: The pull request object.\n        - vars: A dictionary of variables.\n        - system: The system string.\n        - user: The user string.\n        \"\"\"\n        self.encoder = TokenEncoder.get_token_encoder()\n        \n        if pr is not None:\n            self.prompt_tokens = self._get_system_user_tokens(pr, self.encoder, vars, system, user)\n\n    def _get_system_user_tokens(self, pr, encoder, vars: dict, system, user):\n        \"\"\"\n        Calculates the number of tokens in the system and user strings.\n\n        Args:\n        - pr: The pull request object.\n        - encoder: An object of the encoding_for_model class from the tiktoken module.\n        - vars: A dictionary of variables.\n        - system: The system string.\n        - user: The user string.\n\n        Returns:\n        The sum of the number of tokens in the system and user strings.\n        \"\"\"\n        try:\n            environment = Environment(undefined=StrictUndefined)\n            system_prompt = environment.from_string(system).render(vars)\n            user_prompt = environment.from_string(user).render(vars)\n            system_prompt_tokens = len(encoder.encode(system_prompt))\n            user_prompt_tokens = len(encoder.encode(user_prompt))\n            return system_prompt_tokens + user_prompt_tokens\n        except Exception as e:\n            get_logger().error(f\"Error in _get_system_user_tokens: {e}\")\n            return 0\n\n    def _calc_claude_tokens(self, patch: str) -> int:\n        try:\n            import anthropic\n            from pr_agent.algo import MAX_TOKENS\n            \n            client = anthropic.Anthropic(api_key=get_settings(use_context=False).get('anthropic.key'))\n            max_tokens = MAX_TOKENS[get_settings().config.model]\n\n            if len(patch.encode('utf-8')) > self.CLAUDE_MAX_CONTENT_SIZE:\n                get_logger().warning(\n                    \"Content too large for Anthropic token counting API, falling back to local tokenizer\"\n                )\n                return max_tokens\n\n            response = client.messages.count_tokens(\n                model=self.CLAUDE_MODEL,\n                system=\"system\",\n                messages=[{\n                    \"role\": \"user\",\n                    \"content\": patch\n                }],\n            )\n            return response.input_tokens\n\n        except Exception as e:\n            get_logger().error(f\"Error in Anthropic token counting: {e}\")\n            return max_tokens\n\n    def _apply_estimation_factor(self, model_name: str, default_estimate: int) -> int:\n        factor = 1 + get_settings().get('config.model_token_count_estimate_factor', 0)\n        get_logger().warning(f\"{model_name}'s token count cannot be accurately estimated. Using factor of {factor}\")\n        \n        return ceil(factor * default_estimate)\n\n    def _get_token_count_by_model_type(self, patch: str, default_estimate: int) -> int:\n        \"\"\"\n        Get token count based on model type.\n\n        Args:\n            patch: The text to count tokens for.\n            default_estimate: The default token count estimate.\n\n        Returns:\n            int: The calculated token count.\n        \"\"\"\n        model_name = get_settings().config.model.lower()\n        \n        if ModelTypeValidator.is_openai_model(model_name) and get_settings(use_context=False).get('openai.key'):\n            return default_estimate\n\n        if ModelTypeValidator.is_anthropic_model(model_name) and get_settings(use_context=False).get('anthropic.key'):\n            return self._calc_claude_tokens(patch)\n        \n        return self._apply_estimation_factor(model_name, default_estimate)\n    \n    def count_tokens(self, patch: str, force_accurate: bool = False) -> int:\n        \"\"\"\n        Counts the number of tokens in a given patch string.\n\n        Args:\n        - patch: The patch string.\n        - force_accurate: If True, uses a more precise calculation method.\n\n        Returns:\n        The number of tokens in the patch string.\n        \"\"\"\n        encoder_estimate = len(self.encoder.encode(patch, disallowed_special=()))\n\n        # If an estimate is enough (for example, in cases where the maximal allowed tokens is way below the known limits), return it.\n        if not force_accurate:\n            return encoder_estimate\n\n        return self._get_token_count_by_model_type(patch, encoder_estimate)\n"
  },
  {
    "path": "pr_agent/algo/types.py",
    "content": "from dataclasses import dataclass\nfrom enum import Enum\nfrom typing import Optional\n\n\nclass EDIT_TYPE(Enum):\n    ADDED = 1\n    DELETED = 2\n    MODIFIED = 3\n    RENAMED = 4\n    UNKNOWN = 5\n\n\n@dataclass\nclass FilePatchInfo:\n    base_file: str\n    head_file: str\n    patch: str\n    filename: str\n    tokens: int = -1\n    edit_type: EDIT_TYPE = EDIT_TYPE.UNKNOWN\n    old_filename: str = None\n    num_plus_lines: int = -1\n    num_minus_lines: int = -1\n    language: Optional[str] = None\n    ai_file_summary: str = None\n"
  },
  {
    "path": "pr_agent/algo/utils.py",
    "content": "from __future__ import annotations\n\nimport ast\nimport copy\nimport difflib\nimport hashlib\nimport html\nimport json\nimport os\nimport re\nimport sys\nimport textwrap\nimport time\nimport traceback\nfrom datetime import datetime\nfrom enum import Enum\nfrom importlib.metadata import PackageNotFoundError, version\nfrom typing import Any, List, Tuple, TypedDict\n\nimport html2text\nimport requests\nimport yaml\nfrom pydantic import BaseModel\nfrom starlette_context import context\n\nfrom pr_agent.algo import MAX_TOKENS\nfrom pr_agent.algo.git_patch_processing import extract_hunk_lines_from_patch\nfrom pr_agent.algo.token_handler import TokenEncoder\nfrom pr_agent.algo.types import FilePatchInfo\nfrom pr_agent.config_loader import get_settings, global_settings\nfrom pr_agent.log import get_logger\n\n\ndef get_model(model_type: str = \"model_weak\") -> str:\n    if model_type == \"model_weak\" and get_settings().get(\"config.model_weak\"):\n        return get_settings().config.model_weak\n    elif model_type == \"model_reasoning\" and get_settings().get(\"config.model_reasoning\"):\n        return get_settings().config.model_reasoning\n    return get_settings().config.model\n\n\nclass Range(BaseModel):\n    line_start: int  # should be 0-indexed\n    line_end: int\n    column_start: int = -1\n    column_end: int = -1\n\n\nclass ModelType(str, Enum):\n    REGULAR = \"regular\"\n    WEAK = \"weak\"\n    REASONING = \"reasoning\"\n\n\nclass TodoItem(TypedDict):\n    relevant_file: str\n    line_range: Tuple[int, int]\n    content: str\n\n\nclass PRReviewHeader(str, Enum):\n    REGULAR = \"## PR Reviewer Guide\"\n    INCREMENTAL = \"## Incremental PR Reviewer Guide\"\n\n\nclass ReasoningEffort(str, Enum):\n    XHIGH = \"xhigh\"\n    HIGH = \"high\"\n    MEDIUM = \"medium\"\n    LOW = \"low\"\n    MINIMAL = \"minimal\"\n    NONE = \"none\"\n\n\nclass PRDescriptionHeader(str, Enum):\n    DIAGRAM_WALKTHROUGH = \"Diagram Walkthrough\"\n    FILE_WALKTHROUGH = \"File Walkthrough\"\n\n\ndef get_setting(key: str) -> Any:\n    try:\n        key = key.upper()\n        return context.get(\"settings\", global_settings).get(key, global_settings.get(key, None))\n    except Exception:\n        return global_settings.get(key, None)\n\n\ndef emphasize_header(text: str, only_markdown=False, reference_link=None) -> str:\n    try:\n        # Finding the position of the first occurrence of \": \"\n        colon_position = text.find(\": \")\n\n        # Splitting the string and wrapping the first part in <strong> tags\n        if colon_position != -1:\n            # Everything before the colon (inclusive) is wrapped in <strong> tags\n            if only_markdown:\n                if reference_link:\n                    transformed_string = f\"[**{text[:colon_position + 1]}**]({reference_link})\\n\" + text[colon_position + 1:]\n                else:\n                    transformed_string = f\"**{text[:colon_position + 1]}**\\n\" + text[colon_position + 1:]\n            else:\n                if reference_link:\n                    transformed_string = f\"<strong><a href='{reference_link}'>{text[:colon_position + 1]}</a></strong><br>\" + text[colon_position + 1:]\n                else:\n                    transformed_string = \"<strong>\" + text[:colon_position + 1] + \"</strong>\" +'<br>' + text[colon_position + 1:]\n        else:\n            # If there's no \": \", return the original string\n            transformed_string = text\n\n        return transformed_string\n    except Exception as e:\n        get_logger().exception(f\"Failed to emphasize header: {e}\")\n        return text\n\n\ndef unique_strings(input_list: List[str]) -> List[str]:\n    if not input_list or not isinstance(input_list, list):\n        return input_list\n    seen = set()\n    unique_list = []\n    for item in input_list:\n        if item not in seen:\n            unique_list.append(item)\n            seen.add(item)\n    return unique_list\n\n\ndef convert_to_markdown_v2(output_data: dict,\n                           gfm_supported: bool = True,\n                           incremental_review=None,\n                           git_provider=None,\n                           files=None) -> str:\n    \"\"\"\n    Convert a dictionary of data into markdown format.\n    Args:\n        output_data (dict): A dictionary containing data to be converted to markdown format.\n    Returns:\n        str: The markdown formatted text generated from the input dictionary.\n    \"\"\"\n\n    emojis = {\n        \"Can be split\": \"🔀\",\n        \"Key issues to review\": \"⚡\",\n        \"Recommended focus areas for review\": \"⚡\",\n        \"Score\": \"🏅\",\n        \"Relevant tests\": \"🧪\",\n        \"Focused PR\": \"✨\",\n        \"Relevant ticket\": \"🎫\",\n        \"Security concerns\": \"🔒\",\n        \"Todo sections\": \"📝\",\n        \"Insights from user's answers\": \"📝\",\n        \"Code feedback\": \"🤖\",\n        \"Estimated effort to review [1-5]\": \"⏱️\",\n        \"Contribution time cost estimate\": \"⏳\",\n        \"Ticket compliance check\": \"🎫\",\n    }\n    markdown_text = \"\"\n    if not incremental_review:\n        markdown_text += f\"{PRReviewHeader.REGULAR.value} 🔍\\n\\n\"\n    else:\n        markdown_text += f\"{PRReviewHeader.INCREMENTAL.value} 🔍\\n\\n\"\n        markdown_text += f\"⏮️ Review for commits since previous PR-Agent review {incremental_review}.\\n\\n\"\n    if not output_data or not output_data.get('review', {}):\n        return \"\"\n\n    if get_settings().get(\"pr_reviewer.enable_intro_text\", False):\n        markdown_text += f\"Here are some key observations to aid the review process:\\n\\n\"\n\n    if gfm_supported:\n        markdown_text += \"<table>\\n\"\n\n    todo_summary = output_data['review'].pop('todo_summary', '')\n    for key, value in output_data['review'].items():\n        if value is None or value == '' or value == {} or value == []:\n            if key.lower() not in ['can_be_split', 'key_issues_to_review']:\n                continue\n        key_nice = key.replace('_', ' ').capitalize()\n        emoji = emojis.get(key_nice, \"\")\n        if 'Estimated effort to review' in key_nice:\n            key_nice = 'Estimated effort to review'\n            value = str(value).strip()\n            if value.isnumeric():\n                value_int = int(value)\n            else:\n                try:\n                    value_int = int(value.split(',')[0])\n                except ValueError:\n                    continue\n            blue_bars = '🔵' * value_int\n            white_bars = '⚪' * (5 - value_int)\n            value = f\"{value_int} {blue_bars}{white_bars}\"\n            if gfm_supported:\n                markdown_text += f\"<tr><td>\"\n                markdown_text += f\"{emoji}&nbsp;<strong>{key_nice}</strong>: {value}\"\n                markdown_text += f\"</td></tr>\\n\"\n            else:\n                markdown_text += f\"### {emoji} {key_nice}: {value}\\n\\n\"\n        elif 'relevant tests' in key_nice.lower():\n            value = str(value).strip().lower()\n            if gfm_supported:\n                markdown_text += f\"<tr><td>\"\n                if is_value_no(value):\n                    markdown_text += f\"{emoji}&nbsp;<strong>No relevant tests</strong>\"\n                else:\n                    markdown_text += f\"{emoji}&nbsp;<strong>PR contains tests</strong>\"\n                markdown_text += f\"</td></tr>\\n\"\n            else:\n                if is_value_no(value):\n                    markdown_text += f'### {emoji} No relevant tests\\n\\n'\n                else:\n                    markdown_text += f\"### {emoji} PR contains tests\\n\\n\"\n        elif 'ticket compliance check' in key_nice.lower():\n            markdown_text = ticket_markdown_logic(emoji, markdown_text, value, gfm_supported)\n        elif 'contribution time cost estimate' in key_nice.lower():\n            if gfm_supported:\n                markdown_text += f\"<tr><td>{emoji}&nbsp;<strong>Contribution time estimate</strong> (best, average, worst case): \"\n                markdown_text += f\"{value['best_case'].replace('m', ' minutes')} | {value['average_case'].replace('m', ' minutes')} | {value['worst_case'].replace('m', ' minutes')}\"\n                markdown_text += f\"</td></tr>\\n\"\n            else:\n                markdown_text += f\"### {emoji} Contribution time estimate (best, average, worst case): \"\n                markdown_text += f\"{value['best_case'].replace('m', ' minutes')} | {value['average_case'].replace('m', ' minutes')} | {value['worst_case'].replace('m', ' minutes')}\\n\\n\"\n        elif 'security concerns' in key_nice.lower():\n            if gfm_supported:\n                markdown_text += f\"<tr><td>\"\n                if is_value_no(value):\n                    markdown_text += f\"{emoji}&nbsp;<strong>No security concerns identified</strong>\"\n                else:\n                    markdown_text += f\"{emoji}&nbsp;<strong>Security concerns</strong><br><br>\\n\\n\"\n                    value = emphasize_header(value.strip())\n                    markdown_text += f\"{value}\"\n                markdown_text += f\"</td></tr>\\n\"\n            else:\n                if is_value_no(value):\n                    markdown_text += f'### {emoji} No security concerns identified\\n\\n'\n                else:\n                    markdown_text += f\"### {emoji} Security concerns\\n\\n\"\n                    value = emphasize_header(value.strip(), only_markdown=True)\n                    markdown_text += f\"{value}\\n\\n\"\n        elif 'todo sections' in key_nice.lower():\n            if gfm_supported:\n                markdown_text += \"<tr><td>\"\n                if is_value_no(value):\n                    markdown_text += f\"✅&nbsp;<strong>No TODO sections</strong>\"\n                else:\n                    markdown_todo_items = format_todo_items(value, git_provider, gfm_supported)\n                    markdown_text += f\"{emoji}&nbsp;<strong>TODO sections</strong>\\n<br><br>\\n\"\n                    markdown_text += markdown_todo_items\n                markdown_text += \"</td></tr>\\n\"\n            else:\n                if is_value_no(value):\n                    markdown_text += f\"### ✅ No TODO sections\\n\\n\"\n                else:\n                    markdown_todo_items = format_todo_items(value, git_provider, gfm_supported)\n                    markdown_text += f\"### {emoji} TODO sections\\n\\n\"\n                    markdown_text += markdown_todo_items\n        elif 'can be split' in key_nice.lower():\n            if gfm_supported:\n                markdown_text += f\"<tr><td>\"\n                markdown_text += process_can_be_split(emoji, value)\n                markdown_text += f\"</td></tr>\\n\"\n        elif 'key issues to review' in key_nice.lower():\n            # value is a list of issues\n            if is_value_no(value):\n                if gfm_supported:\n                    markdown_text += f\"<tr><td>\"\n                    markdown_text += f\"{emoji}&nbsp;<strong>No major issues detected</strong>\"\n                    markdown_text += f\"</td></tr>\\n\"\n                else:\n                    markdown_text += f\"### {emoji} No major issues detected\\n\\n\"\n            else:\n                issues = value\n                if gfm_supported:\n                    markdown_text += f\"<tr><td>\"\n                    # markdown_text += f\"{emoji}&nbsp;<strong>{key_nice}</strong><br><br>\\n\\n\"\n                    markdown_text += f\"{emoji}&nbsp;<strong>Recommended focus areas for review</strong><br><br>\\n\\n\"\n                else:\n                    markdown_text += f\"### {emoji} Recommended focus areas for review\\n\\n#### \\n\"\n                for i, issue in enumerate(issues):\n                    try:\n                        if not issue or not isinstance(issue, dict):\n                            continue\n                        relevant_file = issue.get('relevant_file', '').strip()\n                        issue_header = issue.get('issue_header', '').strip()\n                        if issue_header.lower() == 'possible bug':\n                            issue_header = 'Possible Issue'  # Make the header less frightening\n                        issue_content = issue.get('issue_content', '').strip()\n                        start_line = int(str(issue.get('start_line', 0)).strip())\n                        end_line = int(str(issue.get('end_line', 0)).strip())\n\n                        relevant_lines_str = extract_relevant_lines_str(end_line, files, relevant_file, start_line, dedent=True)\n                        if git_provider:\n                            reference_link = git_provider.get_line_link(relevant_file, start_line, end_line)\n                        else:\n                            reference_link = None\n\n                        if gfm_supported:\n                            if reference_link is not None and len(reference_link) > 0:\n                                if relevant_lines_str:\n                                    issue_str = f\"<details><summary><a href='{reference_link}'><strong>{issue_header}</strong></a>\\n\\n{issue_content}\\n</summary>\\n\\n{relevant_lines_str}\\n\\n</details>\"\n                                else:\n                                    issue_str = f\"<a href='{reference_link}'><strong>{issue_header}</strong></a><br>{issue_content}\"\n                            else:\n                                issue_str = f\"<strong>{issue_header}</strong><br>{issue_content}\"\n                        else:\n                            if reference_link is not None and len(reference_link) > 0:\n                                issue_str = f\"[**{issue_header}**]({reference_link})\\n\\n{issue_content}\\n\\n\"\n                            else:\n                                issue_str = f\"**{issue_header}**\\n\\n{issue_content}\\n\\n\"\n                        markdown_text += f\"{issue_str}\\n\\n\"\n                    except Exception as e:\n                        get_logger().exception(f\"Failed to process 'Recommended focus areas for review': {e}\")\n                if gfm_supported:\n                    markdown_text += f\"</td></tr>\\n\"\n        else:\n            if gfm_supported:\n                markdown_text += f\"<tr><td>\"\n                markdown_text += f\"{emoji}&nbsp;<strong>{key_nice}</strong>: {value}\"\n                markdown_text += f\"</td></tr>\\n\"\n            else:\n                markdown_text += f\"### {emoji} {key_nice}: {value}\\n\\n\"\n\n    if gfm_supported:\n        markdown_text += \"</table>\\n\"\n\n    return markdown_text\n\n\ndef extract_relevant_lines_str(end_line, files, relevant_file, start_line, dedent=False) -> str:\n    \"\"\"\n    Finds 'relevant_file' in 'files', and extracts the lines from 'start_line' to 'end_line' string from the file content.\n    \"\"\"\n    try:\n        relevant_lines_str = \"\"\n        if files:\n            files = set_file_languages(files)\n            for file in files:\n                if file.filename.strip() == relevant_file:\n                    if not file.head_file:\n                        # as a fallback, extract relevant lines directly from patch\n                        patch = file.patch\n                        get_logger().info(f\"No content found in file: '{file.filename}' for 'extract_relevant_lines_str'. Using patch instead\")\n                        _, selected_lines = extract_hunk_lines_from_patch(patch, file.filename, start_line, end_line,side='right')\n                        if not selected_lines:\n                            get_logger().error(f\"Failed to extract relevant lines from patch: {file.filename}\")\n                            return \"\"\n                        # filter out '-' lines\n                        relevant_lines_str = \"\"\n                        for line in selected_lines.splitlines():\n                            if line.startswith('-'):\n                                continue\n                            relevant_lines_str += line[1:] + '\\n'\n                    else:\n                        relevant_file_lines = file.head_file.splitlines()\n                        relevant_lines_str = \"\\n\".join(relevant_file_lines[start_line - 1:end_line])\n\n                    if dedent and relevant_lines_str:\n                        # Remove the longest leading string of spaces and tabs common to all lines.\n                        relevant_lines_str = textwrap.dedent(relevant_lines_str)\n                    relevant_lines_str = f\"```{file.language}\\n{relevant_lines_str}\\n```\"\n                    break\n\n        return relevant_lines_str\n    except Exception as e:\n        get_logger().exception(f\"Failed to extract relevant lines: {e}\")\n        return \"\"\n\n\ndef ticket_markdown_logic(emoji, markdown_text, value, gfm_supported) -> str:\n    ticket_compliance_str = \"\"\n    compliance_emoji = ''\n    # Track compliance levels across all tickets\n    all_compliance_levels = []\n\n    if isinstance(value, list):\n        for ticket_analysis in value:\n            try:\n                ticket_url = ticket_analysis.get('ticket_url', '').strip()\n                explanation = ''\n                ticket_compliance_level = ''  # Individual ticket compliance\n                fully_compliant_str = ticket_analysis.get('fully_compliant_requirements', '').strip()\n                not_compliant_str = ticket_analysis.get('not_compliant_requirements', '').strip()\n                requires_further_human_verification = ticket_analysis.get('requires_further_human_verification',\n                                                                          '').strip()\n\n                if not fully_compliant_str and not not_compliant_str:\n                    get_logger().debug(f\"Ticket compliance has no requirements\",\n                                       artifact={'ticket_url': ticket_url})\n                    continue\n\n                # Calculate individual ticket compliance level\n                if fully_compliant_str:\n                    if not_compliant_str:\n                        ticket_compliance_level = 'Partially compliant'\n                    else:\n                        if not requires_further_human_verification:\n                            ticket_compliance_level = 'Fully compliant'\n                        else:\n                            ticket_compliance_level = 'PR Code Verified'\n                elif not_compliant_str:\n                    ticket_compliance_level = 'Not compliant'\n\n                # Store the compliance level for aggregation\n                if ticket_compliance_level:\n                    all_compliance_levels.append(ticket_compliance_level)\n\n                # build compliance string\n                if fully_compliant_str:\n                    explanation += f\"Compliant requirements:\\n\\n{fully_compliant_str}\\n\\n\"\n                if not_compliant_str:\n                    explanation += f\"Non-compliant requirements:\\n\\n{not_compliant_str}\\n\\n\"\n                if requires_further_human_verification:\n                    explanation += f\"Requires further human verification:\\n\\n{requires_further_human_verification}\\n\\n\"\n                ticket_compliance_str += f\"\\n\\n**[{ticket_url.split('/')[-1]}]({ticket_url}) - {ticket_compliance_level}**\\n\\n{explanation}\\n\\n\"\n\n                # for debugging\n                if requires_further_human_verification:\n                    get_logger().debug(f\"Ticket compliance requires further human verification\",\n                                       artifact={'ticket_url': ticket_url,\n                                                 'requires_further_human_verification': requires_further_human_verification,\n                                                 'compliance_level': ticket_compliance_level})\n\n            except Exception as e:\n                get_logger().exception(f\"Failed to process ticket compliance: {e}\")\n                continue\n\n        # Calculate overall compliance level and emoji\n        if all_compliance_levels:\n            if all(level == 'Fully compliant' for level in all_compliance_levels):\n                compliance_level = 'Fully compliant'\n                compliance_emoji = '✅'\n            elif all(level == 'PR Code Verified' for level in all_compliance_levels):\n                compliance_level = 'PR Code Verified'\n                compliance_emoji = '✅'\n            elif any(level == 'Not compliant' for level in all_compliance_levels):\n                # If there's a mix of compliant and non-compliant tickets\n                if any(level in ['Fully compliant', 'PR Code Verified'] for level in all_compliance_levels):\n                    compliance_level = 'Partially compliant'\n                    compliance_emoji = '🔶'\n                else:\n                    compliance_level = 'Not compliant'\n                    compliance_emoji = '❌'\n            elif any(level == 'Partially compliant' for level in all_compliance_levels):\n                compliance_level = 'Partially compliant'\n                compliance_emoji = '🔶'\n            else:\n                compliance_level = 'PR Code Verified'\n                compliance_emoji = '✅'\n\n            # Set extra statistics outside the ticket loop\n            get_settings().set('config.extra_statistics', {'compliance_level': compliance_level})\n\n        # editing table row for ticket compliance analysis\n        if gfm_supported:\n            markdown_text += f\"<tr><td>\\n\\n\"\n            markdown_text += f\"**{emoji} Ticket compliance analysis {compliance_emoji}**\\n\\n\"\n            markdown_text += ticket_compliance_str\n            markdown_text += f\"</td></tr>\\n\"\n        else:\n            markdown_text += f\"### {emoji} Ticket compliance analysis {compliance_emoji}\\n\\n\"\n            markdown_text += ticket_compliance_str + \"\\n\\n\"\n\n    return markdown_text\n\n\ndef process_can_be_split(emoji, value):\n    try:\n        # key_nice = \"Can this PR be split?\"\n        key_nice = \"Multiple PR themes\"\n        markdown_text = \"\"\n        if not value or isinstance(value, list) and len(value) == 1:\n            value = \"No\"\n            # markdown_text += f\"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\\n\\n{value}\\n\\n</td></tr>\\n\"\n            # markdown_text += f\"### {emoji} No multiple PR themes\\n\\n\"\n            markdown_text += f\"{emoji} <strong>No multiple PR themes</strong>\\n\\n\"\n        else:\n            markdown_text += f\"{emoji} <strong>{key_nice}</strong><br><br>\\n\\n\"\n            for i, split in enumerate(value):\n                title = split.get('title', '')\n                relevant_files = split.get('relevant_files', [])\n                markdown_text += f\"<details><summary>\\nSub-PR theme: <b>{title}</b></summary>\\n\\n\"\n                markdown_text += f\"___\\n\\nRelevant files:\\n\\n\"\n                for file in relevant_files:\n                    markdown_text += f\"- {file}\\n\"\n                markdown_text += f\"___\\n\\n\"\n                markdown_text += f\"</details>\\n\\n\"\n\n                # markdown_text += f\"#### Sub-PR theme: {title}\\n\\n\"\n                # markdown_text += f\"Relevant files:\\n\\n\"\n                # for file in relevant_files:\n                #     markdown_text += f\"- {file}\\n\"\n                # markdown_text += \"\\n\"\n            # number_of_splits = len(value)\n            # markdown_text += f\"<tr><td rowspan={number_of_splits}> {emoji}&nbsp;<strong>{key_nice}</strong></td>\\n\"\n            # for i, split in enumerate(value):\n            #     title = split.get('title', '')\n            #     relevant_files = split.get('relevant_files', [])\n            #     if i == 0:\n            #         markdown_text += f\"<td><details><summary>\\nSub-PR theme:<br><strong>{title}</strong></summary>\\n\\n\"\n            #         markdown_text += f\"<hr>\\n\"\n            #         markdown_text += f\"Relevant files:\\n\"\n            #         markdown_text += f\"<ul>\\n\"\n            #         for file in relevant_files:\n            #             markdown_text += f\"<li>{file}</li>\\n\"\n            #         markdown_text += f\"</ul>\\n\\n</details></td></tr>\\n\"\n            #     else:\n            #         markdown_text += f\"<tr>\\n<td><details><summary>\\nSub-PR theme:<br><strong>{title}</strong></summary>\\n\\n\"\n            #         markdown_text += f\"<hr>\\n\"\n            #         markdown_text += f\"Relevant files:\\n\"\n            #         markdown_text += f\"<ul>\\n\"\n            #         for file in relevant_files:\n            #             markdown_text += f\"<li>{file}</li>\\n\"\n            #         markdown_text += f\"</ul>\\n\\n</details></td></tr>\\n\"\n    except Exception as e:\n        get_logger().exception(f\"Failed to process can be split: {e}\")\n        return \"\"\n    return markdown_text\n\n\ndef parse_code_suggestion(code_suggestion: dict, i: int = 0, gfm_supported: bool = True) -> str:\n    \"\"\"\n    Convert a dictionary of data into markdown format.\n\n    Args:\n        code_suggestion (dict): A dictionary containing data to be converted to markdown format.\n\n    Returns:\n        str: A string containing the markdown formatted text generated from the input dictionary.\n    \"\"\"\n    markdown_text = \"\"\n    if gfm_supported and 'relevant_line' in code_suggestion:\n        markdown_text += '<table>'\n        for sub_key, sub_value in code_suggestion.items():\n            try:\n                if sub_key.lower() == 'relevant_file':\n                    relevant_file = sub_value.strip('`').strip('\"').strip(\"'\")\n                    markdown_text += f\"<tr><td>relevant file</td><td>{relevant_file}</td></tr>\"\n                    # continue\n                elif sub_key.lower() == 'suggestion':\n                    markdown_text += (f\"<tr><td>{sub_key} &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</td>\"\n                                      f\"<td>\\n\\n<strong>\\n\\n{sub_value.strip()}\\n\\n</strong>\\n</td></tr>\")\n                elif sub_key.lower() == 'relevant_line':\n                    markdown_text += f\"<tr><td>relevant line</td>\"\n                    sub_value_list = sub_value.split('](')\n                    relevant_line = sub_value_list[0].lstrip('`').lstrip('[')\n                    if len(sub_value_list) > 1:\n                        link = sub_value_list[1].rstrip(')').strip('`')\n                        markdown_text += f\"<td><a href='{link}'>{relevant_line}</a></td>\"\n                    else:\n                        markdown_text += f\"<td>{relevant_line}</td>\"\n                    markdown_text += \"</tr>\"\n            except Exception as e:\n                get_logger().exception(f\"Failed to parse code suggestion: {e}\")\n                pass\n        markdown_text += '</table>'\n        markdown_text += \"<hr>\"\n    else:\n        for sub_key, sub_value in code_suggestion.items():\n            if isinstance(sub_key, str):\n                sub_key = sub_key.rstrip()\n            if isinstance(sub_value,str):\n                sub_value = sub_value.rstrip()\n            if isinstance(sub_value, dict):  # \"code example\"\n                markdown_text += f\"  - **{sub_key}:**\\n\"\n                for code_key, code_value in sub_value.items():  # 'before' and 'after' code\n                    code_str = f\"```\\n{code_value}\\n```\"\n                    code_str_indented = textwrap.indent(code_str, '        ')\n                    markdown_text += f\"    - **{code_key}:**\\n{code_str_indented}\\n\"\n            else:\n                if \"relevant_file\" in sub_key.lower():\n                    markdown_text += f\"\\n  - **{sub_key}:** {sub_value}  \\n\"\n                else:\n                    markdown_text += f\"   **{sub_key}:** {sub_value}  \\n\"\n                if \"relevant_line\" not in sub_key.lower():  # nicer presentation\n                    # markdown_text = markdown_text.rstrip('\\n') + \"\\\\\\n\" # works for gitlab\n                    markdown_text = markdown_text.rstrip('\\n') + \"   \\n\"  # works for gitlab and bitbucker\n\n        markdown_text += \"\\n\"\n    return markdown_text\n\n\ndef try_fix_json(review, max_iter=10, code_suggestions=False):\n    \"\"\"\n    Fix broken or incomplete JSON messages and return the parsed JSON data.\n\n    Args:\n    - review: A string containing the JSON message to be fixed.\n    - max_iter: An integer representing the maximum number of iterations to try and fix the JSON message.\n    - code_suggestions: A boolean indicating whether to try and fix JSON messages with code feedback.\n\n    Returns:\n    - data: A dictionary containing the parsed JSON data.\n\n    The function attempts to fix broken or incomplete JSON messages by parsing until the last valid code suggestion.\n    If the JSON message ends with a closing bracket, the function calls the fix_json_escape_char function to fix the\n    message.\n    If code_suggestions is True and the JSON message contains code feedback, the function tries to fix the JSON\n    message by parsing until the last valid code suggestion.\n    The function uses regular expressions to find the last occurrence of \"},\" with any number of whitespaces or\n    newlines.\n    It tries to parse the JSON message with the closing bracket and checks if it is valid.\n    If the JSON message is valid, the parsed JSON data is returned.\n    If the JSON message is not valid, the last code suggestion is removed and the process is repeated until a valid JSON\n    message is obtained or the maximum number of iterations is reached.\n    If a valid JSON message is not obtained, an error is logged and an empty dictionary is returned.\n    \"\"\"\n\n    if review.endswith(\"}\"):\n        return fix_json_escape_char(review)\n\n    data = {}\n    if code_suggestions:\n        closing_bracket = \"]}\"\n    else:\n        closing_bracket = \"]}}\"\n\n    if (review.rfind(\"'Code feedback': [\") > 0 or review.rfind('\"Code feedback\": [') > 0) or \\\n            (review.rfind(\"'Code suggestions': [\") > 0 or review.rfind('\"Code suggestions\": [') > 0) :\n        last_code_suggestion_ind = [m.end() for m in re.finditer(r\"\\}\\s*,\", review)][-1] - 1\n        valid_json = False\n        iter_count = 0\n\n        while last_code_suggestion_ind > 0 and not valid_json and iter_count < max_iter:\n            try:\n                data = json.loads(review[:last_code_suggestion_ind] + closing_bracket)\n                valid_json = True\n                review = review[:last_code_suggestion_ind].strip() + closing_bracket\n            except json.decoder.JSONDecodeError:\n                review = review[:last_code_suggestion_ind]\n                last_code_suggestion_ind = [m.end() for m in re.finditer(r\"\\}\\s*,\", review)][-1] - 1\n                iter_count += 1\n\n        if not valid_json:\n            get_logger().error(\"Unable to decode JSON response from AI\")\n            data = {}\n\n    return data\n\n\ndef fix_json_escape_char(json_message=None):\n    \"\"\"\n    Fix broken or incomplete JSON messages and return the parsed JSON data.\n\n    Args:\n        json_message (str): A string containing the JSON message to be fixed.\n\n    Returns:\n        dict: A dictionary containing the parsed JSON data.\n\n    Raises:\n        None\n\n    \"\"\"\n    try:\n        result = json.loads(json_message)\n    except Exception as e:\n        # Find the offending character index:\n        idx_to_replace = int(str(e).split(' ')[-1].replace(')', ''))\n        # Remove the offending character:\n        json_message = list(json_message)\n        json_message[idx_to_replace] = ' '\n        new_message = ''.join(json_message)\n        return fix_json_escape_char(json_message=new_message)\n    return result\n\n\ndef convert_str_to_datetime(date_str):\n    \"\"\"\n    Convert a string representation of a date and time into a datetime object.\n\n    Args:\n        date_str (str): A string representation of a date and time in the format '%a, %d %b %Y %H:%M:%S %Z'\n\n    Returns:\n        datetime: A datetime object representing the input date and time.\n\n    Example:\n        >>> convert_str_to_datetime('Mon, 01 Jan 2022 12:00:00 UTC')\n        datetime.datetime(2022, 1, 1, 12, 0, 0)\n    \"\"\"\n    datetime_format = '%a, %d %b %Y %H:%M:%S %Z'\n    return datetime.strptime(date_str, datetime_format)\n\n\ndef load_large_diff(filename, new_file_content_str: str, original_file_content_str: str, show_warning: bool = True) -> str:\n    \"\"\"\n    Generate a patch for a modified file by comparing the original content of the file with the new content provided as\n    input.\n    \"\"\"\n    if not original_file_content_str and not new_file_content_str:\n        return \"\"\n\n    try:\n        original_file_content_str = (original_file_content_str or \"\").rstrip() + \"\\n\"\n        new_file_content_str = (new_file_content_str or \"\").rstrip() + \"\\n\"\n        diff = difflib.unified_diff(original_file_content_str.splitlines(keepends=True),\n                                    new_file_content_str.splitlines(keepends=True))\n        if get_settings().config.verbosity_level >= 2 and show_warning:\n            get_logger().info(f\"File was modified, but no patch was found. Manually creating patch: {filename}.\")\n        patch = ''.join(diff)\n        return patch\n    except Exception as e:\n        get_logger().exception(f\"Failed to generate patch for file: {filename}\")\n        return \"\"\n\n\ndef update_settings_from_args(args: List[str]) -> List[str]:\n    \"\"\"\n    Update the settings of the Dynaconf object based on the arguments passed to the function.\n\n    Args:\n        args: A list of arguments passed to the function.\n        Example args: ['--pr_code_suggestions.extra_instructions=\"be funny',\n                  '--pr_code_suggestions.num_code_suggestions=3']\n\n    Returns:\n        None\n\n    Raises:\n        ValueError: If the argument is not in the correct format.\n\n    \"\"\"\n    other_args = []\n    if args:\n        for arg in args:\n            arg = arg.strip()\n            if arg.startswith('--'):\n                arg = arg.strip('-').strip()\n                vals = arg.split('=', 1)\n                if len(vals) != 2:\n                    if len(vals) > 2:  # --extended is a valid argument\n                        get_logger().error(f'Invalid argument format: {arg}')\n                    other_args.append(arg)\n                    continue\n                key, value = _fix_key_value(*vals)\n                get_settings().set(key, value)\n                get_logger().info(f'Updated setting {key} to: \"{value}\"')\n            else:\n                other_args.append(arg)\n    return other_args\n\n\ndef _fix_key_value(key: str, value: str):\n    key = key.strip().upper()\n    value = value.strip()\n    try:\n        value = yaml.safe_load(value)\n    except Exception as e:\n        get_logger().debug(f\"Failed to parse YAML for config override {key}={value}\", exc_info=e)\n    return key, value\n\n\ndef load_yaml(response_text: str, keys_fix_yaml: List[str] = [], first_key=\"\", last_key=\"\") -> dict:\n    response_text_original = copy.deepcopy(response_text)\n    response_text = response_text.strip('\\n').removeprefix('yaml').removeprefix('```yaml').rstrip().removesuffix('```')\n    try:\n        data = yaml.safe_load(response_text)\n    except Exception as e:\n        get_logger().warning(f\"Initial failure to parse AI prediction: {e}\")\n        data = try_fix_yaml(response_text, keys_fix_yaml=keys_fix_yaml, first_key=first_key, last_key=last_key,\n                            response_text_original=response_text_original)\n        if not data:\n            get_logger().error(f\"Failed to parse AI prediction after fallbacks\",\n                               artifact={'response_text': response_text})\n        else:\n            get_logger().info(f\"Successfully parsed AI prediction after fallbacks\",\n                              artifact={'response_text': response_text})\n    return data\n\n\n\ndef try_fix_yaml(response_text: str,\n                 keys_fix_yaml: List[str] = [],\n                 first_key=\"\",\n                 last_key=\"\",\n                 response_text_original=\"\") -> dict:\n    response_text_lines = response_text.split('\\n')\n\n    keys_yaml = ['relevant line:', 'suggestion content:', 'relevant file:', 'existing code:',\n                 'improved code:', 'label:', 'why:', 'suggestion_summary:']\n    keys_yaml = keys_yaml + keys_fix_yaml\n\n    # first fallback - try to convert 'relevant line: ...' to relevant line: |-\\n        ...'\n    response_text_lines_copy = response_text_lines.copy()\n    for i in range(0, len(response_text_lines_copy)):\n        for key in keys_yaml:\n            if key in response_text_lines_copy[i] and not '|' in response_text_lines_copy[i]:\n                response_text_lines_copy[i] = response_text_lines_copy[i].replace(f'{key}',\n                                                                                  f'{key} |\\n        ')\n    try:\n        data = yaml.safe_load('\\n'.join(response_text_lines_copy))\n        get_logger().info(f\"Successfully parsed AI prediction after adding |-\\n\")\n        return data\n    except:\n        pass\n\n    # 1.5 fallback - try to convert '|' to '|2'. Will solve cases of indent decreasing during the code\n    response_text_copy = copy.deepcopy(response_text)\n    response_text_copy = response_text_copy.replace('|\\n', '|2\\n')\n    try:\n        data = yaml.safe_load(response_text_copy)\n        get_logger().info(f\"Successfully parsed AI prediction after replacing | with |2\")\n        return data\n    except:\n        # if it fails, we can try to add spaces to the lines that are not indented properly, and contain '}'.\n        response_text_lines_copy = response_text_copy.split('\\n')\n        for i in range(0, len(response_text_lines_copy)):\n            initial_space = len(response_text_lines_copy[i]) - len(response_text_lines_copy[i].lstrip())\n            if initial_space == 2 and '|2' not in response_text_lines_copy[i] and '}' in response_text_lines_copy[i]:\n                response_text_lines_copy[i] = '    ' + response_text_lines_copy[i].lstrip()\n        try:\n            data = yaml.safe_load('\\n'.join(response_text_lines_copy))\n            get_logger().info(f\"Successfully parsed AI prediction after replacing | with |2 and adding spaces\")\n            return data\n        except:\n            pass\n\n    # second fallback - try to extract only range from first ```yaml to the last ```\n    snippet_pattern = r'```yaml([\\s\\S]*?)```(?=\\s*$|\")'\n    snippet = re.search(snippet_pattern, '\\n'.join(response_text_lines_copy))\n    if not snippet:\n        snippet = re.search(snippet_pattern, response_text_original) # before we removed the \"```\"\n    if snippet:\n        snippet_text = snippet.group()\n        try:\n            data = yaml.safe_load(snippet_text.removeprefix('```yaml').rstrip('`'))\n            get_logger().info(f\"Successfully parsed AI prediction after extracting yaml snippet\")\n            return data\n        except:\n            pass\n\n\n    # third fallback - try to remove leading and trailing curly brackets\n    response_text_copy = response_text.strip().rstrip().removeprefix('{').removesuffix('}').rstrip(':\\n')\n    try:\n        data = yaml.safe_load(response_text_copy)\n        get_logger().info(f\"Successfully parsed AI prediction after removing curly brackets\")\n        return data\n    except:\n        pass\n\n\n    # forth fallback - try to extract yaml snippet by 'first_key' and 'last_key'\n    # note that 'last_key' can be in practice a key that is not the last key in the yaml snippet.\n    # it just needs to be some inner key, so we can look for newlines after it\n    if first_key and last_key:\n        index_start = response_text.find(f\"\\n{first_key}:\")\n        if index_start == -1:\n            index_start = response_text.find(f\"{first_key}:\")\n        index_last_code = response_text.rfind(f\"{last_key}:\")\n        index_end = response_text.find(\"\\n\\n\", index_last_code) # look for newlines after last_key\n        if index_end == -1:\n            index_end = len(response_text)\n        response_text_copy = response_text[index_start:index_end].strip().strip('```yaml').strip('`').strip()\n        if response_text_copy:\n            try:\n                data = yaml.safe_load(response_text_copy)\n                get_logger().info(f\"Successfully parsed AI prediction after extracting yaml snippet\")\n                return data\n            except:\n                pass\n\n    # fifth fallback - try to remove leading '+' (sometimes added by AI for 'existing code' and 'improved code')\n    response_text_lines_copy = response_text_lines.copy()\n    for i in range(0, len(response_text_lines_copy)):\n        if response_text_lines_copy[i].startswith('+'):\n            response_text_lines_copy[i] = ' ' + response_text_lines_copy[i][1:]\n    try:\n        data = yaml.safe_load('\\n'.join(response_text_lines_copy))\n        get_logger().info(f\"Successfully parsed AI prediction after removing leading '+'\")\n        return data\n    except:\n        pass\n\n    # sixth fallback - replace tabs with spaces\n    if '\\t' in response_text:\n        response_text_copy = copy.deepcopy(response_text)\n        response_text_copy = response_text_copy.replace('\\t', '    ')\n        try:\n            data = yaml.safe_load(response_text_copy)\n            get_logger().info(f\"Successfully parsed AI prediction after replacing tabs with spaces\")\n            return data\n        except:\n            pass\n\n    # seventh fallback - add indent for sections of code blocks\n    response_text_copy = copy.deepcopy(response_text)\n    response_text_copy_lines = response_text_copy.split('\\n')\n    start_line = -1\n    improve_sections = ['existing_code:', 'improved_code:', 'response:', 'why:']\n    describe_sections = ['description:', 'title:', 'changes_diagram:', 'pr_files:', 'pr_ticket:']\n    for i, line in enumerate(response_text_copy_lines):\n        line_stripped = line.rstrip()\n        if any(key in line_stripped for key in (improve_sections+describe_sections)):\n            start_line = i\n        elif line_stripped.endswith(': |') or line_stripped.endswith(': |-') or line_stripped.endswith(': |2') or any(line_stripped.endswith(key) for key in keys_yaml):\n            start_line = -1\n        elif start_line != -1:\n            response_text_copy_lines[i] = '    ' + line\n    response_text_copy = '\\n'.join(response_text_copy_lines)\n    response_text_copy = response_text_copy.replace(' |\\n', ' |2\\n')\n    try:\n        data = yaml.safe_load(response_text_copy)\n        get_logger().info(f\"Successfully parsed AI prediction after adding indent for sections of code blocks\")\n        return data\n    except:\n        pass\n\n    # eighth fallback - try to remove pipe chars at the root-level dicts\n    response_text_copy = copy.deepcopy(response_text)\n    response_text_copy = response_text_copy.lstrip('|\\n')\n    try:\n        data = yaml.safe_load(response_text_copy)\n        get_logger().info(f\"Successfully parsed AI prediction after removing pipe chars\")\n        return data\n    except:\n        pass\n\n    # ninth fallback - try to decode the response text with different encodings. GPT-5 can return text that is not utf-8 encoded.\n    encodings_to_try = ['latin-1', 'utf-16']\n    for encoding in encodings_to_try:\n        try:\n            data = yaml.safe_load(response_text.encode(encoding).decode(\"utf-8\"))\n            if data:\n                get_logger().info(f\"Successfully parsed AI prediction after decoding with {encoding} encoding\")\n                return data\n        except:\n            pass\n\n    # # sixth fallback - try to remove last lines\n    # for i in range(1, len(response_text_lines)):\n    #     response_text_lines_tmp = '\\n'.join(response_text_lines[:-i])\n    #     try:\n    #         data = yaml.safe_load(response_text_lines_tmp)\n    #         get_logger().info(f\"Successfully parsed AI prediction after removing {i} lines\")\n    #         return data\n    #     except:\n    #         pass\n\n\n\ndef set_custom_labels(variables, git_provider=None):\n    if not get_settings().config.enable_custom_labels:\n        return\n\n    labels = get_settings().get('custom_labels', {})\n    if not labels:\n        # set default labels\n        labels = ['Bug fix', 'Tests', 'Bug fix with tests', 'Enhancement', 'Documentation', 'Other']\n        labels_list = \"\\n      - \".join(labels) if labels else \"\"\n        labels_list = f\"      - {labels_list}\" if labels_list else \"\"\n        variables[\"custom_labels\"] = labels_list\n        return\n\n    # Set custom labels\n    variables[\"custom_labels_class\"] = \"class Label(str, Enum):\"\n    counter = 0\n    labels_minimal_to_labels_dict = {}\n    for k, v in labels.items():\n        description = \"'\" + v['description'].strip('\\n').replace('\\n', '\\\\n') + \"'\"\n        # variables[\"custom_labels_class\"] += f\"\\n    {k.lower().replace(' ', '_')} = '{k}' # {description}\"\n        variables[\"custom_labels_class\"] += f\"\\n    {k.lower().replace(' ', '_')} = {description}\"\n        labels_minimal_to_labels_dict[k.lower().replace(' ', '_')] = k\n        counter += 1\n    variables[\"labels_minimal_to_labels_dict\"] = labels_minimal_to_labels_dict\n\ndef get_user_labels(current_labels: List[str] = None):\n    \"\"\"\n    Only keep labels that has been added by the user\n    \"\"\"\n    try:\n        enable_custom_labels = get_settings().config.get('enable_custom_labels', False)\n        custom_labels = get_settings().get('custom_labels', [])\n        if current_labels is None:\n            current_labels = []\n        user_labels = []\n        for label in current_labels:\n            if label.lower() in ['bug fix', 'tests', 'enhancement', 'documentation', 'other']:\n                continue\n            if enable_custom_labels:\n                if label in custom_labels:\n                    continue\n            user_labels.append(label)\n        if user_labels:\n            get_logger().debug(f\"Keeping user labels: {user_labels}\")\n    except Exception as e:\n        get_logger().exception(f\"Failed to get user labels: {e}\")\n        return current_labels\n    return user_labels\n\n\ndef get_max_tokens(model):\n    \"\"\"\n    Get the maximum number of tokens allowed for a model.\n    logic:\n    (1) If the model is in './pr_agent/algo/__init__.py', use the value from there.\n    (2) else, the user needs to define explicitly 'config.custom_model_max_tokens'\n\n    For both cases, we further limit the number of tokens to 'config.max_model_tokens' if it is set.\n    This aims to improve the algorithmic quality, as the AI model degrades in performance when the input is too long.\n    \"\"\"\n    settings = get_settings()\n    if model in MAX_TOKENS:\n        max_tokens_model = MAX_TOKENS[model]\n    elif settings.config.custom_model_max_tokens > 0:\n        max_tokens_model = settings.config.custom_model_max_tokens\n    else:\n        get_logger().error(f\"Model {model} is not defined in MAX_TOKENS in ./pr_agent/algo/__init__.py and no custom_model_max_tokens is set\")\n        raise Exception(f\"Ensure {model} is defined in MAX_TOKENS in ./pr_agent/algo/__init__.py or set a positive value for it in config.custom_model_max_tokens\")\n\n    if settings.config.max_model_tokens and settings.config.max_model_tokens > 0:\n        max_tokens_model = min(settings.config.max_model_tokens, max_tokens_model)\n    return max_tokens_model\n\n\ndef clip_tokens(text: str, max_tokens: int, add_three_dots=True, num_input_tokens=None, delete_last_line=False) -> str:\n    \"\"\"\n    Clip the number of tokens in a string to a maximum number of tokens.\n\n    This function limits text to a specified token count by calculating the approximate\n    character-to-token ratio and truncating the text accordingly. A safety factor of 0.9\n    (10% reduction) is applied to ensure the result stays within the token limit.\n\n    Args:\n        text (str): The string to clip. If empty or None, returns the input unchanged.\n        max_tokens (int): The maximum number of tokens allowed in the string.\n                         If negative, returns an empty string.\n        add_three_dots (bool, optional): Whether to add \"\\\\n...(truncated)\" at the end\n                                       of the clipped text to indicate truncation.\n                                       Defaults to True.\n        num_input_tokens (int, optional): Pre-computed number of tokens in the input text.\n                                        If provided, skips token encoding step for efficiency.\n                                        If None, tokens will be counted using TokenEncoder.\n                                        Defaults to None.\n        delete_last_line (bool, optional): Whether to remove the last line from the\n                                         clipped content before adding truncation indicator.\n                                         Useful for ensuring clean breaks at line boundaries.\n                                         Defaults to False.\n\n    Returns:\n        str: The clipped string. Returns original text if:\n             - Text is empty/None\n             - Token count is within limit\n             - An error occurs during processing\n\n             Returns empty string if max_tokens <= 0.\n\n    Examples:\n        Basic usage:\n        >>> text = \"This is a sample text that might be too long\"\n        >>> result = clip_tokens(text, max_tokens=10)\n        >>> print(result)\n        This is a sample...\n        (truncated)\n\n        Without truncation indicator:\n        >>> result = clip_tokens(text, max_tokens=10, add_three_dots=False)\n        >>> print(result)\n        This is a sample\n\n        With pre-computed token count:\n        >>> result = clip_tokens(text, max_tokens=5, num_input_tokens=15)\n        >>> print(result)\n        This...\n        (truncated)\n\n        With line deletion:\n        >>> multiline_text = \"Line 1\\\\nLine 2\\\\nLine 3\"\n        >>> result = clip_tokens(multiline_text, max_tokens=3, delete_last_line=True)\n        >>> print(result)\n        Line 1\n        Line 2\n        ...\n        (truncated)\n\n    Notes:\n        The function uses a safety factor of 0.9 (10% reduction) to ensure the\n        result stays within the token limit, as character-to-token ratios can vary.\n        If token encoding fails, the original text is returned with a warning logged.\n    \"\"\"\n    if not text:\n        return text\n\n    try:\n        if num_input_tokens is None:\n            encoder = TokenEncoder.get_token_encoder()\n            num_input_tokens = len(encoder.encode(text))\n        if num_input_tokens <= max_tokens:\n            return text\n        if max_tokens < 0:\n            return \"\"\n\n        # calculate the number of characters to keep\n        num_chars = len(text)\n        chars_per_token = num_chars / num_input_tokens\n        factor = 0.9  # reduce by 10% to be safe\n        num_output_chars = int(factor * chars_per_token * max_tokens)\n\n        # clip the text\n        if num_output_chars > 0:\n            clipped_text = text[:num_output_chars]\n            if delete_last_line:\n                clipped_text = clipped_text.rsplit('\\n', 1)[0]\n            if add_three_dots:\n                clipped_text += \"\\n...(truncated)\"\n        else: # if the text is empty\n            clipped_text =  \"\"\n\n        return clipped_text\n    except Exception as e:\n        get_logger().warning(f\"Failed to clip tokens: {e}\")\n        return text\n\ndef replace_code_tags(text):\n    \"\"\"\n    Replace odd instances of ` with <code> and even instances of ` with </code>\n    \"\"\"\n    text = html.escape(text)\n    parts = text.split('`')\n    for i in range(1, len(parts), 2):\n        parts[i] = '<code>' + parts[i] + '</code>'\n    return ''.join(parts)\n\n\ndef find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo],\n                                              relevant_file: str,\n                                              relevant_line_in_file: str,\n                                              absolute_position: int = None) -> Tuple[int, int]:\n    position = -1\n    if absolute_position is None:\n        absolute_position = -1\n    re_hunk_header = re.compile(\n        r\"^@@ -(\\d+)(?:,(\\d+))? \\+(\\d+)(?:,(\\d+))? @@[ ]?(.*)\")\n\n    if not diff_files:\n        return position, absolute_position\n\n    for file in diff_files:\n        if file.filename and (file.filename.strip() == relevant_file):\n            patch = file.patch\n            patch_lines = patch.splitlines()\n            delta = 0\n            start1, size1, start2, size2 = 0, 0, 0, 0\n            if absolute_position != -1: # matching absolute to relative\n                for i, line in enumerate(patch_lines):\n                    # new hunk\n                    if line.startswith('@@'):\n                        delta = 0\n                        match = re_hunk_header.match(line)\n                        start1, size1, start2, size2 = map(int, match.groups()[:4])\n                    elif not line.startswith('-'):\n                        delta += 1\n\n                    #\n                    absolute_position_curr = start2 + delta - 1\n\n                    if absolute_position_curr == absolute_position:\n                        position = i\n                        break\n            else:\n                # try to find the line in the patch using difflib, with some margin of error\n                matches_difflib: list[str | Any] = difflib.get_close_matches(relevant_line_in_file,\n                                                                             patch_lines, n=3, cutoff=0.93)\n                if len(matches_difflib) == 1 and matches_difflib[0].startswith('+'):\n                    relevant_line_in_file = matches_difflib[0]\n\n\n                for i, line in enumerate(patch_lines):\n                    if line.startswith('@@'):\n                        delta = 0\n                        match = re_hunk_header.match(line)\n                        start1, size1, start2, size2 = map(int, match.groups()[:4])\n                    elif not line.startswith('-'):\n                        delta += 1\n\n                    if relevant_line_in_file in line and line[0] != '-':\n                        position = i\n                        absolute_position = start2 + delta - 1\n                        break\n\n                if position == -1 and relevant_line_in_file[0] == '+':\n                    no_plus_line = relevant_line_in_file[1:].lstrip()\n                    for i, line in enumerate(patch_lines):\n                        if line.startswith('@@'):\n                            delta = 0\n                            match = re_hunk_header.match(line)\n                            start1, size1, start2, size2 = map(int, match.groups()[:4])\n                        elif not line.startswith('-'):\n                            delta += 1\n\n                        if no_plus_line in line and line[0] != '-':\n                            # The model might add a '+' to the beginning of the relevant_line_in_file even if originally\n                            # it's a context line\n                            position = i\n                            absolute_position = start2 + delta - 1\n                            break\n    return position, absolute_position\n\ndef get_rate_limit_status(github_token) -> dict:\n    GITHUB_API_URL = get_settings(use_context=False).get(\"GITHUB.BASE_URL\", \"https://api.github.com\").rstrip(\"/\")  # \"https://api.github.com\"\n    # GITHUB_API_URL = \"https://api.github.com\"\n    RATE_LIMIT_URL = f\"{GITHUB_API_URL}/rate_limit\"\n    HEADERS = {\n        \"Accept\": \"application/vnd.github.v3+json\",\n        \"Authorization\": f\"token {github_token}\"\n    }\n\n    response = requests.get(RATE_LIMIT_URL, headers=HEADERS)\n    try:\n        rate_limit_info = response.json()\n        if rate_limit_info.get('message') == 'Rate limiting is not enabled.':  # for github enterprise\n            return {'resources': {}}\n        response.raise_for_status()  # Check for HTTP errors\n    except:  # retry\n        time.sleep(0.1)\n        response = requests.get(RATE_LIMIT_URL, headers=HEADERS)\n        return response.json()\n    return rate_limit_info\n\n\ndef validate_rate_limit_github(github_token, installation_id=None, threshold=0.1) -> bool:\n    try:\n        rate_limit_status = get_rate_limit_status(github_token)\n        if installation_id:\n            get_logger().debug(f\"installation_id: {installation_id}, Rate limit status: {rate_limit_status['rate']}\")\n    # validate that the rate limit is not exceeded\n        # validate that the rate limit is not exceeded\n        for key, value in rate_limit_status['resources'].items():\n            if value['remaining'] < value['limit'] * threshold:\n                get_logger().error(f\"key: {key}, value: {value}\")\n                return False\n        return True\n    except Exception as e:\n        get_logger().error(f\"Error in rate limit {e}\",\n                           artifact={\"traceback\": traceback.format_exc()})\n        return True\n\n\ndef validate_and_await_rate_limit(github_token):\n    try:\n        rate_limit_status = get_rate_limit_status(github_token)\n        # validate that the rate limit is not exceeded\n        for key, value in rate_limit_status['resources'].items():\n            if value['remaining'] < value['limit'] // 80:\n                get_logger().error(f\"key: {key}, value: {value}\")\n                sleep_time_sec = value['reset'] - datetime.now().timestamp()\n                sleep_time_hour = sleep_time_sec / 3600.0\n                get_logger().error(f\"Rate limit exceeded. Sleeping for {sleep_time_hour} hours\")\n                if sleep_time_sec > 0:\n                    time.sleep(sleep_time_sec + 1)\n                rate_limit_status = get_rate_limit_status(github_token)\n        return rate_limit_status\n    except:\n        get_logger().error(\"Error in rate limit\")\n        return None\n\n\ndef github_action_output(output_data: dict, key_name: str):\n    try:\n        if not get_settings().get('github_action_config.enable_output', False):\n            return\n\n        key_data = output_data.get(key_name, {})\n        with open(os.environ['GITHUB_OUTPUT'], 'a') as fh:\n            print(f\"{key_name}={json.dumps(key_data, indent=None, ensure_ascii=False)}\", file=fh)\n    except Exception as e:\n        get_logger().error(f\"Failed to write to GitHub Action output: {e}\")\n    return\n\n\ndef show_relevant_configurations(relevant_section: str) -> str:\n    skip_keys = ['ai_disclaimer', 'ai_disclaimer_title', 'ANALYTICS_FOLDER', 'secret_provider', \"skip_keys\", \"app_id\", \"redirect\",\n                      'trial_prefix_message', 'no_eligible_message', 'identity_provider', 'ALLOWED_REPOS','APP_NAME']\n    extra_skip_keys = get_settings().config.get('config.skip_keys', [])\n    if extra_skip_keys:\n        skip_keys.extend(extra_skip_keys)\n\n    markdown_text = \"\"\n    markdown_text += \"\\n<hr>\\n<details> <summary><strong>🛠️ Relevant configurations:</strong></summary> \\n\\n\"\n    markdown_text +=\"<br>These are the relevant [configurations](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml) for this tool:\\n\\n\"\n    markdown_text += f\"**[config**]\\n```yaml\\n\\n\"\n    for key, value in get_settings().config.items():\n        if key in skip_keys:\n            continue\n        markdown_text += f\"{key}: {value}\\n\"\n    markdown_text += \"\\n```\\n\"\n    markdown_text += f\"\\n**[{relevant_section}]**\\n```yaml\\n\\n\"\n    for key, value in get_settings().get(relevant_section, {}).items():\n        if key in skip_keys:\n            continue\n        markdown_text += f\"{key}: {value}\\n\"\n    markdown_text += \"\\n```\"\n    markdown_text += \"\\n</details>\\n\"\n    return markdown_text\n\ndef is_value_no(value):\n    if not value:\n        return True\n    value_str = str(value).strip().lower()\n    if value_str == 'no' or value_str == 'none' or value_str == 'false':\n        return True\n    return False\n\n\ndef set_pr_string(repo_name, pr_number):\n    return f\"{repo_name}#{pr_number}\"\n\n\ndef string_to_uniform_number(s: str) -> float:\n    \"\"\"\n    Convert a string to a uniform number in the range [0, 1].\n    The uniform distribution is achieved by the nature of the SHA-256 hash function, which produces a uniformly distributed hash value over its output space.\n    \"\"\"\n    # Generate a hash of the string\n    hash_object = hashlib.sha256(s.encode())\n    # Convert the hash to an integer\n    hash_int = int(hash_object.hexdigest(), 16)\n    # Normalize the integer to the range [0, 1]\n    max_hash_int = 2 ** 256 - 1\n    uniform_number = float(hash_int) / max_hash_int\n    return uniform_number\n\n\ndef process_description(description_full: str) -> Tuple[str, List]:\n    if not description_full:\n        return \"\", []\n\n    # description_split = description_full.split(PRDescriptionHeader.FILE_WALKTHROUGH.value)\n    if PRDescriptionHeader.FILE_WALKTHROUGH.value in description_full:\n        try:\n            # FILE_WALKTHROUGH are presented in a collapsible section in the description\n            regex_pattern = r'<details.*?>\\s*<summary>\\s*<h3>\\s*' + re.escape(PRDescriptionHeader.FILE_WALKTHROUGH.value) + r'\\s*</h3>\\s*</summary>'\n            description_split = re.split(regex_pattern, description_full, maxsplit=1, flags=re.DOTALL)\n\n            # If the regex pattern is not found, fallback to the previous method\n            if len(description_split) == 1:\n                get_logger().debug(\"Could not find regex pattern for file walkthrough, falling back to simple split\")\n                description_split = description_full.split(PRDescriptionHeader.FILE_WALKTHROUGH.value, 1)\n        except Exception as e:\n            get_logger().warning(f\"Failed to split description using regex, falling back to simple split: {e}\")\n            description_split = description_full.split(PRDescriptionHeader.FILE_WALKTHROUGH.value, 1)\n\n        if len(description_split) < 2:\n            get_logger().error(\"Failed to split description into base and changes walkthrough\", artifact={'description': description_full})\n            return description_full.strip(), []\n\n        base_description_str = description_split[0].strip()\n        changes_walkthrough_str = \"\"\n        files = []\n        if len(description_split) > 1:\n            changes_walkthrough_str = description_split[1]\n        else:\n            get_logger().debug(\"No changes walkthrough found\")\n    else:\n        base_description_str = description_full.strip()\n        return base_description_str, []\n\n    try:\n        if changes_walkthrough_str:\n            # get the end of the table\n            if '</table>\\n\\n___' in changes_walkthrough_str:\n                end = changes_walkthrough_str.index(\"</table>\\n\\n___\")\n            elif '\\n___' in changes_walkthrough_str:\n                end = changes_walkthrough_str.index(\"\\n___\")\n            else:\n                end = len(changes_walkthrough_str)\n            changes_walkthrough_str = changes_walkthrough_str[:end]\n\n            h = html2text.HTML2Text()\n            h.body_width = 0  # Disable line wrapping\n\n            # find all the files\n            pattern = r'<tr>\\s*<td>\\s*(<details>\\s*<summary>(.*?)</summary>(.*?)</details>)\\s*</td>'\n            files_found = re.findall(pattern, changes_walkthrough_str, re.DOTALL)\n            for file_data in files_found:\n                try:\n                    if isinstance(file_data, tuple):\n                        file_data = file_data[0]\n                    pattern = r'<details>\\s*<summary><strong>(.*?)</strong>\\s*<dd><code>(.*?)</code>.*?</summary>\\s*<hr>\\s*(.*?)\\s*(?:<li>|•)(.*?)</details>'\n                    res = re.search(pattern, file_data, re.DOTALL)\n                    if not res or res.lastindex != 4:\n                        pattern_back = r'<details>\\s*<summary><strong>(.*?)</strong><dd><code>(.*?)</code>.*?</summary>\\s*<hr>\\s*(.*?)\\n\\n\\s*(.*?)</details>'\n                        res = re.search(pattern_back, file_data, re.DOTALL)\n                    if not res or res.lastindex != 4:\n                        pattern_back = r'<details>\\s*<summary><strong>(.*?)</strong>\\s*<dd><code>(.*?)</code>.*?</summary>\\s*<hr>\\s*(.*?)\\s*-\\s*(.*?)\\s*</details>' # looking for hypen ('- ')\n                        res = re.search(pattern_back, file_data, re.DOTALL)\n                    if res and res.lastindex == 4:\n                        short_filename = res.group(1).strip()\n                        short_summary = res.group(2).strip()\n                        long_filename = res.group(3).strip()\n                        if long_filename.endswith('<ul>'):\n                            long_filename = long_filename[:-4].strip()\n                        long_summary =  res.group(4).strip()\n                        long_summary = long_summary.replace('<br> *', '\\n*').replace('<br>','').replace('\\n','<br>')\n                        long_summary = h.handle(long_summary).strip()\n                        if long_summary.startswith('\\\\-'):\n                            long_summary = \"* \" + long_summary[2:]\n                        elif not long_summary.startswith('*'):\n                            long_summary = f\"* {long_summary}\"\n\n                        files.append({\n                            'short_file_name': short_filename,\n                            'full_file_name': long_filename,\n                            'short_summary': short_summary,\n                            'long_summary': long_summary\n                        })\n                    else:\n                        if '<code>...</code>' in file_data:\n                            pass # PR with many files. some did not get analyzed\n                        else:\n                            get_logger().warning(f\"Failed to parse description\", artifact={'description': file_data})\n                except Exception as e:\n                    get_logger().exception(f\"Failed to process description: {e}\", artifact={'description': file_data})\n\n\n    except Exception as e:\n        get_logger().exception(f\"Failed to process description: {e}\")\n\n    return base_description_str, files\n\ndef get_version() -> str:\n    # First check pyproject.toml if running directly out of repository\n    if os.path.exists(\"pyproject.toml\"):\n        if sys.version_info >= (3, 11):\n            import tomllib\n            with open(\"pyproject.toml\", \"rb\") as f:\n                data = tomllib.load(f)\n                if \"project\" in data and \"version\" in data[\"project\"]:\n                    return data[\"project\"][\"version\"]\n                else:\n                    get_logger().warning(\"Version not found in pyproject.toml\")\n        else:\n            get_logger().warning(\"Unable to determine local version from pyproject.toml\")\n\n    # Otherwise get the installed pip package version\n    try:\n        return version('pr-agent')\n    except PackageNotFoundError:\n        get_logger().warning(\"Unable to find package named 'pr-agent'\")\n        return \"unknown\"\n\n\ndef set_file_languages(diff_files) -> List[FilePatchInfo]:\n    try:\n        # if the language is already set, do not change it\n        if hasattr(diff_files[0], 'language') and diff_files[0].language:\n            return diff_files\n\n        # map file extensions to programming languages\n        language_extension_map_org = get_settings().language_extension_map_org\n        extension_to_language = {}\n        for language, extensions in language_extension_map_org.items():\n            for ext in extensions:\n                extension_to_language[ext] = language\n        for file in diff_files:\n            extension_s = '.' + file.filename.rsplit('.')[-1]\n            language_name = \"txt\"\n            if extension_s and (extension_s in extension_to_language):\n                language_name = extension_to_language[extension_s]\n            file.language = language_name.lower()\n    except Exception as e:\n        get_logger().exception(f\"Failed to set file languages: {e}\")\n\n    return diff_files\n\ndef format_todo_item(todo_item: TodoItem, git_provider, gfm_supported) -> str:\n    relevant_file = todo_item.get('relevant_file', '').strip()\n    line_number = todo_item.get('line_number', '')\n    content = todo_item.get('content', '')\n    reference_link = git_provider.get_line_link(relevant_file, line_number, line_number)\n    file_ref = f\"{relevant_file} [{line_number}]\"\n    if reference_link:\n        if gfm_supported:\n            file_ref = f\"<a href='{reference_link}'>{file_ref}</a>\"\n        else:\n            file_ref = f\"[{file_ref}]({reference_link})\"\n\n    if content:\n        return f\"{file_ref}: {content.strip()}\"\n    else:\n        # if content is empty, return only the file reference\n        return file_ref\n\n\ndef format_todo_items(value: list[TodoItem] | TodoItem, git_provider, gfm_supported) -> str:\n    markdown_text = \"\"\n    MAX_ITEMS = 5 # limit the number of items to display\n    if gfm_supported:\n        if isinstance(value, list):\n            markdown_text += \"<ul>\\n\"\n            if len(value) > MAX_ITEMS:\n                get_logger().debug(f\"Truncating todo items to {MAX_ITEMS} items\")\n                value = value[:MAX_ITEMS]\n            for todo_item in value:\n                markdown_text += f\"<li>{format_todo_item(todo_item, git_provider, gfm_supported)}</li>\\n\"\n            markdown_text += \"</ul>\\n\"\n        else:\n            markdown_text += f\"<p>{format_todo_item(value, git_provider, gfm_supported)}</p>\\n\"\n    else:\n        if isinstance(value, list):\n            if len(value) > MAX_ITEMS:\n                get_logger().debug(f\"Truncating todo items to {MAX_ITEMS} items\")\n                value = value[:MAX_ITEMS]\n            for todo_item in value:\n                markdown_text += f\"- {format_todo_item(todo_item, git_provider, gfm_supported)}\\n\"\n        else:\n            markdown_text += f\"- {format_todo_item(value, git_provider, gfm_supported)}\\n\"\n    return markdown_text\n"
  },
  {
    "path": "pr_agent/cli.py",
    "content": "import argparse\nimport asyncio\nimport os\n\nfrom pr_agent.agent.pr_agent import PRAgent, commands\nfrom pr_agent.algo.utils import get_version\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.log import get_logger, setup_logger\n\nlog_level = os.environ.get(\"LOG_LEVEL\", \"INFO\")\nsetup_logger(log_level)\n\n\ndef set_parser():\n    parser = argparse.ArgumentParser(description='AI based pull request analyzer', usage=\n    \"\"\"\\\n    Usage: cli.py --pr_url=<URL on supported git hosting service> <command> [<args>].\n    For example:\n    - cli.py --pr_url=... review\n    - cli.py --pr_url=... describe\n    - cli.py --pr_url=... improve\n    - cli.py --pr_url=... ask \"write me a poem about this PR\"\n    - cli.py --pr_url=... reflect\n    - cli.py --issue_url=... similar_issue\n    - cli.py --pr_url/--issue_url= help_docs [<asked question>]\n\n    Supported commands:\n    - review / review_pr - Add a review that includes a summary of the PR and specific suggestions for improvement.\n\n    - ask / ask_question [question] - Ask a question about the PR.\n\n    - describe / describe_pr - Modify the PR title and description based on the PR's contents.\n\n    - improve / improve_code - Suggest improvements to the code in the PR as pull request comments ready to commit.\n    Extended mode ('improve --extended') employs several calls, and provides a more thorough feedback\n\n    - reflect - Ask the PR author questions about the PR.\n\n    - update_changelog - Update the changelog based on the PR's contents.\n\n    - add_docs\n\n    - generate_labels\n    \n    - help_docs - Ask a question, from either an issue or PR context, on a given repo (current context or a different one)\n\n\n    Configuration:\n    To edit any configuration parameter from 'configuration.toml', just add -config_path=<value>.\n    For example: 'python cli.py --pr_url=... review --pr_reviewer.extra_instructions=\"focus on the file: ...\"'\n    \"\"\")\n    parser.add_argument('--version', action='version', version=f'pr-agent {get_version()}')\n    parser.add_argument('--pr_url', type=str, help='The URL of the PR to review', default=None)\n    parser.add_argument('--issue_url', type=str, help='The URL of the Issue to review', default=None)\n    parser.add_argument('command', type=str, help='The', choices=commands, default='review')\n    parser.add_argument('rest', nargs=argparse.REMAINDER, default=[])\n    return parser\n\n\ndef run_command(pr_url, command):\n    # Preparing the command\n    run_command_str = f\"--pr_url={pr_url} {command.lstrip('/')}\"\n    args = set_parser().parse_args(run_command_str.split())\n\n    # Run the command. Feedback will appear in GitHub PR comments\n    run(args=args)\n\n\ndef run(inargs=None, args=None):\n    parser = set_parser()\n    if not args:\n        args = parser.parse_args(inargs)\n    if not args.pr_url and not args.issue_url:\n        parser.print_help()\n        return\n\n    command = args.command.lower()\n    get_settings().set(\"CONFIG.CLI_MODE\", True)\n\n    async def inner():\n        if args.issue_url:\n            result = await asyncio.create_task(PRAgent().handle_request(args.issue_url, [command] + args.rest))\n        else:\n            result = await asyncio.create_task(PRAgent().handle_request(args.pr_url, [command] + args.rest))\n\n        if get_settings().litellm.get(\"enable_callbacks\", False):\n            # There may be additional events on the event queue from the run above. If there are give them time to complete.\n            get_logger().debug(\"Waiting for event queue to complete\")\n            tasks = [task for task in asyncio.all_tasks() if task is not asyncio.current_task()]\n            if tasks:\n                _, pending = await asyncio.wait(tasks, timeout=30)\n                if pending:\n                    get_logger().warning(\n                        f\"{len(pending)} callback tasks({[task.get_coro() for task in pending]}) did not complete within timeout\"\n                    )\n\n        return result\n\n    result = asyncio.run(inner())\n    if not result:\n        parser.print_help()\n\n\nif __name__ == '__main__':\n    run()\n"
  },
  {
    "path": "pr_agent/cli_pip.py",
    "content": "from pr_agent import cli\nfrom pr_agent.config_loader import get_settings\n\n\ndef main():\n    # Fill in the following values\n    provider = \"github\"  # GitHub provider\n    user_token = \"...\"  # GitHub user token\n    openai_key = \"...\"  # OpenAI key\n    pr_url = \"...\"  # PR URL, for example 'https://github.com/Codium-ai/pr-agent/pull/809'\n    command = \"/review\"  # Command to run (e.g. '/review', '/describe', '/ask=\"What is the purpose of this PR?\"')\n\n    # Setting the configurations\n    get_settings().set(\"CONFIG.git_provider\", provider)\n    get_settings().set(\"openai.key\", openai_key)\n    get_settings().set(\"github.user_token\", user_token)\n\n    # Run the command. Feedback will appear in GitHub PR comments\n    cli.run_command(pr_url, command)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pr_agent/config_loader.py",
    "content": "from os.path import abspath, dirname, join\nfrom pathlib import Path\nfrom typing import Optional\n\nfrom dynaconf import Dynaconf\nfrom starlette_context import context\n\nPR_AGENT_TOML_KEY = 'pr-agent'\n\ncurrent_dir = dirname(abspath(__file__))\n\ndynconf_kwargs = {'core_loaders': [], # DISABLE default loaders, otherwise will load toml files more than once.\n                           'loaders': ['pr_agent.custom_merge_loader', 'dynaconf.loaders.env_loader'], # Use a custom loader to merge sections, but overwrite their overlapping values. Also support ENV variables to take precedence.\n                           'root_path': join(current_dir, \"settings\"), #Used for Dynaconf.find_file() - So that root path points to settings folder, since we disabled all core loaders.\n                           'merge_enabled': True  # In case more than one file is sent, merge them. Must be set to True, otherwise, a .toml file with section [XYZ] overwrites the entire section of a previous .toml file's [XYZ] and we want it to only overwrite the overlapping fields under such section\n                           }\nglobal_settings = Dynaconf(\n    envvar_prefix=False,\n    load_dotenv=False,  # Security: Don't load .env files\n    settings_files=[join(current_dir, f) for f in [\n        \"settings/configuration.toml\",\n        \"settings/ignore.toml\",\n        \"settings/generated_code_ignore.toml\",\n        \"settings/language_extensions.toml\",\n        \"settings/pr_reviewer_prompts.toml\",\n        \"settings/pr_questions_prompts.toml\",\n        \"settings/pr_line_questions_prompts.toml\",\n        \"settings/pr_description_prompts.toml\",\n        \"settings/code_suggestions/pr_code_suggestions_prompts.toml\",\n        \"settings/code_suggestions/pr_code_suggestions_prompts_not_decoupled.toml\",\n        \"settings/code_suggestions/pr_code_suggestions_reflect_prompts.toml\",\n        \"settings/pr_information_from_user_prompts.toml\",\n        \"settings/pr_update_changelog_prompts.toml\",\n        \"settings/pr_custom_labels.toml\",\n        \"settings/pr_add_docs.toml\",\n        \"settings/custom_labels.toml\",\n        \"settings/pr_help_prompts.toml\",\n        \"settings/pr_help_docs_prompts.toml\",\n        \"settings/pr_help_docs_headings_prompts.toml\",\n        \"settings/.secrets.toml\",\n        \"settings_prod/.secrets.toml\",\n    ]],\n    **dynconf_kwargs\n)\n\n\ndef get_settings(use_context=False):\n    \"\"\"\n    Retrieves the current settings.\n\n    This function attempts to fetch the settings from the starlette_context's context object. If it fails,\n    it defaults to the global settings defined outside of this function.\n\n    Returns:\n        Dynaconf: The current settings object, either from the context or the global default.\n    \"\"\"\n    try:\n        return context[\"settings\"]\n    except Exception:\n        return global_settings\n\n\n# Add local configuration from pyproject.toml of the project being reviewed\ndef _find_repository_root() -> Optional[Path]:\n    \"\"\"\n    Identify project root directory by recursively searching for the .git directory in the parent directories.\n    \"\"\"\n    cwd = Path.cwd().resolve()\n    no_way_up = False\n    while not no_way_up:\n        no_way_up = cwd == cwd.parent\n        if (cwd / \".git\").is_dir():\n            return cwd\n        cwd = cwd.parent\n    return None\n\n\ndef _find_pyproject() -> Optional[Path]:\n    \"\"\"\n    Search for file pyproject.toml in the repository root.\n    \"\"\"\n    repo_root = _find_repository_root()\n    if repo_root:\n        pyproject = repo_root / \"pyproject.toml\"\n        return pyproject if pyproject.is_file() else None\n    return None\n\n\npyproject_path = _find_pyproject()\nif pyproject_path is not None:\n    get_settings().load_file(pyproject_path, env=f'tool.{PR_AGENT_TOML_KEY}')\n\n\ndef apply_secrets_manager_config():\n    \"\"\"\n    Retrieve configuration from AWS Secrets Manager and override existing settings\n    \"\"\"\n    try:\n        # Dynamic imports to avoid circular dependency (secret_providers imports config_loader)\n        from pr_agent.secret_providers import get_secret_provider\n        from pr_agent.log import get_logger\n\n        secret_provider = get_secret_provider()\n        if not secret_provider:\n            return\n\n        if (hasattr(secret_provider, 'get_all_secrets') and\n            get_settings().get(\"CONFIG.SECRET_PROVIDER\") == 'aws_secrets_manager'):\n            try:\n                secrets = secret_provider.get_all_secrets()\n                if secrets:\n                    apply_secrets_to_config(secrets)\n                    get_logger().info(\"Applied AWS Secrets Manager configuration\")\n            except Exception as e:\n                get_logger().error(f\"Failed to apply AWS Secrets Manager config: {e}\")\n    except Exception as e:\n        try:\n            from pr_agent.log import get_logger\n            get_logger().debug(f\"Secret provider not configured: {e}\")\n        except:\n            # Fail completely silently if log module is not available\n            pass\n\n\ndef apply_secrets_to_config(secrets: dict):\n    \"\"\"\n    Apply secret dictionary to configuration\n    \"\"\"\n    try:\n        # Dynamic import to avoid potential circular dependency\n        from pr_agent.log import get_logger\n    except:\n        def get_logger():\n            class DummyLogger:\n                def debug(self, msg): pass\n            return DummyLogger()\n\n    for key, value in secrets.items():\n        if '.' in key:  # nested key like \"openai.key\"\n            parts = key.split('.')\n            if len(parts) == 2:\n                section, setting = parts\n                section_upper = section.upper()\n                setting_upper = setting.upper()\n\n                # Set only when no existing value (prioritize environment variables)\n                current_value = get_settings().get(f\"{section_upper}.{setting_upper}\")\n                if current_value is None or current_value == \"\":\n                    get_settings().set(f\"{section_upper}.{setting_upper}\", value)\n                    get_logger().debug(f\"Set {section}.{setting} from AWS Secrets Manager\")\n"
  },
  {
    "path": "pr_agent/custom_merge_loader.py",
    "content": "from pathlib import Path\nimport tomllib #tomllib should be used instead of Py toml for Python 3.11+\n\nfrom jinja2.exceptions import SecurityError\n\nfrom pr_agent.log import get_logger\n\ndef load(obj, env=None, silent=True, key=None, filename=None):\n    \"\"\"\n    Load and merge TOML configuration files into a Dynaconf settings object using a secure, in-house loader.\n    This loader:\n    - Replaces list and dict fields instead of appending/updating (non-default Dynaconf behavior).\n    - Enforces several security checks (e.g., disallows includes/preloads and enforces .toml files).\n    - Supports optional single-key loading.\n    - Supports Dynaconf's fresh_vars feature for dynamic reloading.\n    Args:\n        obj: The Dynaconf settings instance to update.\n        env: The current environment name (upper case). Defaults to 'DEVELOPMENT'. Note: currently unused.\n        silent (bool): If True, suppress exceptions and log warnings/errors instead.\n        key (str | None): Load only this top-level key (section) if provided; otherwise, load all keys from the files.\n        filename (str | None): Custom filename for tests (not used when settings_files are provided).\n    Returns:\n        None\n    \"\"\"\n\n    MAX_TOML_SIZE_IN_BYTES = 100 * 1024 * 1024 # Prevent out of mem. exceptions by limiting to 100 MBs which is sufficient for upto 1M lines\n\n    # Get the list of files to load\n    # TODO: hasattr(obj, 'settings_files') for some reason returns False. Need to use 'settings_file'\n    settings_files = obj.settings_files if hasattr(obj, 'settings_files') else (\n        obj.settings_file) if hasattr(obj, 'settings_file') else []\n    if not settings_files or not isinstance(settings_files, list):\n        get_logger().warning(\"No settings files specified, or missing keys \"\n                             \"(tried looking for 'settings_files' or 'settings_file'), or not a list. Skipping loading.\",\n                             artifact={'toml_obj_attributes_names': dir(obj)})\n        return\n\n    # Storage for all loaded data\n    accumulated_data = {}\n\n    # Security: Check for forbidden configuration options\n    if hasattr(obj, 'includes') and obj.includes:\n        if not silent:\n            raise SecurityError(\"Configuration includes forbidden option: 'includes'. Skipping loading.\")\n        get_logger().error(\"Configuration includes forbidden option: 'includes'. Skipping loading.\")\n        return\n    if hasattr(obj, 'preload') and obj.preload:\n        if not silent:\n            raise SecurityError(\"Configuration includes forbidden option: 'preload'. Skipping loading.\")\n        get_logger().error(\"Configuration includes forbidden option: 'preload'. Skipping loading.\")\n        return\n\n    for settings_file in settings_files:\n        try:\n            # Load the TOML file\n            file_path = Path(settings_file)\n            # Security: Only allow .toml files\n            if file_path.suffix.lower() != '.toml':\n                get_logger().warning(f\"Only .toml files are allowed. Skipping: {settings_file}\")\n                continue\n\n            if not file_path.exists():\n                get_logger().warning(f\"Settings file not found: {settings_file}. Skipping it.\")\n                continue\n\n            if file_path.stat().st_size > MAX_TOML_SIZE_IN_BYTES:\n                get_logger().warning(f\"Settings file too large (> {MAX_TOML_SIZE_IN_BYTES} bytes): {settings_file}. Skipping it.\")\n                continue\n\n            with open(file_path, 'rb') as f:\n                file_data = tomllib.load(f)\n\n            # Handle sections (like [config], [default], etc.)\n            if not isinstance(file_data, dict):\n                get_logger().warning(f\"TOML root is not a table in '{settings_file}'. Skipping.\")\n                continue\n\n            # Security: Check file contents for forbidden directives\n            validate_file_security(file_data, settings_file)\n\n            for section_name, section_data in file_data.items():\n                if not isinstance(section_data, dict):\n                    get_logger().warning(f\"Section '{section_name}' in '{settings_file}' is not a table. Skipping.\")\n                    continue\n                for field, field_value in section_data.items():\n                    if section_name not in accumulated_data:\n                        accumulated_data[section_name] = {}\n                    accumulated_data[section_name][field] = field_value\n\n        except Exception as e:\n            if not silent:\n                raise e\n            get_logger().exception(f\"Exception loading settings file: {settings_file}. Skipping.\")\n\n    # Update the settings object\n    for k, v in accumulated_data.items():\n        # For fresh_vars support: key parameter is uppercase, but accumulated_data keys are lowercase\n        if key is None or key.upper() == k.upper():\n            obj.set(k, v)\n\ndef validate_file_security(file_data, filename):\n    \"\"\"\n    Validate that the config file does not contain security-sensitive directives.\n\n    Args:\n        file_data: Parsed TOML data representing the configuration contents.\n        filename: The name or path of the file being validated (used for error messages).\n\n    Raises:\n        SecurityError: If forbidden directives are found within the configuration, or if data too nested.\n    \"\"\"\n    MAX_DEPTH = 50\n\n    # Check for forbidden keys\n    # Comprehensive list of forbidden keys with explanations\n    forbidden_keys_to_reasons = {\n        # Include mechanisms - allow loading arbitrary files\n        'dynaconf_include': 'allows including other config files dynamically',\n        'dynaconf_includes': 'allows including other config files dynamically',\n        'includes': 'allows including other config files dynamically',\n\n        # Preload mechanisms - allow loading files before main config\n        'preload': 'allows preloading files with potential code execution',\n        'preload_for_dynaconf': 'allows preloading files with potential code execution',\n        'preloads': 'allows preloading files with potential code execution',\n\n        # Merge controls - could be used to manipulate config behavior\n        'dynaconf_merge': 'allows manipulating merge behavior',\n        'dynaconf_merge_enabled': 'allows manipulating merge behavior',\n        'merge_enabled': 'allows manipulating merge behavior',\n\n        # Loader controls - allow changing how configs are loaded\n        'loaders_for_dynaconf': 'allows overriding loaders to execute arbitrary code',\n        'loaders': 'allows overriding loaders to execute arbitrary code',\n        'core_loaders': 'allows overriding core loaders',\n        'core_loaders_for_dynaconf': 'allows overriding core loaders',\n\n        # Settings module - allows loading Python modules\n        'settings_module': 'allows loading Python modules with code execution',\n        'settings_file_for_dynaconf': 'could override settings file location',\n        'settings_files_for_dynaconf': 'could override settings file location',\n\n        # Environment variable prefix manipulation\n        'envvar_prefix': 'allows changing environment variable prefix',\n        'envvar_prefix_for_dynaconf': 'allows changing environment variable prefix',\n    }\n\n    # Check at the top level and in all sections\n    def check_dict(data, path=\"\", max_depth=MAX_DEPTH):\n        if max_depth <= 0:\n            raise SecurityError(\n                f\"Maximum nesting depth exceeded at {path}. \"\n                f\"Possible attempt to cause stack overflow.\"\n            )\n\n        for key, value in data.items():\n            full_path = f\"{path}.{key}\" if path else key\n\n            if key.lower() in forbidden_keys_to_reasons:\n                raise SecurityError(\n                    f\"Security error in {filename}: \"\n                    f\"Forbidden directive '{key}' found at {full_path}. Reason: {forbidden_keys_to_reasons[key.lower()]}\"\n                )\n\n            # Recursively check nested dicts\n            if isinstance(value, dict):\n                check_dict(value, path=full_path, max_depth=(max_depth - 1))\n\n    check_dict(file_data, max_depth=MAX_DEPTH)\n"
  },
  {
    "path": "pr_agent/git_providers/__init__.py",
    "content": "from starlette_context import context\n\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers.azuredevops_provider import AzureDevopsProvider\nfrom pr_agent.git_providers.bitbucket_provider import BitbucketProvider\nfrom pr_agent.git_providers.bitbucket_server_provider import \\\n    BitbucketServerProvider\nfrom pr_agent.git_providers.codecommit_provider import CodeCommitProvider\nfrom pr_agent.git_providers.gerrit_provider import GerritProvider\nfrom pr_agent.git_providers.git_provider import GitProvider\nfrom pr_agent.git_providers.gitea_provider import GiteaProvider\nfrom pr_agent.git_providers.github_provider import GithubProvider\nfrom pr_agent.git_providers.gitlab_provider import GitLabProvider\nfrom pr_agent.git_providers.local_git_provider import LocalGitProvider\nfrom pr_agent.git_providers.gitea_provider import GiteaProvider\n\n_GIT_PROVIDERS = {\n    'github': GithubProvider,\n    'gitlab': GitLabProvider,\n    'bitbucket': BitbucketProvider,\n    'bitbucket_server': BitbucketServerProvider,\n    'azure': AzureDevopsProvider,\n    'codecommit': CodeCommitProvider,\n    'local': LocalGitProvider,\n    'gerrit': GerritProvider,\n    'gitea': GiteaProvider\n}\n\n\ndef get_git_provider():\n    try:\n        provider_id = get_settings().config.git_provider\n    except AttributeError as e:\n        raise ValueError(\"git_provider is a required attribute in the configuration file\") from e\n    if provider_id not in _GIT_PROVIDERS:\n        raise ValueError(f\"Unknown git provider: {provider_id}\")\n    return _GIT_PROVIDERS[provider_id]\n\n\ndef get_git_provider_with_context(pr_url) -> GitProvider:\n    \"\"\"\n    Get a GitProvider instance for the given PR URL. If the GitProvider instance is already in the context, return it.\n    \"\"\"\n\n    is_context_env = None\n    try:\n        is_context_env = context.get(\"settings\", None)\n    except Exception:\n        pass  # we are not in a context environment (CLI)\n\n    # check if context[\"git_provider\"][\"pr_url\"] exists\n    if is_context_env and context.get(\"git_provider\", {}).get(\"pr_url\", {}):\n        git_provider = context[\"git_provider\"][\"pr_url\"]\n        # possibly check if the git_provider is still valid, or if some reset is needed\n        # ...\n        return git_provider\n    else:\n        try:\n            provider_id = get_settings().config.git_provider\n            if provider_id not in _GIT_PROVIDERS:\n                raise ValueError(f\"Unknown git provider: {provider_id}\")\n            git_provider = _GIT_PROVIDERS[provider_id](pr_url)\n            if is_context_env:\n                context[\"git_provider\"] = {pr_url: git_provider}\n            return git_provider\n        except Exception as e:\n            raise ValueError(f\"Failed to get git provider for {pr_url}\") from e\n"
  },
  {
    "path": "pr_agent/git_providers/azuredevops_provider.py",
    "content": "from __future__ import annotations\n\nimport os\nfrom typing import Optional, Tuple\nfrom urllib.parse import urlparse\n\nfrom pr_agent.algo.types import EDIT_TYPE, FilePatchInfo\n\nfrom ..algo.file_filter import filter_ignored\nfrom ..algo.language_handler import is_valid_file\nfrom ..algo.utils import (PRDescriptionHeader, clip_tokens,\n                          find_line_number_of_relevant_line_in_file,\n                          load_large_diff)\nfrom ..config_loader import get_settings\nfrom ..log import get_logger\nfrom .git_provider import GitProvider\n\nAZURE_DEVOPS_AVAILABLE = True\nADO_APP_CLIENT_DEFAULT_ID = \"499b84ac-1321-427f-aa17-267ca6975798/.default\"\nMAX_PR_DESCRIPTION_AZURE_LENGTH = 4000-1\n\ntry:\n    # noinspection PyUnresolvedReferences\n    from azure.devops.connection import Connection\n    # noinspection PyUnresolvedReferences\n    from azure.devops.released.git import (Comment, CommentThread, GitPullRequest, GitVersionDescriptor, GitClient, CommentThreadContext, CommentPosition)\n    from azure.devops.released.work_item_tracking import WorkItemTrackingClient\n    # noinspection PyUnresolvedReferences\n    from azure.identity import DefaultAzureCredential\n    from msrest.authentication import BasicAuthentication\nexcept ImportError:\n    AZURE_DEVOPS_AVAILABLE = False\n\n\nclass AzureDevopsProvider(GitProvider):\n\n    def __init__(\n            self, pr_url: Optional[str] = None, incremental: Optional[bool] = False\n    ):\n        if not AZURE_DEVOPS_AVAILABLE:\n            raise ImportError(\n                \"Azure DevOps provider is not available. Please install the required dependencies.\"\n            )\n\n        self.azure_devops_client, self.azure_devops_board_client = self._get_azure_devops_client()\n        self.diff_files = None\n        self.workspace_slug = None\n        self.repo_slug = None\n        self.repo = None\n        self.pr_num = None\n        self.pr = None\n        self.temp_comments = []\n        self.incremental = incremental\n        if pr_url:\n            self.set_pr(pr_url)\n\n    def publish_code_suggestions(self, code_suggestions: list) -> bool:\n        \"\"\"\n        Publishes code suggestions as comments on the PR.\n        \"\"\"\n        post_parameters_list = []\n        status = get_settings().azure_devops.get(\"default_comment_status\", \"closed\")\n        for suggestion in code_suggestions:\n            body = suggestion['body']\n            relevant_file = suggestion['relevant_file']\n            relevant_lines_start = suggestion['relevant_lines_start']\n            relevant_lines_end = suggestion['relevant_lines_end']\n\n            if not relevant_lines_start or relevant_lines_start == -1:\n                get_logger().warning(\n                    f\"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}\")\n                continue\n\n            if relevant_lines_end < relevant_lines_start:\n                get_logger().warning(f\"Failed to publish code suggestion, \"\n                                       f\"relevant_lines_end is {relevant_lines_end} and \"\n                                       f\"relevant_lines_start is {relevant_lines_start}\")\n                continue\n\n            thread_context = CommentThreadContext(\n                file_path=relevant_file,\n                right_file_start=CommentPosition(offset=1, line=relevant_lines_start),\n                right_file_end=CommentPosition(offset=1, line=relevant_lines_end))\n            comment = Comment(content=body, comment_type=1)\n            thread = CommentThread(comments=[comment], thread_context=thread_context, status=status)\n            try:\n                self.azure_devops_client.create_thread(\n                    comment_thread=thread,\n                    project=self.workspace_slug,\n                    repository_id=self.repo_slug,\n                    pull_request_id=self.pr_num\n                )\n            except Exception as e:\n                get_logger().error(f\"Azure failed to publish code suggestion, error: {e}\", suggestion=suggestion)\n        return True\n\n    def reply_to_comment_from_comment_id(self, comment_id: int, body: str, is_temporary: bool = False) -> Comment:\n        # comment_id is actually thread_id\n        return self.reply_to_thread(comment_id, body, is_temporary)\n\n    def get_pr_description_full(self) -> str:\n        return self.pr.description\n\n    def edit_comment(self, comment: Comment, body: str):\n        try:\n            self.azure_devops_client.update_comment(\n                repository_id=self.repo_slug,\n                pull_request_id=self.pr_num,\n                thread_id=comment.thread_id,\n                comment_id=comment.id,\n                comment=Comment(content=body),\n                project=self.workspace_slug,\n            )\n        except Exception as e:\n            get_logger().exception(f\"Failed to edit comment, error: {e}\")\n\n    def remove_comment(self, comment: Comment):\n        try:\n            self.azure_devops_client.delete_comment(\n                repository_id=self.repo_slug,\n                pull_request_id=self.pr_num,\n                thread_id=comment.thread_id,\n                comment_id=comment.id,\n                project=self.workspace_slug,\n            )\n        except Exception as e:\n            get_logger().exception(f\"Failed to remove comment, error: {e}\")\n\n    def publish_labels(self, pr_types):\n        try:\n            for pr_type in pr_types:\n                self.azure_devops_client.create_pull_request_label(\n                    label={\"name\": pr_type},\n                    project=self.workspace_slug,\n                    repository_id=self.repo_slug,\n                    pull_request_id=self.pr_num,\n                )\n        except Exception as e:\n            get_logger().warning(f\"Failed to publish labels, error: {e}\")\n\n    def get_pr_labels(self, update=False):\n        try:\n            labels = self.azure_devops_client.get_pull_request_labels(\n                project=self.workspace_slug,\n                repository_id=self.repo_slug,\n                pull_request_id=self.pr_num,\n            )\n            return [label.name for label in labels]\n        except Exception as e:\n            get_logger().exception(f\"Failed to get labels, error: {e}\")\n            return []\n\n    def is_supported(self, capability: str) -> bool:\n        return True\n\n    def set_pr(self, pr_url: str):\n        self.pr_url = pr_url\n        self.workspace_slug, self.repo_slug, self.pr_num = self._parse_pr_url(pr_url)\n        self.pr = self._get_pr()\n\n    def get_repo_settings(self):\n        try:\n            contents = self.azure_devops_client.get_item_content(\n                repository_id=self.repo_slug,\n                project=self.workspace_slug,\n                download=False,\n                include_content_metadata=False,\n                include_content=True,\n                path=\".pr_agent.toml\",\n            )\n            return list(contents)[0]\n        except Exception as e:\n            if get_settings().config.verbosity_level >= 2:\n                get_logger().error(f\"Failed to get repo settings, error: {e}\")\n            return \"\"\n\n    def get_files(self):\n        files = []\n        for i in self.azure_devops_client.get_pull_request_commits(\n                project=self.workspace_slug,\n                repository_id=self.repo_slug,\n                pull_request_id=self.pr_num,\n        ):\n            changes_obj = self.azure_devops_client.get_changes(\n                project=self.workspace_slug,\n                repository_id=self.repo_slug,\n                commit_id=i.commit_id,\n            )\n\n            for c in changes_obj.changes:\n                files.append(c[\"item\"][\"path\"])\n        return list(set(files))\n\n    def get_diff_files(self) -> list[FilePatchInfo]:\n        try:\n\n            if self.diff_files:\n                return self.diff_files\n\n            base_sha = self.pr.last_merge_target_commit\n            head_sha = self.pr.last_merge_commit\n\n            # Get PR iterations\n            iterations = self.azure_devops_client.get_pull_request_iterations(\n                repository_id=self.repo_slug,\n                pull_request_id=self.pr_num,\n                project=self.workspace_slug\n            )\n            changes = None\n            if iterations:\n                iteration_id = iterations[-1].id  # Get the last iteration (most recent changes)\n\n                # Get changes for the iteration\n                changes = self.azure_devops_client.get_pull_request_iteration_changes(\n                    repository_id=self.repo_slug,\n                    pull_request_id=self.pr_num,\n                    iteration_id=iteration_id,\n                    project=self.workspace_slug\n                )\n            diff_files = []\n            diffs = []\n            diff_types = {}\n            if changes:\n                for change in changes.change_entries:\n                    item = change.additional_properties.get('item', {})\n                    path = item.get('path', None)\n                    if path:\n                        diffs.append(path)\n                        diff_types[path] = change.additional_properties.get('changeType', 'Unknown')\n\n            # wrong implementation - gets all the files that were changed in any commit in the PR\n            # commits = self.azure_devops_client.get_pull_request_commits(\n            #     project=self.workspace_slug,\n            #     repository_id=self.repo_slug,\n            #     pull_request_id=self.pr_num,\n            # )\n            #\n            # diff_files = []\n            # diffs = []\n            # diff_types = {}\n\n            # for c in commits:\n            #     changes_obj = self.azure_devops_client.get_changes(\n            #         project=self.workspace_slug,\n            #         repository_id=self.repo_slug,\n            #         commit_id=c.commit_id,\n            #     )\n            #     for i in changes_obj.changes:\n            #         if i[\"item\"][\"gitObjectType\"] == \"tree\":\n            #             continue\n            #         diffs.append(i[\"item\"][\"path\"])\n            #         diff_types[i[\"item\"][\"path\"]] = i[\"changeType\"]\n            #\n            # diffs = list(set(diffs))\n\n            diffs_original = diffs\n            diffs = filter_ignored(diffs_original, 'azure')\n            if diffs_original != diffs:\n                try:\n                    get_logger().info(f\"Filtered out [ignore] files for pull request:\", extra=\n                    {\"files\": diffs_original,  # diffs is just a list of names\n                     \"filtered_files\": diffs})\n                except Exception:\n                    pass\n\n            invalid_files_names = []\n            for file in diffs:\n                if not is_valid_file(file):\n                    invalid_files_names.append(file)\n                    continue\n\n                version = GitVersionDescriptor(\n                    version=head_sha.commit_id, version_type=\"commit\"\n                )\n                try:\n                    new_file_content_str = self.azure_devops_client.get_item(\n                        repository_id=self.repo_slug,\n                        path=file,\n                        project=self.workspace_slug,\n                        version_descriptor=version,\n                        download=False,\n                        include_content=True,\n                    )\n\n                    new_file_content_str = new_file_content_str.content\n                except Exception as error:\n                    get_logger().error(f\"Failed to retrieve new file content of {file} at version {version}\", error=error)\n                    # get_logger().error(\n                    #     \"Failed to retrieve new file content of %s at version %s. Error: %s\",\n                    #     file,\n                    #     version,\n                    #     str(error),\n                    # )\n                    new_file_content_str = \"\"\n\n                edit_type = EDIT_TYPE.MODIFIED\n                if diff_types[file] == \"add\":\n                    edit_type = EDIT_TYPE.ADDED\n                elif diff_types[file] == \"delete\":\n                    edit_type = EDIT_TYPE.DELETED\n                elif \"rename\" in diff_types[file]: # diff_type can be `rename` | `edit, rename`\n                    edit_type = EDIT_TYPE.RENAMED\n\n                version = GitVersionDescriptor(\n                    version=base_sha.commit_id, version_type=\"commit\"\n                )\n                if edit_type == EDIT_TYPE.ADDED or edit_type == EDIT_TYPE.RENAMED:\n                    original_file_content_str = \"\"\n                else:\n                    try:\n                        original_file_content_str = self.azure_devops_client.get_item(\n                            repository_id=self.repo_slug,\n                            path=file,\n                            project=self.workspace_slug,\n                            version_descriptor=version,\n                            download=False,\n                            include_content=True,\n                        )\n                        original_file_content_str = original_file_content_str.content\n                    except Exception as error:\n                        get_logger().error(f\"Failed to retrieve original file content of {file} at version {version}\", error=error)\n                        original_file_content_str = \"\"\n\n                patch = load_large_diff(\n                    file, new_file_content_str, original_file_content_str, show_warning=False\n                ).rstrip()\n\n                # count number of lines added and removed\n                patch_lines = patch.splitlines(keepends=True)\n                num_plus_lines = len([line for line in patch_lines if line.startswith('+')])\n                num_minus_lines = len([line for line in patch_lines if line.startswith('-')])\n\n                diff_files.append(\n                    FilePatchInfo(\n                        original_file_content_str,\n                        new_file_content_str,\n                        patch=patch,\n                        filename=file,\n                        edit_type=edit_type,\n                        num_plus_lines=num_plus_lines,\n                        num_minus_lines=num_minus_lines,\n                    )\n                )\n            get_logger().info(f\"Invalid files: {invalid_files_names}\")\n\n            self.diff_files = diff_files\n            return diff_files\n        except Exception as e:\n            get_logger().exception(f\"Failed to get diff files, error: {e}\")\n            return []\n\n    def publish_comment(self, pr_comment: str, is_temporary: bool = False, thread_context=None) -> Comment:\n        if is_temporary and not get_settings().config.publish_output_progress:\n            get_logger().debug(f\"Skipping publish_comment for temporary comment: {pr_comment}\")\n            return None\n        comment = Comment(content=pr_comment)\n\n        status = get_settings().azure_devops.get(\"default_comment_status\", \"closed\")\n        thread = CommentThread(comments=[comment], thread_context=thread_context, status=status)\n        thread_response = self.azure_devops_client.create_thread(\n            comment_thread=thread,\n            project=self.workspace_slug,\n            repository_id=self.repo_slug,\n            pull_request_id=self.pr_num,\n        )\n        created_comment = thread_response.comments[0]\n        created_comment.thread_id = thread_response.id\n        if is_temporary:\n            self.temp_comments.append(created_comment)\n        return created_comment\n\n    def publish_persistent_comment(self, pr_comment: str,\n                                   initial_header: str,\n                                   update_header: bool = True,\n                                   name='review',\n                                   final_update_message=True):\n        return self.publish_persistent_comment_full(pr_comment, initial_header, update_header, name, final_update_message)\n\n    def publish_description(self, pr_title: str, pr_body: str):\n        if len(pr_body) > MAX_PR_DESCRIPTION_AZURE_LENGTH:\n\n            usage_guide_text='<details> <summary><strong>✨ Describe tool usage guide:</strong></summary><hr>'\n            ind = pr_body.find(usage_guide_text)\n            if ind != -1:\n                pr_body = pr_body[:ind]\n\n            if len(pr_body) > MAX_PR_DESCRIPTION_AZURE_LENGTH:\n                changes_walkthrough_text = PRDescriptionHeader.FILE_WALKTHROUGH.value\n                ind = pr_body.find(changes_walkthrough_text)\n                if ind != -1:\n                    pr_body = pr_body[:ind]\n\n            if len(pr_body) > MAX_PR_DESCRIPTION_AZURE_LENGTH:\n                trunction_message = \" ... (description truncated due to length limit)\"\n                pr_body = pr_body[:MAX_PR_DESCRIPTION_AZURE_LENGTH - len(trunction_message)] + trunction_message\n                get_logger().warning(\"PR description was truncated due to length limit\")\n        try:\n            updated_pr = GitPullRequest()\n            updated_pr.title = pr_title\n            updated_pr.description = pr_body\n            self.azure_devops_client.update_pull_request(\n                project=self.workspace_slug,\n                repository_id=self.repo_slug,\n                pull_request_id=self.pr_num,\n                git_pull_request_to_update=updated_pr,\n            )\n        except Exception as e:\n            get_logger().exception(\n                f\"Could not update pull request {self.pr_num} description: {e}\"\n            )\n\n    def remove_initial_comment(self):\n        try:\n            for comment in self.temp_comments:\n                self.remove_comment(comment)\n        except Exception as e:\n            get_logger().exception(f\"Failed to remove temp comments, error: {e}\")\n\n    def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None):\n        self.publish_inline_comments([self.create_inline_comment(body, relevant_file, relevant_line_in_file)])\n\n    def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str,\n                              absolute_position: int = None):\n        position, absolute_position = find_line_number_of_relevant_line_in_file(self.get_diff_files(),\n                                                                                relevant_file.strip('`'),\n                                                                                relevant_line_in_file,\n                                                                                absolute_position)\n        if position == -1:\n            if get_settings().config.verbosity_level >= 2:\n                get_logger().info(f\"Could not find position for {relevant_file} {relevant_line_in_file}\")\n            subject_type = \"FILE\"\n        else:\n            subject_type = \"LINE\"\n        path = relevant_file.strip()\n        return dict(body=body, path=path, position=position, absolute_position=absolute_position) if subject_type == \"LINE\" else {}\n\n    def publish_inline_comments(self, comments: list[dict], disable_fallback: bool = False):\n            overall_success = True\n            for comment in comments:\n                try:\n                    self.publish_comment(comment[\"body\"],\n                                        thread_context={\n                                            \"filePath\": comment[\"path\"],\n                                            \"rightFileStart\": {\n                                                \"line\": comment[\"absolute_position\"],\n                                                \"offset\": comment[\"position\"],\n                                            },\n                                            \"rightFileEnd\": {\n                                                \"line\": comment[\"absolute_position\"],\n                                                \"offset\": comment[\"position\"],\n                                            },\n                                        })\n                    if get_settings().config.verbosity_level >= 2:\n                        get_logger().info(\n                            f\"Published code suggestion on {self.pr_num} at {comment['path']}\"\n                        )\n                except Exception as e:\n                    if get_settings().config.verbosity_level >= 2:\n                        get_logger().error(f\"Failed to publish code suggestion, error: {e}\")\n                    overall_success = False\n            return overall_success\n\n    def get_title(self):\n        return self.pr.title\n\n    def get_languages(self):\n        languages = []\n        files = self.azure_devops_client.get_items(\n            project=self.workspace_slug,\n            repository_id=self.repo_slug,\n            recursion_level=\"Full\",\n            include_content_metadata=True,\n            include_links=False,\n            download=False,\n        )\n        for f in files:\n            if f.git_object_type == \"blob\":\n                file_name, file_extension = os.path.splitext(f.path)\n                languages.append(file_extension[1:])\n\n        extension_counts = {}\n        for ext in languages:\n            if ext != \"\":\n                extension_counts[ext] = extension_counts.get(ext, 0) + 1\n\n        total_extensions = sum(extension_counts.values())\n\n        extension_percentages = {\n            ext: (count / total_extensions) * 100\n            for ext, count in extension_counts.items()\n        }\n\n        return extension_percentages\n\n    def get_pr_branch(self):\n        pr_info = self.azure_devops_client.get_pull_request_by_id(\n            project=self.workspace_slug, pull_request_id=self.pr_num\n        )\n        source_branch = pr_info.source_ref_name.split(\"/\")[-1]\n        return source_branch\n\n    def get_user_id(self):\n        return 0\n\n    def get_issue_comments(self) -> list[Comment]:\n        threads = self.azure_devops_client.get_threads(repository_id=self.repo_slug, pull_request_id=self.pr_num, project=self.workspace_slug)\n        threads.reverse()\n        comment_list = []\n        for thread in threads:\n            for comment in thread.comments:\n                if comment.content and comment not in comment_list:\n                    comment.body = comment.content\n                    comment.thread_id = thread.id\n                    comment_list.append(comment)\n        return comment_list\n\n    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:\n        return True\n\n    def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool:\n        return True\n\n    def set_like(self, thread_id: int, comment_id: int, create: bool = True):\n        if create:\n            self.azure_devops_client.create_like(self.repo_slug, self.pr_num, thread_id, comment_id, project=self.workspace_slug)\n        else:\n            self.azure_devops_client.delete_like(self.repo_slug, self.pr_num, thread_id, comment_id, project=self.workspace_slug)\n            \n    def set_thread_status(self, thread_id: int, status: str):\n        try:\n            self.azure_devops_client.update_thread(CommentThread(status=status), self.repo_slug, self.pr_num, thread_id, self.workspace_slug)\n        except Exception as e:\n            get_logger().exception(f\"Failed to set thread status, error: {e}\")\n            \n    def reply_to_thread(self, thread_id: int, body: str, is_temporary: bool = False) -> Comment:\n        try:\n            comment = Comment(content=body)\n            response = self.azure_devops_client.create_comment(comment, self.repo_slug, self.pr_num, thread_id, self.workspace_slug)\n            response.thread_id = thread_id\n            if is_temporary:\n                self.temp_comments.append(response)\n            return response\n        except Exception as e:\n            get_logger().exception(f\"Failed to reply to thread, error: {e}\")\n    \n    def get_thread_context(self, thread_id: int) -> CommentThreadContext:\n        try:\n            thread = self.azure_devops_client.get_pull_request_thread(self.repo_slug, self.pr_num, thread_id, self.workspace_slug)\n            return thread.thread_context\n        except Exception as e:\n            get_logger().exception(f\"Failed to set thread status, error: {e}\")\n    \n    @staticmethod\n    def _parse_pr_url(pr_url: str) -> Tuple[str, str, int]:\n        parsed_url = urlparse(pr_url)\n        path_parts = parsed_url.path.strip(\"/\").split(\"/\")\n        num_parts = len(path_parts)\n        if num_parts < 5:\n            raise ValueError(\"The provided URL has insufficient path components for an Azure DevOps PR URL\")\n        \n        # Verify that the second-to-last path component is \"pullrequest\"\n        if path_parts[num_parts - 2] != \"pullrequest\":\n            raise ValueError(\"The provided URL does not follow the expected Azure DevOps PR URL format\")\n\n        workspace_slug = path_parts[num_parts - 5]\n        repo_slug = path_parts[num_parts - 3]\n        try:\n            pr_number = int(path_parts[num_parts - 1])\n        except ValueError as e:\n            raise ValueError(\"Cannot parse PR number in the provided URL\") from e\n\n        return workspace_slug, repo_slug, pr_number\n\n    @staticmethod\n    def _get_azure_devops_client() -> Tuple[GitClient, WorkItemTrackingClient]:\n        org = get_settings().azure_devops.get(\"org\", None)\n        pat = get_settings().azure_devops.get(\"pat\", None)\n\n        if not org:\n            raise ValueError(\"Azure DevOps organization is required\")\n\n        if pat:\n            auth_token = pat\n        else:\n            try:\n                # try to use azure default credentials\n                # see https://learn.microsoft.com/en-us/python/api/overview/azure/identity-readme?view=azure-python\n                # for usage and env var configuration of user-assigned managed identity, local machine auth etc.\n                get_logger().info(\"No PAT found in settings, trying to use Azure Default Credentials.\")\n                credentials = DefaultAzureCredential()\n                accessToken = credentials.get_token(ADO_APP_CLIENT_DEFAULT_ID)\n                auth_token = accessToken.token\n            except Exception as e:\n                get_logger().error(f\"No PAT found in settings, and Azure Default Authentication failed, error: {e}\")\n                raise\n\n        credentials = BasicAuthentication(\"\", auth_token)\n        azure_devops_connection = Connection(base_url=org, creds=credentials)\n        azure_devops_client = azure_devops_connection.clients.get_git_client()\n        azure_devops_board_client = azure_devops_connection.clients.get_work_item_tracking_client()\n\n        return azure_devops_client, azure_devops_board_client\n\n    def _get_repo(self):\n        if self.repo is None:\n            self.repo = self.azure_devops_client.get_repository(\n                project=self.workspace_slug, repository_id=self.repo_slug\n            )\n        return self.repo\n\n    def _get_pr(self):\n        self.pr = self.azure_devops_client.get_pull_request_by_id(\n            pull_request_id=self.pr_num, project=self.workspace_slug\n        )\n        return self.pr\n\n    def get_commit_messages(self):\n        return \"\"  # not implemented yet\n\n    def get_pr_id(self):\n        try:\n            pr_id = f\"{self.workspace_slug}/{self.repo_slug}/{self.pr_num}\"\n            return pr_id\n        except Exception as e:\n            if get_settings().config.verbosity_level >= 2:\n                get_logger().info(f\"Failed to get PR id, error: {e}\")\n            return \"\"\n\n    def publish_file_comments(self, file_comments: list) -> bool:\n        pass\n\n    def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str:\n        return self.pr_url+f\"?_a=files&path={relevant_file}\"\n\n    def get_comment_url(self, comment) -> str:\n        return self.pr_url + \"?discussionId=\" + str(comment.thread_id)\n\n    def get_latest_commit_url(self) -> str:\n        commits = self.azure_devops_client.get_pull_request_commits(self.repo_slug, self.pr_num, self.workspace_slug)\n        last = commits[0]\n        url = self.azure_devops_client.normalized_url + \"/\" + self.workspace_slug + \"/_git/\" + self.repo_slug + \"/commit/\" + last.commit_id\n        return url\n\n    def get_linked_work_items(self) -> list:\n        \"\"\"\n        Get linked work items from the PR.\n        \"\"\"\n        try:\n            work_items = self.azure_devops_client.get_pull_request_work_item_refs(\n                project=self.workspace_slug,\n                repository_id=self.repo_slug,\n                pull_request_id=self.pr_num,\n            )\n            ids = [work_item.id for work_item in work_items]\n            if not work_items:\n                return []\n            items = self.get_work_items(ids)\n            return items\n        except Exception as e:\n            get_logger().exception(f\"Failed to get linked work items, error: {e}\")\n            return []\n\n    def get_work_items(self, work_item_ids: list) -> list:\n        \"\"\"\n        Get work items by their IDs.\n        \"\"\"\n        try:\n            raw_work_items = self.azure_devops_board_client.get_work_items(\n                project=self.workspace_slug,\n                ids=work_item_ids,\n            )\n            work_items = []\n            for item in raw_work_items:\n                work_items.append(\n                    {\n                        \"id\": item.id,\n                        \"title\": item.fields.get(\"System.Title\", \"\"),\n                        \"url\": item.url,\n                        \"body\": item.fields.get(\"System.Description\", \"\"),\n                        \"acceptance_criteria\": item.fields.get(\n                            \"Microsoft.VSTS.Common.AcceptanceCriteria\", \"\"\n                        ),\n                        \"tags\": item.fields.get(\"System.Tags\", \"\").split(\"; \") if item.fields.get(\"System.Tags\") else [],\n                    }\n                )\n            return work_items\n        except Exception as e:\n            get_logger().exception(f\"Failed to get work items, error: {e}\")\n            return []\n"
  },
  {
    "path": "pr_agent/git_providers/bitbucket_provider.py",
    "content": "import difflib\nimport json\nimport re\nfrom typing import Optional, Tuple\nfrom urllib.parse import urlparse\n\nimport requests\nfrom atlassian.bitbucket import Cloud\nfrom starlette_context import context\n\nfrom pr_agent.algo.types import EDIT_TYPE, FilePatchInfo\n\nfrom ..algo.file_filter import filter_ignored\nfrom ..algo.language_handler import is_valid_file\nfrom ..algo.utils import find_line_number_of_relevant_line_in_file\nfrom ..config_loader import get_settings\nfrom ..log import get_logger\nfrom .git_provider import MAX_FILES_ALLOWED_FULL, GitProvider\n\n\ndef _gef_filename(diff):\n    if diff.new.path:\n        return diff.new.path\n    return diff.old.path\n\n\nclass BitbucketProvider(GitProvider):\n    def __init__(\n        self, pr_url: Optional[str] = None, incremental: Optional[bool] = False\n    ):\n        s = requests.Session()\n        s.headers[\"Content-Type\"] = \"application/json\"\n\n        self.auth_type = get_settings().get(\"BITBUCKET.AUTH_TYPE\", \"bearer\")\n\n        try:\n            def get_token(token_name, auth_type_name):\n                token = get_settings().get(f\"BITBUCKET.{token_name.upper()}\", None)\n                if not token:\n                    raise ValueError(f\"{auth_type_name} auth requires a token\")\n                return token\n\n            if self.auth_type == \"basic\":\n                self.basic_token = get_token(\"basic_token\", \"Basic\")\n                s.headers[\"Authorization\"] = f\"Basic {self.basic_token}\"\n            elif self.auth_type == \"bearer\":\n                try:\n                    self.bearer_token = context.get(\"bitbucket_bearer_token\", None)\n                except:\n                    self.bearer_token = None\n\n                if not self.bearer_token:\n                    self.bearer_token = get_token(\"bearer_token\", \"Bearer\")\n                s.headers[\"Authorization\"] = f\"Bearer {self.bearer_token}\"\n            else:\n                 raise ValueError(f\"Unsupported auth_type: {self.auth_type}\")\n\n        except Exception as e:\n            get_logger().exception(f\"Failed to initialize Bitbucket authentication: {e}\")\n            raise\n\n        self.headers = s.headers\n        self.bitbucket_client = Cloud(session=s)\n        self.max_comment_length = 31000\n        self.workspace_slug = None\n        self.repo_slug = None\n        self.repo = None\n        self.pr_num = None\n        self.pr = None\n        self.pr_url = pr_url\n        self.temp_comments = []\n        self.incremental = incremental\n        self.diff_files = None\n        self.git_files = None\n        if pr_url:\n            self.set_pr(pr_url)\n        self.bitbucket_comment_api_url = self.pr._BitbucketBase__data[\"links\"][\"comments\"][\"href\"]\n        self.bitbucket_pull_request_api_url = self.pr._BitbucketBase__data[\"links\"]['self']['href']\n\n    def get_repo_settings(self):\n        try:\n            url = (f\"https://api.bitbucket.org/2.0/repositories/{self.workspace_slug}/{self.repo_slug}/src/\"\n                   f\"{self.pr.destination_branch}/.pr_agent.toml\")\n            response = requests.request(\"GET\", url, headers=self.headers)\n            if response.status_code == 404:  # not found\n                return \"\"\n            contents = response.text.encode('utf-8')\n            return contents\n        except Exception:\n            return \"\"\n\n    def get_git_repo_url(self, pr_url: str=None) -> str: #bitbucket does not support issue url, so ignore param\n        try:\n            parsed_url = urlparse(self.pr_url)\n            return f\"{parsed_url.scheme}://{parsed_url.netloc}/{self.workspace_slug}/{self.repo_slug}.git\"\n        except Exception as e:\n            get_logger().exception(f\"url is not a valid merge requests url: {self.pr_url}\")\n            return \"\"\n\n    # Given a git repo url, return prefix and suffix of the provider in order to view a given file belonging to that repo.\n    # Example: git clone git clone https://bitbucket.org/codiumai/pr-agent.git and branch: main -> prefix: \"https://bitbucket.org/codiumai/pr-agent/src/main\", suffix: \"\"\n    # In case git url is not provided, provider will use PR context (which includes branch) to determine the prefix and suffix.\n    def get_canonical_url_parts(self, repo_git_url:str=None, desired_branch:str=None) -> Tuple[str, str]:\n        scheme_and_netloc = None\n        if repo_git_url:\n            parsed_git_url = urlparse(repo_git_url)\n            scheme_and_netloc = parsed_git_url.scheme + \"://\" + parsed_git_url.netloc\n            repo_path = parsed_git_url.path.split('.git')[0][1:] #/<workspace>/<repo>.git -> <workspace>/<repo>\n            if repo_path.count('/') != 1:\n                get_logger().error(f\"repo_git_url is not a valid git repo url: {repo_git_url}\")\n                return (\"\", \"\")\n            workspace_name, project_name = repo_path.split('/')\n        else:\n            desired_branch = self.get_repo_default_branch()\n            parsed_pr_url = urlparse(self.pr_url)\n            scheme_and_netloc = parsed_pr_url.scheme + \"://\" + parsed_pr_url.netloc\n            workspace_name, project_name = (self.workspace_slug, self.repo_slug)\n        prefix = f\"{scheme_and_netloc}/{workspace_name}/{project_name}/src/{desired_branch}\"\n        suffix = \"\" #None\n        return (prefix, suffix)\n\n\n    def publish_code_suggestions(self, code_suggestions: list) -> bool:\n        \"\"\"\n        Publishes code suggestions as comments on the PR.\n        \"\"\"\n        post_parameters_list = []\n        for suggestion in code_suggestions:\n            body = suggestion[\"body\"]\n            original_suggestion = suggestion.get('original_suggestion', None)  # needed for diff code\n            if original_suggestion:\n                try:\n                    existing_code = original_suggestion['existing_code'].rstrip() + \"\\n\"\n                    improved_code = original_suggestion['improved_code'].rstrip() + \"\\n\"\n                    diff = difflib.unified_diff(existing_code.split('\\n'),\n                                                improved_code.split('\\n'), n=999)\n                    patch_orig = \"\\n\".join(diff)\n                    patch = \"\\n\".join(patch_orig.splitlines()[5:]).strip('\\n')\n                    diff_code = f\"\\n\\n```diff\\n{patch.rstrip()}\\n```\"\n                    # replace ```suggestion ... ``` with diff_code, using regex:\n                    body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL)\n                except Exception as e:\n                    get_logger().exception(f\"Bitbucket failed to get diff code for publishing, error: {e}\")\n                    continue\n\n            relevant_file = suggestion[\"relevant_file\"]\n            relevant_lines_start = suggestion[\"relevant_lines_start\"]\n            relevant_lines_end = suggestion[\"relevant_lines_end\"]\n\n            if not relevant_lines_start or relevant_lines_start == -1:\n                get_logger().exception(\n                    f\"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}\"\n                )\n                continue\n\n            if relevant_lines_end < relevant_lines_start:\n                get_logger().exception(\n                    f\"Failed to publish code suggestion, \"\n                    f\"relevant_lines_end is {relevant_lines_end} and \"\n                    f\"relevant_lines_start is {relevant_lines_start}\"\n                )\n                continue\n\n            if relevant_lines_end > relevant_lines_start:\n                post_parameters = {\n                    \"body\": body,\n                    \"path\": relevant_file,\n                    \"line\": relevant_lines_end,\n                    \"start_line\": relevant_lines_start,\n                    \"start_side\": \"RIGHT\",\n                }\n            else:  # API is different for single line comments\n                post_parameters = {\n                    \"body\": body,\n                    \"path\": relevant_file,\n                    \"line\": relevant_lines_start,\n                    \"side\": \"RIGHT\",\n                }\n            post_parameters_list.append(post_parameters)\n\n        try:\n            self.publish_inline_comments(post_parameters_list)\n            return True\n        except Exception as e:\n            get_logger().error(f\"Bitbucket failed to publish code suggestion, error: {e}\")\n            return False\n\n    def publish_file_comments(self, file_comments: list) -> bool:\n        pass\n\n    def is_supported(self, capability: str) -> bool:\n        if capability in ['get_issue_comments', 'publish_inline_comments', 'get_labels', 'gfm_markdown',\n                            'publish_file_comments']:\n            return False\n        return True\n\n    def set_pr(self, pr_url: str):\n        self.workspace_slug, self.repo_slug, self.pr_num = self._parse_pr_url(pr_url)\n        self.pr = self._get_pr()\n\n    def get_files(self):\n        try:\n            git_files = context.get(\"git_files\", None)\n            if git_files:\n                return git_files\n            self.git_files = [_gef_filename(diff) for diff in self.pr.diffstat()]\n            context[\"git_files\"] = self.git_files\n            return self.git_files\n        except Exception:\n            if not self.git_files:\n                self.git_files = [_gef_filename(diff) for diff in self.pr.diffstat()]\n            return self.git_files\n\n    def get_diff_files(self) -> list[FilePatchInfo]:\n        if self.diff_files:\n            return self.diff_files\n\n        diffs_original = list(self.pr.diffstat())\n        diffs = filter_ignored(diffs_original, 'bitbucket')\n        if diffs != diffs_original:\n            try:\n                names_original = [d.new.path for d in diffs_original]\n                names_kept = [d.new.path for d in diffs]\n                names_filtered = list(set(names_original) - set(names_kept))\n                get_logger().info(f\"Filtered out [ignore] files for PR\", extra={\n                    'original_files': names_original,\n                    'names_kept': names_kept,\n                    'names_filtered': names_filtered\n\n                })\n            except Exception as e:\n                pass\n\n        # get the pr patches\n        try:\n            pr_patches = self.pr.diff()\n        except Exception as e:\n            # Try different encodings if UTF-8 fails\n            get_logger().warning(f\"Failed to decode PR patch with utf-8, error: {e}\")\n            encodings_to_try = ['iso-8859-1', 'latin-1', 'ascii', 'utf-16']\n            pr_patches = None\n            for encoding in encodings_to_try:\n                try:\n                    pr_patches = self.pr.diff(encoding=encoding)\n                    get_logger().info(f\"Successfully decoded PR patch with encoding {encoding}\")\n                    break\n                except UnicodeDecodeError:\n                    continue\n\n            if pr_patches is None:\n                raise ValueError(f\"Failed to decode PR patch with encodings {encodings_to_try}\")\n\n        diff_split = [\"diff --git\" + x for x in pr_patches.split(\"diff --git\") if x.strip()]\n        # filter all elements of 'diff_split' that are of indices in 'diffs_original' that are not in 'diffs'\n        if len(diff_split) > len(diffs) and len(diffs_original) == len(diff_split):\n            diff_split = [diff_split[i] for i in range(len(diff_split)) if diffs_original[i] in diffs]\n        if len(diff_split) != len(diffs):\n            get_logger().error(f\"Error - failed to split the diff into {len(diffs)} parts\")\n            return []\n        # bitbucket diff has a header for each file, we need to remove it:\n        # \"diff --git filename\n        # new file mode 100644 (optional)\n        #  index caa56f0..61528d7 100644\n        #   --- a/pr_agent/cli_pip.py\n        #  +++ b/pr_agent/cli_pip.py\n        #   @@ -... @@\"\n        for i, _ in enumerate(diff_split):\n            diff_split_lines = diff_split[i].splitlines()\n            if (len(diff_split_lines) >= 6) and \\\n                    ((diff_split_lines[2].startswith(\"---\") and\n                      diff_split_lines[3].startswith(\"+++\") and\n                      diff_split_lines[4].startswith(\"@@\")) or\n                     (diff_split_lines[3].startswith(\"---\") and  # new or deleted file\n                      diff_split_lines[4].startswith(\"+++\") and\n                      diff_split_lines[5].startswith(\"@@\"))):\n                diff_split[i] = \"\\n\".join(diff_split_lines[4:])\n            else:\n                if diffs[i].data.get('lines_added', 0) == 0 and diffs[i].data.get('lines_removed', 0) == 0:\n                    diff_split[i] = \"\"\n                elif len(diff_split_lines) <= 3:\n                    diff_split[i] = \"\"\n                    get_logger().info(f\"Disregarding empty diff for file {_gef_filename(diffs[i])}\")\n                else:\n                    get_logger().warning(f\"Bitbucket failed to get diff for file {_gef_filename(diffs[i])}\")\n                    diff_split[i] = \"\"\n\n        invalid_files_names = []\n        diff_files = []\n        counter_valid = 0\n        # get full files\n        for index, diff in enumerate(diffs):\n            file_path = _gef_filename(diff)\n            if not is_valid_file(file_path):\n                invalid_files_names.append(file_path)\n                continue\n\n            try:\n                counter_valid += 1\n                if get_settings().get(\"bitbucket_app.avoid_full_files\", False):\n                    original_file_content_str = \"\"\n                    new_file_content_str = \"\"\n                elif counter_valid < MAX_FILES_ALLOWED_FULL // 2:  # factor 2 because bitbucket has limited API calls\n                    if diff.old.get_data(\"links\"):\n                        original_file_content_str = self._get_pr_file_content(\n                            diff.old.get_data(\"links\")['self']['href'])\n                    else:\n                        original_file_content_str = \"\"\n                    if diff.new.get_data(\"links\"):\n                        new_file_content_str = self._get_pr_file_content(diff.new.get_data(\"links\")['self']['href'])\n                    else:\n                        new_file_content_str = \"\"\n                else:\n                    if counter_valid == MAX_FILES_ALLOWED_FULL // 2:\n                        get_logger().info(\n                            f\"Bitbucket too many files in PR, will avoid loading full content for rest of files\")\n                    original_file_content_str = \"\"\n                    new_file_content_str = \"\"\n            except Exception as e:\n                get_logger().exception(f\"Error - bitbucket failed to get file content, error: {e}\")\n                original_file_content_str = \"\"\n                new_file_content_str = \"\"\n\n            file_patch_canonic_structure = FilePatchInfo(\n                original_file_content_str,\n                new_file_content_str,\n                diff_split[index],\n                file_path,\n            )\n\n            if diff.data['status'] == 'added':\n                file_patch_canonic_structure.edit_type = EDIT_TYPE.ADDED\n            elif diff.data['status'] == 'removed':\n                file_patch_canonic_structure.edit_type = EDIT_TYPE.DELETED\n            elif diff.data['status'] == 'modified':\n                file_patch_canonic_structure.edit_type = EDIT_TYPE.MODIFIED\n            elif diff.data['status'] == 'renamed':\n                file_patch_canonic_structure.edit_type = EDIT_TYPE.RENAMED\n            diff_files.append(file_patch_canonic_structure)\n\n        if invalid_files_names:\n            get_logger().info(f\"Disregarding files with invalid extensions:\\n{invalid_files_names}\")\n\n        self.diff_files = diff_files\n        return diff_files\n\n    def get_latest_commit_url(self):\n        return self.pr.data['source']['commit']['links']['html']['href']\n\n    def get_comment_url(self, comment):\n        return comment.data['links']['html']['href']\n\n    def publish_persistent_comment(self, pr_comment: str,\n                                   initial_header: str,\n                                   update_header: bool = True,\n                                   name='review',\n                                   final_update_message=True):\n        try:\n            for comment in self.pr.comments():\n                body = comment.raw\n                if initial_header in body:\n                    latest_commit_url = self.get_latest_commit_url()\n                    comment_url = self.get_comment_url(comment)\n                    if update_header:\n                        updated_header = f\"{initial_header}\\n\\n#### ({name.capitalize()} updated until commit {latest_commit_url})\\n\"\n                        pr_comment_updated = pr_comment.replace(initial_header, updated_header)\n                    else:\n                        pr_comment_updated = pr_comment\n                    get_logger().info(f\"Persistent mode - updating comment {comment_url} to latest {name} message\")\n                    d = {\"content\": {\"raw\": pr_comment_updated}}\n                    response = comment._update_data(comment.put(None, data=d))\n                    if final_update_message:\n                        self.publish_comment(\n                            f\"**[Persistent {name}]({comment_url})** updated to latest commit {latest_commit_url}\")\n                    return\n        except Exception as e:\n            get_logger().exception(f\"Failed to update persistent review, error: {e}\")\n            pass\n        self.publish_comment(pr_comment)\n\n    def publish_comment(self, pr_comment: str, is_temporary: bool = False):\n        if is_temporary and not get_settings().config.publish_output_progress:\n            get_logger().debug(f\"Skipping publish_comment for temporary comment: {pr_comment}\")\n            return None\n        pr_comment = self.limit_output_characters(pr_comment, self.max_comment_length)\n        comment = self.pr.comment(pr_comment)\n        if is_temporary:\n            self.temp_comments.append(comment[\"id\"])\n        return comment\n\n    def edit_comment(self, comment, body: str):\n        try:\n            body = self.limit_output_characters(body, self.max_comment_length)\n            comment.update(body)\n        except Exception as e:\n            get_logger().exception(f\"Failed to update comment, error: {e}\")\n\n    def remove_initial_comment(self):\n        try:\n            for comment in self.temp_comments:\n                self.remove_comment(comment)\n        except Exception as e:\n            get_logger().exception(f\"Failed to remove temp comments, error: {e}\")\n\n    def remove_comment(self, comment):\n        try:\n            self.pr.delete(f\"comments/{comment}\")\n        except Exception as e:\n            get_logger().exception(f\"Failed to remove comment, error: {e}\")\n\n    # function to create_inline_comment\n    def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str,\n                              absolute_position: int = None):\n        body = self.limit_output_characters(body, self.max_comment_length)\n        position, absolute_position = find_line_number_of_relevant_line_in_file(self.get_diff_files(),\n                                                                                relevant_file.strip('`'),\n                                                                                relevant_line_in_file,\n                                                                                absolute_position)\n        if position == -1:\n            if get_settings().config.verbosity_level >= 2:\n                get_logger().info(f\"Could not find position for {relevant_file} {relevant_line_in_file}\")\n            subject_type = \"FILE\"\n        else:\n            subject_type = \"LINE\"\n        path = relevant_file.strip()\n        return dict(body=body, path=path, position=absolute_position) if subject_type == \"LINE\" else {}\n\n    def publish_inline_comment(self, comment: str, from_line: int, file: str, original_suggestion=None):\n        comment = self.limit_output_characters(comment, self.max_comment_length)\n        payload = json.dumps({\n            \"content\": {\n                \"raw\": comment,\n            },\n            \"inline\": {\n                \"to\": from_line,\n                \"path\": file\n            },\n        })\n        response = requests.request(\n            \"POST\", self.bitbucket_comment_api_url, data=payload, headers=self.headers\n        )\n        return response\n\n    def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str:\n        if relevant_line_start == -1:\n            link = f\"{self.pr_url}/#L{relevant_file}\"\n        else:\n            link = f\"{self.pr_url}/#L{relevant_file}T{relevant_line_start}\"\n        return link\n\n    def generate_link_to_relevant_line_number(self, suggestion) -> str:\n        try:\n            relevant_file = suggestion['relevant_file'].strip('`').strip(\"'\").rstrip()\n            relevant_line_str = suggestion['relevant_line'].rstrip()\n            if not relevant_line_str:\n                return \"\"\n\n            diff_files = self.get_diff_files()\n            position, absolute_position = find_line_number_of_relevant_line_in_file \\\n                (diff_files, relevant_file, relevant_line_str)\n\n            if absolute_position != -1 and self.pr_url:\n                link = f\"{self.pr_url}/#L{relevant_file}T{absolute_position}\"\n                return link\n        except Exception as e:\n            if get_settings().config.verbosity_level >= 2:\n                get_logger().info(f\"Failed adding line link, error: {e}\")\n\n        return \"\"\n\n    def publish_inline_comments(self, comments: list[dict]):\n        for comment in comments:\n            if 'position' in comment:\n                self.publish_inline_comment(comment['body'], comment['position'], comment['path'])\n            elif 'start_line' in comment:  # multi-line comment\n                # note that bitbucket does not seem to support range - only a comment on a single line - https://community.developer.atlassian.com/t/api-post-endpoint-for-inline-pull-request-comments/60452\n                self.publish_inline_comment(comment['body'], comment['start_line'], comment['path'])\n            elif 'line' in comment:  # single-line comment\n                self.publish_inline_comment(comment['body'], comment['line'], comment['path'])\n            else:\n                get_logger().error(f\"Could not publish inline comment {comment}\")\n\n    def get_title(self):\n        return self.pr.title\n\n    def get_languages(self):\n        languages = {self._get_repo().get_data(\"language\"): 0}\n        return languages\n\n    def get_pr_branch(self):\n        return self.pr.source_branch\n\n    # This function attempts to get the default branch of the repository. As a fallback, uses the PR destination branch.\n    # Note: Must be running from a PR context.\n    def get_repo_default_branch(self):\n        try:\n            url_repo = f\"https://api.bitbucket.org/2.0/repositories/{self.workspace_slug}/{self.repo_slug}/\"\n            response_repo = requests.request(\"GET\", url_repo, headers=self.headers).json()\n            return response_repo['mainbranch']['name']\n        except:\n            return self.pr.destination_branch\n\n    def get_pr_owner_id(self) -> str | None:\n        return self.workspace_slug\n\n    def get_pr_description_full(self):\n        return self.pr.description\n\n    def get_user_id(self):\n        return 0\n\n    def get_issue_comments(self):\n        raise NotImplementedError(\n            \"Bitbucket provider does not support issue comments yet\"\n        )\n\n    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:\n        return True\n\n    def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool:\n        return True\n\n    @staticmethod\n    def _parse_pr_url(pr_url: str) -> Tuple[str, int, int]:\n        parsed_url = urlparse(pr_url)\n\n        if \"bitbucket.org\" not in parsed_url.netloc:\n            raise ValueError(\"The provided URL is not a valid Bitbucket URL\")\n\n        path_parts = parsed_url.path.strip(\"/\").split(\"/\")\n\n        if len(path_parts) < 4 or path_parts[2] != \"pull-requests\":\n            raise ValueError(\n                \"The provided URL does not appear to be a Bitbucket PR URL\"\n            )\n\n        workspace_slug = path_parts[0]\n        repo_slug = path_parts[1]\n        try:\n            pr_number = int(path_parts[3])\n        except ValueError as e:\n            raise ValueError(\"Unable to convert PR number to integer\") from e\n\n        return workspace_slug, repo_slug, pr_number\n\n    def _get_repo(self):\n        if self.repo is None:\n            self.repo = self.bitbucket_client.workspaces.get(\n                self.workspace_slug\n            ).repositories.get(self.repo_slug)\n        return self.repo\n\n    def _get_pr(self):\n        return self._get_repo().pullrequests.get(self.pr_num)\n\n    def get_pr_file_content(self, file_path: str, branch: str) -> str:\n        try:\n            if branch == self.pr.source_branch:\n                branch = self.pr.data[\"source\"][\"commit\"][\"hash\"]\n            elif branch == self.pr.destination_branch:\n                branch = self.pr.data[\"destination\"][\"commit\"][\"hash\"]\n            url = (f\"https://api.bitbucket.org/2.0/repositories/{self.workspace_slug}/{self.repo_slug}/src/\"\n                   f\"{branch}/{file_path}\")\n            response = requests.request(\"GET\", url, headers=self.headers)\n            if response.status_code == 404:  # not found\n                return \"\"\n            contents = response.text\n            return contents\n        except Exception:\n            return \"\"\n\n    def create_or_update_pr_file(self, file_path: str, branch: str, contents=\"\", message=\"\") -> None:\n        url = (f\"https://api.bitbucket.org/2.0/repositories/{self.workspace_slug}/{self.repo_slug}/src/\")\n        if not message:\n            if contents:\n                message = f\"Update {file_path}\"\n            else:\n                message = f\"Create {file_path}\"\n        files = {file_path: contents}\n        data = {\n            \"message\": message,\n            \"branch\": branch\n        }\n        headers = {'Authorization': self.headers['Authorization']} if 'Authorization' in self.headers else {}\n        try:\n            requests.request(\"POST\", url, headers=headers, data=data, files=files)\n        except Exception:\n            get_logger().exception(f\"Failed to create empty file {file_path} in branch {branch}\")\n\n    def _get_pr_file_content(self, remote_link: str):\n        try:\n            response = requests.request(\"GET\", remote_link, headers=self.headers)\n            if response.status_code == 404:  # not found\n                return \"\"\n            contents = response.text\n            return contents\n        except Exception:\n            return \"\"\n\n    def get_commit_messages(self):\n        return \"\"  # not implemented yet\n\n    # bitbucket does not support labels\n    def publish_description(self, pr_title: str, description: str):\n        payload = json.dumps({\n            \"description\": description,\n            \"title\": pr_title\n\n        })\n\n        response = requests.request(\"PUT\", self.bitbucket_pull_request_api_url, headers=self.headers, data=payload)\n        try:\n            if response.status_code != 200:\n                get_logger().info(f\"Failed to update description, error code: {response.status_code}\")\n        except:\n            pass\n        return response\n\n    # bitbucket does not support labels\n    def publish_labels(self, pr_types: list):\n        pass\n\n    # bitbucket does not support labels\n    def get_pr_labels(self, update=False):\n        pass\n    #Clone related\n    def _prepare_clone_url_with_token(self, repo_url_to_clone: str) -> str | None:\n        if \"bitbucket.org\" not in repo_url_to_clone:\n            get_logger().error(\"Repo URL is not a valid bitbucket URL.\")\n            return None\n\n        (scheme, base_url) = repo_url_to_clone.split(\"bitbucket.org\")\n        if not all([scheme, base_url]):\n            get_logger().error(f\"repo_url_to_clone: {repo_url_to_clone} is not a valid bitbucket URL.\")\n            return None\n\n        if self.auth_type == \"basic\":\n            # Basic auth with token\n            clone_url = f\"{scheme}x-token-auth:{self.basic_token}@bitbucket.org{base_url}\"\n        elif self.auth_type == \"bearer\":\n            # Bearer token\n            clone_url = f\"{scheme}x-token-auth:{self.bearer_token}@bitbucket.org{base_url}\"\n        else:\n            # This case should ideally not be reached if __init__ validates auth_type\n            get_logger().error(f\"Unsupported or uninitialized auth_type: {getattr(self, 'auth_type', 'N/A')}. Returning None\")\n            return None\n\n        return clone_url\n"
  },
  {
    "path": "pr_agent/git_providers/bitbucket_server_provider.py",
    "content": "import difflib\nimport re\n\nfrom packaging.version import parse as parse_version\nfrom typing import Optional, Tuple\nfrom urllib.parse import quote_plus, urlparse\n\nfrom atlassian.bitbucket import Bitbucket\nfrom requests.exceptions import HTTPError\nimport shlex\nimport subprocess\n\nfrom ..algo.file_filter import filter_ignored\nfrom ..algo.git_patch_processing import decode_if_bytes\nfrom ..algo.language_handler import is_valid_file\nfrom ..algo.types import EDIT_TYPE, FilePatchInfo\nfrom ..algo.utils import (find_line_number_of_relevant_line_in_file,\n                          load_large_diff)\nfrom ..config_loader import get_settings\nfrom ..log import get_logger\nfrom .git_provider import GitProvider, get_git_ssl_env\n\n\nclass BitbucketServerProvider(GitProvider):\n    def __init__(\n            self, pr_url: Optional[str] = None, incremental: Optional[bool] = False,\n            bitbucket_client: Optional[Bitbucket] = None,\n    ):\n        self.bitbucket_server_url = None\n        self.workspace_slug = None\n        self.repo_slug = None\n        self.repo = None\n        self.pr_num = None\n        self.pr = None\n        self.pr_url = pr_url\n        self.temp_comments = []\n        self.incremental = incremental\n        self.diff_files = None\n        self.bitbucket_pull_request_api_url = pr_url\n        self.bearer_token = get_settings().get(\"BITBUCKET_SERVER.BEARER_TOKEN\", None)\n        # Get username and password from settings\n        username = get_settings().get(\"BITBUCKET_SERVER.USERNAME\", None)\n        password = get_settings().get(\"BITBUCKET_SERVER.PASSWORD\", None)\n        if bitbucket_client: # if Bitbucket client is provided, use it\n            self.bitbucket_client = bitbucket_client\n            self.bitbucket_server_url = getattr(bitbucket_client, 'url', None) or self._parse_bitbucket_server(pr_url)\n        else:\n            self.bitbucket_server_url = self._parse_bitbucket_server(pr_url)\n            if not self.bitbucket_server_url:\n                raise ValueError(\"Invalid or missing Bitbucket Server URL parsed from PR URL.\")\n            \n            if self.bearer_token:  # if bearer token is provided, use it\n                self.bitbucket_client = Bitbucket(\n                    url=self.bitbucket_server_url,\n                    token=self.bearer_token\n                )\n            else:  # otherwise use username and password\n                self.bitbucket_client = Bitbucket(\n                    url=self.bitbucket_server_url,\n                    username=username,\n                    password=password\n                )\n        try:\n            self.bitbucket_api_version = parse_version(self.bitbucket_client.get(\"rest/api/1.0/application-properties\").get('version'))\n        except Exception:\n            self.bitbucket_api_version = None\n\n        if pr_url:\n            self.set_pr(pr_url)\n\n    def get_git_repo_url(self, pr_url: str=None) -> str: #bitbucket server does not support issue url, so ignore param\n        try:\n            parsed_url = urlparse(self.pr_url)\n            return f\"{parsed_url.scheme}://{parsed_url.netloc}/scm/{self.workspace_slug.lower()}/{self.repo_slug.lower()}.git\"\n        except Exception as e:\n            get_logger().exception(f\"url is not a valid merge requests url: {self.pr_url}\")\n            return \"\"\n\n    # Given a git repo url, return prefix and suffix of the provider in order to view a given file belonging to that repo.\n    # Example: https://bitbucket.dev.my_inc.com/scm/my_work/my_repo.git and branch: my_branch -> prefix: \"https://bitbucket.dev.my_inc.com/projects/MY_WORK/repos/my_repo/browse/src\", suffix: \"?at=refs%2Fheads%2Fmy_branch\"\n    # In case git url is not provided, provider will use PR context (which includes branch) to determine the prefix and suffix.\n    def get_canonical_url_parts(self, repo_git_url:str=None, desired_branch:str=None) -> Tuple[str, str]:\n        workspace_name = None\n        project_name = None\n        if not repo_git_url:\n            workspace_name = self.workspace_slug\n            project_name = self.repo_slug\n            default_branch_dict = self.bitbucket_client.get_default_branch(workspace_name, project_name)\n            if 'displayId' in default_branch_dict:\n                desired_branch = default_branch_dict['displayId']\n            else:\n                get_logger().error(f\"Cannot obtain default branch for workspace_name={workspace_name}, \"\n                                   f\"project_name={project_name}, default_branch_dict={default_branch_dict}\")\n                return (\"\", \"\")\n        elif '.git' in repo_git_url and 'scm/' in repo_git_url:\n            repo_path = repo_git_url.split('.git')[0].split('scm/')[-1]\n            if repo_path.count('/') == 1:  # Has to have the form <workspace>/<repo>\n                workspace_name, project_name = repo_path.split('/')\n        if not workspace_name or not project_name:\n            get_logger().error(f\"workspace_name or project_name not found in context, either git url: {repo_git_url} or uninitialized workspace/project.\")\n            return (\"\", \"\")\n        prefix = f\"{self.bitbucket_server_url}/projects/{workspace_name}/repos/{project_name}/browse\"\n        suffix = f\"?at=refs%2Fheads%2F{desired_branch}\"\n        return (prefix, suffix)\n\n    def get_repo_settings(self):\n        try:\n            content = self.bitbucket_client.get_content_of_file(self.workspace_slug, self.repo_slug, \".pr_agent.toml\")\n\n            return content\n        except Exception as e:\n            if isinstance(e, HTTPError):\n                if e.response.status_code == 404:  # not found\n                    return \"\"\n\n            get_logger().error(f\"Failed to load .pr_agent.toml file, error: {e}\")\n            return \"\"\n\n    def get_pr_id(self):\n        return self.pr_num\n\n    def publish_code_suggestions(self, code_suggestions: list) -> bool:\n        \"\"\"\n        Publishes code suggestions as comments on the PR.\n        \"\"\"\n        post_parameters_list = []\n        for suggestion in code_suggestions:\n            body = suggestion[\"body\"]\n            original_suggestion = suggestion.get('original_suggestion', None)  # needed for diff code\n            if original_suggestion:\n                try:\n                    existing_code = original_suggestion['existing_code'].rstrip() + \"\\n\"\n                    improved_code = original_suggestion['improved_code'].rstrip() + \"\\n\"\n                    diff = difflib.unified_diff(existing_code.split('\\n'),\n                                                improved_code.split('\\n'), n=999)\n                    patch_orig = \"\\n\".join(diff)\n                    patch = \"\\n\".join(patch_orig.splitlines()[5:]).strip('\\n')\n                    diff_code = f\"\\n\\n```diff\\n{patch.rstrip()}\\n```\"\n                    # replace ```suggestion ... ``` with diff_code, using regex:\n                    body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL)\n                except Exception as e:\n                    get_logger().exception(f\"Bitbucket failed to get diff code for publishing, error: {e}\")\n                    continue\n            relevant_file = suggestion[\"relevant_file\"]\n            relevant_lines_start = suggestion[\"relevant_lines_start\"]\n            relevant_lines_end = suggestion[\"relevant_lines_end\"]\n\n            if not relevant_lines_start or relevant_lines_start == -1:\n                get_logger().warning(\n                    f\"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}\"\n                )\n                continue\n\n            if relevant_lines_end < relevant_lines_start:\n                get_logger().warning(\n                    f\"Failed to publish code suggestion, \"\n                    f\"relevant_lines_end is {relevant_lines_end} and \"\n                    f\"relevant_lines_start is {relevant_lines_start}\"\n                )\n                continue\n\n            if relevant_lines_end > relevant_lines_start:\n                # Bitbucket does not support multi-line suggestions so use a code block instead - https://jira.atlassian.com/browse/BSERV-4553\n                body = body.replace(\"```suggestion\", \"```\")\n                post_parameters = {\n                    \"body\": body,\n                    \"path\": relevant_file,\n                    \"line\": relevant_lines_end,\n                    \"start_line\": relevant_lines_start,\n                    \"start_side\": \"RIGHT\",\n                }\n            else:  # API is different for single line comments\n                post_parameters = {\n                    \"body\": body,\n                    \"path\": relevant_file,\n                    \"line\": relevant_lines_start,\n                    \"side\": \"RIGHT\",\n                }\n            post_parameters_list.append(post_parameters)\n\n        try:\n            self.publish_inline_comments(post_parameters_list)\n            return True\n        except Exception as e:\n            if get_settings().config.verbosity_level >= 2:\n                get_logger().error(f\"Failed to publish code suggestion, error: {e}\")\n            return False\n\n    def publish_file_comments(self, file_comments: list) -> bool:\n        pass\n\n    def is_supported(self, capability: str) -> bool:\n        if capability in ['get_issue_comments', 'get_labels', 'gfm_markdown', 'publish_file_comments']:\n            return False\n        return True\n\n    def set_pr(self, pr_url: str):\n        self.workspace_slug, self.repo_slug, self.pr_num = self._parse_pr_url(pr_url)\n        self.pr = self._get_pr()\n\n    def get_file(self, path: str, commit_id: str):\n        file_content = \"\"\n        try:\n            file_content = self.bitbucket_client.get_content_of_file(self.workspace_slug,\n                                                                     self.repo_slug,\n                                                                     path,\n                                                                     commit_id)\n        except HTTPError as e:\n            get_logger().debug(f\"File {path} not found at commit id: {commit_id}\")\n        return file_content\n\n    def get_files(self):\n        changes = self.bitbucket_client.get_pull_requests_changes(self.workspace_slug, self.repo_slug, self.pr_num)\n        diffstat = [change[\"path\"]['toString'] for change in changes]\n        return diffstat\n\n    #gets the best common ancestor: https://git-scm.com/docs/git-merge-base\n    @staticmethod\n    def get_best_common_ancestor(source_commits_list, destination_commits_list, guaranteed_common_ancestor) -> str:\n        destination_commit_hashes = {commit['id'] for commit in destination_commits_list} | {guaranteed_common_ancestor}\n\n        for commit in source_commits_list:\n            for parent_commit in commit['parents']:\n                if parent_commit['id'] in destination_commit_hashes:\n                    return parent_commit['id']\n\n        return guaranteed_common_ancestor\n\n    def get_diff_files(self) -> list[FilePatchInfo]:\n        if self.diff_files:\n            return self.diff_files\n\n        head_sha = self.pr.fromRef['latestCommit']\n\n        # if Bitbucket api version is >= 8.16 then use the merge-base api for 2-way diff calculation\n        if self.bitbucket_api_version is not None and self.bitbucket_api_version >= parse_version(\"8.16\"):\n            try:\n                base_sha = self.bitbucket_client.get(self._get_merge_base())['id']\n            except Exception as e:\n                get_logger().error(f\"Failed to get the best common ancestor for PR: {self.pr_url}, \\nerror: {e}\")\n                raise e\n        else:\n            source_commits_list = list(self.bitbucket_client.get_pull_requests_commits(\n                self.workspace_slug,\n                self.repo_slug,\n                self.pr_num\n            ))\n            # if Bitbucket api version is None or < 7.0 then do a simple diff with a guaranteed common ancestor\n            base_sha = source_commits_list[-1]['parents'][0]['id']\n            # if Bitbucket api version is 7.0-8.15 then use 2-way diff functionality for the base_sha\n            if self.bitbucket_api_version is not None and self.bitbucket_api_version >= parse_version(\"7.0\"):\n                try:\n                    destination_commits = list(\n                        self.bitbucket_client.get_commits(self.workspace_slug, self.repo_slug, base_sha,\n                                                          self.pr.toRef['latestCommit']))\n                    base_sha = self.get_best_common_ancestor(source_commits_list, destination_commits, base_sha)\n                except Exception as e:\n                    get_logger().error(\n                        f\"Failed to get the commit list for calculating best common ancestor for PR: {self.pr_url}, \\nerror: {e}\")\n                    raise e\n\n        diff_files = []\n        original_file_content_str = \"\"\n        new_file_content_str = \"\"\n\n        changes_original = list(self.bitbucket_client.get_pull_requests_changes(self.workspace_slug, self.repo_slug, self.pr_num))\n        changes = filter_ignored(changes_original, 'bitbucket_server')\n        for change in changes:\n            file_path = change['path']['toString']\n            if not is_valid_file(file_path.split(\"/\")[-1]):\n                get_logger().info(f\"Skipping a non-code file: {file_path}\")\n                continue\n\n            match change['type']:\n                case 'ADD':\n                    edit_type = EDIT_TYPE.ADDED\n                    new_file_content_str = self.get_file(file_path, head_sha)\n                    new_file_content_str = decode_if_bytes(new_file_content_str)\n                    original_file_content_str = \"\"\n                case 'DELETE':\n                    edit_type = EDIT_TYPE.DELETED\n                    new_file_content_str = \"\"\n                    original_file_content_str = self.get_file(file_path, base_sha)\n                    original_file_content_str = decode_if_bytes(original_file_content_str)\n                case 'RENAME':\n                    edit_type = EDIT_TYPE.RENAMED\n                case _:\n                    edit_type = EDIT_TYPE.MODIFIED\n                    original_file_content_str = self.get_file(file_path, base_sha)\n                    original_file_content_str = decode_if_bytes(original_file_content_str)\n                    new_file_content_str = self.get_file(file_path, head_sha)\n                    new_file_content_str = decode_if_bytes(new_file_content_str)\n\n            patch = load_large_diff(file_path, new_file_content_str, original_file_content_str, show_warning=False)\n\n            diff_files.append(\n                FilePatchInfo(\n                    original_file_content_str,\n                    new_file_content_str,\n                    patch,\n                    file_path,\n                    edit_type=edit_type,\n                )\n            )\n\n        self.diff_files = diff_files\n        return diff_files\n\n    def publish_comment(self, pr_comment: str, is_temporary: bool = False):\n        if not is_temporary:\n            self.bitbucket_client.add_pull_request_comment(self.workspace_slug, self.repo_slug, self.pr_num, pr_comment)\n\n    def remove_initial_comment(self):\n        try:\n            for comment in self.temp_comments:\n                self.remove_comment(comment)\n        except ValueError as e:\n            get_logger().exception(f\"Failed to remove temp comments, error: {e}\")\n\n    def remove_comment(self, comment):\n        pass\n\n    # function to create_inline_comment\n    def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str,\n                              absolute_position: int = None):\n\n        position, absolute_position = find_line_number_of_relevant_line_in_file(\n            self.get_diff_files(),\n            relevant_file.strip('`'),\n            relevant_line_in_file,\n            absolute_position\n        )\n        if position == -1:\n            if get_settings().config.verbosity_level >= 2:\n                get_logger().info(f\"Could not find position for {relevant_file} {relevant_line_in_file}\")\n            subject_type = \"FILE\"\n        else:\n            subject_type = \"LINE\"\n        path = relevant_file.strip()\n        return dict(body=body, path=path, position=absolute_position) if subject_type == \"LINE\" else {}\n\n    def publish_inline_comment(self, comment: str, from_line: int, file: str, original_suggestion=None):\n        payload = {\n            \"text\": comment,\n            \"severity\": \"NORMAL\",\n            \"anchor\": {\n                \"diffType\": \"EFFECTIVE\",\n                \"path\": file,\n                \"lineType\": \"ADDED\",\n                \"line\": from_line,\n                \"fileType\": \"TO\"\n            }\n        }\n\n        try:\n            self.bitbucket_client.post(self._get_pr_comments_path(), data=payload)\n        except Exception as e:\n            get_logger().error(f\"Failed to publish inline comment to '{file}' at line {from_line}, error: {e}\")\n            raise e\n\n    def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str:\n        if relevant_line_start == -1:\n            link = f\"{self.pr_url}/diff#{quote_plus(relevant_file)}\"\n        else:\n            link = f\"{self.pr_url}/diff#{quote_plus(relevant_file)}?t={relevant_line_start}\"\n        return link\n\n    def generate_link_to_relevant_line_number(self, suggestion) -> str:\n        try:\n            relevant_file = suggestion['relevant_file'].strip('`').strip(\"'\").rstrip()\n            relevant_line_str = suggestion['relevant_line'].rstrip()\n            if not relevant_line_str:\n                return \"\"\n\n            diff_files = self.get_diff_files()\n            position, absolute_position = find_line_number_of_relevant_line_in_file \\\n                (diff_files, relevant_file, relevant_line_str)\n\n            if absolute_position != -1:\n                if self.pr:\n                    link = f\"{self.pr_url}/diff#{quote_plus(relevant_file)}?t={absolute_position}\"\n                    return link\n                else:\n                    if get_settings().config.verbosity_level >= 2:\n                        get_logger().info(f\"Failed adding line link to '{relevant_file}' since PR not set\")\n            else:\n                if get_settings().config.verbosity_level >= 2:\n                    get_logger().info(f\"Failed adding line link to '{relevant_file}' since position not found\")\n\n            if absolute_position != -1 and self.pr_url:\n                link = f\"{self.pr_url}/diff#{quote_plus(relevant_file)}?t={absolute_position}\"\n                return link\n        except Exception as e:\n            if get_settings().config.verbosity_level >= 2:\n                get_logger().info(f\"Failed adding line link to '{relevant_file}', error: {e}\")\n\n        return \"\"\n\n    def publish_inline_comments(self, comments: list[dict]):\n        for comment in comments:\n            if 'position' in comment:\n                self.publish_inline_comment(comment['body'], comment['position'], comment['path'])\n            elif 'start_line' in comment: # multi-line comment\n                # note that bitbucket does not seem to support range - only a comment on a single line - https://community.developer.atlassian.com/t/api-post-endpoint-for-inline-pull-request-comments/60452\n                self.publish_inline_comment(comment['body'], comment['start_line'], comment['path'])\n            elif 'line' in comment: # single-line comment\n                self.publish_inline_comment(comment['body'], comment['line'], comment['path'])\n            else:\n                get_logger().error(f\"Could not publish inline comment: {comment}\")\n\n    def get_title(self):\n        return self.pr.title\n\n    def get_languages(self):\n        return {\"yaml\": 0}  # devops LOL\n\n    def get_pr_branch(self):\n        return self.pr.fromRef['displayId']\n\n    def get_pr_owner_id(self) -> str | None:\n        return self.workspace_slug\n\n    def get_pr_description_full(self):\n        if hasattr(self.pr, \"description\"):\n            return self.pr.description\n        else:\n            return None\n\n    def get_user_id(self):\n        return 0\n\n    def get_issue_comments(self):\n        raise NotImplementedError(\n            \"Bitbucket provider does not support issue comments yet\"\n        )\n\n    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:\n        return True\n\n    def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool:\n        return True\n\n    @staticmethod\n    def _parse_bitbucket_server(url: str) -> str:\n        # pr url format: f\"{bitbucket_server}/projects/{project_name}/repos/{repository_name}/pull-requests/{pr_id}\"\n        parsed_url = urlparse(url)\n        server_path = parsed_url.path.split(\"/projects/\")\n        if len(server_path) > 1:\n            server_path = server_path[0].strip(\"/\")\n            return f\"{parsed_url.scheme}://{parsed_url.netloc}/{server_path}\".strip(\"/\")\n        return f\"{parsed_url.scheme}://{parsed_url.netloc}\"\n\n    @staticmethod\n    def _parse_pr_url(pr_url: str) -> Tuple[str, str, int]:\n        # pr url format: f\"{bitbucket_server}/projects/{project_name}/repos/{repository_name}/pull-requests/{pr_id}\"\n        parsed_url = urlparse(pr_url)\n\n        path_parts = parsed_url.path.strip(\"/\").split(\"/\")\n\n        try:\n            projects_index = path_parts.index(\"projects\")\n        except ValueError:\n            projects_index = -1\n\n        try:\n            users_index = path_parts.index(\"users\")\n        except ValueError:\n            users_index = -1\n\n        if projects_index == -1 and users_index == -1:\n            raise ValueError(f\"The provided URL '{pr_url}' does not appear to be a Bitbucket PR URL\")\n\n        if projects_index != -1:\n            path_parts = path_parts[projects_index:]\n        else:\n            path_parts = path_parts[users_index:]\n\n        if len(path_parts) < 6 or path_parts[2] != \"repos\" or path_parts[4] != \"pull-requests\":\n            raise ValueError(\n                f\"The provided URL '{pr_url}' does not appear to be a Bitbucket PR URL\"\n            )\n\n        workspace_slug = path_parts[1]\n        if users_index != -1:\n            workspace_slug = f\"~{workspace_slug}\"\n        repo_slug = path_parts[3]\n        try:\n            pr_number = int(path_parts[5])\n        except ValueError as e:\n            raise ValueError(f\"Unable to convert PR number '{path_parts[5]}' to integer\") from e\n\n        return workspace_slug, repo_slug, pr_number\n\n    def _get_repo(self):\n        if self.repo is None:\n            self.repo = self.bitbucket_client.get_repo(self.workspace_slug, self.repo_slug)\n        return self.repo\n\n    def _get_pr(self):\n        try:\n            pr = self.bitbucket_client.get_pull_request(self.workspace_slug, self.repo_slug,\n                                                        pull_request_id=self.pr_num)\n            return type('new_dict', (object,), pr)\n        except Exception as e:\n            get_logger().error(f\"Failed to get pull request, error: {e}\")\n            raise e\n\n    def _get_pr_file_content(self, remote_link: str):\n        return \"\"\n\n    def get_commit_messages(self):\n        return \"\"\n\n    # bitbucket does not support labels\n    def publish_description(self, pr_title: str, description: str):\n        payload = {\n            \"version\": self.pr.version,\n            \"description\": description,\n            \"title\": pr_title,\n            \"reviewers\": self.pr.reviewers  # needs to be sent otherwise gets wiped\n        }\n        try:\n            self.bitbucket_client.update_pull_request(self.workspace_slug, self.repo_slug, str(self.pr_num), payload)\n        except Exception as e:\n            get_logger().error(f\"Failed to update pull request, error: {e}\")\n            raise e\n\n    # bitbucket does not support labels\n    def publish_labels(self, pr_types: list):\n        pass\n\n    # bitbucket does not support labels\n    def get_pr_labels(self, update=False):\n        pass\n\n    def _get_pr_comments_path(self):\n        return f\"rest/api/latest/projects/{self.workspace_slug}/repos/{self.repo_slug}/pull-requests/{self.pr_num}/comments\"\n\n    def _get_merge_base(self):\n        return f\"rest/api/latest/projects/{self.workspace_slug}/repos/{self.repo_slug}/pull-requests/{self.pr_num}/merge-base\"\n    # Clone related\n    def _prepare_clone_url_with_token(self, repo_url_to_clone: str) -> str | None:\n        if 'bitbucket.' not in repo_url_to_clone:\n            get_logger().error(\"Repo URL is not a valid bitbucket URL.\")\n            return None\n        bearer_token = self.bearer_token\n        if not bearer_token:\n            get_logger().error(\"No bearer token provided. Returning None\")\n            return None\n        # Return unmodified URL as the token is passed via HTTP headers in _clone_inner, as seen below.\n        return repo_url_to_clone\n\n    #Overriding the shell command, since for some reason usage of x-token-auth doesn't work, as mentioned here:\n    # https://stackoverflow.com/questions/56760396/cloning-bitbucket-server-repo-with-access-tokens\n    def _clone_inner(self, repo_url: str, dest_folder: str, operation_timeout_in_seconds: int=None):\n        bearer_token = self.bearer_token\n        if not bearer_token:\n            #Shouldn't happen since this is checked in _prepare_clone, therefore - throwing an exception.\n            raise RuntimeError(f\"Bearer token is required!\")\n\n        cli_args = shlex.split(f\"git clone -c http.extraHeader='Authorization: Bearer {bearer_token}' \"\n                               f\"--filter=blob:none --depth 1 {repo_url} {dest_folder}\")\n\n        ssl_env = get_git_ssl_env()\n\n        subprocess.run(cli_args, env=ssl_env, check=True,  # check=True will raise an exception if the command fails\n            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=operation_timeout_in_seconds)\n"
  },
  {
    "path": "pr_agent/git_providers/codecommit_client.py",
    "content": "import boto3\nimport botocore\n\n\nclass CodeCommitDifferencesResponse:\n    \"\"\"\n    CodeCommitDifferencesResponse is the response object returned from our get_differences() function.\n    It maps the JSON response to member variables of this class.\n    \"\"\"\n\n    def __init__(self, json: dict):\n        before_blob = json.get(\"beforeBlob\", {})\n        after_blob = json.get(\"afterBlob\", {})\n\n        self.before_blob_id = before_blob.get(\"blobId\", \"\")\n        self.before_blob_path = before_blob.get(\"path\", \"\")\n        self.after_blob_id = after_blob.get(\"blobId\", \"\")\n        self.after_blob_path = after_blob.get(\"path\", \"\")\n        self.change_type = json.get(\"changeType\", \"\")\n\n\nclass CodeCommitPullRequestResponse:\n    \"\"\"\n    CodeCommitPullRequestResponse is the response object returned from our get_pr() function.\n    It maps the JSON response to member variables of this class.\n    \"\"\"\n\n    def __init__(self, json: dict):\n        self.title = json.get(\"title\", \"\")\n        self.description = json.get(\"description\", \"\")\n\n        self.targets = []\n        for target in json.get(\"pullRequestTargets\", []):\n            self.targets.append(CodeCommitPullRequestResponse.CodeCommitPullRequestTarget(target))\n\n    class CodeCommitPullRequestTarget:\n        \"\"\"\n        CodeCommitPullRequestTarget is a subclass of CodeCommitPullRequestResponse that\n        holds details about an individual target commit.\n        \"\"\"\n\n        def __init__(self, json: dict):\n            self.source_commit = json.get(\"sourceCommit\", \"\")\n            self.source_branch = json.get(\"sourceReference\", \"\")\n            self.destination_commit = json.get(\"destinationCommit\", \"\")\n            self.destination_branch = json.get(\"destinationReference\", \"\")\n\n\nclass CodeCommitClient:\n    \"\"\"\n    CodeCommitClient is a wrapper around the AWS boto3 SDK for the CodeCommit client\n    \"\"\"\n\n    def __init__(self):\n        self.boto_client = None\n\n    def is_supported(self, capability: str) -> bool:\n        if capability in [\"gfm_markdown\"]:\n            return False\n        return True\n\n    def _connect_boto_client(self):\n        try:\n            self.boto_client = boto3.client(\"codecommit\")\n        except Exception as e:\n            raise ValueError(f\"Failed to connect to AWS CodeCommit: {e}\") from e\n\n    def get_differences(self, repo_name: int, destination_commit: str, source_commit: str):\n        \"\"\"\n        Get the differences between two commits in CodeCommit.\n\n        Args:\n        - repo_name: Name of the repository\n        - destination_commit: Commit hash you want to merge into (the \"before\" hash) (usually on the main or master branch)\n        - source_commit: Commit hash of the code you are adding (the \"after\" branch)\n\n        Returns:\n        - List of CodeCommitDifferencesResponse objects\n\n        Boto3 Documentation:\n        - aws codecommit get-differences\n        - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/get_differences.html\n        \"\"\"\n        if self.boto_client is None:\n            self._connect_boto_client()\n\n        # The differences response from AWS is paginated, so we need to iterate through the pages to get all the differences.\n        differences = []\n        try:\n            paginator = self.boto_client.get_paginator(\"get_differences\")\n            for page in paginator.paginate(\n                repositoryName=repo_name,\n                beforeCommitSpecifier=destination_commit,\n                afterCommitSpecifier=source_commit,\n            ):\n                differences.extend(page.get(\"differences\", []))\n        except botocore.exceptions.ClientError as e:\n            if e.response[\"Error\"][\"Code\"] == 'RepositoryDoesNotExistException':\n                raise ValueError(f\"CodeCommit cannot retrieve differences: Repository does not exist: {repo_name}\") from e\n            raise ValueError(f\"CodeCommit cannot retrieve differences for {source_commit}..{destination_commit}\") from e\n        except Exception as e:\n            raise ValueError(f\"CodeCommit cannot retrieve differences for {source_commit}..{destination_commit}\") from e\n\n        output = []\n        for json in differences:\n            output.append(CodeCommitDifferencesResponse(json))\n        return output\n\n    def get_file(self, repo_name: str, file_path: str, sha_hash: str, optional: bool = False):\n        \"\"\"\n        Retrieve a file from CodeCommit.\n\n        Args:\n        - repo_name: Name of the repository\n        - file_path: Path to the file you are retrieving\n        - sha_hash: Commit hash of the file you are retrieving\n\n        Returns:\n        - File contents\n\n        Boto3 Documentation:\n        - aws codecommit get_file\n        - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/get_file.html\n        \"\"\"\n        if not file_path:\n            return \"\"\n\n        if self.boto_client is None:\n            self._connect_boto_client()\n\n        try:\n            response = self.boto_client.get_file(repositoryName=repo_name, commitSpecifier=sha_hash, filePath=file_path)\n        except botocore.exceptions.ClientError as e:\n            if e.response[\"Error\"][\"Code\"] == 'RepositoryDoesNotExistException':\n                raise ValueError(f\"CodeCommit cannot retrieve PR: Repository does not exist: {repo_name}\") from e\n            # if the file does not exist, but is flagged as optional, then return an empty string\n            if optional and e.response[\"Error\"][\"Code\"] == 'FileDoesNotExistException':\n                return \"\"\n            raise ValueError(f\"CodeCommit cannot retrieve file '{file_path}' from repository '{repo_name}'\") from e\n        except Exception as e:\n            raise ValueError(f\"CodeCommit cannot retrieve file '{file_path}' from repository '{repo_name}'\") from e\n        if \"fileContent\" not in response:\n            raise ValueError(f\"File content is empty for file: {file_path}\")\n\n        return response.get(\"fileContent\", \"\")\n\n    def get_pr(self, repo_name: str, pr_number: int):\n        \"\"\"\n        Get a information about a CodeCommit PR.\n\n        Args:\n        - repo_name: Name of the repository\n        - pr_number: The PR number you are requesting\n\n        Returns:\n        - CodeCommitPullRequestResponse object\n\n        Boto3 Documentation:\n        - aws codecommit get_pull_request\n        - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/get_pull_request.html\n        \"\"\"\n        if self.boto_client is None:\n            self._connect_boto_client()\n\n        try:\n            response = self.boto_client.get_pull_request(pullRequestId=str(pr_number))\n        except botocore.exceptions.ClientError as e:\n            if e.response[\"Error\"][\"Code\"] == 'PullRequestDoesNotExistException':\n                raise ValueError(f\"CodeCommit cannot retrieve PR: PR number does not exist: {pr_number}\") from e\n            if e.response[\"Error\"][\"Code\"] == 'RepositoryDoesNotExistException':\n                raise ValueError(f\"CodeCommit cannot retrieve PR: Repository does not exist: {repo_name}\") from e\n            raise ValueError(f\"CodeCommit cannot retrieve PR: {pr_number}: boto client error\") from e\n        except Exception as e:\n            raise ValueError(f\"CodeCommit cannot retrieve PR: {pr_number}\") from e\n\n        if \"pullRequest\" not in response:\n            raise ValueError(\"CodeCommit PR number not found: {pr_number}\")\n\n        return CodeCommitPullRequestResponse(response.get(\"pullRequest\", {}))\n\n    def publish_description(self, pr_number: int, pr_title: str, pr_body: str):\n        \"\"\"\n        Set the title and description on a pull request\n\n        Args:\n        - pr_number: the AWS CodeCommit pull request number\n        - pr_title: title of the pull request\n        - pr_body: body of the pull request\n\n        Returns:\n        - None\n\n        Boto3 Documentation:\n        - aws codecommit update_pull_request_title\n        - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/update_pull_request_title.html\n        - aws codecommit update_pull_request_description\n        - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/update_pull_request_description.html\n        \"\"\"\n        if self.boto_client is None:\n            self._connect_boto_client()\n\n        try:\n            self.boto_client.update_pull_request_title(pullRequestId=str(pr_number), title=pr_title)\n            self.boto_client.update_pull_request_description(pullRequestId=str(pr_number), description=pr_body)\n        except botocore.exceptions.ClientError as e:\n            if e.response[\"Error\"][\"Code\"] == 'PullRequestDoesNotExistException':\n                raise ValueError(f\"PR number does not exist: {pr_number}\") from e\n            if e.response[\"Error\"][\"Code\"] == 'InvalidTitleException':\n                raise ValueError(f\"Invalid title for PR number: {pr_number}\") from e\n            if e.response[\"Error\"][\"Code\"] == 'InvalidDescriptionException':\n                raise ValueError(f\"Invalid description for PR number: {pr_number}\") from e\n            if e.response[\"Error\"][\"Code\"] == 'PullRequestAlreadyClosedException':\n                raise ValueError(f\"PR is already closed: PR number: {pr_number}\") from e\n            raise ValueError(f\"Boto3 client error calling publish_description\") from e\n        except Exception as e:\n            raise ValueError(f\"Error calling publish_description\") from e\n\n    def publish_comment(self, repo_name: str, pr_number: int, destination_commit: str, source_commit: str, comment: str, annotation_file: str = None, annotation_line: int = None):\n        \"\"\"\n        Publish a comment to a pull request\n\n        Args:\n        - repo_name: name of the repository\n        - pr_number: number of the pull request\n        - destination_commit: The commit hash you want to merge into (the \"before\" hash) (usually on the main or master branch)\n        - source_commit: The commit hash of the code you are adding (the \"after\" branch)\n        - comment: The comment you want to publish\n        - annotation_file: The file you want to annotate (optional)\n        - annotation_line: The line number you want to annotate (optional)\n\n        Comment annotations for CodeCommit are different than GitHub.\n        CodeCommit only designates the starting line number for the comment.\n        It does not support the ending line number to highlight a range of lines.\n\n        Returns:\n        - None\n\n        Boto3 Documentation:\n        - aws codecommit post_comment_for_pull_request\n        - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/post_comment_for_pull_request.html\n        \"\"\"\n        if self.boto_client is None:\n            self._connect_boto_client()\n\n        try:\n            # If the comment has code annotations,\n            # then set the file path and line number in the location dictionary\n            if annotation_file and annotation_line:\n                self.boto_client.post_comment_for_pull_request(\n                    pullRequestId=str(pr_number),\n                    repositoryName=repo_name,\n                    beforeCommitId=destination_commit,\n                    afterCommitId=source_commit,\n                    content=comment,\n                    location={\n                        \"filePath\": annotation_file,\n                        \"filePosition\": annotation_line,\n                        \"relativeFileVersion\": \"AFTER\",\n                    },\n                )\n            else:\n                # The comment does not have code annotations\n                self.boto_client.post_comment_for_pull_request(\n                    pullRequestId=str(pr_number),\n                    repositoryName=repo_name,\n                    beforeCommitId=destination_commit,\n                    afterCommitId=source_commit,\n                    content=comment,\n                )\n        except botocore.exceptions.ClientError as e:\n            if e.response[\"Error\"][\"Code\"] == 'RepositoryDoesNotExistException':\n                raise ValueError(f\"Repository does not exist: {repo_name}\") from e\n            if e.response[\"Error\"][\"Code\"] == 'PullRequestDoesNotExistException':\n                raise ValueError(f\"PR number does not exist: {pr_number}\") from e\n            raise ValueError(f\"Boto3 client error calling post_comment_for_pull_request\") from e\n        except Exception as e:\n            raise ValueError(f\"Error calling post_comment_for_pull_request\") from e\n"
  },
  {
    "path": "pr_agent/git_providers/codecommit_provider.py",
    "content": "import os\nimport re\nfrom collections import Counter\nfrom typing import List, Optional, Tuple\nfrom urllib.parse import urlparse\n\nfrom pr_agent.algo.language_handler import is_valid_file\nfrom pr_agent.algo.types import EDIT_TYPE, FilePatchInfo\nfrom pr_agent.git_providers.codecommit_client import CodeCommitClient\n\nfrom ..algo.utils import load_large_diff\nfrom ..config_loader import get_settings\nfrom ..log import get_logger\nfrom .git_provider import GitProvider\n\n\nclass PullRequestCCMimic:\n    \"\"\"\n    This class mimics the PullRequest class from the PyGithub library for the CodeCommitProvider.\n    \"\"\"\n\n    def __init__(self, title: str, diff_files: List[FilePatchInfo]):\n        self.title = title\n        self.diff_files = diff_files\n        self.description = None\n        self.source_commit = None\n        self.source_branch = None  # the branch containing your new code changes\n        self.destination_commit = None\n        self.destination_branch = None  # the branch you are going to merge into\n\n\nclass CodeCommitFile:\n    \"\"\"\n    This class represents a file in a pull request in CodeCommit.\n    \"\"\"\n\n    def __init__(\n        self,\n        a_path: str,\n        a_blob_id: str,\n        b_path: str,\n        b_blob_id: str,\n        edit_type: EDIT_TYPE,\n    ):\n        self.a_path = a_path\n        self.a_blob_id = a_blob_id\n        self.b_path = b_path\n        self.b_blob_id = b_blob_id\n        self.edit_type: EDIT_TYPE = edit_type\n        self.filename = b_path if b_path else a_path\n\n\nclass CodeCommitProvider(GitProvider):\n    \"\"\"\n    This class implements the GitProvider interface for AWS CodeCommit repositories.\n    \"\"\"\n\n    def __init__(self, pr_url: Optional[str] = None, incremental: Optional[bool] = False):\n        self.codecommit_client = CodeCommitClient()\n        self.aws_client = None\n        self.repo_name = None\n        self.pr_num = None\n        self.pr = None\n        self.diff_files = None\n        self.git_files = None\n        self.pr_url = pr_url\n        if pr_url:\n            self.set_pr(pr_url)\n\n    def provider_name(self):\n        return \"CodeCommit\"\n\n    def is_supported(self, capability: str) -> bool:\n        if capability in [\n            \"get_issue_comments\",\n            \"create_inline_comment\",\n            \"publish_inline_comments\",\n            \"get_labels\",\n            \"gfm_markdown\"\n        ]:\n            return False\n        return True\n\n    def set_pr(self, pr_url: str):\n        self.repo_name, self.pr_num = self._parse_pr_url(pr_url)\n        self.pr = self._get_pr()\n\n    def get_files(self) -> list[CodeCommitFile]:\n        # bring files from CodeCommit only once\n        if self.git_files:\n            return self.git_files\n\n        self.git_files = []\n        differences = self.codecommit_client.get_differences(self.repo_name, self.pr.destination_commit, self.pr.source_commit)\n        for item in differences:\n            self.git_files.append(CodeCommitFile(item.before_blob_path,\n                                                 item.before_blob_id,\n                                                 item.after_blob_path,\n                                                 item.after_blob_id,\n                                                 CodeCommitProvider._get_edit_type(item.change_type)))\n        return self.git_files\n\n    def get_diff_files(self) -> list[FilePatchInfo]:\n        \"\"\"\n        Retrieves the list of files that have been modified, added, deleted, or renamed in a pull request in CodeCommit,\n        along with their content and patch information.\n\n        Returns:\n            diff_files (List[FilePatchInfo]): List of FilePatchInfo objects representing the modified, added, deleted,\n            or renamed files in the merge request.\n        \"\"\"\n        # bring files from CodeCommit only once\n        if self.diff_files:\n            return self.diff_files\n\n        self.diff_files = []\n\n        files = self.get_files()\n        for diff_item in files:\n            patch_filename = \"\"\n            if diff_item.a_blob_id is not None:\n                patch_filename = diff_item.a_path\n                original_file_content_str = self.codecommit_client.get_file(\n                    self.repo_name, diff_item.a_path, self.pr.destination_commit)\n                if isinstance(original_file_content_str, (bytes, bytearray)):\n                    original_file_content_str = original_file_content_str.decode(\"utf-8\")\n            else:\n                original_file_content_str = \"\"\n\n            if diff_item.b_blob_id is not None:\n                patch_filename = diff_item.b_path\n                new_file_content_str = self.codecommit_client.get_file(self.repo_name, diff_item.b_path, self.pr.source_commit)\n                if isinstance(new_file_content_str, (bytes, bytearray)):\n                    new_file_content_str = new_file_content_str.decode(\"utf-8\")\n            else:\n                new_file_content_str = \"\"\n\n            patch = load_large_diff(patch_filename, new_file_content_str, original_file_content_str)\n\n            # Store the diffs as a list of FilePatchInfo objects\n            info = FilePatchInfo(\n                original_file_content_str,\n                new_file_content_str,\n                patch,\n                diff_item.b_path,\n                edit_type=diff_item.edit_type,\n                old_filename=None\n                if diff_item.a_path == diff_item.b_path\n                else diff_item.a_path,\n            )\n            # Only add valid files to the diff list\n            # \"bad extensions\" are set in the language_extensions.toml file\n            # a \"valid file\" is one that is not in the \"bad extensions\" list\n            if is_valid_file(info.filename):\n                self.diff_files.append(info)\n\n        return self.diff_files\n\n    def publish_description(self, pr_title: str, pr_body: str):\n        try:\n            self.codecommit_client.publish_description(\n                pr_number=self.pr_num,\n                pr_title=pr_title,\n                pr_body=CodeCommitProvider._add_additional_newlines(pr_body),\n            )\n        except Exception as e:\n            raise ValueError(f\"CodeCommit Cannot publish description for PR: {self.pr_num}\") from e\n\n    def publish_comment(self, pr_comment: str, is_temporary: bool = False):\n        if is_temporary:\n            get_logger().info(pr_comment)\n            return\n\n        pr_comment = CodeCommitProvider._remove_markdown_html(pr_comment)\n        pr_comment = CodeCommitProvider._add_additional_newlines(pr_comment)\n\n        try:\n            self.codecommit_client.publish_comment(\n                repo_name=self.repo_name,\n                pr_number=self.pr_num,\n                destination_commit=self.pr.destination_commit,\n                source_commit=self.pr.source_commit,\n                comment=pr_comment,\n            )\n        except Exception as e:\n            raise ValueError(f\"CodeCommit Cannot publish comment for PR: {self.pr_num}\") from e\n\n    def publish_code_suggestions(self, code_suggestions: list) -> bool:\n        counter = 1\n        for suggestion in code_suggestions:\n            # Verify that each suggestion has the required keys\n            if not all(key in suggestion for key in [\"body\", \"relevant_file\", \"relevant_lines_start\"]):\n                get_logger().warning(f\"Skipping code suggestion #{counter}: Each suggestion must have 'body', 'relevant_file', 'relevant_lines_start' keys\")\n                continue\n\n            # Publish the code suggestion to CodeCommit\n            try:\n                get_logger().debug(f\"Code Suggestion #{counter} in file: {suggestion['relevant_file']}: {suggestion['relevant_lines_start']}\")\n                self.codecommit_client.publish_comment(\n                    repo_name=self.repo_name,\n                    pr_number=self.pr_num,\n                    destination_commit=self.pr.destination_commit,\n                    source_commit=self.pr.source_commit,\n                    comment=suggestion[\"body\"],\n                    annotation_file=suggestion[\"relevant_file\"],\n                    annotation_line=suggestion[\"relevant_lines_start\"],\n                )\n            except Exception as e:\n                raise ValueError(f\"CodeCommit Cannot publish code suggestions for PR: {self.pr_num}\") from e\n\n            counter += 1\n\n        # The calling function passes in a list of code suggestions, and this function publishes each suggestion one at a time.\n        # If we were to return False here, the calling function will attempt to publish the same list of code suggestions again, one at a time.\n        # Since this function publishes the suggestions one at a time anyway, we always return True here to avoid the retry.\n        return True\n\n    def publish_labels(self, labels):\n        return [\"\"]  # not implemented yet\n\n    def get_pr_labels(self, update=False):\n        return [\"\"]  # not implemented yet\n\n    def remove_initial_comment(self):\n        return \"\"  # not implemented yet\n\n    def remove_comment(self, comment):\n        return \"\"  # not implemented yet\n\n    def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None):\n        # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/post_comment_for_compared_commit.html\n        raise NotImplementedError(\"CodeCommit provider does not support publishing inline comments yet\")\n\n    def publish_inline_comments(self, comments: list[dict]):\n        raise NotImplementedError(\"CodeCommit provider does not support publishing inline comments yet\")\n\n    def get_title(self):\n        return self.pr.title\n\n    def get_pr_id(self):\n        \"\"\"\n        Returns the PR ID in the format: \"repo_name/pr_number\".\n        Note: This is an internal identifier for PR-Agent,\n        and is not the same as the CodeCommit PR identifier.\n        \"\"\"\n        try:\n            pr_id = f\"{self.repo_name}/{self.pr_num}\"\n            return pr_id\n        except:\n            return \"\"\n\n    def get_languages(self):\n        \"\"\"\n        Returns a dictionary of languages, containing the percentage of each language used in the PR.\n\n        Returns:\n        - dict: A dictionary where each key is a language name and the corresponding value is the percentage of that language in the PR.\n        \"\"\"\n        commit_files = self.get_files()\n        filenames = [ item.filename for item in commit_files ]\n        extensions = CodeCommitProvider._get_file_extensions(filenames)\n\n        # Calculate the percentage of each file extension in the PR\n        percentages = CodeCommitProvider._get_language_percentages(extensions)\n\n        # The global language_extension_map is a dictionary of languages,\n        # where each dictionary item is a BoxList of extensions.\n        # We want a dictionary of extensions,\n        # where each dictionary item is a language name.\n        # We build that language->extension dictionary here in main_extensions_flat.\n        main_extensions_flat = {}\n        language_extension_map_org = get_settings().language_extension_map_org\n        language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()}\n        for language, extensions in language_extension_map.items():\n            for ext in extensions:\n                main_extensions_flat[ext] = language\n\n        # Map the file extension/languages to percentages\n        languages = {}\n        for ext, pct in percentages.items():\n            languages[main_extensions_flat.get(ext, \"\")] = pct\n\n        return languages\n\n    def get_pr_branch(self):\n        return self.pr.source_branch\n\n    def get_pr_description_full(self) -> str:\n        return self.pr.description\n\n    def get_user_id(self):\n        return -1  # not implemented yet\n\n    def get_issue_comments(self):\n        raise NotImplementedError(\"CodeCommit provider does not support issue comments yet\")\n\n    def get_repo_settings(self):\n        # a local \".pr_agent.toml\" settings file is optional\n        settings_filename = \".pr_agent.toml\"\n        return self.codecommit_client.get_file(self.repo_name, settings_filename, self.pr.source_commit, optional=True)\n\n    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:\n        get_logger().info(\"CodeCommit provider does not support eyes reaction yet\")\n        return True\n\n    def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool:\n        get_logger().info(\"CodeCommit provider does not support removing reactions yet\")\n        return True\n\n    @staticmethod\n    def _parse_pr_url(pr_url: str) -> Tuple[str, int]:\n        \"\"\"\n        Parse the CodeCommit PR URL and return the repository name and PR number.\n\n        Args:\n        - pr_url: the full AWS CodeCommit pull request URL\n\n        Returns:\n        - Tuple[str, int]: A tuple containing the repository name and PR number.\n        \"\"\"\n        # Example PR URL:\n        # https://us-east-1.console.aws.amazon.com/codesuite/codecommit/repositories/__MY_REPO__/pull-requests/123456\"\n        parsed_url = urlparse(pr_url)\n\n        if not CodeCommitProvider._is_valid_codecommit_hostname(parsed_url.netloc):\n            raise ValueError(f\"The provided URL is not a valid CodeCommit URL: {pr_url}\")\n\n        path_parts = parsed_url.path.strip(\"/\").split(\"/\")\n\n        if (\n            len(path_parts) < 6\n            or path_parts[0] != \"codesuite\"\n            or path_parts[1] != \"codecommit\"\n            or path_parts[2] != \"repositories\"\n            or path_parts[4] != \"pull-requests\"\n        ):\n            raise ValueError(f\"The provided URL does not appear to be a CodeCommit PR URL: {pr_url}\")\n\n        repo_name = path_parts[3]\n\n        try:\n            pr_number = int(path_parts[5])\n        except ValueError as e:\n            raise ValueError(f\"Unable to convert PR number to integer: '{path_parts[5]}'\") from e\n\n        return repo_name, pr_number\n\n    @staticmethod\n    def _is_valid_codecommit_hostname(hostname: str) -> bool:\n        \"\"\"\n        Check if the provided hostname is a valid AWS CodeCommit hostname.\n\n        This is not an exhaustive check of AWS region names,\n        but instead uses a regex to check for matching AWS region patterns.\n\n        Args:\n        - hostname: the hostname to check\n\n        Returns:\n        - bool: True if the hostname is valid, False otherwise.\n        \"\"\"\n        return re.match(r\"^[a-z]{2}-(gov-)?[a-z]+-\\d\\.console\\.aws\\.amazon\\.com$\", hostname) is not None\n\n    def _get_pr(self):\n        response = self.codecommit_client.get_pr(self.repo_name, self.pr_num)\n\n        if len(response.targets) == 0:\n            raise ValueError(f\"No files found in CodeCommit PR: {self.pr_num}\")\n\n        # TODO: implement support for multiple targets in one CodeCommit PR\n        #       for now, we are only using the first target in the PR\n        if len(response.targets) > 1:\n            get_logger().warning(\n                \"Multiple targets in one PR is not supported for CodeCommit yet. Continuing, using the first target only...\"\n            )\n\n        # Return our object that mimics PullRequest class from the PyGithub library\n        # (This strategy was copied from the LocalGitProvider)\n        mimic = PullRequestCCMimic(response.title, self.diff_files)\n        mimic.description = response.description\n        mimic.source_commit = response.targets[0].source_commit\n        mimic.source_branch = response.targets[0].source_branch\n        mimic.destination_commit = response.targets[0].destination_commit\n        mimic.destination_branch = response.targets[0].destination_branch\n\n        return mimic\n\n    def get_commit_messages(self):\n        return \"\"  # not implemented yet\n\n    @staticmethod\n    def _add_additional_newlines(body: str) -> str:\n        \"\"\"\n        Replace single newlines in a PR body with double newlines.\n\n        CodeCommit Markdown does not seem to render as well as GitHub Markdown,\n        so we add additional newlines to the PR body to make it more readable in CodeCommit.\n\n        Args:\n        - body: the PR body\n\n        Returns:\n        - str: the PR body with the double newlines added\n        \"\"\"\n        return re.sub(r'(?<!\\n)\\n(?!\\n)', '\\n\\n', body)\n\n    @staticmethod\n    def _remove_markdown_html(comment: str) -> str:\n        \"\"\"\n        Remove the HTML tags from a PR comment.\n\n        CodeCommit Markdown does not seem to render as well as GitHub Markdown,\n        so we remove the HTML tags from the PR comment to make it more readable in CodeCommit.\n\n        Args:\n        - comment: the PR comment\n\n        Returns:\n        - str: the PR comment with the HTML tags removed\n        \"\"\"\n        comment = comment.replace(\"<details>\", \"\")\n        comment = comment.replace(\"</details>\", \"\")\n        comment = comment.replace(\"<summary>\", \"\")\n        comment = comment.replace(\"</summary>\", \"\")\n        return comment\n\n    @staticmethod\n    def _get_edit_type(codecommit_change_type: str):\n        \"\"\"\n        Convert the CodeCommit change type string to the EDIT_TYPE enum.\n        The CodeCommit change type string is returned from the get_differences SDK method.\n\n        Args:\n        - codecommit_change_type: the CodeCommit change type string\n\n        Returns:\n        - An EDIT_TYPE enum representing the modified, added, deleted, or renamed file in the PR diff.\n        \"\"\"\n        t = codecommit_change_type.upper()\n        edit_type = None\n        if t == \"A\":\n            edit_type = EDIT_TYPE.ADDED\n        elif t == \"D\":\n            edit_type = EDIT_TYPE.DELETED\n        elif t == \"M\":\n            edit_type = EDIT_TYPE.MODIFIED\n        elif t == \"R\":\n            edit_type = EDIT_TYPE.RENAMED\n        return edit_type\n\n    @staticmethod\n    def _get_file_extensions(filenames):\n        \"\"\"\n        Return a list of file extensions from a list of filenames.\n        The returned extensions will include the dot \".\" prefix,\n        to accommodate for the dots in the existing language_extension_map settings.\n        Filenames with no extension will return an empty string for the extension.\n\n        Args:\n        - filenames: a list of filenames\n\n        Returns:\n        - list: A list of file extensions, including the dot \".\" prefix.\n        \"\"\"\n        extensions = []\n        for filename in filenames:\n            filename, ext = os.path.splitext(filename)\n            if ext:\n                extensions.append(ext.lower())\n            else:\n                extensions.append(\"\")\n        return extensions\n\n    @staticmethod\n    def _get_language_percentages(extensions):\n        \"\"\"\n        Return a dictionary containing the programming language name (as the key),\n        and the percentage that language is used (as the value),\n        given a list of file extensions.\n\n        Args:\n        - extensions: a list of file extensions\n\n        Returns:\n        - dict: A dictionary where each key is a language name and the corresponding value is the percentage of that language in the PR.\n        \"\"\"\n        total_files = len(extensions)\n        if total_files == 0:\n            return {}\n\n        # Identify language by file extension and count\n        lang_count = Counter(extensions)\n        # Convert counts to percentages\n        lang_percentage = {\n            lang: round(count / total_files * 100) for lang, count in lang_count.items()\n        }\n        return lang_percentage\n"
  },
  {
    "path": "pr_agent/git_providers/gerrit_provider.py",
    "content": "import json\nimport os\nimport pathlib\nimport shutil\nimport subprocess\nimport uuid\nfrom collections import Counter, namedtuple\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile, mkdtemp\n\nimport requests\nimport urllib3.util\nfrom git import Repo\n\nfrom pr_agent.algo.types import EDIT_TYPE, FilePatchInfo\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers.git_provider import GitProvider\nfrom pr_agent.git_providers.local_git_provider import PullRequestMimic\nfrom pr_agent.log import get_logger\n\n\ndef _call(*command, **kwargs) -> (int, str, str):\n    res = subprocess.run(\n        command,\n        stdout=subprocess.PIPE,\n        stderr=subprocess.PIPE,\n        check=True,\n        **kwargs,\n    )\n    return res.stdout.decode()\n\n\ndef clone(url, directory):\n    get_logger().info(\"Cloning %s to %s\", url, directory)\n    stdout = _call('git', 'clone', \"--depth\", \"1\", url, directory)\n    get_logger().info(stdout)\n\n\ndef fetch(url, refspec, cwd):\n    get_logger().info(\"Fetching %s %s\", url, refspec)\n    stdout = _call(\n        'git', 'fetch', '--depth', '2', url, refspec,\n        cwd=cwd\n    )\n    get_logger().info(stdout)\n\n\ndef checkout(cwd):\n    get_logger().info(\"Checking out\")\n    stdout = _call('git', 'checkout', \"FETCH_HEAD\", cwd=cwd)\n    get_logger().info(stdout)\n\n\ndef show(*args, cwd=None):\n    get_logger().info(\"Show\")\n    return _call('git', 'show', *args, cwd=cwd)\n\n\ndef diff(*args, cwd=None):\n    get_logger().info(\"Diff\")\n    patch = _call('git', 'diff', *args, cwd=cwd)\n    if not patch:\n        get_logger().warning(\"No changes found\")\n        return\n    return patch\n\n\ndef reset_local_changes(cwd):\n    get_logger().info(\"Reset local changes\")\n    _call('git', 'checkout', \"--force\", cwd=cwd)\n\n\ndef add_comment(url: urllib3.util.Url, refspec, message):\n    *_, patchset, changenum = refspec.rsplit(\"/\")\n    message = \"'\" + message.replace(\"'\", \"'\\\"'\\\"'\") + \"'\"\n    return _call(\n        \"ssh\",\n        \"-p\", str(url.port),\n        f\"{url.auth}@{url.host}\",\n        \"gerrit\", \"review\",\n        \"--message\", message,\n        # \"--code-review\", score,\n        f\"{patchset},{changenum}\",\n    )\n\n\ndef list_comments(url: urllib3.util.Url, refspec):\n    *_, patchset, _ = refspec.rsplit(\"/\")\n    stdout = _call(\n        \"ssh\",\n        \"-p\", str(url.port),\n        f\"{url.auth}@{url.host}\",\n        \"gerrit\", \"query\",\n        \"--comments\",\n        \"--current-patch-set\", patchset,\n        \"--format\", \"JSON\",\n    )\n    change_set, *_ = stdout.splitlines()\n    return json.loads(change_set)[\"currentPatchSet\"][\"comments\"]\n\n\ndef prepare_repo(url: urllib3.util.Url, project, refspec):\n    repo_url = (f\"{url.scheme}://{url.auth}@{url.host}:{url.port}/{project}\")\n\n    directory = pathlib.Path(mkdtemp())\n    clone(repo_url, directory)\n    fetch(repo_url, refspec, cwd=directory)\n    checkout(cwd=directory)\n    return directory\n\n\ndef adopt_to_gerrit_message(message):\n    lines = message.splitlines()\n    buf = []\n    for line in lines:\n        # remove markdown formatting\n        line = (line.replace(\"*\", \"\")\n                .replace(\"``\", \"`\")\n                .replace(\"<details>\", \"\")\n                .replace(\"</details>\", \"\")\n                .replace(\"<summary>\", \"\")\n                .replace(\"</summary>\", \"\"))\n\n        line = line.strip()\n        if line.startswith('#'):\n            buf.append(\"\\n\" +\n                       line.replace('#', '').removesuffix(\":\").strip() +\n                       \":\")\n            continue\n        elif line.startswith('-'):\n            buf.append(line.removeprefix('-').strip())\n            continue\n        else:\n            buf.append(line)\n    return \"\\n\".join(buf).strip()\n\n\ndef add_suggestion(src_filename, context: str, start, end: int):\n    with (\n        NamedTemporaryFile(\"w\", delete=False) as tmp,\n        open(src_filename, \"r\") as src\n    ):\n        lines = src.readlines()\n        tmp.writelines(lines[:start - 1])\n        if context:\n            tmp.write(context)\n        tmp.writelines(lines[end:])\n\n    shutil.copy(tmp.name, src_filename)\n    os.remove(tmp.name)\n\n\ndef upload_patch(patch, path):\n    patch_server_endpoint = get_settings().get(\n        'gerrit.patch_server_endpoint')\n    patch_server_token = get_settings().get(\n        'gerrit.patch_server_token')\n\n    response = requests.post(\n        patch_server_endpoint,\n        json={\n            \"content\": patch,\n            \"path\": path,\n        },\n        headers={\n            \"Content-Type\": \"application/json\",\n            \"Authorization\": f\"Bearer {patch_server_token}\",\n        }\n    )\n    response.raise_for_status()\n    patch_server_endpoint = patch_server_endpoint.rstrip(\"/\")\n    return patch_server_endpoint + \"/\" + path\n\n\nclass GerritProvider(GitProvider):\n\n    def __init__(self, key: str, incremental=False):\n        self.project, self.refspec = key.split(':')\n        assert self.project, \"Project name is required\"\n        assert self.refspec, \"Refspec is required\"\n        base_url = get_settings().get('gerrit.url')\n        assert base_url, \"Gerrit URL is required\"\n        user = get_settings().get('gerrit.user')\n        assert user, \"Gerrit user is required\"\n\n        parsed = urllib3.util.parse_url(base_url)\n        self.parsed_url = urllib3.util.parse_url(\n            f\"{parsed.scheme}://{user}@{parsed.host}:{parsed.port}\"\n        )\n\n        self.repo_path = prepare_repo(\n            self.parsed_url, self.project, self.refspec\n        )\n        self.repo = Repo(self.repo_path)\n        assert self.repo\n        self.pr_url = base_url\n        self.pr = PullRequestMimic(self.get_pr_title(), self.get_diff_files())\n\n    def get_pr_title(self):\n        \"\"\"\n        Substitutes the branch-name as the PR-mimic title.\n        \"\"\"\n        return self.repo.branches[0].name\n\n    def get_issue_comments(self):\n        comments = list_comments(self.parsed_url, self.refspec)\n        Comments = namedtuple('Comments', ['reversed'])\n        Comment = namedtuple('Comment', ['body'])\n        return Comments([Comment(c['message']) for c in reversed(comments)])\n\n    def get_pr_labels(self, update=False):\n        raise NotImplementedError(\n            'Getting labels is not implemented for the gerrit provider')\n\n    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False):\n        raise NotImplementedError(\n            'Adding reactions is not implemented for the gerrit provider')\n\n    def remove_reaction(self, issue_comment_id: int, reaction_id: int):\n        raise NotImplementedError(\n            'Removing reactions is not implemented for the gerrit provider')\n\n    def get_commit_messages(self):\n        return [self.repo.head.commit.message]\n\n    def get_repo_settings(self):\n        try:\n            with open(self.repo_path / \".pr_agent.toml\", 'rb') as f:\n                contents = f.read()\n            return contents\n        except OSError:\n            return b\"\"\n\n    def get_diff_files(self) -> list[FilePatchInfo]:\n        diffs = self.repo.head.commit.diff(\n            self.repo.head.commit.parents[0],  # previous commit\n            create_patch=True,\n            R=True\n        )\n\n        diff_files = []\n        for diff_item in diffs:\n            if diff_item.a_blob is not None:\n                original_file_content_str = (\n                    diff_item.a_blob.data_stream.read().decode('utf-8')\n                )\n            else:\n                original_file_content_str = \"\"  # empty file\n            if diff_item.b_blob is not None:\n                new_file_content_str = diff_item.b_blob.data_stream.read(). \\\n                    decode('utf-8')\n            else:\n                new_file_content_str = \"\"  # empty file\n            edit_type = EDIT_TYPE.MODIFIED\n            if diff_item.new_file:\n                edit_type = EDIT_TYPE.ADDED\n            elif diff_item.deleted_file:\n                edit_type = EDIT_TYPE.DELETED\n            elif diff_item.renamed_file:\n                edit_type = EDIT_TYPE.RENAMED\n            diff_files.append(\n                FilePatchInfo(\n                    original_file_content_str,\n                    new_file_content_str,\n                    diff_item.diff.decode('utf-8'),\n                    diff_item.b_path,\n                    edit_type=edit_type,\n                    old_filename=None\n                    if diff_item.a_path == diff_item.b_path\n                    else diff_item.a_path\n                )\n            )\n        self.diff_files = diff_files\n        return diff_files\n\n    def get_files(self):\n        diff_index = self.repo.head.commit.diff(\n            self.repo.head.commit.parents[0],  # previous commit\n            R=True\n        )\n        # Get the list of changed files\n        diff_files = [item.a_path for item in diff_index]\n        return diff_files\n\n    def get_languages(self):\n        \"\"\"\n        Calculate percentage of languages in repository. Used for hunk\n        prioritisation.\n        \"\"\"\n        # Get all files in repository\n        filepaths = [Path(item.path) for item in\n                     self.repo.tree().traverse() if item.type == 'blob']\n        # Identify language by file extension and count\n        lang_count = Counter(\n            ext.lstrip('.') for filepath in filepaths for ext in\n            [filepath.suffix.lower()])\n        # Convert counts to percentages\n        total_files = len(filepaths)\n        lang_percentage = {lang: count / total_files * 100 for lang, count\n                           in lang_count.items()}\n        return lang_percentage\n\n    def get_pr_description_full(self):\n        return self.repo.head.commit.message\n\n    def get_user_id(self):\n        return self.repo.head.commit.author.email\n\n    def is_supported(self, capability: str) -> bool:\n        if capability in [\n            # 'get_issue_comments',\n            'create_inline_comment',\n            'publish_inline_comments',\n            'get_labels',\n            'gfm_markdown'\n        ]:\n            return False\n        return True\n\n    def split_suggestion(self, msg) -> tuple[str, str]:\n        is_code_context = False\n        description = []\n        context = []\n        for line in msg.splitlines():\n            if line.startswith('```suggestion'):\n                is_code_context = True\n                continue\n            if line.startswith('```'):\n                is_code_context = False\n                continue\n            if is_code_context:\n                context.append(line)\n            else:\n                description.append(\n                    line.replace('*', '')\n                )\n\n        return (\n            '\\n'.join(description),\n            '\\n'.join(context) + '\\n' if context else ''\n        )\n\n    def publish_code_suggestions(self, code_suggestions: list):\n        msg = []\n        for suggestion in code_suggestions:\n            description, code = self.split_suggestion(suggestion['body'])\n            add_suggestion(\n                pathlib.Path(self.repo_path) / suggestion[\"relevant_file\"],\n                code,\n                suggestion[\"relevant_lines_start\"],\n                suggestion[\"relevant_lines_end\"],\n            )\n            patch = diff(cwd=self.repo_path)\n            patch_id = uuid.uuid4().hex[0:4]\n            path = \"/\".join([\"codium-ai\", self.refspec, patch_id])\n            full_path = upload_patch(patch, path)\n            reset_local_changes(self.repo_path)\n            msg.append(f'* {description}\\n{full_path}')\n\n        if msg:\n            add_comment(self.parsed_url, self.refspec, \"\\n\".join(msg))\n            return True\n\n    def publish_comment(self, pr_comment: str, is_temporary: bool = False):\n        if not is_temporary:\n            msg = adopt_to_gerrit_message(pr_comment)\n            add_comment(self.parsed_url, self.refspec, msg)\n\n    def publish_description(self, pr_title: str, pr_body: str):\n        msg = adopt_to_gerrit_message(pr_body)\n        add_comment(self.parsed_url, self.refspec, pr_title + '\\n' + msg)\n\n    def publish_inline_comments(self, comments: list[dict]):\n        raise NotImplementedError(\n            'Publishing inline comments is not implemented for the gerrit '\n            'provider')\n\n    def publish_inline_comment(self, body: str, relevant_file: str,\n                               relevant_line_in_file: str, original_suggestion=None):\n        raise NotImplementedError(\n            'Publishing inline comments is not implemented for the gerrit '\n            'provider')\n\n\n    def publish_labels(self, labels):\n        # Not applicable to the local git provider,\n        # but required by the interface\n        pass\n\n    def remove_initial_comment(self):\n        # remove repo, cloned in previous steps\n        # shutil.rmtree(self.repo_path)\n        pass\n\n    def remove_comment(self, comment):\n        pass\n\n    def get_pr_branch(self):\n        return self.repo.head\n"
  },
  {
    "path": "pr_agent/git_providers/git_provider.py",
    "content": "from abc import ABC, abstractmethod\n# enum EDIT_TYPE (ADDED, DELETED, MODIFIED, RENAMED)\nimport os\nimport shutil\nimport subprocess\nfrom typing import Optional, Tuple\n\nfrom pr_agent.algo.types import FilePatchInfo\nfrom pr_agent.algo.utils import Range, process_description\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.log import get_logger\n\nMAX_FILES_ALLOWED_FULL = 50\n\ndef get_git_ssl_env() -> dict[str, str]:\n    \"\"\"\n    Get git SSL configuration arguments for per-command use.\n    This fixes SSL certificate issues when cloning repos with self-signed certificates.\n    Returns the current environment with the addition of SSL config changes if any such SSL certificates exist.\n    \"\"\"\n    ssl_cert_file = os.environ.get('SSL_CERT_FILE')\n    requests_ca_bundle = os.environ.get('REQUESTS_CA_BUNDLE')\n    git_ssl_ca_info = os.environ.get('GIT_SSL_CAINFO')\n\n    chosen_cert_file = \"\"\n\n    # Try SSL_CERT_FILE first\n    if ssl_cert_file:\n        if os.path.exists(ssl_cert_file):\n            if ((requests_ca_bundle and requests_ca_bundle != ssl_cert_file)\n                    or (git_ssl_ca_info and git_ssl_ca_info != ssl_cert_file)):\n                get_logger().warning(f\"Found mismatch among: SSL_CERT_FILE, REQUESTS_CA_BUNDLE, GIT_SSL_CAINFO. \"\n                                     f\"Using the SSL_CERT_FILE to resolve ambiguity.\",\n                                  artifact={\"ssl_cert_file\": ssl_cert_file, \"requests_ca_bundle\": requests_ca_bundle,\n                                            'git_ssl_ca_info': git_ssl_ca_info})\n            else:\n                get_logger().info(f\"Using SSL certificate bundle for git operations\", artifact={\"ssl_cert_file\": ssl_cert_file})\n            chosen_cert_file = ssl_cert_file\n        else:\n            get_logger().warning(\"SSL certificate bundle not found for git operations\", artifact={\"ssl_cert_file\": ssl_cert_file})\n\n    # Fallback to REQUESTS_CA_BUNDLE\n    elif requests_ca_bundle:\n        if os.path.exists(requests_ca_bundle):\n            if (git_ssl_ca_info and git_ssl_ca_info != requests_ca_bundle):\n                get_logger().warning(f\"Found mismatch between: REQUESTS_CA_BUNDLE, GIT_SSL_CAINFO. \"\n                                     f\"Using the REQUESTS_CA_BUNDLE to resolve ambiguity.\",\n                artifact = {\"requests_ca_bundle\": requests_ca_bundle, 'git_ssl_ca_info': git_ssl_ca_info})\n            else:\n                get_logger().info(\"Using SSL certificate bundle from REQUESTS_CA_BUNDLE for git operations\",\n                                  artifact={\"requests_ca_bundle\": requests_ca_bundle})\n            chosen_cert_file = requests_ca_bundle\n        else:\n            get_logger().warning(\"requests CA bundle not found for git operations\", artifact={\"requests_ca_bundle\": requests_ca_bundle})\n\n    #Fallback to GIT CA:\n    elif git_ssl_ca_info:\n        if os.path.exists(git_ssl_ca_info):\n            get_logger().info(\"Using git SSL CA info from GIT_SSL_CAINFO for git operations\",\n                              artifact={\"git_ssl_ca_info\": git_ssl_ca_info})\n            chosen_cert_file = git_ssl_ca_info\n        else:\n            get_logger().warning(\"git SSL CA info not found for git operations\", artifact={\"git_ssl_ca_info\": git_ssl_ca_info})\n\n    else:\n        get_logger().warning(\"Neither SSL_CERT_FILE nor REQUESTS_CA_BUNDLE nor GIT_SSL_CAINFO are defined, or they are defined but not found. Returning environment without SSL configuration\")\n\n    returned_env = os.environ.copy()\n    if chosen_cert_file:\n        returned_env.update({\"GIT_SSL_CAINFO\": chosen_cert_file, \"REQUESTS_CA_BUNDLE\": chosen_cert_file})\n    return returned_env\n\n\nclass GitProvider(ABC):\n    @abstractmethod\n    def is_supported(self, capability: str) -> bool:\n        pass\n\n    #Given a url (issues or PR/MR) - get the .git repo url to which they belong. Needs to be implemented by the provider.\n    def get_git_repo_url(self, issues_or_pr_url: str) -> str:\n        get_logger().warning(\"Not implemented! Returning empty url\")\n        return \"\"\n\n    # Given a git repo url, return prefix and suffix of the provider in order to view a given file belonging to that repo. Needs to be implemented by the provider.\n    # For example: For a git: https://git_provider.com/MY_PROJECT/MY_REPO.git and desired branch: <MY_BRANCH> then it should return ('https://git_provider.com/projects/MY_PROJECT/repos/MY_REPO/.../<MY_BRANCH>', '?=<SOME HEADER>')\n    # so that to properly view the file: docs/readme.md -> <PREFIX>/docs/readme.md<SUFFIX> -> https://git_provider.com/projects/MY_PROJECT/repos/MY_REPO/<MY_BRANCH>/docs/readme.md?=<SOME HEADER>)\n    def get_canonical_url_parts(self, repo_git_url:str, desired_branch:str) -> Tuple[str, str]:\n        get_logger().warning(\"Not implemented! Returning empty prefix and suffix\")\n        return (\"\", \"\")\n\n\n    #Clone related API\n    #An object which ensures deletion of a cloned repo, once it becomes out of scope.\n    # Example usage:\n    #    with TemporaryDirectory() as tmp_dir:\n    #            returned_obj: GitProvider.ScopedClonedRepo = self.git_provider.clone(self.repo_url, tmp_dir, remove_dest_folder=False)\n    #            print(returned_obj.path) #Use returned_obj.path.\n    #    #From this point, returned_obj.path may be deleted at any point and therefore must not be used.\n    class ScopedClonedRepo(object):\n        def __init__(self, dest_folder):\n            self.path = dest_folder\n\n        def __del__(self):\n            if self.path and os.path.exists(self.path):\n                shutil.rmtree(self.path, ignore_errors=True)\n\n    #Method to allow implementors to manipulate the repo url to clone (such as embedding tokens in the url string). Needs to be implemented by the provider.\n    def _prepare_clone_url_with_token(self, repo_url_to_clone: str) -> str | None:\n        get_logger().warning(\"Not implemented! Returning None\")\n        return None\n\n    # Does a shallow clone, using a forked process to support a timeout guard.\n    # In case operation has failed, it is expected to throw an exception as this method does not return a value.\n    def _clone_inner(self, repo_url: str, dest_folder: str, operation_timeout_in_seconds: int=None) -> None:\n        #The following ought to be equivalent to:\n        # #Repo.clone_from(repo_url, dest_folder)\n        # , but with throwing an exception upon timeout.\n        # Note: This can only be used in context that supports using pipes.\n        try:\n            ssl_env = get_git_ssl_env()\n        except Exception as e:\n            get_logger().exception(\n                \"Failed to prepare SSL environment for git operations, falling back to default env\",\n                artifact={\"error\": e}\n            )\n            ssl_env = os.environ.copy()\n\n        subprocess.run([\n            \"git\", \"clone\",\n            \"--filter=blob:none\",\n            \"--depth\", \"1\",\n            repo_url, dest_folder\n        ], env=ssl_env, check=True,  # check=True will raise an exception if the command fails\n            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=operation_timeout_in_seconds)\n\n    CLONE_TIMEOUT_SEC = 20\n    # Clone a given url to a destination folder. If successful, returns an object that wraps the destination folder,\n    # deleting it once it is garbage collected. See: GitProvider.ScopedClonedRepo for more details.\n    def clone(self, repo_url_to_clone: str, dest_folder: str, remove_dest_folder: bool = True,\n              operation_timeout_in_seconds: int=CLONE_TIMEOUT_SEC) -> ScopedClonedRepo|None:\n        returned_obj = None\n        clone_url = self._prepare_clone_url_with_token(repo_url_to_clone)\n        if not clone_url:\n            get_logger().error(\"Clone failed: Unable to obtain url to clone.\")\n            return returned_obj\n        try:\n            if remove_dest_folder and os.path.exists(dest_folder) and os.path.isdir(dest_folder):\n                shutil.rmtree(dest_folder)\n            self._clone_inner(clone_url, dest_folder, operation_timeout_in_seconds)\n            returned_obj = GitProvider.ScopedClonedRepo(dest_folder)\n        except Exception as e:\n            get_logger().exception(f\"Clone failed: Could not clone url.\",\n                artifact={\"error\": str(e), \"url\": clone_url, \"dest_folder\": dest_folder})\n        finally:\n            return returned_obj\n\n    @abstractmethod\n    def get_files(self) -> list:\n        pass\n\n    @abstractmethod\n    def get_diff_files(self) -> list[FilePatchInfo]:\n        pass\n\n    def get_incremental_commits(self, is_incremental):\n        pass\n\n    @abstractmethod\n    def publish_description(self, pr_title: str, pr_body: str):\n        pass\n\n    @abstractmethod\n    def publish_code_suggestions(self, code_suggestions: list) -> bool:\n        pass\n\n    @abstractmethod\n    def get_languages(self):\n        pass\n\n    @abstractmethod\n    def get_pr_branch(self):\n        pass\n\n    @abstractmethod\n    def get_user_id(self):\n        pass\n\n    @abstractmethod\n    def get_pr_description_full(self) -> str:\n        pass\n\n    def edit_comment(self, comment, body: str):\n        pass\n\n    def edit_comment_from_comment_id(self, comment_id: int, body: str):\n        pass\n\n    def get_comment_body_from_comment_id(self, comment_id: int) -> str:\n        pass\n\n    def reply_to_comment_from_comment_id(self, comment_id: int, body: str):\n        pass\n\n    def get_pr_description(self, full: bool = True, split_changes_walkthrough=False) -> str | tuple:\n        from pr_agent.algo.utils import clip_tokens\n        from pr_agent.config_loader import get_settings\n        max_tokens_description = get_settings().get(\"CONFIG.MAX_DESCRIPTION_TOKENS\", None)\n        description = self.get_pr_description_full() if full else self.get_user_description()\n        if split_changes_walkthrough:\n            description, files = process_description(description)\n            if max_tokens_description:\n                description = clip_tokens(description, max_tokens_description)\n            return description, files\n        else:\n            if max_tokens_description:\n                description = clip_tokens(description, max_tokens_description)\n            return description\n\n    def get_user_description(self) -> str:\n        if hasattr(self, 'user_description') and not (self.user_description is None):\n            return self.user_description\n\n        description = (self.get_pr_description_full() or \"\").strip()\n        description_lowercase = description.lower()\n        get_logger().debug(f\"Existing description\", description=description_lowercase)\n\n        # if the existing description wasn't generated by the pr-agent, just return it as-is\n        if not self._is_generated_by_pr_agent(description_lowercase):\n            get_logger().info(f\"Existing description was not generated by the pr-agent\")\n            self.user_description = description\n            return description\n\n        # if the existing description was generated by the pr-agent, but it doesn't contain a user description,\n        # return nothing (empty string) because it means there is no user description\n        user_description_header = \"### **user description**\"\n        if user_description_header not in description_lowercase:\n            get_logger().info(f\"Existing description was generated by the pr-agent, but it doesn't contain a user description\")\n            return \"\"\n\n        # otherwise, extract the original user description from the existing pr-agent description and return it\n        # user_description_start_position = description_lowercase.find(user_description_header) + len(user_description_header)\n        # return description[user_description_start_position:].split(\"\\n\", 1)[-1].strip()\n\n        # the 'user description' is in the beginning. extract and return it\n        possible_headers = self._possible_headers()\n        start_position = description_lowercase.find(user_description_header) + len(user_description_header)\n        end_position = len(description)\n        for header in possible_headers: # try to clip at the next header\n            if header != user_description_header and header in description_lowercase:\n                end_position = min(end_position, description_lowercase.find(header))\n        if end_position != len(description) and end_position > start_position:\n            original_user_description = description[start_position:end_position].strip()\n            if original_user_description.endswith(\"___\"):\n                original_user_description = original_user_description[:-3].strip()\n        else:\n            original_user_description = description.split(\"___\")[0].strip()\n            if original_user_description.lower().startswith(user_description_header):\n                original_user_description = original_user_description[len(user_description_header):].strip()\n\n        get_logger().info(f\"Extracted user description from existing description\",\n                          description=original_user_description)\n        self.user_description = original_user_description\n        return original_user_description\n\n    def _possible_headers(self):\n        return (\"### **user description**\", \"### **pr type**\", \"### **pr description**\", \"### **pr labels**\", \"### **type**\", \"### **description**\",\n                \"### **labels**\", \"### 🤖 generated by pr agent\")\n\n    def _is_generated_by_pr_agent(self, description_lowercase: str) -> bool:\n        possible_headers = self._possible_headers()\n        return any(description_lowercase.startswith(header) for header in possible_headers)\n\n    @abstractmethod\n    def get_repo_settings(self):\n        pass\n\n    def get_workspace_name(self):\n        return \"\"\n\n    def get_pr_id(self):\n        return \"\"\n\n    def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str:\n        return \"\"\n\n    def get_lines_link_original_file(self, filepath:str, component_range: Range) -> str:\n        return \"\"\n\n    #### comments operations ####\n    @abstractmethod\n    def publish_comment(self, pr_comment: str, is_temporary: bool = False):\n        pass\n\n    def publish_persistent_comment(self, pr_comment: str,\n                                   initial_header: str,\n                                   update_header: bool = True,\n                                   name='review',\n                                   final_update_message=True):\n        return self.publish_comment(pr_comment)\n\n    def publish_persistent_comment_full(self, pr_comment: str,\n                                   initial_header: str,\n                                   update_header: bool = True,\n                                   name='review',\n                                   final_update_message=True):\n        try:\n            prev_comments = list(self.get_issue_comments())\n            for comment in prev_comments:\n                if comment.body.startswith(initial_header):\n                    latest_commit_url = self.get_latest_commit_url()\n                    comment_url = self.get_comment_url(comment)\n                    if update_header:\n                        updated_header = f\"{initial_header}\\n\\n#### ({name.capitalize()} updated until commit {latest_commit_url})\\n\"\n                        pr_comment_updated = pr_comment.replace(initial_header, updated_header)\n                    else:\n                        pr_comment_updated = pr_comment\n                    get_logger().info(f\"Persistent mode - updating comment {comment_url} to latest {name} message\")\n                    # response = self.mr.notes.update(comment.id, {'body': pr_comment_updated})\n                    self.edit_comment(comment, pr_comment_updated)\n                    if final_update_message:\n                        return self.publish_comment(\n                            f\"**[Persistent {name}]({comment_url})** updated to latest commit {latest_commit_url}\")\n                    return comment\n        except Exception as e:\n            get_logger().exception(f\"Failed to update persistent review, error: {e}\")\n            pass\n        return self.publish_comment(pr_comment)\n\n    @abstractmethod\n    def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None):\n        pass\n\n    def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str,\n                              absolute_position: int = None):\n        raise NotImplementedError(\"This git provider does not support creating inline comments yet\")\n\n    @abstractmethod\n    def publish_inline_comments(self, comments: list[dict]):\n        pass\n\n    @abstractmethod\n    def remove_initial_comment(self):\n        pass\n\n    @abstractmethod\n    def remove_comment(self, comment):\n        pass\n\n    @abstractmethod\n    def get_issue_comments(self):\n        pass\n\n    def get_comment_url(self, comment) -> str:\n        return \"\"\n\n    def get_review_thread_comments(self, comment_id: int) -> list[dict]:\n        pass\n\n    #### labels operations ####\n    @abstractmethod\n    def publish_labels(self, labels):\n        pass\n\n    @abstractmethod\n    def get_pr_labels(self, update=False):\n        pass\n\n    def get_repo_labels(self):\n        pass\n\n    @abstractmethod\n    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:\n        pass\n\n    @abstractmethod\n    def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool:\n        pass\n\n    #### commits operations ####\n    @abstractmethod\n    def get_commit_messages(self):\n        pass\n\n    def get_pr_url(self) -> str:\n        if hasattr(self, 'pr_url'):\n            return self.pr_url\n        return \"\"\n\n    def get_latest_commit_url(self) -> str:\n        return \"\"\n\n    def auto_approve(self) -> bool:\n        return False\n\n    def calc_pr_statistics(self, pull_request_data: dict):\n        return {}\n\n    def get_num_of_files(self):\n        try:\n            return len(self.get_diff_files())\n        except Exception as e:\n            return -1\n\n    def limit_output_characters(self, output: str, max_chars: int):\n        return output[:max_chars] + '...' if len(output) > max_chars else output\n\n\ndef get_main_pr_language(languages, files) -> str:\n    \"\"\"\n    Get the main language of the commit. Return an empty string if cannot determine.\n    \"\"\"\n    main_language_str = \"\"\n    if not languages:\n        get_logger().info(\"No languages detected\")\n        return main_language_str\n    if not files:\n        get_logger().info(\"No files in diff\")\n        return main_language_str\n\n    try:\n        top_language = max(languages, key=languages.get).lower()\n\n        # validate that the specific commit uses the main language\n        extension_list = []\n        for file in files:\n            if not file:\n                continue\n            if isinstance(file, str):\n                file = FilePatchInfo(base_file=None, head_file=None, patch=None, filename=file)\n            extension_list.append(file.filename.rsplit('.')[-1])\n\n        # get the most common extension\n        most_common_extension = '.' + max(set(extension_list), key=extension_list.count)\n        try:\n            language_extension_map_org = get_settings().language_extension_map_org\n            language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()}\n\n            if top_language in language_extension_map and most_common_extension in language_extension_map[top_language]:\n                main_language_str = top_language\n            else:\n                for language, extensions in language_extension_map.items():\n                    if most_common_extension in extensions:\n                        main_language_str = language\n                        break\n        except Exception as e:\n            get_logger().exception(f\"Failed to get main language: {e}\")\n\n        ## old approach:\n        # most_common_extension = max(set(extension_list), key=extension_list.count)\n        # if most_common_extension == 'py' and top_language == 'python' or \\\n        #         most_common_extension == 'js' and top_language == 'javascript' or \\\n        #         most_common_extension == 'ts' and top_language == 'typescript' or \\\n        #         most_common_extension == 'tsx' and top_language == 'typescript' or \\\n        #         most_common_extension == 'go' and top_language == 'go' or \\\n        #         most_common_extension == 'java' and top_language == 'java' or \\\n        #         most_common_extension == 'c' and top_language == 'c' or \\\n        #         most_common_extension == 'cpp' and top_language == 'c++' or \\\n        #         most_common_extension == 'cs' and top_language == 'c#' or \\\n        #         most_common_extension == 'swift' and top_language == 'swift' or \\\n        #         most_common_extension == 'php' and top_language == 'php' or \\\n        #         most_common_extension == 'rb' and top_language == 'ruby' or \\\n        #         most_common_extension == 'rs' and top_language == 'rust' or \\\n        #         most_common_extension == 'scala' and top_language == 'scala' or \\\n        #         most_common_extension == 'kt' and top_language == 'kotlin' or \\\n        #         most_common_extension == 'pl' and top_language == 'perl' or \\\n        #         most_common_extension == top_language:\n        #     main_language_str = top_language\n\n    except Exception as e:\n        get_logger().exception(e)\n\n    return main_language_str\n\n\n\n\nclass IncrementalPR:\n    def __init__(self, is_incremental: bool = False):\n        self.is_incremental = is_incremental\n        self.commits_range = None\n        self.first_new_commit = None\n        self.last_seen_commit = None\n\n    @property\n    def first_new_commit_sha(self):\n        return None if self.first_new_commit is None else self.first_new_commit.sha\n\n    @property\n    def last_seen_commit_sha(self):\n        return None if self.last_seen_commit is None else self.last_seen_commit.sha\n"
  },
  {
    "path": "pr_agent/git_providers/gitea_provider.py",
    "content": "import json\nfrom typing import Any, Dict, List, Optional, Set, Tuple\nfrom urllib.parse import urlparse\n\nimport giteapy\nfrom giteapy.rest import ApiException\n\nfrom pr_agent.algo.file_filter import filter_ignored\nfrom pr_agent.algo.language_handler import is_valid_file\nfrom pr_agent.algo.types import EDIT_TYPE\nfrom pr_agent.algo.utils import (clip_tokens,\n                                 find_line_number_of_relevant_line_in_file)\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers.git_provider import (MAX_FILES_ALLOWED_FULL,\n                                                 FilePatchInfo, GitProvider,\n                                                 IncrementalPR)\nfrom pr_agent.log import get_logger\n\n\nclass GiteaProvider(GitProvider):\n    def __init__(self, url: Optional[str] = None):\n        super().__init__()\n        self.logger = get_logger()\n\n        if not url:\n            self.logger.error(\"PR URL not provided.\")\n            raise ValueError(\"PR URL not provided.\")\n\n        self.base_url = get_settings().get(\"GITEA.URL\", \"https://gitea.com\").rstrip(\"/\")\n        self.pr_url = \"\"\n        self.issue_url = \"\"\n\n        self.gitea_access_token = get_settings().get(\"GITEA.PERSONAL_ACCESS_TOKEN\", None)\n        if not self.gitea_access_token:\n            self.logger.error(\"Gitea access token not found in settings.\")\n            raise ValueError(\"Gitea access token not found in settings.\")\n\n        self.repo_settings = get_settings().get(\"GITEA.REPO_SETTING\", None)\n        configuration = giteapy.Configuration()\n        configuration.host = \"{}/api/v1\".format(self.base_url)\n        configuration.api_key['Authorization'] = f'token {self.gitea_access_token}'\n\n        if get_settings().get(\"GITEA.SKIP_SSL_VERIFICATION\", False):\n            configuration.verify_ssl = False\n\n        # Use custom cert (self-signed)\n        configuration.ssl_ca_cert = get_settings().get(\"GITEA.SSL_CA_CERT\", None)\n\n        client = giteapy.ApiClient(configuration)\n        self.repo_api = RepoApi(client)\n        self.owner = None\n        self.repo = None\n        self.pr_number = None\n        self.issue_number = None\n        self.max_comment_chars = 65000\n        self.enabled_pr = False\n        self.enabled_issue = False\n        self.temp_comments = []\n        self.pr = None\n        self.git_files = []\n        self.file_contents = {}\n        self.file_diffs = {}\n        self.sha = None\n        self.diff_files = []\n        self.incremental = IncrementalPR(False)\n        self.comments_list = []\n        self.unreviewed_files_set = dict()\n\n        if \"pulls\" in url:\n            self.pr_url = url\n            self.__set_repo_and_owner_from_pr()\n            self.enabled_pr = True\n            self.pr = self.repo_api.get_pull_request(\n                owner=self.owner,\n                repo=self.repo,\n                pr_number=self.pr_number\n            )\n            self.git_files = self.repo_api.get_change_file_pull_request(\n                owner=self.owner,\n                repo=self.repo,\n                pr_number=self.pr_number\n            )\n            # Optional ignore with user custom\n            self.git_files = filter_ignored(self.git_files, platform=\"gitea\")\n\n            self.sha = self.pr.head.sha if self.pr.head.sha else \"\"\n            self.__add_file_content()\n            self.__add_file_diff()\n            self.pr_commits = self.repo_api.list_all_commits(\n                owner=self.owner,\n                repo=self.repo\n            )\n            self.last_commit = self.pr_commits[-1]\n            self.last_commit_id = self.last_commit\n            self.base_sha = self.pr.base.sha if self.pr.base.sha else \"\"\n            self.base_ref = self.pr.base.ref if self.pr.base.ref else \"\"\n        elif \"issues\" in url:\n            self.issue_url = url\n            self.__set_repo_and_owner_from_issue()\n            self.enabled_issue = True\n        else:\n            self.pr_commits = None\n\n    def __add_file_content(self):\n        for file in self.git_files:\n            file_path = file.get(\"filename\")\n            # Ignore file from default settings\n            if not is_valid_file(file_path):\n                continue\n\n            if file_path and self.sha:\n                try:\n                    content = self.repo_api.get_file_content(\n                        owner=self.owner,\n                        repo=self.repo,\n                        commit_sha=self.sha,\n                        filepath=file_path\n                    )\n                    self.file_contents[file_path] = content\n                except ApiException as e:\n                    self.logger.error(f\"Error getting file content for {file_path}: {str(e)}\")\n                    self.file_contents[file_path] = \"\"\n\n    def __add_file_diff(self):\n        try:\n            diff_contents = self.repo_api.get_pull_request_diff(\n                    owner=self.owner,\n                    repo=self.repo,\n                    pr_number=self.pr_number\n            )\n\n            lines = diff_contents.splitlines()\n            current_file = None\n            current_patch = []\n            file_patches = {}\n            for line in lines:\n                if line.startswith('diff --git'):\n                    if current_file and current_patch:\n                        file_patches[current_file] = '\\n'.join(current_patch)\n                        current_patch = []\n                    current_file = line.split(' b/')[-1]\n                elif line.startswith('@@'):\n                    current_patch = [line]\n                elif current_patch:\n                    current_patch.append(line)\n\n            if current_file and current_patch:\n                file_patches[current_file] = '\\n'.join(current_patch)\n\n            self.file_diffs = file_patches\n        except Exception as e:\n            self.logger.error(f\"Error getting diff content: {str(e)}\")\n\n    def _parse_pr_url(self, pr_url: str) -> Tuple[str, str, int]:\n        parsed_url = urlparse(pr_url)\n\n        if parsed_url.path.startswith('/api/v1'):\n            parsed_url = urlparse(pr_url.replace(\"/api/v1\", \"\"))\n\n        path_parts = parsed_url.path.strip('/').split('/')\n        if len(path_parts) < 4 or path_parts[2] != 'pulls':\n            raise ValueError(\"The provided URL does not appear to be a Gitea PR URL\")\n\n        try:\n            pr_number = int(path_parts[3])\n        except ValueError as e:\n            raise ValueError(\"Unable to convert PR number to integer\") from e\n\n        owner = path_parts[0]\n        repo = path_parts[1]\n\n        return owner, repo, pr_number\n\n    def _parse_issue_url(self, issue_url: str) -> Tuple[str, str, int]:\n        parsed_url = urlparse(issue_url)\n\n        if parsed_url.path.startswith('/api/v1'):\n            parsed_url = urlparse(issue_url.replace(\"/api/v1\", \"\"))\n\n        path_parts = parsed_url.path.strip('/').split('/')\n        if len(path_parts) < 4 or path_parts[2] != 'issues':\n            raise ValueError(\"The provided URL does not appear to be a Gitea issue URL\")\n\n        try:\n            issue_number = int(path_parts[3])\n        except ValueError as e:\n            raise ValueError(\"Unable to convert issue number to integer\") from e\n\n        owner = path_parts[0]\n        repo = path_parts[1]\n\n        return owner, repo, issue_number\n\n    def __set_repo_and_owner_from_pr(self):\n        \"\"\"Extract owner and repo from the PR URL\"\"\"\n        try:\n            owner, repo, pr_number = self._parse_pr_url(self.pr_url)\n            self.owner = owner\n            self.repo = repo\n            self.pr_number = pr_number\n            self.logger.info(f\"Owner: {self.owner}, Repo: {self.repo}, PR Number: {self.pr_number}\")\n        except ValueError as e:\n            self.logger.error(f\"Error parsing PR URL: {str(e)}\")\n        except Exception as e:\n            self.logger.error(f\"Unexpected error: {str(e)}\")\n\n    def __set_repo_and_owner_from_issue(self):\n        \"\"\"Extract owner and repo from the issue URL\"\"\"\n        try:\n            owner, repo, issue_number = self._parse_issue_url(self.issue_url)\n            self.owner = owner\n            self.repo = repo\n            self.issue_number = issue_number\n            self.logger.info(f\"Owner: {self.owner}, Repo: {self.repo}, Issue Number: {self.issue_number}\")\n        except ValueError as e:\n            self.logger.error(f\"Error parsing issue URL: {str(e)}\")\n        except Exception as e:\n            self.logger.error(f\"Unexpected error: {str(e)}\")\n\n    def get_pr_url(self) -> str:\n        return self.pr_url\n\n    def get_issue_url(self) -> str:\n        return self.issue_url\n\n    def get_latest_commit_url(self) -> str:\n        return self.last_commit.html_url\n\n    def get_comment_url(self, comment) -> str:\n        return comment.html_url\n\n    def publish_persistent_comment(self, pr_comment: str,\n                                   initial_header: str,\n                                   update_header: bool = True,\n                                   name='review',\n                                   final_update_message=True):\n        self.publish_persistent_comment_full(pr_comment, initial_header, update_header, name, final_update_message)\n\n    def publish_comment(self, comment: str,is_temporary: bool = False) -> None:\n        \"\"\"Publish a comment to the pull request\"\"\"\n        if is_temporary and not get_settings().config.publish_output_progress:\n            get_logger().debug(f\"Skipping publish_comment for temporary comment\")\n            return None\n\n        if self.enabled_issue:\n            index = self.issue_number\n        elif self.enabled_pr:\n            index = self.pr_number\n        else:\n            self.logger.error(\"Neither PR nor issue URL provided.\")\n            return None\n\n        comment = self.limit_output_characters(comment, self.max_comment_chars)\n        response = self.repo_api.create_comment(\n            owner=self.owner,\n            repo=self.repo,\n            index=index,\n            comment=comment\n        )\n\n        if not response:\n            self.logger.error(\"Failed to publish comment\")\n            return None\n\n        if is_temporary:\n            self.temp_comments.append(comment)\n\n        comment_obj = {\n            \"is_temporary\": is_temporary,\n            \"comment\": comment,\n            \"comment_id\": response.id if isinstance(response, tuple) else response.id\n        }\n        self.comments_list.append(comment_obj)\n        self.logger.info(\"Comment published\")\n        return comment_obj\n\n    def edit_comment(self, comment, body : str):\n        body = self.limit_output_characters(body, self.max_comment_chars)\n        try:\n            self.repo_api.edit_comment(\n                owner=self.owner,\n                repo=self.repo,\n                comment_id=comment.get(\"comment_id\") if isinstance(comment, dict) else comment.id,\n                comment=body\n            )\n        except ApiException as e:\n            self.logger.error(f\"Error editing comment: {e}\")\n            return None\n        except Exception as e:\n            self.logger.error(f\"Unexpected error: {e}\")\n            return None\n\n\n    def publish_inline_comment(self,body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None):\n        \"\"\"Publish an inline comment on a specific line\"\"\"\n        body = self.limit_output_characters(body, self.max_comment_chars)\n        position, absolute_position = find_line_number_of_relevant_line_in_file(self.diff_files,\n                                                                                relevant_file.strip('`'),\n                                                                                relevant_line_in_file,\n                                                                                )\n        if position == -1:\n            get_logger().info(f\"Could not find position for {relevant_file} {relevant_line_in_file}\")\n            subject_type = \"FILE\"\n        else:\n            subject_type = \"LINE\"\n\n        path = relevant_file.strip()\n        payload = dict(body=body, path=path, old_position=position,new_position = absolute_position) if subject_type == \"LINE\" else {}\n        self.publish_inline_comments([payload])\n\n\n    def publish_inline_comments(self, comments: List[Dict[str, Any]],body : str = \"Inline comment\") -> None:\n        response = self.repo_api.create_inline_comment(\n            owner=self.owner,\n            repo=self.repo,\n            pr_number=self.pr_number if self.enabled_pr else self.issue_number,\n            body=body,\n            commit_id=self.last_commit.sha if self.last_commit else \"\",\n            comments=comments\n        )\n\n        if not response:\n            self.logger.error(\"Failed to publish inline comment\")\n            return\n\n        self.logger.info(\"Inline comment published\")\n\n    def publish_code_suggestions(self, suggestions: List[Dict[str, Any]]):\n        \"\"\"Publish code suggestions\"\"\"\n        for suggestion in suggestions:\n            body = suggestion.get(\"body\",\"\")\n            if not body:\n                self.logger.error(\"No body provided for the suggestion\")\n                continue\n\n            path = suggestion.get(\"relevant_file\",\"\")\n            new_position = suggestion.get(\"relevant_lines_start\",0)\n            old_position = suggestion.get(\"relevant_lines_start\",0) if \"original_suggestion\" not in suggestion else suggestion[\"original_suggestion\"].get(\"relevant_lines_start\",0)\n            title_body = suggestion[\"original_suggestion\"].get(\"suggestion_content\",\"\") if \"original_suggestion\" in suggestion else \"\"\n            payload = dict(body=body, path=path, old_position=old_position,new_position = new_position)\n            if title_body:\n                title_body = f\"**Suggestion:** {title_body}\"\n                self.publish_inline_comments([payload],title_body)\n            else:\n                self.publish_inline_comments([payload])\n\n    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:\n        \"\"\"Add eyes reaction to a comment\"\"\"\n        try:\n            if disable_eyes:\n                return None\n\n            comments = self.repo_api.list_all_comments(\n                owner=self.owner,\n                repo=self.repo,\n                index=self.pr_number if self.enabled_pr else self.issue_number\n            )\n\n            comment_ids = [comment.id for comment in comments]\n            if issue_comment_id not in comment_ids:\n                self.logger.error(f\"Comment ID {issue_comment_id} not found. Available IDs: {comment_ids}\")\n                return None\n\n            response = self.repo_api.add_reaction_comment(\n                owner=self.owner,\n                repo=self.repo,\n                comment_id=issue_comment_id,\n                reaction=\"eyes\"\n            )\n\n            if not response:\n                self.logger.error(\"Failed to add eyes reaction\")\n                return None\n\n            return response[0].id if isinstance(response, tuple) else response.id\n\n        except ApiException as e:\n            self.logger.error(f\"Error adding eyes reaction: {e}\")\n            return None\n        except Exception as e:\n            self.logger.error(f\"Unexpected error: {e}\")\n            return None\n\n    def remove_reaction(self, comment_id: int) -> None:\n        \"\"\"Remove reaction from a comment\"\"\"\n        try:\n            response = self.repo_api.remove_reaction_comment(\n                owner=self.owner,\n                repo=self.repo,\n                comment_id=comment_id\n            )\n            if not response:\n                self.logger.error(\"Failed to remove reaction\")\n        except ApiException as e:\n            self.logger.error(f\"Error removing reaction: {e}\")\n        except Exception as e:\n            self.logger.error(f\"Unexpected error: {e}\")\n\n    def get_commit_messages(self)-> str:\n        \"\"\"Get commit messages for the PR\"\"\"\n        max_tokens = get_settings().get(\"CONFIG.MAX_COMMITS_TOKENS\", None)\n        pr_commits = self.repo_api.get_pr_commits(\n            owner=self.owner,\n            repo=self.repo,\n            pr_number=self.pr_number\n        )\n\n        if not pr_commits:\n            self.logger.error(\"Failed to get commit messages\")\n            return \"\"\n\n        try:\n            commit_messages = [commit[\"commit\"][\"message\"] for commit in pr_commits if commit]\n\n            if not commit_messages:\n                self.logger.error(\"No commit messages found\")\n                return \"\"\n\n            commit_message = \"\".join(commit_messages)\n            if max_tokens:\n                commit_message = clip_tokens(commit_message, max_tokens)\n\n            return commit_message\n        except Exception as e:\n            self.logger.error(f\"Error processing commit messages: {str(e)}\")\n            return \"\"\n\n    def _get_file_content_from_base(self, filename: str) -> str:\n        return self.repo_api.get_file_content(\n            owner=self.owner,\n            repo=self.repo,\n            commit_sha=self.base_sha,\n            filepath=filename\n        )\n\n    def _get_file_content_from_latest_commit(self, filename: str) -> str:\n        return self.repo_api.get_file_content(\n            owner=self.owner,\n            repo=self.repo,\n            commit_sha=self.last_commit.sha,\n            filepath=filename\n        )\n\n    def get_diff_files(self) -> List[FilePatchInfo]:\n        \"\"\"Get files that were modified in the PR\"\"\"\n        if self.diff_files:\n            return self.diff_files\n\n        invalid_files_names = []\n        counter_valid = 0\n        diff_files = []\n        for file in self.git_files:\n            filename = file.get(\"filename\")\n            if not filename:\n                continue\n\n            if not is_valid_file(filename):\n                invalid_files_names.append(filename)\n                continue\n\n            counter_valid += 1\n            avoid_load = False\n            patch = self.file_diffs.get(filename,\"\")\n            head_file = \"\"\n            base_file = \"\"\n\n            if counter_valid >= MAX_FILES_ALLOWED_FULL and patch and not self.incremental.is_incremental:\n                avoid_load = True\n                if counter_valid == MAX_FILES_ALLOWED_FULL:\n                    self.logger.info(\"Too many files in PR, will avoid loading full content for rest of files\")\n\n            if avoid_load:\n                head_file = \"\"\n            else:\n                # Get file content from this pr\n                head_file = self.file_contents.get(filename,\"\")\n\n            if self.incremental.is_incremental and self.unreviewed_files_set:\n                base_file = self._get_file_content_from_latest_commit(filename)\n                self.unreviewed_files_set[filename] = patch\n            else:\n                if avoid_load:\n                    base_file = \"\"\n                else:\n                    base_file = self._get_file_content_from_base(filename)\n\n            num_plus_lines = file.get(\"additions\",0)\n            num_minus_lines = file.get(\"deletions\",0)\n            status = file.get(\"status\",\"\")\n\n            if status == 'added':\n                edit_type = EDIT_TYPE.ADDED\n            elif status == 'removed' or status == 'deleted':\n                edit_type = EDIT_TYPE.DELETED\n            elif status == 'renamed':\n                edit_type = EDIT_TYPE.RENAMED\n            elif status == 'modified' or status == 'changed':\n                edit_type = EDIT_TYPE.MODIFIED\n            else:\n                self.logger.error(f\"Unknown edit type: {status}\")\n                edit_type = EDIT_TYPE.UNKNOWN\n\n            file_patch_info = FilePatchInfo(\n                base_file=base_file,\n                head_file=head_file,\n                patch=patch,\n                filename=filename,\n                num_minus_lines=num_minus_lines,\n                num_plus_lines=num_plus_lines,\n                edit_type=edit_type\n            )\n            diff_files.append(file_patch_info)\n\n        if invalid_files_names:\n            self.logger.info(f\"Filtered out files with invalid extensions: {invalid_files_names}\")\n\n        self.diff_files = diff_files\n        return diff_files\n\n    def get_line_link(self, relevant_file, relevant_line_start, relevant_line_end = None) -> str:\n        if relevant_line_start == -1:\n            link = f\"{self.base_url}/{self.owner}/{self.repo}/src/branch/{self.get_pr_branch()}/{relevant_file}\"\n        elif relevant_line_end:\n            link = f\"{self.base_url}/{self.owner}/{self.repo}/src/branch/{self.get_pr_branch()}/{relevant_file}#L{relevant_line_start}-L{relevant_line_end}\"\n        else:\n            link = f\"{self.base_url}/{self.owner}/{self.repo}/src/branch/{self.get_pr_branch()}/{relevant_file}#L{relevant_line_start}\"\n\n        self.logger.info(f\"Generated link: {link}\")\n        return link\n\n    def get_pr_id(self):\n        try:\n            pr_id = f\"{self.repo}/{self.pr_number}\"\n            return pr_id\n        except:\n            return \"\"\n\n    def get_files(self) -> List[Dict[str, Any]]:\n        \"\"\"Get all files in the PR\"\"\"\n        return [file.get(\"filename\",\"\") for file in self.git_files]\n\n    def get_num_of_files(self) -> int:\n        \"\"\"Get number of files changed in the PR\"\"\"\n        return len(self.git_files)\n\n    def get_issue_comments(self) -> List[Dict[str, Any]]:\n        \"\"\"Get all comments in the PR\"\"\"\n        index = self.issue_number if self.enabled_issue else self.pr_number\n        comments = self.repo_api.list_all_comments(\n            owner=self.owner,\n            repo=self.repo,\n            index=index\n        )\n        if not comments:\n            self.logger.error(\"Failed to get comments\")\n            return []\n\n        return comments\n\n    def get_languages(self) -> Set[str]:\n        \"\"\"Get programming languages used in the repository\"\"\"\n        languages = self.repo_api.get_languages(\n            owner=self.owner,\n            repo=self.repo\n        )\n\n        return languages\n\n    def get_pr_branch(self) -> str:\n        \"\"\"Get the branch name of the PR\"\"\"\n        if not self.pr:\n            self.logger.error(\"Failed to get PR branch\")\n            return \"\"\n\n        if not self.pr.head:\n            self.logger.error(\"PR head not found\")\n            return \"\"\n\n        return self.pr.head.ref if self.pr.head.ref else \"\"\n\n    def get_pr_description_full(self) -> str:\n        \"\"\"Get full PR description with metadata\"\"\"\n        if not self.pr:\n            self.logger.error(\"Failed to get PR description\")\n            return \"\"\n\n        return self.pr.body if self.pr.body else \"\"\n\n    def get_pr_labels(self,update=False) -> List[str]:\n        \"\"\"Get labels assigned to the PR\"\"\"\n        if not update:\n            if not self.pr.labels:\n                self.logger.error(\"Failed to get PR labels\")\n                return []\n            return [label.name for label in self.pr.labels]\n\n        labels = self.repo_api.get_issue_labels(\n            owner=self.owner,\n            repo=self.repo,\n            issue_number=self.pr_number\n        )\n        if not labels:\n            self.logger.error(\"Failed to get PR labels\")\n            return []\n\n        return [label.name for label in labels]\n\n    def get_repo_settings(self) -> str:\n        \"\"\"Get repository settings\"\"\"\n        if not self.repo_settings:\n            self.logger.error(\"Repository settings not found\")\n            return \"\"\n\n        response = self.repo_api.get_file_content(\n            owner=self.owner,\n            repo=self.repo,\n            commit_sha=self.sha,\n            filepath=self.repo_settings\n        )\n        if not response:\n            self.logger.error(\"Failed to get repository settings\")\n            return \"\"\n\n        return response\n\n    def get_user_id(self) -> str:\n        \"\"\"Get the ID of the authenticated user\"\"\"\n        return f\"{self.pr.user.id}\" if self.pr else \"\"\n\n    def is_supported(self, capability) -> bool:\n        \"\"\"Check if the provider is supported\"\"\"\n        return True\n\n    def get_git_repo_url(self, issues_or_pr_url: str) -> str:\n        return f\"{self.base_url}/{self.owner}/{self.repo}.git\" #base_url / <OWNER>/<REPO>.git\n\n    def publish_description(self, pr_title: str, pr_body: str) -> None:\n        \"\"\"Publish PR description\"\"\"\n        response = self.repo_api.edit_pull_request(\n            owner=self.owner,\n            repo=self.repo,\n            pr_number=self.pr_number if self.enabled_pr else self.issue_number,\n            title=pr_title,\n            body=pr_body\n        )\n\n        if not response:\n            self.logger.error(\"Failed to publish PR description\")\n            return None\n\n        self.logger.info(\"PR description published successfully\")\n        if self.enabled_pr:\n            self.pr = self.repo_api.get_pull_request(\n                owner=self.owner,\n                repo=self.repo,\n                pr_number=self.pr_number\n            )\n\n    def publish_labels(self, labels: List[int]) -> None:\n        \"\"\"Publish labels to the PR\"\"\"\n        if not labels:\n            self.logger.error(\"No labels provided to publish\")\n            return None\n\n        response = self.repo_api.add_labels(\n            owner=self.owner,\n            repo=self.repo,\n            issue_number=self.pr_number if self.enabled_pr else self.issue_number,\n            labels=labels\n        )\n\n        if response:\n            self.logger.info(\"Labels added successfully\")\n\n    def remove_comment(self, comment) -> None:\n        \"\"\"Remove a specific comment\"\"\"\n        if not comment:\n            return\n\n        try:\n            comment_id = comment.get(\"comment_id\") if isinstance(comment, dict) else comment.id\n            if not comment_id:\n                self.logger.error(\"Comment ID not found\")\n                return None\n            self.repo_api.remove_comment(\n                owner=self.owner,\n                repo=self.repo,\n                comment_id=comment_id\n            )\n\n            if self.comments_list and comment in self.comments_list:\n                self.comments_list.remove(comment)\n\n            self.logger.info(f\"Comment removed successfully: {comment}\")\n        except ApiException as e:\n            self.logger.error(f\"Error removing comment: {e}\")\n            raise e\n\n    def remove_initial_comment(self) -> None:\n        \"\"\"Remove the initial comment\"\"\"\n        for comment in self.comments_list:\n            try:\n                if not comment.get(\"is_temporary\"):\n                    continue\n                self.remove_comment(comment)\n            except Exception as e:\n                self.logger.error(f\"Error removing comment: {e}\")\n                continue\n            self.logger.info(f\"Removed initial comment: {comment.get('comment_id')}\")\n\n    #Clone related\n    def _prepare_clone_url_with_token(self, repo_url_to_clone: str) -> str | None:\n        #For example, to clone:\n        #https://github.com/Codium-ai/pr-agent-pro.git\n        #Need to embed inside the github token:\n        #https://<token>@github.com/Codium-ai/pr-agent-pro.git\n\n        gitea_token = self.gitea_access_token\n        gitea_base_url = self.base_url\n        scheme = gitea_base_url.split(\"://\")[0]\n        scheme += \"://\"\n        if not all([gitea_token, gitea_base_url]):\n            get_logger().error(\"Either missing auth token or missing base url\")\n            return None\n        base_url = gitea_base_url.split(scheme)[1]\n        if not base_url:\n            get_logger().error(f\"Base url: {gitea_base_url} has an empty base url\")\n            return None\n        if base_url not in repo_url_to_clone:\n            get_logger().error(f\"url to clone: {repo_url_to_clone} does not contain {base_url}\")\n            return None\n        repo_full_name = repo_url_to_clone.split(base_url)[-1]\n        if not repo_full_name:\n            get_logger().error(f\"url to clone: {repo_url_to_clone} is malformed\")\n            return None\n\n        clone_url = scheme\n        clone_url += f\"{gitea_token}@{base_url}{repo_full_name}\"\n        return clone_url\n\nclass RepoApi(giteapy.RepositoryApi):\n    def __init__(self, client: giteapy.ApiClient):\n        self.repository = giteapy.RepositoryApi(client)\n        self.issue = giteapy.IssueApi(client)\n        self.logger = get_logger()\n        super().__init__(client)\n\n    def create_inline_comment(self, owner: str, repo: str, pr_number: int, body : str ,commit_id : str, comments: List[Dict[str, Any]]):\n        body = {\n            \"body\": body,\n            \"comments\": comments,\n            \"commit_id\": commit_id,\n        }\n        return self.api_client.call_api(\n            '/repos/{owner}/{repo}/pulls/{pr_number}/reviews',\n            'POST',\n            path_params={'owner': owner, 'repo': repo, 'pr_number': pr_number},\n            body=body,\n            response_type='Repository',\n            auth_settings=['AuthorizationHeaderToken']\n        )\n\n    def create_comment(self, owner: str, repo: str, index: int, comment: str):\n        body = {\n            \"body\": comment\n        }\n        return self.issue.issue_create_comment(\n            owner=owner,\n            repo=repo,\n            index=index,\n            body=body\n        )\n\n    def edit_comment(self, owner: str, repo: str, comment_id: int, comment: str):\n        body = {\n            \"body\": comment\n        }\n        return self.issue.issue_edit_comment(\n            owner=owner,\n            repo=repo,\n            id=comment_id,\n            body=body\n        )\n\n    def remove_comment(self, owner: str, repo: str, comment_id: int):\n        return self.issue.issue_delete_comment(\n            owner=owner,\n            repo=repo,\n            id=comment_id\n        )\n\n    def list_all_comments(self, owner: str, repo: str, index: int):\n        return self.issue.issue_get_comments(\n            owner=owner,\n            repo=repo,\n            index=index\n        )\n\n    def get_pull_request_diff(self, owner: str, repo: str, pr_number: int) -> str:\n        \"\"\"Get the diff content of a pull request using direct API call\"\"\"\n        try:\n            url = f'/repos/{owner}/{repo}/pulls/{pr_number}.diff'\n\n            response = self.api_client.call_api(\n                url,\n                'GET',\n                path_params={},\n                response_type=None,\n                _return_http_data_only=False,\n                _preload_content=False,\n                auth_settings=['AuthorizationHeaderToken']\n            )\n\n            if hasattr(response, 'data'):\n                raw_data = response.data.read()\n                return raw_data.decode('utf-8')\n            elif isinstance(response, tuple):\n                raw_data = response[0].read()\n                return raw_data.decode('utf-8')\n            else:\n                error_msg = f\"Unexpected response format received from API: {type(response)}\"\n                self.logger.error(error_msg)\n                raise RuntimeError(error_msg)\n\n        except ApiException as e:\n            self.logger.error(f\"Error getting diff: {str(e)}\")\n            raise e\n        except Exception as e:\n            self.logger.error(f\"Unexpected error: {str(e)}\")\n            raise e\n\n    def get_pull_request(self, owner: str, repo: str, pr_number: int):\n        \"\"\"Get pull request details including description\"\"\"\n        return self.repository.repo_get_pull_request(\n            owner=owner,\n            repo=repo,\n            index=pr_number\n        )\n\n    def edit_pull_request(self, owner: str, repo: str, pr_number: int,title : str, body: str):\n        \"\"\"Edit pull request description\"\"\"\n        body = {\n            \"body\": body,\n            \"title\" : title\n        }\n        return self.repository.repo_edit_pull_request(\n            owner=owner,\n            repo=repo,\n            index=pr_number,\n            body=body\n        )\n\n    def get_change_file_pull_request(self, owner: str, repo: str, pr_number: int):\n        \"\"\"Get changed files in the pull request\"\"\"\n        try:\n            url = f'/repos/{owner}/{repo}/pulls/{pr_number}/files'\n\n            response = self.api_client.call_api(\n                url,\n                'GET',\n                path_params={},\n                response_type=None,\n                _return_http_data_only=False,\n                _preload_content=False,\n                auth_settings=['AuthorizationHeaderToken']\n            )\n\n            if hasattr(response, 'data'):\n                raw_data = response.data.read()\n                diff_content = raw_data.decode('utf-8')\n                return json.loads(diff_content) if isinstance(diff_content, str) else diff_content\n            elif isinstance(response, tuple):\n                raw_data = response[0].read()\n                diff_content = raw_data.decode('utf-8')\n                return json.loads(diff_content) if isinstance(diff_content, str) else diff_content\n\n            return []\n\n        except ApiException as e:\n            self.logger.error(f\"Error getting changed files: {e}\")\n            return []\n        except Exception as e:\n            self.logger.error(f\"Unexpected error: {e}\")\n            return []\n\n    def get_languages(self, owner: str, repo: str):\n        \"\"\"Get programming languages used in the repository\"\"\"\n        try:\n            url = f'/repos/{owner}/{repo}/languages'\n\n            response = self.api_client.call_api(\n                url,\n                'GET',\n                path_params={},\n                response_type=None,\n                _return_http_data_only=False,\n                _preload_content=False,\n                auth_settings=['AuthorizationHeaderToken']\n            )\n\n            if hasattr(response, 'data'):\n                raw_data = response.data.read()\n                return json.loads(raw_data.decode('utf-8'))\n            elif isinstance(response, tuple):\n                raw_data = response[0].read()\n                return json.loads(raw_data.decode('utf-8'))\n\n            return {}\n\n        except ApiException as e:\n            self.logger.error(f\"Error getting languages: {e}\")\n            return {}\n        except Exception as e:\n            self.logger.error(f\"Unexpected error: {e}\")\n            return {}\n\n    def get_file_content(self, owner: str, repo: str, commit_sha: str, filepath: str) -> str:\n        \"\"\"Get raw file content from a specific commit\"\"\"\n\n        try:\n            url = f'/repos/{owner}/{repo}/raw/{filepath}'\n            query_params = []\n            if commit_sha:\n                query_params.append(('ref', commit_sha))\n\n            response = self.api_client.call_api(\n                url,\n                'GET',\n                path_params={},\n                query_params=query_params,\n                response_type=None,\n                _return_http_data_only=False,\n                _preload_content=False,\n                auth_settings=['AuthorizationHeaderToken']\n            )\n\n            if hasattr(response, 'data'):\n                raw_data = response.data.read()\n                return raw_data.decode('utf-8')\n            elif isinstance(response, tuple):\n                raw_data = response[0].read()\n                return raw_data.decode('utf-8')\n\n            return \"\"\n\n        except ApiException as e:\n            self.logger.error(f\"Error getting file: {filepath}, content: {e}\")\n            return \"\"\n        except Exception as e:\n            self.logger.error(f\"Unexpected error: {e}\")\n            return \"\"\n\n    def get_issue_labels(self, owner: str, repo: str, issue_number: int):\n        \"\"\"Get labels assigned to the issue\"\"\"\n        return self.issue.issue_get_labels(\n            owner=owner,\n            repo=repo,\n            index=issue_number\n        )\n\n    def list_all_commits(self, owner: str, repo: str):\n        return self.repository.repo_get_all_commits(\n            owner=owner,\n            repo=repo\n        )\n\n    def add_reviewer(self, owner: str, repo: str, pr_number: int, reviewers: List[str]):\n        body = {\n            \"reviewers\": reviewers\n        }\n        return self.api_client.call_api(\n            '/repos/{owner}/{repo}/pulls/{pr_number}/requested_reviewers',\n            'POST',\n            path_params={'owner': owner, 'repo': repo, 'pr_number': pr_number},\n            body=body,\n            response_type='Repository',\n            auth_settings=['AuthorizationHeaderToken']\n        )\n\n    def add_reaction_comment(self, owner: str, repo: str, comment_id: int, reaction: str):\n        body = {\n            \"content\": reaction\n        }\n        return self.api_client.call_api(\n            '/repos/{owner}/{repo}/issues/comments/{id}/reactions',\n            'POST',\n            path_params={'owner': owner, 'repo': repo, 'id': comment_id},\n            body=body,\n            response_type='Repository',\n            auth_settings=['AuthorizationHeaderToken']\n        )\n\n    def remove_reaction_comment(self, owner: str, repo: str, comment_id: int):\n        return self.api_client.call_api(\n            '/repos/{owner}/{repo}/issues/comments/{id}/reactions',\n            'DELETE',\n            path_params={'owner': owner, 'repo': repo, 'id': comment_id},\n            response_type='Repository',\n            auth_settings=['AuthorizationHeaderToken']\n        )\n\n    def add_labels(self, owner: str, repo: str, issue_number: int, labels: List[int]):\n        body = {\n            \"labels\": labels\n        }\n        return self.issue.issue_add_label(\n            owner=owner,\n            repo=repo,\n            index=issue_number,\n            body=body\n        )\n\n    def get_pr_commits(self, owner: str, repo: str, pr_number: int):\n        \"\"\"Get all commits in a pull request\"\"\"\n        try:\n            url = f'/repos/{owner}/{repo}/pulls/{pr_number}/commits'\n\n            response = self.api_client.call_api(\n                url,\n                'GET',\n                path_params={},\n                response_type=None,\n                _return_http_data_only=False,\n                _preload_content=False,\n                auth_settings=['AuthorizationHeaderToken']\n            )\n\n            if hasattr(response, 'data'):\n                raw_data = response.data.read()\n                commits_data = json.loads(raw_data.decode('utf-8'))\n                return commits_data\n            elif isinstance(response, tuple):\n                raw_data = response[0].read()\n                commits_data = json.loads(raw_data.decode('utf-8'))\n                return commits_data\n\n            return []\n\n        except ApiException as e:\n            self.logger.error(f\"Error getting PR commits: {e}\")\n            return []\n        except Exception as e:\n            self.logger.error(f\"Unexpected error: {e}\")\n            return []\n"
  },
  {
    "path": "pr_agent/git_providers/github_provider.py",
    "content": "import copy\nimport difflib\nimport hashlib\nimport itertools\nimport re\nimport time\nimport traceback\nimport json\nfrom datetime import datetime\nfrom typing import Optional, Tuple\nfrom urllib.parse import urlparse\n\nfrom github.Issue import Issue\nfrom github import AppAuthentication, Auth, Github, GithubException\nfrom retry import retry\nfrom starlette_context import context\n\nfrom ..algo.file_filter import filter_ignored\nfrom ..algo.git_patch_processing import extract_hunk_headers\nfrom ..algo.language_handler import is_valid_file\nfrom ..algo.types import EDIT_TYPE\nfrom ..algo.utils import (PRReviewHeader, Range, clip_tokens,\n                          find_line_number_of_relevant_line_in_file,\n                          load_large_diff, set_file_languages)\nfrom ..config_loader import get_settings\nfrom ..log import get_logger\nfrom ..servers.utils import RateLimitExceeded\nfrom .git_provider import (MAX_FILES_ALLOWED_FULL, FilePatchInfo, GitProvider,\n                           IncrementalPR)\n\n\nclass GithubProvider(GitProvider):\n    def __init__(self, pr_url: Optional[str] = None):\n        self.repo_obj = None\n        try:\n            self.installation_id = context.get(\"installation_id\", None)\n        except Exception:\n            self.installation_id = None\n        self.max_comment_chars = 65000\n        self.base_url = get_settings().get(\"GITHUB.BASE_URL\", \"https://api.github.com\").rstrip(\"/\") # \"https://api.github.com\"\n        self.base_url_html = self.base_url.split(\"api/\")[0].rstrip(\"/\") if \"api/\" in self.base_url else \"https://github.com\"\n        self.github_client = self._get_github_client()\n        self.repo = None\n        self.pr_num = None\n        self.pr = None\n        self.issue_main = None\n        self.github_user_id = None\n        self.diff_files = None\n        self.git_files = None\n        self.incremental = IncrementalPR(False)\n        if pr_url and 'pull' in pr_url:\n            self.set_pr(pr_url)\n            self.pr_commits = list(self.pr.get_commits())\n            self.last_commit_id = self.pr_commits[-1]\n            self.pr_url = self.get_pr_url() # pr_url for github actions can be as api.github.com, so we need to get the url from the pr object\n        elif pr_url and 'issue' in pr_url: #url is an issue\n            self.issue_main = self._get_issue_handle(pr_url)\n        else: #Instantiated the provider without a PR / Issue\n            self.pr_commits = None\n\n    def _get_issue_handle(self, issue_url) -> Optional[Issue]:\n        repo_name, issue_number = self._parse_issue_url(issue_url)\n        if not repo_name or not issue_number:\n            get_logger().error(f\"Given url: {issue_url} is not a valid issue.\")\n            return None\n        # else: Check if can get a valid Repo handle:\n        try:\n            repo_obj = self.github_client.get_repo(repo_name)\n            if not repo_obj:\n                get_logger().error(f\"Given url: {issue_url}, belonging to owner/repo: {repo_name} does \"\n                                   f\"not have a valid repository: {self.get_git_repo_url(issue_url)}\")\n                return None\n            # else: Valid repo handle:\n            return repo_obj.get_issue(issue_number)\n        except Exception as e:\n            get_logger().exception(f\"Failed to get an issue object for issue: {issue_url}, belonging to owner/repo: {repo_name}\")\n            return None\n\n    def get_incremental_commits(self, incremental=IncrementalPR(False)):\n        self.incremental = incremental\n        if self.incremental.is_incremental:\n            self.unreviewed_files_set = dict()\n            self._get_incremental_commits()\n\n    def is_supported(self, capability: str) -> bool:\n        return True\n\n    def _get_owner_and_repo_path(self, given_url: str) -> str:\n        try:\n            repo_path = None\n            if 'issues' in given_url:\n                repo_path, _ = self._parse_issue_url(given_url)\n            elif 'pull' in given_url:\n                repo_path, _ = self._parse_pr_url(given_url)\n            elif given_url.endswith('.git'):\n                parsed_url = urlparse(given_url)\n                repo_path = (parsed_url.path.split('.git')[0])[1:] # /<owner>/<repo>.git -> <owner>/<repo>\n            if not repo_path:\n                get_logger().error(f\"url is neither an issues url nor a PR url nor a valid git url: {given_url}. Returning empty result.\")\n                return \"\"\n            return repo_path\n        except Exception as e:\n            get_logger().exception(f\"unable to parse url: {given_url}. Returning empty result.\")\n            return \"\"\n\n    def get_git_repo_url(self, issues_or_pr_url: str) -> str:\n        repo_path = self._get_owner_and_repo_path(issues_or_pr_url) #Return: <OWNER>/<REPO>\n        if not repo_path or repo_path not in issues_or_pr_url:\n            get_logger().error(f\"Unable to retrieve owner/path from url: {issues_or_pr_url}\")\n            return \"\"\n        return f\"{self.base_url_html}/{repo_path}.git\" #https://github.com / <OWNER>/<REPO>.git\n\n    # Given a git repo url, return prefix and suffix of the provider in order to view a given file belonging to that repo.\n    # Example: https://github.com/qodo-ai/pr-agent.git and branch: v0.8 -> prefix: \"https://github.com/qodo-ai/pr-agent/blob/v0.8\", suffix: \"\"\n    # In case git url is not provided, provider will use PR context (which includes branch) to determine the prefix and suffix.\n    def get_canonical_url_parts(self, repo_git_url:str, desired_branch:str) -> Tuple[str, str]:\n        owner = None\n        repo = None\n        scheme_and_netloc = None\n\n        if repo_git_url or self.issue_main: #Either user provided an external git url, which may be different than what this provider was initialized with, or an issue:\n            desired_branch = desired_branch if repo_git_url else self.issue_main.repository.default_branch\n            html_url = repo_git_url if repo_git_url else self.issue_main.html_url\n            parsed_git_url = urlparse(html_url)\n            scheme_and_netloc = parsed_git_url.scheme + \"://\" + parsed_git_url.netloc\n            repo_path = self._get_owner_and_repo_path(html_url)\n            if repo_path.count('/') == 1: #Has to have the form <owner>/<repo>\n                owner, repo = repo_path.split('/')\n            else:\n                get_logger().error(f\"Invalid repo_path: {repo_path} from url: {html_url}\")\n                return (\"\", \"\")\n\n        if (not owner or not repo) and self.repo: #\"else\" - User did not provide an external git url, or not an issue, use self.repo object\n            owner, repo = self.repo.split('/')\n            scheme_and_netloc = self.base_url_html\n            desired_branch = self.repo_obj.default_branch\n        if not all([scheme_and_netloc, owner, repo]): #\"else\": Not invoked from a PR context,but no provided git url for context\n            get_logger().error(f\"Unable to get canonical url parts since missing context (PR or explicit git url)\")\n            return (\"\", \"\")\n\n        prefix = f\"{scheme_and_netloc}/{owner}/{repo}/blob/{desired_branch}\"\n        suffix = \"\"  # github does not add a suffix\n        return (prefix, suffix)\n\n    def get_pr_url(self) -> str:\n        return self.pr.html_url\n\n    def set_pr(self, pr_url: str):\n        self.repo, self.pr_num = self._parse_pr_url(pr_url)\n        self.pr = self._get_pr()\n\n    def _get_incremental_commits(self):\n        if not self.pr_commits:\n            self.pr_commits = list(self.pr.get_commits())\n\n        self.previous_review = self.get_previous_review(full=True, incremental=True)\n        if self.previous_review:\n            self.incremental.commits_range = self.get_commit_range()\n            # Get all files changed during the commit range\n\n            for commit in self.incremental.commits_range:\n                if commit.commit.message.startswith(f\"Merge branch '{self._get_repo().default_branch}'\"):\n                    get_logger().info(f\"Skipping merge commit {commit.commit.message}\")\n                    continue\n                self.unreviewed_files_set.update({file.filename: file for file in commit.files})\n        else:\n            get_logger().info(\"No previous review found, will review the entire PR\")\n            self.incremental.is_incremental = False\n\n    def get_commit_range(self):\n        last_review_time = self.previous_review.created_at\n        first_new_commit_index = None\n        for index in range(len(self.pr_commits) - 1, -1, -1):\n            if self.pr_commits[index].commit.author.date > last_review_time:\n                self.incremental.first_new_commit = self.pr_commits[index]\n                first_new_commit_index = index\n            else:\n                self.incremental.last_seen_commit = self.pr_commits[index]\n                break\n        return self.pr_commits[first_new_commit_index:] if first_new_commit_index is not None else []\n\n    def get_previous_review(self, *, full: bool, incremental: bool):\n        if not (full or incremental):\n            raise ValueError(\"At least one of full or incremental must be True\")\n        if not getattr(self, \"comments\", None):\n            self.comments = list(self.pr.get_issue_comments())\n        prefixes = []\n        if full:\n            prefixes.append(PRReviewHeader.REGULAR.value)\n        if incremental:\n            prefixes.append(PRReviewHeader.INCREMENTAL.value)\n        for index in range(len(self.comments) - 1, -1, -1):\n            if any(self.comments[index].body.startswith(prefix) for prefix in prefixes):\n                return self.comments[index]\n\n    def get_files(self):\n        if self.incremental.is_incremental and self.unreviewed_files_set:\n            return self.unreviewed_files_set.values()\n        try:\n            git_files = context.get(\"git_files\", None)\n            if git_files:\n                return git_files\n            self.git_files = list(self.pr.get_files()) # 'list' to handle pagination\n            context[\"git_files\"] = self.git_files\n            return self.git_files\n        except Exception:\n            if not self.git_files:\n                self.git_files = list(self.pr.get_files())\n            return self.git_files\n\n    def get_num_of_files(self):\n        if hasattr(self.git_files, \"totalCount\"):\n            return self.git_files.totalCount\n        else:\n            try:\n                return len(self.git_files)\n            except Exception as e:\n                return -1\n\n    @retry(exceptions=RateLimitExceeded,\n           tries=get_settings().github.ratelimit_retries, delay=2, backoff=2, jitter=(1, 3))\n    def get_diff_files(self) -> list[FilePatchInfo]:\n        \"\"\"\n        Retrieves the list of files that have been modified, added, deleted, or renamed in a pull request in GitHub,\n        along with their content and patch information.\n\n        Returns:\n            diff_files (List[FilePatchInfo]): List of FilePatchInfo objects representing the modified, added, deleted,\n            or renamed files in the merge request.\n        \"\"\"\n        try:\n            try:\n                diff_files = context.get(\"diff_files\", None)\n                if diff_files:\n                    return diff_files\n            except Exception:\n                pass\n\n            if self.diff_files:\n                return self.diff_files\n\n            # filter files using [ignore] patterns\n            files_original = self.get_files()\n            files = filter_ignored(files_original)\n            if files_original != files:\n                try:\n                    names_original = [file.filename for file in files_original]\n                    names_new = [file.filename for file in files]\n                    get_logger().info(f\"Filtered out [ignore] files for pull request:\", extra=\n                    {\"files\": names_original,\n                     \"filtered_files\": names_new})\n                except Exception:\n                    pass\n\n            diff_files = []\n            invalid_files_names = []\n            is_close_to_rate_limit = False\n\n            # The base.sha will point to the current state of the base branch (including parallel merges), not the original base commit when the PR was created\n            # We can fix this by finding the merge base commit between the PR head and base branches\n            # Note that The pr.head.sha is actually correct as is - it points to the latest commit in your PR branch.\n            # This SHA isn't affected by parallel merges to the base branch since it's specific to your PR's branch.\n            repo = self.repo_obj\n            pr = self.pr\n            try:\n                compare = repo.compare(pr.base.sha, pr.head.sha) # communication with GitHub\n                merge_base_commit = compare.merge_base_commit\n            except Exception as e:\n                get_logger().error(f\"Failed to get merge base commit: {e}\")\n                merge_base_commit = pr.base\n            if merge_base_commit.sha != pr.base.sha:\n                get_logger().info(\n                    f\"Using merge base commit {merge_base_commit.sha} instead of base commit \")\n\n            counter_valid = 0\n            for file in files:\n                if not is_valid_file(file.filename):\n                    invalid_files_names.append(file.filename)\n                    continue\n\n                patch = file.patch\n                if is_close_to_rate_limit:\n                    new_file_content_str = \"\"\n                    original_file_content_str = \"\"\n                else:\n                    # allow only a limited number of files to be fully loaded. We can manage the rest with diffs only\n                    counter_valid += 1\n                    avoid_load = False\n                    if counter_valid >= MAX_FILES_ALLOWED_FULL and patch and not self.incremental.is_incremental:\n                        avoid_load = True\n                        if counter_valid == MAX_FILES_ALLOWED_FULL:\n                            get_logger().info(f\"Too many files in PR, will avoid loading full content for rest of files\")\n\n                    if avoid_load:\n                        new_file_content_str = \"\"\n                    else:\n                        new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha)  # communication with GitHub\n\n                    if self.incremental.is_incremental and self.unreviewed_files_set:\n                        original_file_content_str = self._get_pr_file_content(file, self.incremental.last_seen_commit_sha)\n                        patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str)\n                        self.unreviewed_files_set[file.filename] = patch\n                    else:\n                        if avoid_load:\n                            original_file_content_str = \"\"\n                        else:\n                            original_file_content_str = self._get_pr_file_content(file, merge_base_commit.sha)\n                            # original_file_content_str = self._get_pr_file_content(file, self.pr.base.sha)\n                        if not patch:\n                            patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str)\n\n\n                if file.status == 'added':\n                    edit_type = EDIT_TYPE.ADDED\n                elif file.status == 'removed':\n                    edit_type = EDIT_TYPE.DELETED\n                elif file.status == 'renamed':\n                    edit_type = EDIT_TYPE.RENAMED\n                elif file.status == 'modified':\n                    edit_type = EDIT_TYPE.MODIFIED\n                else:\n                    get_logger().error(f\"Unknown edit type: {file.status}\")\n                    edit_type = EDIT_TYPE.UNKNOWN\n\n                # count number of lines added and removed\n                if hasattr(file, 'additions') and hasattr(file, 'deletions'):\n                    num_plus_lines = file.additions\n                    num_minus_lines = file.deletions\n                else:\n                    patch_lines = patch.splitlines(keepends=True)\n                    num_plus_lines = len([line for line in patch_lines if line.startswith('+')])\n                    num_minus_lines = len([line for line in patch_lines if line.startswith('-')])\n\n                file_patch_canonical_structure = FilePatchInfo(original_file_content_str, new_file_content_str, patch,\n                                                               file.filename, edit_type=edit_type,\n                                                               num_plus_lines=num_plus_lines,\n                                                               num_minus_lines=num_minus_lines,)\n                diff_files.append(file_patch_canonical_structure)\n            if invalid_files_names:\n                get_logger().info(f\"Filtered out files with invalid extensions: {invalid_files_names}\")\n\n            self.diff_files = diff_files\n            try:\n                context[\"diff_files\"] = diff_files\n            except Exception:\n                pass\n\n            return diff_files\n\n        except Exception as e:\n            get_logger().error(f\"Failing to get diff files: {e}\",\n                               artifact={\"traceback\": traceback.format_exc()})\n            raise RateLimitExceeded(\"Rate limit exceeded for GitHub API.\") from e\n\n    def publish_description(self, pr_title: str, pr_body: str):\n        self.pr.edit(title=pr_title, body=pr_body)\n\n    def get_latest_commit_url(self) -> str:\n        return self.last_commit_id.html_url\n\n    def get_comment_url(self, comment) -> str:\n        return comment.html_url\n\n    def publish_persistent_comment(self, pr_comment: str,\n                                   initial_header: str,\n                                   update_header: bool = True,\n                                   name='review',\n                                   final_update_message=True):\n        self.publish_persistent_comment_full(pr_comment, initial_header, update_header, name, final_update_message)\n\n    def publish_comment(self, pr_comment: str, is_temporary: bool = False):\n        if not self.pr and not self.issue_main:\n            get_logger().error(\"Cannot publish a comment if missing PR/Issue context\")\n            return None\n\n        if is_temporary and not get_settings().config.publish_output_progress:\n            get_logger().debug(f\"Skipping publish_comment for temporary comment: {pr_comment}\")\n            return None\n        pr_comment = self.limit_output_characters(pr_comment, self.max_comment_chars)\n\n        # In case this is an issue, can publish the comment on the issue.\n        if self.issue_main:\n            return self.issue_main.create_comment(pr_comment)\n\n        response = self.pr.create_issue_comment(pr_comment)\n        if hasattr(response, \"user\") and hasattr(response.user, \"login\"):\n            self.github_user_id = response.user.login\n        response.is_temporary = is_temporary\n        if not hasattr(self.pr, 'comments_list'):\n            self.pr.comments_list = []\n        self.pr.comments_list.append(response)\n        return response\n\n    def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None):\n        body = self.limit_output_characters(body, self.max_comment_chars)\n        self.publish_inline_comments([self.create_inline_comment(body, relevant_file, relevant_line_in_file)])\n\n\n    def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str,\n                              absolute_position: int = None):\n        body = self.limit_output_characters(body, self.max_comment_chars)\n        position, absolute_position = find_line_number_of_relevant_line_in_file(self.diff_files,\n                                                                                relevant_file.strip('`'),\n                                                                                relevant_line_in_file,\n                                                                                absolute_position)\n        if position == -1:\n            get_logger().info(f\"Could not find position for {relevant_file} {relevant_line_in_file}\")\n            subject_type = \"FILE\"\n        else:\n            subject_type = \"LINE\"\n        path = relevant_file.strip()\n        return dict(body=body, path=path, position=position) if subject_type == \"LINE\" else {}\n\n    def publish_inline_comments(self, comments: list[dict], disable_fallback: bool = False):\n        try:\n            # publish all comments in a single message\n            self.pr.create_review(commit=self.last_commit_id, comments=comments)\n        except Exception as e:\n            get_logger().info(f\"Initially failed to publish inline comments as committable\")\n\n            if (getattr(e, \"status\", None) == 422 and not disable_fallback):\n                pass  # continue to try _publish_inline_comments_fallback_with_verification\n            else:\n                raise e # will end up with publishing the comments one by one\n\n            try:\n                self._publish_inline_comments_fallback_with_verification(comments)\n            except Exception as e:\n                get_logger().error(f\"Failed to publish inline code comments fallback, error: {e}\")\n                raise e    \n    \n    def get_review_thread_comments(self, comment_id: int) -> list[dict]:\n        \"\"\"\n        Retrieves all comments in the same thread as the given comment.\n        \n        Args:\n            comment_id: Review comment ID\n                \n        Returns:\n            List of comments in the same thread\n        \"\"\"\n        try:\n            # Fetch all comments with a single API call\n            all_comments = list(self.pr.get_comments())\n            \n            # Find the target comment by ID\n            target_comment = next((c for c in all_comments if c.id == comment_id), None)\n            if not target_comment:\n                return []\n        \n            # Get root comment id\n            root_comment_id = target_comment.raw_data.get(\"in_reply_to_id\", target_comment.id)\n            # Build the thread - include the root comment and all replies to it\n            thread_comments = [\n                c for c in all_comments if\n                c.id == root_comment_id or c.raw_data.get(\"in_reply_to_id\") == root_comment_id\n            ]\n        \n        \n            return thread_comments\n                \n        except Exception as e:\n            get_logger().exception(f\"Failed to get review comments for an inline ask command\", artifact={\"comment_id\": comment_id, \"error\": e})\n            return []\n\n    def _publish_inline_comments_fallback_with_verification(self, comments: list[dict]):\n        \"\"\"\n        Check each inline comment separately against the GitHub API and discard of invalid comments,\n        then publish all the remaining valid comments in a single review.\n        For invalid comments, also try removing the suggestion part and posting the comment just on the first line.\n        \"\"\"\n        verified_comments, invalid_comments = self._verify_code_comments(comments)\n\n        # publish as a group the verified comments\n        if verified_comments:\n            try:\n                self.pr.create_review(commit=self.last_commit_id, comments=verified_comments)\n            except:\n                pass\n\n        # try to publish one by one the invalid comments as a one-line code comment\n        if invalid_comments and get_settings().github.try_fix_invalid_inline_comments:\n            fixed_comments_as_one_liner = self._try_fix_invalid_inline_comments(\n                [comment for comment, _ in invalid_comments])\n            for comment in fixed_comments_as_one_liner:\n                try:\n                    self.publish_inline_comments([comment], disable_fallback=True)\n                    get_logger().info(f\"Published invalid comment as a single line comment: {comment}\")\n                except:\n                    get_logger().error(f\"Failed to publish invalid comment as a single line comment: {comment}\")\n\n    def _verify_code_comment(self, comment: dict):\n        is_verified = False\n        e = None\n        try:\n            # event =\"\" # By leaving this blank, you set the review action state to PENDING\n            input = dict(commit_id=self.last_commit_id.sha, comments=[comment])\n            headers, data = self.pr._requester.requestJsonAndCheck(\n                \"POST\", f\"{self.pr.url}/reviews\", input=input)\n            pending_review_id = data[\"id\"]\n            is_verified = True\n        except Exception as err:\n            is_verified = False\n            pending_review_id = None\n            e = err\n        if pending_review_id is not None:\n            try:\n                self.pr._requester.requestJsonAndCheck(\"DELETE\", f\"{self.pr.url}/reviews/{pending_review_id}\")\n            except Exception:\n                pass\n        return is_verified, e\n\n    def _verify_code_comments(self, comments: list[dict]) -> tuple[list[dict], list[tuple[dict, Exception]]]:\n        \"\"\"Very each comment against the GitHub API and return 2 lists: 1 of verified and 1 of invalid comments\"\"\"\n        verified_comments = []\n        invalid_comments = []\n        for comment in comments:\n            time.sleep(1)  # for avoiding secondary rate limit\n            is_verified, e = self._verify_code_comment(comment)\n            if is_verified:\n                verified_comments.append(comment)\n            else:\n                invalid_comments.append((comment, e))\n        return verified_comments, invalid_comments\n\n    def _try_fix_invalid_inline_comments(self, invalid_comments: list[dict]) -> list[dict]:\n        \"\"\"\n        Try fixing invalid comments by removing the suggestion part and setting the comment just on the first line.\n        Return only comments that have been modified in some way.\n        This is a best-effort attempt to fix invalid comments, and should be verified accordingly.\n        \"\"\"\n        import copy\n        fixed_comments = []\n        for comment in invalid_comments:\n            try:\n                fixed_comment = copy.deepcopy(comment)  # avoid modifying the original comment dict for later logging\n                if \"```suggestion\" in comment[\"body\"]:\n                    fixed_comment[\"body\"] = comment[\"body\"].split(\"```suggestion\")[0]\n                if \"start_line\" in comment:\n                    fixed_comment[\"line\"] = comment[\"start_line\"]\n                    del fixed_comment[\"start_line\"]\n                if \"start_side\" in comment:\n                    fixed_comment[\"side\"] = comment[\"start_side\"]\n                    del fixed_comment[\"start_side\"]\n                if fixed_comment != comment:\n                    fixed_comments.append(fixed_comment)\n            except Exception as e:\n                get_logger().error(f\"Failed to fix inline comment, error: {e}\")\n        return fixed_comments\n\n    def publish_code_suggestions(self, code_suggestions: list) -> bool:\n        \"\"\"\n        Publishes code suggestions as comments on the PR.\n        \"\"\"\n        post_parameters_list = []\n\n        code_suggestions_validated = self.validate_comments_inside_hunks(code_suggestions)\n\n        for suggestion in code_suggestions_validated:\n            body = suggestion['body']\n            relevant_file = suggestion['relevant_file']\n            relevant_lines_start = suggestion['relevant_lines_start']\n            relevant_lines_end = suggestion['relevant_lines_end']\n\n            if not relevant_lines_start or relevant_lines_start == -1:\n                get_logger().exception(\n                    f\"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}\")\n                continue\n\n            if relevant_lines_end < relevant_lines_start:\n                get_logger().exception(f\"Failed to publish code suggestion, \"\n                                  f\"relevant_lines_end is {relevant_lines_end} and \"\n                                  f\"relevant_lines_start is {relevant_lines_start}\")\n                continue\n\n            if relevant_lines_end > relevant_lines_start:\n                post_parameters = {\n                    \"body\": body,\n                    \"path\": relevant_file,\n                    \"line\": relevant_lines_end,\n                    \"start_line\": relevant_lines_start,\n                    \"start_side\": \"RIGHT\",\n                }\n            else:  # API is different for single line comments\n                post_parameters = {\n                    \"body\": body,\n                    \"path\": relevant_file,\n                    \"line\": relevant_lines_start,\n                    \"side\": \"RIGHT\",\n                }\n            post_parameters_list.append(post_parameters)\n\n        try:\n            self.publish_inline_comments(post_parameters_list)\n            return True\n        except Exception as e:\n            get_logger().error(f\"Failed to publish code suggestion, error: {e}\")\n            return False\n\n    def edit_comment(self, comment, body: str):\n        try:\n            body = self.limit_output_characters(body, self.max_comment_chars)\n            comment.edit(body=body)\n        except GithubException as e:\n            if hasattr(e, \"status\") and e.status == 403:\n                # Log as warning for permission-related issues (usually due to polling)\n                get_logger().warning(\n                    \"Failed to edit github comment due to permission restrictions\",\n                    artifact={\"error\": e})\n            else:\n                get_logger().exception(f\"Failed to edit github comment\", artifact={\"error\": e})\n\n    def edit_comment_from_comment_id(self, comment_id: int, body: str):\n        try:\n            # self.pr.get_issue_comment(comment_id).edit(body)\n            body = self.limit_output_characters(body, self.max_comment_chars)\n            headers, data_patch = self.pr._requester.requestJsonAndCheck(\n                \"PATCH\", f\"{self.base_url}/repos/{self.repo}/issues/comments/{comment_id}\",\n                input={\"body\": body}\n            )\n        except Exception as e:\n            get_logger().exception(f\"Failed to edit comment, error: {e}\")\n\n    def reply_to_comment_from_comment_id(self, comment_id: int, body: str):\n        try:\n            # self.pr.get_issue_comment(comment_id).edit(body)\n            body = self.limit_output_characters(body, self.max_comment_chars)\n            headers, data_patch = self.pr._requester.requestJsonAndCheck(\n                \"POST\", f\"{self.base_url}/repos/{self.repo}/pulls/{self.pr_num}/comments/{comment_id}/replies\",\n                input={\"body\": body}\n            )\n        except Exception as e:\n            get_logger().exception(f\"Failed to reply comment, error: {e}\")\n\n    def get_comment_body_from_comment_id(self, comment_id: int):\n        try:\n            # self.pr.get_issue_comment(comment_id).edit(body)\n            headers, data_patch = self.pr._requester.requestJsonAndCheck(\n                \"GET\", f\"{self.base_url}/repos/{self.repo}/issues/comments/{comment_id}\"\n            )\n            return data_patch.get(\"body\",\"\")\n        except Exception as e:\n            get_logger().exception(f\"Failed to edit comment, error: {e}\")\n            return None\n\n    def publish_file_comments(self, file_comments: list) -> bool:\n        try:\n            headers, existing_comments = self.pr._requester.requestJsonAndCheck(\n                \"GET\", f\"{self.pr.url}/comments\"\n            )\n            for comment in file_comments:\n                comment['commit_id'] = self.last_commit_id.sha\n                comment['body'] = self.limit_output_characters(comment['body'], self.max_comment_chars)\n\n                found = False\n                for existing_comment in existing_comments:\n                    comment['commit_id'] = self.last_commit_id.sha\n                    our_app_name = get_settings().get(\"GITHUB.APP_NAME\", \"\")\n                    same_comment_creator = False\n                    if self.deployment_type == 'app':\n                        same_comment_creator = our_app_name.lower() in existing_comment['user']['login'].lower()\n                    elif self.deployment_type == 'user':\n                        same_comment_creator = self.github_user_id == existing_comment['user']['login']\n                    if existing_comment['subject_type'] == 'file' and comment['path'] == existing_comment['path'] and same_comment_creator:\n\n                        headers, data_patch = self.pr._requester.requestJsonAndCheck(\n                            \"PATCH\", f\"{self.base_url}/repos/{self.repo}/pulls/comments/{existing_comment['id']}\", input={\"body\":comment['body']}\n                        )\n                        found = True\n                        break\n                if not found:\n                    headers, data_post = self.pr._requester.requestJsonAndCheck(\n                        \"POST\", f\"{self.pr.url}/comments\", input=comment\n                    )\n            return True\n        except Exception as e:\n            get_logger().error(f\"Failed to publish diffview file summary, error: {e}\")\n            return False\n\n    def remove_initial_comment(self):\n        try:\n            for comment in getattr(self.pr, 'comments_list', []):\n                if comment.is_temporary:\n                    self.remove_comment(comment)\n        except Exception as e:\n            get_logger().exception(f\"Failed to remove initial comment, error: {e}\")\n\n    def remove_comment(self, comment):\n        try:\n            comment.delete()\n        except Exception as e:\n            get_logger().exception(f\"Failed to remove comment, error: {e}\")\n\n    def get_title(self):\n        return self.pr.title\n\n    def get_languages(self):\n        languages = self._get_repo().get_languages()\n        return languages\n\n    def get_pr_branch(self):\n        return self.pr.head.ref\n\n    def get_pr_owner_id(self) -> str | None:\n        if not self.repo:\n            return None\n        return self.repo.split('/')[0]\n\n    def get_pr_description_full(self):\n        return self.pr.body\n\n    def get_user_id(self):\n        if not self.github_user_id:\n            try:\n                self.github_user_id = self.github_client.get_user().raw_data['login']\n            except Exception as e:\n                self.github_user_id = \"\"\n                # logging.exception(f\"Failed to get user id, error: {e}\")\n        return self.github_user_id\n\n    def get_notifications(self, since: datetime):\n        deployment_type = get_settings().get(\"GITHUB.DEPLOYMENT_TYPE\", \"user\")\n\n        if deployment_type != 'user':\n            raise ValueError(\"Deployment mode must be set to 'user' to get notifications\")\n\n        notifications = self.github_client.get_user().get_notifications(since=since)\n        return notifications\n\n    def get_issue_comments(self):\n        return self.pr.get_issue_comments()\n\n    def get_repo_settings(self):\n        try:\n            # contents = self.repo_obj.get_contents(\".pr_agent.toml\", ref=self.pr.head.sha).decoded_content\n\n            # more logical to take 'pr_agent.toml' from the default branch\n            contents = self.repo_obj.get_contents(\".pr_agent.toml\").decoded_content\n            return contents\n        except Exception:\n            return \"\"\n\n    def get_workspace_name(self):\n        return self.repo.split('/')[0]\n\n    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:\n        if disable_eyes:\n            return None\n        try:\n            headers, data_patch = self.pr._requester.requestJsonAndCheck(\n                \"POST\", f\"{self.base_url}/repos/{self.repo}/issues/comments/{issue_comment_id}/reactions\",\n                input={\"content\": \"eyes\"}\n            )\n            return data_patch.get(\"id\", None)\n        except Exception as e:\n            get_logger().warning(f\"Failed to add eyes reaction, error: {e}\")\n            return None\n\n    def remove_reaction(self, issue_comment_id: int, reaction_id: str) -> bool:\n        try:\n            # self.pr.get_issue_comment(issue_comment_id).delete_reaction(reaction_id)\n            headers, data_patch = self.pr._requester.requestJsonAndCheck(\n                \"DELETE\",\n                f\"{self.base_url}/repos/{self.repo}/issues/comments/{issue_comment_id}/reactions/{reaction_id}\"\n            )\n            return True\n        except Exception as e:\n            get_logger().exception(f\"Failed to remove eyes reaction, error: {e}\")\n            return False\n\n    def _parse_pr_url(self, pr_url: str) -> Tuple[str, int]:\n        parsed_url = urlparse(pr_url)\n\n        if parsed_url.path.startswith('/api/v3'):\n            parsed_url = urlparse(pr_url.replace(\"/api/v3\", \"\"))\n\n        path_parts = parsed_url.path.strip('/').split('/')\n        if 'api.github.com' in parsed_url.netloc or '/api/v3' in pr_url:\n            if len(path_parts) < 5 or path_parts[3] != 'pulls':\n                raise ValueError(\"The provided URL does not appear to be a GitHub PR URL\")\n            repo_name = '/'.join(path_parts[1:3])\n            try:\n                pr_number = int(path_parts[4])\n            except ValueError as e:\n                raise ValueError(\"Unable to convert PR number to integer\") from e\n            return repo_name, pr_number\n\n        if len(path_parts) < 4 or path_parts[2] != 'pull':\n            raise ValueError(\"The provided URL does not appear to be a GitHub PR URL\")\n\n        repo_name = '/'.join(path_parts[:2])\n        try:\n            pr_number = int(path_parts[3])\n        except ValueError as e:\n            raise ValueError(\"Unable to convert PR number to integer\") from e\n\n        return repo_name, pr_number\n\n    def _parse_issue_url(self, issue_url: str) -> Tuple[str, int]:\n        parsed_url = urlparse(issue_url)\n\n        if parsed_url.path.startswith('/api/v3'): #Check if came from github app\n            parsed_url = urlparse(issue_url.replace(\"/api/v3\", \"\"))\n\n        path_parts = parsed_url.path.strip('/').split('/')\n        if 'api.github.com' in parsed_url.netloc or '/api/v3' in issue_url: #Check if came from github app\n            if len(path_parts) < 5 or path_parts[3] != 'issues':\n                raise ValueError(\"The provided URL does not appear to be a GitHub ISSUE URL\")\n            repo_name = '/'.join(path_parts[1:3])\n            try:\n                issue_number = int(path_parts[4])\n            except ValueError as e:\n                raise ValueError(\"Unable to convert issue number to integer\") from e\n            return repo_name, issue_number\n\n        if len(path_parts) < 4 or path_parts[2] != 'issues':\n            raise ValueError(\"The provided URL does not appear to be a GitHub PR issue\")\n\n        repo_name = '/'.join(path_parts[:2])\n        try:\n            issue_number = int(path_parts[3])\n        except ValueError as e:\n            raise ValueError(\"Unable to convert issue number to integer\") from e\n\n        return repo_name, issue_number\n\n    def _get_github_client(self):\n        self.deployment_type = get_settings().get(\"GITHUB.DEPLOYMENT_TYPE\", \"user\")\n        self.auth = None\n        if self.deployment_type == 'app':\n            try:\n                private_key = get_settings().github.private_key\n                app_id = get_settings().github.app_id\n            except AttributeError as e:\n                raise ValueError(\"GitHub app ID and private key are required when using GitHub app deployment\") from e\n            if not self.installation_id:\n                raise ValueError(\"GitHub app installation ID is required when using GitHub app deployment\")\n            auth = AppAuthentication(app_id=app_id, private_key=private_key,\n                                     installation_id=self.installation_id)\n            self.auth = auth\n        elif self.deployment_type == 'user':\n            try:\n                token = get_settings().github.user_token\n            except AttributeError as e:\n                raise ValueError(\n                    \"GitHub token is required when using user deployment. See: \"\n                    \"https://github.com/Codium-ai/pr-agent#method-2-run-from-source\") from e\n            self.auth = Auth.Token(token)\n        if self.auth:\n            return Github(auth=self.auth, base_url=self.base_url)\n        else:\n            raise ValueError(\"Could not authenticate to GitHub\")\n\n    def _get_repo(self):\n        if hasattr(self, 'repo_obj') and \\\n                hasattr(self.repo_obj, 'full_name') and \\\n                self.repo_obj.full_name == self.repo:\n            return self.repo_obj\n        else:\n            self.repo_obj = self.github_client.get_repo(self.repo)\n            return self.repo_obj\n\n\n    def _get_pr(self):\n        return self._get_repo().get_pull(self.pr_num)\n\n    def get_pr_file_content(self, file_path: str, branch: str) -> str:\n        try:\n            file_content_str = str(\n                self._get_repo()\n                .get_contents(file_path, ref=branch)\n                .decoded_content.decode()\n            )\n        except Exception:\n            file_content_str = \"\"\n        return file_content_str\n\n    def create_or_update_pr_file(\n        self, file_path: str, branch: str, contents=\"\", message=\"\"\n    ) -> None:\n        try:\n            file_obj = self._get_repo().get_contents(file_path, ref=branch)\n            sha1=file_obj.sha\n        except Exception:\n            sha1=\"\"\n        self.repo_obj.update_file(\n            path=file_path,\n            message=message,\n            content=contents,\n            sha=sha1,\n            branch=branch,\n        )\n\n    def _get_pr_file_content(self, file: FilePatchInfo, sha: str) -> str:\n        return self.get_pr_file_content(file.filename, sha)\n\n    def publish_labels(self, pr_types):\n        try:\n            label_color_map = {\"Bug fix\": \"1d76db\", \"Tests\": \"e99695\", \"Bug fix with tests\": \"c5def5\",\n                               \"Enhancement\": \"bfd4f2\", \"Documentation\": \"d4c5f9\",\n                               \"Other\": \"d1bcf9\"}\n            post_parameters = []\n            for p in pr_types:\n                color = label_color_map.get(p, \"d1bcf9\")  # default to \"Other\" color\n                post_parameters.append({\"name\": p, \"color\": color})\n            headers, data = self.pr._requester.requestJsonAndCheck(\n                \"PUT\", f\"{self.pr.issue_url}/labels\", input=post_parameters\n            )\n        except Exception as e:\n            get_logger().warning(f\"Failed to publish labels, error: {e}\")\n\n    def get_pr_labels(self, update=False):\n        try:\n            if not update:\n                labels =self.pr.labels\n                return [label.name for label in labels]\n            else: # obtain the latest labels. Maybe they changed while the AI was running\n                headers, labels = self.pr._requester.requestJsonAndCheck(\n                    \"GET\", f\"{self.pr.issue_url}/labels\")\n                return [label['name'] for label in labels]\n\n        except Exception as e:\n            get_logger().exception(f\"Failed to get labels, error: {e}\")\n            return []\n\n    def get_repo_labels(self):\n        labels = self.repo_obj.get_labels()\n        return [label for label in itertools.islice(labels, 50)]\n\n    def get_commit_messages(self):\n        \"\"\"\n        Retrieves the commit messages of a pull request.\n\n        Returns:\n            str: A string containing the commit messages of the pull request.\n        \"\"\"\n        max_tokens = get_settings().get(\"CONFIG.MAX_COMMITS_TOKENS\", None)\n        try:\n            commit_list = self.pr.get_commits()\n            commit_messages = [commit.commit.message for commit in commit_list]\n            commit_messages_str = \"\\n\".join([f\"{i + 1}. {message}\" for i, message in enumerate(commit_messages)])\n        except Exception:\n            commit_messages_str = \"\"\n        if max_tokens:\n            commit_messages_str = clip_tokens(commit_messages_str, max_tokens)\n        return commit_messages_str\n\n    def generate_link_to_relevant_line_number(self, suggestion) -> str:\n        try:\n            relevant_file = suggestion['relevant_file'].strip('`').strip(\"'\").strip('\\n')\n            relevant_line_str = suggestion['relevant_line'].strip('\\n')\n            if not relevant_line_str:\n                return \"\"\n\n            position, absolute_position = find_line_number_of_relevant_line_in_file \\\n                (self.diff_files, relevant_file, relevant_line_str)\n\n            if absolute_position != -1:\n                # # link to right file only\n                # link = f\"https://github.com/{self.repo}/blob/{self.pr.head.sha}/{relevant_file}\" \\\n                #        + \"#\" + f\"L{absolute_position}\"\n\n                # link to diff\n                sha_file = hashlib.sha256(relevant_file.encode('utf-8')).hexdigest()\n                link = f\"{self.base_url_html}/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}R{absolute_position}\"\n                return link\n        except Exception as e:\n            get_logger().info(f\"Failed adding line link, error: {e}\")\n\n        return \"\"\n\n    def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str:\n        sha_file = hashlib.sha256(relevant_file.encode('utf-8')).hexdigest()\n        if relevant_line_start == -1:\n            link = f\"{self.base_url_html}/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}\"\n        elif relevant_line_end:\n            link = f\"{self.base_url_html}/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}R{relevant_line_start}-R{relevant_line_end}\"\n        else:\n            link = f\"{self.base_url_html}/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}R{relevant_line_start}\"\n        return link\n\n    def get_lines_link_original_file(self, filepath: str, component_range: Range) -> str:\n        \"\"\"\n        Returns the link to the original file on GitHub that corresponds to the given filepath and component range.\n\n        Args:\n            filepath (str): The path of the file.\n            component_range (Range): The range of lines that represent the component.\n\n        Returns:\n            str: The link to the original file on GitHub.\n\n        Example:\n            >>> filepath = \"path/to/file.py\"\n            >>> component_range = Range(line_start=10, line_end=20)\n            >>> link = get_lines_link_original_file(filepath, component_range)\n            >>> print(link)\n            \"https://github.com/{repo}/blob/{commit_sha}/{filepath}/#L11-L21\"\n        \"\"\"\n        line_start = component_range.line_start + 1\n        line_end = component_range.line_end + 1\n        # link = (f\"https://github.com/{self.repo}/blob/{self.last_commit_id.sha}/{filepath}/\"\n        #         f\"#L{line_start}-L{line_end}\")\n        link = (f\"{self.base_url_html}/{self.repo}/blob/{self.last_commit_id.sha}/{filepath}/\"\n                f\"#L{line_start}-L{line_end}\")\n\n        return link\n\n    def get_pr_id(self):\n        try:\n            pr_id = f\"{self.repo}/{self.pr_num}\"\n            return pr_id\n        except:\n            return \"\"\n\n    def fetch_sub_issues(self, issue_url):\n        \"\"\"\n        Fetch sub-issues linked to the given GitHub issue URL using GraphQL via PyGitHub.\n        \"\"\"\n        sub_issues = set()\n\n        # Extract owner, repo, and issue number from URL\n        parts = issue_url.rstrip(\"/\").split(\"/\")\n        owner, repo, issue_number = parts[-4], parts[-3], parts[-1]\n\n        try:\n            # Gets Issue ID from Issue Number\n            query = f\"\"\"\n            query {{\n                repository(owner: \"{owner}\", name: \"{repo}\") {{\n                    issue(number: {issue_number}) {{\n                        id\n                    }}\n                }}\n            }}\n            \"\"\"\n            response_tuple = self.github_client._Github__requester.requestJson(\"POST\", \"/graphql\",\n                                                                               input={\"query\": query})\n\n            # Extract the JSON response from the tuple and parses it\n            if isinstance(response_tuple, tuple) and len(response_tuple) == 3:\n                response_json = json.loads(response_tuple[2])\n            else:\n                get_logger().error(f\"Unexpected response format: {response_tuple}\")\n                return sub_issues\n\n\n            issue_id = response_json.get(\"data\", {}).get(\"repository\", {}).get(\"issue\", {}).get(\"id\")\n\n            if not issue_id:\n                get_logger().warning(f\"Issue ID not found for {issue_url}\")\n                return sub_issues\n\n            # Fetch Sub-Issues\n            sub_issues_query = f\"\"\"\n            query {{\n                node(id: \"{issue_id}\") {{\n                    ... on Issue {{\n                        subIssues(first: 10) {{\n                            nodes {{\n                                url\n                            }}\n                        }}\n                    }}\n                }}\n            }}\n            \"\"\"\n            sub_issues_response_tuple = self.github_client._Github__requester.requestJson(\"POST\", \"/graphql\", input={\n                \"query\": sub_issues_query})\n\n            # Extract the JSON response from the tuple and parses it\n            if isinstance(sub_issues_response_tuple, tuple) and len(sub_issues_response_tuple) == 3:\n                sub_issues_response_json = json.loads(sub_issues_response_tuple[2])\n            else:\n                get_logger().error(\"Unexpected sub-issues response format\", artifact={\"response\": sub_issues_response_tuple})\n                return sub_issues\n\n            if not sub_issues_response_json.get(\"data\", {}).get(\"node\", {}).get(\"subIssues\"):\n                get_logger().error(\"Invalid sub-issues response structure\")\n                return sub_issues\n    \n            nodes = sub_issues_response_json.get(\"data\", {}).get(\"node\", {}).get(\"subIssues\", {}).get(\"nodes\", [])\n            get_logger().info(f\"Github Sub-issues fetched: {len(nodes)}\", artifact={\"nodes\": nodes})\n\n            for sub_issue in nodes:\n                if \"url\" in sub_issue:\n                    sub_issues.add(sub_issue[\"url\"])\n\n        except Exception as e:\n            get_logger().exception(f\"Failed to fetch sub-issues. Error: {e}\")\n\n        return sub_issues\n\n    def auto_approve(self) -> bool:\n        try:\n            res = self.pr.create_review(event=\"APPROVE\")\n            if res.state == \"APPROVED\":\n                return True\n            return False\n        except Exception as e:\n            get_logger().exception(f\"Failed to auto-approve, error: {e}\")\n            return False\n\n    def calc_pr_statistics(self, pull_request_data: dict):\n            return {}\n\n    def validate_comments_inside_hunks(self, code_suggestions):\n        \"\"\"\n        validate that all committable comments are inside PR hunks - this is a must for committable comments in GitHub\n        \"\"\"\n        code_suggestions_copy = copy.deepcopy(code_suggestions)\n        diff_files = self.get_diff_files()\n        RE_HUNK_HEADER = re.compile(\n            r\"^@@ -(\\d+)(?:,(\\d+))? \\+(\\d+)(?:,(\\d+))? @@[ ]?(.*)\")\n\n        diff_files = set_file_languages(diff_files)\n\n        for suggestion in code_suggestions_copy:\n            try:\n                relevant_file_path = suggestion['relevant_file']\n                for file in diff_files:\n                    if file.filename == relevant_file_path:\n\n                        # generate on-demand the patches range for the relevant file\n                        patch_str = file.patch\n                        if not hasattr(file, 'patches_range'):\n                            file.patches_range = []\n                            patch_lines = patch_str.splitlines()\n                            for i, line in enumerate(patch_lines):\n                                if line.startswith('@@'):\n                                    match = RE_HUNK_HEADER.match(line)\n                                    # identify hunk header\n                                    if match:\n                                        section_header, size1, size2, start1, start2 = extract_hunk_headers(match)\n                                        file.patches_range.append({'start': start2, 'end': start2 + size2 - 1})\n\n                        patches_range = file.patches_range\n                        comment_start_line = suggestion.get('relevant_lines_start', None)\n                        comment_end_line = suggestion.get('relevant_lines_end', None)\n                        original_suggestion = suggestion.get('original_suggestion', None) # needed for diff code\n                        if not comment_start_line or not comment_end_line or not original_suggestion:\n                            continue\n\n                        # check if the comment is inside a valid hunk\n                        is_valid_hunk = False\n                        min_distance = float('inf')\n                        patch_range_min = None\n                        # find the hunk that contains the comment, or the closest one\n                        for i, patch_range in enumerate(patches_range):\n                            d1 = comment_start_line - patch_range['start']\n                            d2 = patch_range['end'] - comment_end_line\n                            if d1 >= 0 and d2 >= 0:  # found a valid hunk\n                                is_valid_hunk = True\n                                min_distance = 0\n                                patch_range_min = patch_range\n                                break\n                            elif d1 * d2 <= 0:  # comment is possibly inside the hunk\n                                d1_clip = abs(min(0, d1))\n                                d2_clip = abs(min(0, d2))\n                                d = max(d1_clip, d2_clip)\n                                if d < min_distance:\n                                    patch_range_min = patch_range\n                                    min_distance = min(min_distance, d)\n                        if not is_valid_hunk:\n                            if min_distance < 10:  # 10 lines - a reasonable distance to consider the comment inside the hunk\n                                # make the suggestion non-committable, yet multi line\n                                suggestion['relevant_lines_start'] = max(suggestion['relevant_lines_start'], patch_range_min['start'])\n                                suggestion['relevant_lines_end'] = min(suggestion['relevant_lines_end'], patch_range_min['end'])\n                                body = suggestion['body'].strip()\n\n                                # present new diff code in collapsible\n                                existing_code = original_suggestion['existing_code'].rstrip() + \"\\n\"\n                                improved_code = original_suggestion['improved_code'].rstrip() + \"\\n\"\n                                diff = difflib.unified_diff(existing_code.split('\\n'),\n                                                            improved_code.split('\\n'), n=999)\n                                patch_orig = \"\\n\".join(diff)\n                                patch = \"\\n\".join(patch_orig.splitlines()[5:]).strip('\\n')\n                                diff_code = f\"\\n\\n<details><summary>New proposed code:</summary>\\n\\n```diff\\n{patch.rstrip()}\\n```\"\n                                # replace ```suggestion ... ``` with diff_code, using regex:\n                                body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL)\n                                body += \"\\n\\n</details>\"\n                                suggestion['body'] = body\n                                get_logger().info(f\"Comment was moved to a valid hunk, \"\n                                                  f\"start_line={suggestion['relevant_lines_start']}, end_line={suggestion['relevant_lines_end']}, file={file.filename}\")\n                            else:\n                                get_logger().error(f\"Comment is not inside a valid hunk, \"\n                                                   f\"start_line={suggestion['relevant_lines_start']}, end_line={suggestion['relevant_lines_end']}, file={file.filename}\")\n            except Exception as e:\n                get_logger().error(f\"Failed to process patch for committable comment, error: {e}\")\n        return code_suggestions_copy\n\n    #Clone related\n    def _prepare_clone_url_with_token(self, repo_url_to_clone: str) -> str | None:\n        scheme = \"https://\"\n\n        #For example, to clone:\n        #https://github.com/Codium-ai/pr-agent-pro.git\n        #Need to embed inside the github token:\n        #https://<token>@github.com/Codium-ai/pr-agent-pro.git\n\n        github_token = self.auth.token\n        github_base_url = self.base_url_html\n        if not all([github_token, github_base_url]):\n            get_logger().error(\"Either missing auth token or missing base url\")\n            return None\n        if scheme not in github_base_url:\n            get_logger().error(f\"Base url: {github_base_url} is missing prefix: {scheme}\")\n            return None\n        github_com = github_base_url.split(scheme)[1]  # e.g. 'github.com' or github.<org>.com\n        if not github_com:\n            get_logger().error(f\"Base url: {github_base_url} has an empty base url\")\n            return None\n        if github_com not in repo_url_to_clone:\n            get_logger().error(f\"url to clone: {repo_url_to_clone} does not contain {github_com}\")\n            return None\n        repo_full_name = repo_url_to_clone.split(github_com)[-1]\n        if not repo_full_name:\n            get_logger().error(f\"url to clone: {repo_url_to_clone} is malformed\")\n            return None\n\n        clone_url = scheme\n        if self.deployment_type == 'app':\n            clone_url += \"git:\"\n        clone_url += f\"{github_token}@{github_com}{repo_full_name}\"\n        return clone_url\n"
  },
  {
    "path": "pr_agent/git_providers/gitlab_provider.py",
    "content": "import difflib\nimport hashlib\nimport re\nimport urllib.parse\nfrom typing import Any, Optional, Tuple, Union\nfrom urllib.parse import parse_qs, urlparse\n\nimport gitlab\nimport requests\nfrom gitlab import (GitlabAuthenticationError, GitlabCreateError,\n                    GitlabGetError, GitlabUpdateError)\n\nfrom pr_agent.algo.types import EDIT_TYPE, FilePatchInfo\n\nfrom ..algo.file_filter import filter_ignored\nfrom ..algo.git_patch_processing import decode_if_bytes\nfrom ..algo.language_handler import is_valid_file\nfrom ..algo.utils import (clip_tokens,\n                          find_line_number_of_relevant_line_in_file,\n                          load_large_diff)\nfrom ..config_loader import get_settings\nfrom ..log import get_logger\nfrom .git_provider import MAX_FILES_ALLOWED_FULL, GitProvider\n\n\nclass DiffNotFoundError(Exception):\n    \"\"\"Raised when the diff for a merge request cannot be found.\"\"\"\n    pass\n\nclass GitLabProvider(GitProvider):\n\n    def __init__(self, merge_request_url: Optional[str] = None, incremental: Optional[bool] = False):\n        gitlab_url = get_settings().get(\"GITLAB.URL\", None)\n        if not gitlab_url:\n            raise ValueError(\"GitLab URL is not set in the config file\")\n        self.gitlab_url = gitlab_url\n        ssl_verify = get_settings().get(\"GITLAB.SSL_VERIFY\", True)\n        gitlab_access_token = get_settings().get(\"GITLAB.PERSONAL_ACCESS_TOKEN\", None)\n        if not gitlab_access_token:\n            raise ValueError(\"GitLab personal access token is not set in the config file\")\n        # Authentication method selection via configuration\n        auth_method = get_settings().get(\"GITLAB.AUTH_TYPE\", \"oauth_token\")\n\n        # Basic validation of authentication type\n        if auth_method not in [\"oauth_token\", \"private_token\"]:\n            raise ValueError(f\"Unsupported GITLAB.AUTH_TYPE: '{auth_method}'. \"\n                           f\"Must be 'oauth_token' or 'private_token'.\")\n\n        # Create GitLab instance based on authentication method\n        try:\n            if auth_method == \"oauth_token\":\n                self.gl = gitlab.Gitlab(\n                    url=gitlab_url,\n                    oauth_token=gitlab_access_token,\n                    ssl_verify=ssl_verify\n                )\n            else:  # private_token\n                self.gl = gitlab.Gitlab(\n                    url=gitlab_url,\n                    private_token=gitlab_access_token,\n                    ssl_verify=ssl_verify\n                )\n        except Exception as e:\n            get_logger().error(f\"Failed to create GitLab instance: {e}\")\n            raise ValueError(f\"Unable to authenticate with GitLab: {e}\")\n        self.max_comment_chars = 65000\n        self.id_project = None\n        self.id_mr = None\n        self.mr = None\n        self.diff_files = None\n        self.git_files = None\n        self.temp_comments = []\n        self._submodule_cache: dict[tuple[str, str, str], list[dict]] = {}\n        self.pr_url = merge_request_url\n        self._set_merge_request(merge_request_url)\n        self.RE_HUNK_HEADER = re.compile(\n            r\"^@@ -(\\d+)(?:,(\\d+))? \\+(\\d+)(?:,(\\d+))? @@[ ]?(.*)\")\n        self.incremental = incremental\n\n    # --- submodule expansion helpers (opt-in) ---\n    def _get_gitmodules_map(self) -> dict[str, str]:\n        \"\"\"\n        Return {submodule_path -> repo_url} from '.gitmodules' (best effort).\n        Tries target branch first, then source branch. Always returns text.\n        \"\"\"\n        try:\n            proj = self.gl.projects.get(self.id_project)\n        except Exception:\n            return {}\n\n        import base64\n\n        def _read_text(ref: str | None) -> str | None:\n            if not ref:\n                return None\n            try:\n                f = proj.files.get(file_path=\".gitmodules\", ref=ref)\n            except Exception:\n                return None\n\n            # 1) python-gitlab File.decode() – usually returns BYTES\n            try:\n                raw = f.decode()\n                if isinstance(raw, (bytes, bytearray)):\n                    return raw.decode(\"utf-8\", \"ignore\")\n                if isinstance(raw, str):\n                    return raw\n            except Exception:\n                pass\n\n            # 2) fallback: base64 decode f.content\n            try:\n                c = getattr(f, \"content\", None)\n                if c:\n                    return base64.b64decode(c).decode(\"utf-8\", \"ignore\")\n            except Exception:\n                pass\n\n            return None\n\n        content = (\n            _read_text(getattr(self.mr, \"target_branch\", None))\n            or _read_text(getattr(self.mr, \"source_branch\", None))\n        )\n        if not content:\n            return {}\n\n        import configparser\n\n        parser = configparser.ConfigParser(\n            delimiters=(\"=\",),\n            interpolation=None,\n            inline_comment_prefixes=(\"#\", \";\"),\n            strict=False,\n        )\n        try:\n            parser.read_string(content)\n        except Exception:\n            return {}\n\n        out: dict[str, str] = {}\n        for section in parser.sections():\n            if not section.lower().startswith(\"submodule\"):\n                continue\n            path = parser.get(section, \"path\", fallback=None)\n            url = parser.get(section, \"url\", fallback=None)\n            if path and url:\n                path = path.strip().strip('\"').strip(\"'\")\n                url = url.strip().strip('\"').strip(\"'\")\n                out[path] = url\n        return out\n\n    def _url_to_project_path(self, url: str) -> str | None:\n        \"\"\"\n        Convert ssh/https GitLab URL to 'group/subgroup/repo' project path.\n        \"\"\"\n        try:\n            if url.startswith(\"git@\") and \":\" in url:\n                path = url.split(\":\", 1)[1]\n            else:\n                path = urllib.parse.urlparse(url).path.lstrip(\"/\")\n            if path.endswith(\".git\"):\n                path = path[:-4]\n            return path or None\n        except Exception:\n            return None\n\n    def _project_by_path(self, proj_path: str):\n        \"\"\"\n        Resolve a project by path with multiple strategies:\n        1) URL-encoded path_with_namespace\n        2) Raw path_with_namespace\n        3) Search fallback + exact match on path_with_namespace (case-insensitive)\n        Returns a project object or None.\n        \"\"\"\n        if not proj_path:\n            return None\n\n        # 1) Encoded\n        try:\n            enc = urllib.parse.quote_plus(proj_path)\n            return self.gl.projects.get(enc)\n        except Exception:\n            pass\n\n        # 2) Raw\n        try:\n            return self.gl.projects.get(proj_path)\n        except Exception:\n            pass\n\n        # 3) Search fallback\n        try:\n            name = proj_path.split(\"/\")[-1]\n            # membership=True so we don't leak other people's repos\n            matches = self.gl.projects.list(search=name, simple=True, membership=True, per_page=100)\n            # prefer exact path_with_namespace match (case-insensitive)\n            for p in matches:\n                pwn = getattr(p, \"path_with_namespace\", \"\")\n                if pwn.lower() == proj_path.lower():\n                    return self.gl.projects.get(p.id)\n            if matches:\n                get_logger().warning(f\"[submodule] no exact match for {proj_path} (skip)\")\n        except Exception:\n            pass\n\n        return None\n\n    def _compare_submodule(self, proj_path: str, old_sha: str, new_sha: str) -> list[dict]:\n        \"\"\"\n        Call repository_compare on submodule project; return list of diffs.\n        \"\"\"\n        key = (proj_path, old_sha, new_sha)\n        if key in self._submodule_cache:\n            return self._submodule_cache[key]\n        try:\n            proj = self._project_by_path(proj_path)\n            if proj is None:\n                get_logger().warning(f\"[submodule] resolve failed for {proj_path}\")\n                self._submodule_cache[key] = []\n                return []\n            cmp = proj.repository_compare(old_sha, new_sha)\n            if isinstance(cmp, dict):\n                diffs = cmp.get(\"diffs\", []) or []\n            else:\n                diffs = []\n            self._submodule_cache[key] = diffs\n            return diffs\n        except Exception as e:\n            get_logger().warning(f\"[submodule] compare failed for {proj_path} {old_sha}..{new_sha}: {e}\")\n            self._submodule_cache[key] = []\n            return []\n\n    def _expand_submodule_changes(self, changes: list[dict]) -> list[dict]:\n        \"\"\"\n        If enabled, expand 'Subproject commit' bumps into real file diffs from the submodule.\n        Soft-fail on any issue.\n        \"\"\"\n        try:\n            if not bool(get_settings().get(\"GITLAB.EXPAND_SUBMODULE_DIFFS\", False)):\n                return changes\n        except Exception:\n            return changes\n\n        gitmodules = self._get_gitmodules_map()\n        if not gitmodules:\n            return changes\n\n        out = list(changes)\n        for ch in changes:\n            patch = ch.get(\"diff\") or \"\"\n            if \"Subproject commit\" not in patch:\n                continue\n\n            # Extract old/new SHAs from the hunk\n            old_m = re.search(r\"^-Subproject commit ([0-9a-f]{7,40})\", patch, re.M)\n            new_m = re.search(r\"^\\+Subproject commit ([0-9a-f]{7,40})\", patch, re.M)\n            if not (old_m and new_m):\n                continue\n            old_sha, new_sha = old_m.group(1), new_m.group(1)\n\n            sub_path = ch.get(\"new_path\") or ch.get(\"old_path\") or \"\"\n            repo_url = gitmodules.get(sub_path)\n            if not repo_url:\n                get_logger().warning(f\"[submodule] no url for '{sub_path}' in .gitmodules (skip)\")\n                continue\n\n            proj_path = self._url_to_project_path(repo_url)\n            if not proj_path:\n                get_logger().warning(f\"[submodule] cannot parse project path from url '{repo_url}' (skip)\")\n                continue\n\n            get_logger().info(f\"[submodule] {sub_path} url={repo_url} -> proj_path={proj_path}\")\n            sub_diffs = self._compare_submodule(proj_path, old_sha, new_sha)\n            for sd in sub_diffs:\n                sd_diff = sd.get(\"diff\") or \"\"\n                sd_old = sd.get(\"old_path\") or sd.get(\"a_path\") or \"\"\n                sd_new = sd.get(\"new_path\") or sd.get(\"b_path\") or sd_old\n                out.append({\n                    \"old_path\": f\"{sub_path}/{sd_old}\" if sd_old else sub_path,\n                    \"new_path\": f\"{sub_path}/{sd_new}\" if sd_new else sub_path,\n                    \"diff\": sd_diff,\n                    \"new_file\": sd.get(\"new_file\", False),\n                    \"deleted_file\": sd.get(\"deleted_file\", False),\n                    \"renamed_file\": sd.get(\"renamed_file\", False),\n                })\n        return out\n\n    def is_supported(self, capability: str) -> bool:\n        if capability in ['get_issue_comments', 'create_inline_comment', 'publish_inline_comments',\n            'publish_file_comments']: # gfm_markdown is supported in gitlab !\n            return False\n        return True\n\n    def _get_project_path_from_pr_or_issue_url(self, pr_or_issue_url: str) -> str:\n        repo_project_path = None\n        if 'issues' in pr_or_issue_url:\n            #replace 'issues' with 'merge_requests', since gitlab provider does not support issue urls, just to get the git repo url:\n            pr_or_issue_url = pr_or_issue_url.replace('issues', 'merge_requests')\n        if 'merge_requests' in pr_or_issue_url:\n            repo_project_path, _ = self._parse_merge_request_url(pr_or_issue_url)\n        if not repo_project_path:\n            get_logger().error(f\"url is not a valid merge requests url: {pr_or_issue_url}\")\n            return \"\"\n        return repo_project_path\n\n    def get_git_repo_url(self, issues_or_pr_url: str) -> str:\n        provider_url = issues_or_pr_url\n        repo_path = self._get_project_path_from_pr_or_issue_url(provider_url)\n        if not repo_path or repo_path not in issues_or_pr_url:\n            get_logger().error(f\"Unable to retrieve project path from url: {issues_or_pr_url}\")\n            return \"\"\n        return f\"{issues_or_pr_url.split(repo_path)[0]}{repo_path}.git\"\n\n    # Given a git repo url, return prefix and suffix of the provider in order to view a given file belonging to that repo.\n    # Example: https://gitlab.com/codiumai/pr-agent.git and branch: t1 -> prefix: \"https://gitlab.com/codiumai/pr-agent/-/blob/t1\", suffix: \"?ref_type=heads\"\n    # In case git url is not provided, provider will use PR context (which includes branch) to determine the prefix and suffix.\n    def get_canonical_url_parts(self, repo_git_url:str=None, desired_branch:str=None) -> Tuple[str, str]:\n        repo_path = \"\"\n        if not repo_git_url and not self.pr_url:\n            get_logger().error(\"Cannot get canonical URL parts: missing either context PR URL or a repo GIT URL\")\n            return (\"\", \"\")\n        if not repo_git_url: #Use PR url as context\n            repo_path = self._get_project_path_from_pr_or_issue_url(self.pr_url)\n            try:\n                desired_branch = self.gl.projects.get(self.id_project).default_branch\n            except Exception as e:\n                get_logger().exception(f\"Cannot get PR: {self.pr_url} default branch. Tried project ID: {self.id_project}\")\n                return (\"\", \"\")\n        else: #Use repo git url\n            repo_path = repo_git_url.split('.git')[0].split('.com/')[-1]\n        prefix = f\"{self.gitlab_url}/{repo_path}/-/blob/{desired_branch}\"\n        suffix = \"?ref_type=heads\"  # gitlab cloud adds this suffix. gitlab server does not, but it is harmless.\n        return (prefix, suffix)\n\n    @property\n    def pr(self):\n        '''The GitLab terminology is merge request (MR) instead of pull request (PR)'''\n        return self.mr\n\n    def _set_merge_request(self, merge_request_url: str):\n        self.id_project, self.id_mr = self._parse_merge_request_url(merge_request_url)\n        self.mr = self._get_merge_request()\n        try:\n            self.last_diff = self.mr.diffs.list(get_all=True)[-1]\n        except IndexError as e:\n            get_logger().error(f\"Could not get diff for merge request {self.id_mr}\")\n            raise DiffNotFoundError(f\"Could not get diff for merge request {self.id_mr}\") from e\n\n    def get_pr_file_content(self, file_path: str, branch: str) -> str:\n        try:\n            file_obj = self.gl.projects.get(self.id_project).files.get(file_path, branch)\n            content = file_obj.decode()\n            return decode_if_bytes(content)\n        except GitlabGetError:\n            # In case of file creation the method returns GitlabGetError (404 file not found).\n            # In this case we return an empty string for the diff.\n            return ''\n        except Exception as e:\n            get_logger().warning(f\"Error retrieving file {file_path} from branch {branch}: {e}\")\n            return ''\n\n    def create_or_update_pr_file(self, file_path: str, branch: str, contents=\"\", message=\"\") -> None:\n        \"\"\"Create or update a file in the GitLab repository.\"\"\"\n        try:\n            project = self.gl.projects.get(self.id_project)\n\n            if not message:\n                action = \"Update\" if contents else \"Create\"\n                message = f\"{action} {file_path}\"\n\n            try:\n                existing_file = project.files.get(file_path, branch)\n                existing_file.content = contents\n                existing_file.save(branch=branch, commit_message=message)\n                get_logger().debug(f\"Updated file {file_path} in branch {branch}\")\n            except GitlabGetError:\n                project.files.create({\n                    'file_path': file_path,\n                    'branch': branch,\n                    'content': contents,\n                    'commit_message': message\n                })\n                get_logger().debug(f\"Created file {file_path} in branch {branch}\")\n        except GitlabAuthenticationError as e:\n            get_logger().error(f\"Authentication failed while creating/updating file {file_path} in branch {branch}: {e}\")\n            raise\n        except (GitlabCreateError, GitlabUpdateError) as e:\n            get_logger().error(f\"Permission denied or validation error for file {file_path} in branch {branch}: {e}\")\n            raise\n        except Exception as e:\n            get_logger().exception(f\"Unexpected error creating/updating file {file_path} in branch {branch}: {e}\")\n            raise\n\n    def get_diff_files(self) -> list[FilePatchInfo]:\n        \"\"\"\n        Retrieves the list of files that have been modified, added, deleted, or renamed in a pull request in GitLab,\n        along with their content and patch information.\n\n        Returns:\n            diff_files (List[FilePatchInfo]): List of FilePatchInfo objects representing the modified, added, deleted,\n            or renamed files in the merge request.\n        \"\"\"\n\n        if self.diff_files:\n            return self.diff_files\n\n        # filter files using [ignore] patterns\n        raw_changes = self.mr.changes().get('changes', [])\n        raw_changes = self._expand_submodule_changes(raw_changes)\n        diffs_original = raw_changes\n        diffs = filter_ignored(diffs_original, 'gitlab')\n        if diffs != diffs_original:\n            try:\n                names_original = [diff['new_path'] for diff in diffs_original]\n                names_filtered = [diff['new_path'] for diff in diffs]\n                get_logger().info(f\"Filtered out [ignore] files for merge request {self.id_mr}\", extra={\n                    'original_files': names_original,\n                    'filtered_files': names_filtered\n                })\n            except Exception as e:\n                pass\n\n        diff_files = []\n        invalid_files_names = []\n        counter_valid = 0\n        for diff in diffs:\n            if not is_valid_file(diff['new_path']):\n                invalid_files_names.append(diff['new_path'])\n                continue\n\n            # allow only a limited number of files to be fully loaded. We can manage the rest with diffs only\n            counter_valid += 1\n            if counter_valid < MAX_FILES_ALLOWED_FULL or not diff['diff']:\n                original_file_content_str = self.get_pr_file_content(diff['old_path'], self.mr.diff_refs['base_sha'])\n                new_file_content_str = self.get_pr_file_content(diff['new_path'], self.mr.diff_refs['head_sha'])\n            else:\n                if counter_valid == MAX_FILES_ALLOWED_FULL:\n                    get_logger().info(f\"Too many files in PR, will avoid loading full content for rest of files\")\n                original_file_content_str = ''\n                new_file_content_str = ''\n\n            # Ensure content is properly decoded\n            original_file_content_str = decode_if_bytes(original_file_content_str)\n            new_file_content_str = decode_if_bytes(new_file_content_str)\n\n            edit_type = EDIT_TYPE.MODIFIED\n            if diff['new_file']:\n                edit_type = EDIT_TYPE.ADDED\n            elif diff['deleted_file']:\n                edit_type = EDIT_TYPE.DELETED\n            elif diff['renamed_file']:\n                edit_type = EDIT_TYPE.RENAMED\n\n            filename = diff['new_path']\n            patch = diff['diff']\n            if not patch:\n                patch = load_large_diff(filename, new_file_content_str, original_file_content_str)\n\n\n            # count number of lines added and removed\n            patch_lines = patch.splitlines(keepends=True)\n            num_plus_lines = len([line for line in patch_lines if line.startswith('+')])\n            num_minus_lines = len([line for line in patch_lines if line.startswith('-')])\n            diff_files.append(\n                FilePatchInfo(original_file_content_str, new_file_content_str,\n                              patch=patch,\n                              filename=filename,\n                              edit_type=edit_type,\n                              old_filename=None if diff['old_path'] == diff['new_path'] else diff['old_path'],\n                              num_plus_lines=num_plus_lines,\n                              num_minus_lines=num_minus_lines, ))\n        if invalid_files_names:\n            get_logger().info(f\"Filtered out files with invalid extensions: {invalid_files_names}\")\n\n        self.diff_files = diff_files\n        return diff_files\n\n    def get_files(self) -> list:\n        if not self.git_files:\n            raw_changes = self.mr.changes().get('changes', [])\n            raw_changes = self._expand_submodule_changes(raw_changes)\n            self.git_files = [c.get('new_path') for c in raw_changes if c.get('new_path')]\n        return self.git_files\n\n    def publish_description(self, pr_title: str, pr_body: str):\n        try:\n            self.mr.title = pr_title\n            self.mr.description = pr_body\n            self.mr.save()\n        except Exception as e:\n            get_logger().exception(f\"Could not update merge request {self.id_mr} description: {e}\")\n\n    def get_latest_commit_url(self):\n        try:\n            return self.mr.commits().next().web_url\n        except StopIteration: # no commits\n            return \"\"\n        except Exception as e:\n            get_logger().exception(f\"Could not get latest commit URL: {e}\")\n            return \"\"\n\n    def get_comment_url(self, comment):\n        return f\"{self.mr.web_url}#note_{comment.id}\"\n\n    def publish_persistent_comment(self, pr_comment: str,\n                                   initial_header: str,\n                                   update_header: bool = True,\n                                   name='review',\n                                   final_update_message=True):\n        self.publish_persistent_comment_full(pr_comment, initial_header, update_header, name, final_update_message)\n\n    def publish_comment(self, mr_comment: str, is_temporary: bool = False):\n        if is_temporary and not get_settings().config.publish_output_progress:\n            get_logger().debug(f\"Skipping publish_comment for temporary comment: {mr_comment}\")\n            return None\n        mr_comment = self.limit_output_characters(mr_comment, self.max_comment_chars)\n        comment = self.mr.notes.create({'body': mr_comment})\n        if is_temporary:\n            self.temp_comments.append(comment)\n        return comment\n\n    def edit_comment(self, comment, body: str):\n        body = self.limit_output_characters(body, self.max_comment_chars)\n        self.mr.notes.update(comment.id,{'body': body} )\n\n    def edit_comment_from_comment_id(self, comment_id: int, body: str):\n        body = self.limit_output_characters(body, self.max_comment_chars)\n        comment = self.mr.notes.get(comment_id)\n        comment.body = body\n        comment.save()\n\n    def reply_to_comment_from_comment_id(self, comment_id: int, body: str):\n        body = self.limit_output_characters(body, self.max_comment_chars)\n        discussion = self.mr.discussions.get(comment_id)\n        discussion.notes.create({'body': body})\n\n    def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None):\n        body = self.limit_output_characters(body, self.max_comment_chars)\n        edit_type, found, source_line_no, target_file, target_line_no = self.search_line(relevant_file,\n                                                                                         relevant_line_in_file)\n        self.send_inline_comment(body, edit_type, found, relevant_file, relevant_line_in_file, source_line_no,\n                                 target_file, target_line_no, original_suggestion)\n\n    def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, absolute_position: int = None):\n        raise NotImplementedError(\"Gitlab provider does not support creating inline comments yet\")\n\n    def create_inline_comments(self, comments: list[dict]):\n        raise NotImplementedError(\"Gitlab provider does not support publishing inline comments yet\")\n\n    def get_comment_body_from_comment_id(self, comment_id: int):\n        comment = self.mr.notes.get(comment_id).body\n        return comment\n\n    def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_file: str,\n                            relevant_line_in_file: str,\n                            source_line_no: int, target_file: str, target_line_no: int,\n                            original_suggestion=None) -> None:\n        if not found:\n            get_logger().info(f\"Could not find position for {relevant_file} {relevant_line_in_file}\")\n        else:\n            # in order to have exact sha's we have to find correct diff for this change\n            diff = self.get_relevant_diff(relevant_file, relevant_line_in_file)\n            if diff is None:\n                get_logger().error(f\"Could not get diff for merge request {self.id_mr}\")\n                raise DiffNotFoundError(f\"Could not get diff for merge request {self.id_mr}\")\n            pos_obj = {'position_type': 'text',\n                       'new_path': target_file.filename,\n                       'old_path': target_file.old_filename if target_file.old_filename else target_file.filename,\n                       'base_sha': diff.base_commit_sha, 'start_sha': diff.start_commit_sha, 'head_sha': diff.head_commit_sha}\n            if edit_type == 'deletion':\n                pos_obj['old_line'] = source_line_no - 1\n            elif edit_type == 'addition':\n                pos_obj['new_line'] = target_line_no - 1\n            else:\n                pos_obj['new_line'] = target_line_no - 1\n                pos_obj['old_line'] = source_line_no - 1\n            get_logger().debug(f\"Creating comment in MR {self.id_mr} with body {body} and position {pos_obj}\")\n            try:\n                self.mr.discussions.create({'body': body, 'position': pos_obj})\n            except Exception as e:\n                try:\n                    # fallback - create a general note on the file in the MR\n                    if 'suggestion_orig_location' in original_suggestion:\n                        line_start = original_suggestion['suggestion_orig_location']['start_line']\n                        line_end = original_suggestion['suggestion_orig_location']['end_line']\n                        old_code_snippet = original_suggestion['prev_code_snippet']\n                        new_code_snippet = original_suggestion['new_code_snippet']\n                        content = original_suggestion['suggestion_summary']\n                        label = original_suggestion['category']\n                        if 'score' in original_suggestion:\n                            score = original_suggestion['score']\n                        else:\n                            score = 7\n                    else:\n                        line_start = original_suggestion['relevant_lines_start']\n                        line_end = original_suggestion['relevant_lines_end']\n                        old_code_snippet = original_suggestion['existing_code']\n                        new_code_snippet = original_suggestion['improved_code']\n                        content = original_suggestion['suggestion_content']\n                        label = original_suggestion['label']\n                        score = original_suggestion.get('score', 7)\n\n                    if hasattr(self, 'main_language'):\n                        language = self.main_language\n                    else:\n                        language = ''\n                    link = self.get_line_link(relevant_file, line_start, line_end)\n                    body_fallback =f\"**Suggestion:** {content} [{label}, importance: {score}]\\n\\n\"\n                    body_fallback +=f\"\\n\\n<details><summary>[{target_file.filename} [{line_start}-{line_end}]]({link}):</summary>\\n\\n\"\n                    body_fallback += f\"\\n\\n___\\n\\n`(Cannot implement directly - GitLab API allows committable suggestions strictly on MR diff lines)`\"\n                    body_fallback+=\"</details>\\n\\n\"\n                    diff_patch = difflib.unified_diff(old_code_snippet.split('\\n'),\n                                                new_code_snippet.split('\\n'), n=999)\n                    patch_orig = \"\\n\".join(diff_patch)\n                    patch = \"\\n\".join(patch_orig.splitlines()[5:]).strip('\\n')\n                    diff_code = f\"\\n\\n```diff\\n{patch.rstrip()}\\n```\"\n                    body_fallback += diff_code\n\n                    # Create a general note on the file in the MR\n                    self.mr.notes.create({\n                        'body': body_fallback,\n                        'position': {\n                            'base_sha': diff.base_commit_sha,\n                            'start_sha': diff.start_commit_sha,\n                            'head_sha': diff.head_commit_sha,\n                            'position_type': 'text',\n                            'file_path': f'{target_file.filename}',\n                        }\n                    })\n                    get_logger().debug(f\"Created fallback comment in MR {self.id_mr} with position {pos_obj}\")\n\n                    # get_logger().debug(\n                    #     f\"Failed to create comment in MR {self.id_mr} with position {pos_obj} (probably not a '+' line)\")\n                except Exception as e:\n                    get_logger().exception(f\"Failed to create comment in MR {self.id_mr}\")\n\n    def get_relevant_diff(self, relevant_file: str, relevant_line_in_file: str) -> Optional[dict]:\n        _changes = self.mr.changes()  # dict\n        _changes['changes'] = self._expand_submodule_changes(_changes.get('changes', []))\n        changes = _changes\n        if not changes:\n            get_logger().error('No changes found for the merge request.')\n            return None\n        all_diffs = self.mr.diffs.list(get_all=True)\n        if not all_diffs:\n            get_logger().error('No diffs found for the merge request.')\n            return None\n        for diff in all_diffs:\n            for change in changes['changes']:\n                if change['new_path'] == relevant_file and relevant_line_in_file in change['diff']:\n                    return diff\n            get_logger().debug(\n                f'No relevant diff found for {relevant_file} {relevant_line_in_file}. Falling back to last diff.')\n        return self.last_diff  # fallback to last_diff if no relevant diff is found\n\n    def publish_code_suggestions(self, code_suggestions: list) -> bool:\n        for suggestion in code_suggestions:\n            try:\n                if suggestion and 'original_suggestion' in suggestion:\n                    original_suggestion = suggestion['original_suggestion']\n                else:\n                    original_suggestion = suggestion\n                body = suggestion['body']\n                relevant_file = suggestion['relevant_file']\n                relevant_lines_start = suggestion['relevant_lines_start']\n                relevant_lines_end = suggestion['relevant_lines_end']\n\n                diff_files = self.get_diff_files()\n                target_file = None\n                for file in diff_files:\n                    if file.filename == relevant_file:\n                        if file.filename == relevant_file:\n                            target_file = file\n                            break\n                range = relevant_lines_end - relevant_lines_start # no need to add 1\n                body = body.replace('```suggestion', f'```suggestion:-0+{range}')\n                lines = target_file.head_file.splitlines()\n                relevant_line_in_file = lines[relevant_lines_start - 1]\n\n                # edit_type, found, source_line_no, target_file, target_line_no = self.find_in_file(target_file,\n                #                                                                            relevant_line_in_file)\n                # for code suggestions, we want to edit the new code\n                source_line_no = -1\n                target_line_no = relevant_lines_start + 1\n                found = True\n                edit_type = 'addition'\n\n                self.send_inline_comment(body, edit_type, found, relevant_file, relevant_line_in_file, source_line_no,\n                                         target_file, target_line_no, original_suggestion)\n            except Exception as e:\n                get_logger().exception(f\"Could not publish code suggestion:\\nsuggestion: {suggestion}\\nerror: {e}\")\n\n        # note that we publish suggestions one-by-one. so, if one fails, the rest will still be published\n        return True\n\n    def publish_file_comments(self, file_comments: list) -> bool:\n        pass\n\n    def search_line(self, relevant_file, relevant_line_in_file):\n        target_file = None\n\n        edit_type = self.get_edit_type(relevant_line_in_file)\n        for file in self.get_diff_files():\n            if file.filename == relevant_file:\n                edit_type, found, source_line_no, target_file, target_line_no = self.find_in_file(file,\n                                                                                                  relevant_line_in_file)\n        return edit_type, found, source_line_no, target_file, target_line_no\n\n    def find_in_file(self, file, relevant_line_in_file):\n        edit_type = 'context'\n        source_line_no = 0\n        target_line_no = 0\n        found = False\n        target_file = file\n        patch = file.patch\n        patch_lines = patch.splitlines()\n        for line in patch_lines:\n            if line.startswith('@@'):\n                match = self.RE_HUNK_HEADER.match(line)\n                if not match:\n                    continue\n                start_old, size_old, start_new, size_new, _ = match.groups()\n                source_line_no = int(start_old)\n                target_line_no = int(start_new)\n                continue\n            if line.startswith('-'):\n                source_line_no += 1\n            elif line.startswith('+'):\n                target_line_no += 1\n            elif line.startswith(' '):\n                source_line_no += 1\n                target_line_no += 1\n            if relevant_line_in_file in line:\n                found = True\n                edit_type = self.get_edit_type(line)\n                break\n            elif relevant_line_in_file[0] == '+' and relevant_line_in_file[1:].lstrip() in line:\n                # The model often adds a '+' to the beginning of the relevant_line_in_file even if originally\n                # it's a context line\n                found = True\n                edit_type = self.get_edit_type(line)\n                break\n        return edit_type, found, source_line_no, target_file, target_line_no\n\n    def get_edit_type(self, relevant_line_in_file):\n        edit_type = 'context'\n        if relevant_line_in_file[0] == '-':\n            edit_type = 'deletion'\n        elif relevant_line_in_file[0] == '+':\n            edit_type = 'addition'\n        return edit_type\n\n    def remove_initial_comment(self):\n        try:\n            for comment in self.temp_comments:\n                self.remove_comment(comment)\n        except Exception as e:\n            get_logger().exception(f\"Failed to remove temp comments, error: {e}\")\n\n    def remove_comment(self, comment):\n        try:\n            comment.delete()\n        except Exception as e:\n            get_logger().exception(f\"Failed to remove comment, error: {e}\")\n\n    def get_title(self):\n        return self.mr.title\n\n    def get_languages(self):\n        languages = self.gl.projects.get(self.id_project).languages()\n        return languages\n\n    def get_pr_branch(self):\n        return self.mr.source_branch\n\n    def get_pr_owner_id(self) -> str | None:\n        if not self.gitlab_url or 'gitlab.com' in self.gitlab_url:\n            if not self.id_project:\n                return None\n            return self.id_project.split('/')[0]\n        # extract host name\n        host = urlparse(self.gitlab_url).hostname\n        return host\n\n    def get_pr_description_full(self):\n        return self.mr.description\n\n    def get_issue_comments(self):\n        return self.mr.notes.list(get_all=True)[::-1]\n\n    def get_repo_settings(self):\n        try:\n            main_branch = self.gl.projects.get(self.id_project).default_branch\n            contents = self.gl.projects.get(self.id_project).files.get(file_path='.pr_agent.toml', ref=main_branch).decode()\n            return contents\n        except Exception:\n            return \"\"\n\n    def get_workspace_name(self):\n        return self.id_project.split('/')[0]\n\n    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:\n        if disable_eyes:\n            return None\n        try:\n            if not self.id_mr:\n                get_logger().warning(\"Cannot add eyes reaction: merge request ID is not set.\")\n                return None\n\n            mr = self.gl.projects.get(self.id_project).mergerequests.get(self.id_mr)\n            comment = mr.notes.get(issue_comment_id)\n\n            if not comment:\n                get_logger().warning(f\"Comment with ID {issue_comment_id} not found in merge request {self.id_mr}.\")\n                return None\n\n            award_emoji = comment.awardemojis.create({\n                'name': 'eyes'\n            })\n            return award_emoji.id\n        except Exception as e:\n            get_logger().warning(f\"Failed to add eyes reaction, error: {e}\")\n            return None\n\n    def remove_reaction(self, issue_comment_id: int, reaction_id: str) -> bool:\n        try:\n            if not self.id_mr:\n                get_logger().warning(\"Cannot remove reaction: merge request ID is not set.\")\n                return False\n\n            mr = self.gl.projects.get(self.id_project).mergerequests.get(self.id_mr)\n            comment = mr.notes.get(issue_comment_id)\n\n            if not comment:\n                get_logger().warning(f\"Comment with ID {issue_comment_id} not found in merge request {self.id_mr}.\")\n                return False\n\n            reactions = comment.awardemojis.list()\n            for reaction in reactions:\n                if reaction.name == reaction_id:\n                    reaction.delete()\n                    return True\n\n            get_logger().warning(f\"Reaction '{reaction_id}' not found in comment {issue_comment_id}.\")\n            return False\n        except Exception as e:\n            get_logger().warning(f\"Failed to remove reaction, error: {e}\")\n            return False\n\n    def _parse_merge_request_url(self, merge_request_url: str) -> Tuple[str, int]:\n        parsed_url = urlparse(merge_request_url)\n\n        path_parts = parsed_url.path.strip('/').split('/')\n        if 'merge_requests' not in path_parts:\n            raise ValueError(\"The provided URL does not appear to be a GitLab merge request URL\")\n\n        mr_index = path_parts.index('merge_requests')\n        # Ensure there is an ID after 'merge_requests'\n        if len(path_parts) <= mr_index + 1:\n            raise ValueError(\"The provided URL does not contain a merge request ID\")\n\n        try:\n            mr_id = int(path_parts[mr_index + 1])\n        except ValueError as e:\n            raise ValueError(\"Unable to convert merge request ID to integer\") from e\n\n        # Handle special delimiter (-)\n        project_path = \"/\".join(path_parts[:mr_index])\n        if project_path.endswith('/-'):\n            project_path = project_path[:-2]\n\n        # Return the path before 'merge_requests' and the ID\n        return project_path, mr_id\n\n    def _get_merge_request(self):\n        mr = self.gl.projects.get(self.id_project).mergerequests.get(self.id_mr)\n        return mr\n\n    def get_user_id(self):\n        return None\n\n    def publish_labels(self, pr_types):\n        try:\n            self.mr.labels = list(set(pr_types))\n            self.mr.save()\n        except Exception as e:\n            get_logger().warning(f\"Failed to publish labels, error: {e}\")\n\n    def publish_inline_comments(self, comments: list[dict]):\n        pass\n\n    def get_pr_labels(self, update=False):\n        return self.mr.labels\n\n    def get_repo_labels(self):\n        return self.gl.projects.get(self.id_project).labels.list()\n\n    def get_commit_messages(self):\n        \"\"\"\n        Retrieves the commit messages of a pull request.\n\n        Returns:\n            str: A string containing the commit messages of the pull request.\n        \"\"\"\n        max_tokens = get_settings().get(\"CONFIG.MAX_COMMITS_TOKENS\", None)\n        try:\n            commit_messages_list = [commit['message'] for commit in self.mr.commits()._list]\n            commit_messages_str = \"\\n\".join([f\"{i + 1}. {message}\" for i, message in enumerate(commit_messages_list)])\n        except Exception:\n            commit_messages_str = \"\"\n        if max_tokens:\n            commit_messages_str = clip_tokens(commit_messages_str, max_tokens)\n        return commit_messages_str\n\n    def get_pr_id(self):\n        try:\n            pr_id = self.mr.web_url\n            return pr_id\n        except:\n            return \"\"\n\n    def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str:\n        if relevant_line_start == -1:\n            link = f\"{self.gl.url}/{self.id_project}/-/blob/{self.mr.source_branch}/{relevant_file}?ref_type=heads\"\n        elif relevant_line_end:\n            link = f\"{self.gl.url}/{self.id_project}/-/blob/{self.mr.source_branch}/{relevant_file}?ref_type=heads#L{relevant_line_start}-{relevant_line_end}\"\n        else:\n            link = f\"{self.gl.url}/{self.id_project}/-/blob/{self.mr.source_branch}/{relevant_file}?ref_type=heads#L{relevant_line_start}\"\n        return link\n\n\n    def generate_link_to_relevant_line_number(self, suggestion) -> str:\n        try:\n            relevant_file = suggestion['relevant_file'].strip('`').strip(\"'\").rstrip()\n            relevant_line_str = suggestion['relevant_line'].rstrip()\n            if not relevant_line_str:\n                return \"\"\n\n            position, absolute_position = find_line_number_of_relevant_line_in_file \\\n                (self.diff_files, relevant_file, relevant_line_str)\n\n            if absolute_position != -1:\n                # link to right file only\n                link = f\"{self.gl.url}/{self.id_project}/-/blob/{self.mr.source_branch}/{relevant_file}?ref_type=heads#L{absolute_position}\"\n\n                # # link to diff\n                # sha_file = hashlib.sha1(relevant_file.encode('utf-8')).hexdigest()\n                # link = f\"{self.pr.web_url}/diffs#{sha_file}_{absolute_position}_{absolute_position}\"\n                return link\n        except Exception as e:\n            if get_settings().config.verbosity_level >= 2:\n                get_logger().info(f\"Failed adding line link, error: {e}\")\n\n        return \"\"\n    #Clone related\n    def _prepare_clone_url_with_token(self, repo_url_to_clone: str) -> str | None:\n        if \"gitlab.\" not in repo_url_to_clone:\n            get_logger().error(f\"Repo URL: {repo_url_to_clone} is not a valid gitlab URL.\")\n            return None\n        (scheme, base_url) = repo_url_to_clone.split(\"gitlab.\")\n        access_token = getattr(self.gl, 'oauth_token', None) or getattr(self.gl, 'private_token', None)\n        if not all([scheme, access_token, base_url]):\n            get_logger().error(f\"Either no access token found, or repo URL: {repo_url_to_clone} \"\n                               f\"is missing prefix: {scheme} and/or base URL: {base_url}.\")\n            return None\n\n        #Note that the \"\"official\"\" method found here:\n        # https://docs.gitlab.com/user/profile/personal_access_tokens/#clone-repository-using-personal-access-token\n        # requires a username, which may not be applicable.\n        # The following solution is taken from: https://stackoverflow.com/questions/25409700/using-gitlab-token-to-clone-without-authentication/35003812#35003812\n        # For example: For repo url: https://gitlab.codium-inc.com/qodo/autoscraper.git\n        # Then to clone one will issue: 'git clone https://oauth2:<access token>@gitlab.codium-inc.com/qodo/autoscraper.git'\n\n        clone_url = f\"{scheme}oauth2:{access_token}@gitlab.{base_url}\"\n        return clone_url\n"
  },
  {
    "path": "pr_agent/git_providers/local_git_provider.py",
    "content": "from collections import Counter\nfrom pathlib import Path\nfrom typing import List\n\nfrom git import Repo\n\nfrom pr_agent.algo.types import EDIT_TYPE, FilePatchInfo\nfrom pr_agent.config_loader import _find_repository_root, get_settings\nfrom pr_agent.git_providers.git_provider import GitProvider\nfrom pr_agent.log import get_logger\n\n\nclass PullRequestMimic:\n    \"\"\"\n    This class mimics the PullRequest class from the PyGithub library for the LocalGitProvider.\n    \"\"\"\n\n    def __init__(self, title: str, diff_files: List[FilePatchInfo]):\n        self.title = title\n        self.diff_files = diff_files\n\n\nclass LocalGitProvider(GitProvider):\n    \"\"\"\n    This class implements the GitProvider interface for local git repositories.\n    It mimics the PR functionality of the GitProvider interface,\n    but does not require a hosted git repository.\n    Instead of providing a PR url, the user provides a local branch path to generate a diff-patch.\n    For the MVP it only supports the /review and /describe capabilities.\n    \"\"\"\n\n    def __init__(self, target_branch_name, incremental=False):\n        self.repo_path = _find_repository_root()\n        if self.repo_path is None:\n            raise ValueError('Could not find repository root')\n        self.repo = Repo(self.repo_path)\n        self.head_branch_name = self.repo.head.ref.name\n        self.target_branch_name = target_branch_name\n        self._prepare_repo()\n        self.diff_files = None\n        self.pr = PullRequestMimic(self.get_pr_title(), self.get_diff_files())\n        self.description_path = get_settings().get('local.description_path') \\\n            if get_settings().get('local.description_path') is not None else self.repo_path / 'description.md'\n        self.review_path = get_settings().get('local.review_path') \\\n            if get_settings().get('local.review_path') is not None else self.repo_path / 'review.md'\n        # inline code comments are not supported for local git repositories\n        get_settings().pr_reviewer.inline_code_comments = False\n\n    def _prepare_repo(self):\n        \"\"\"\n        Prepare the repository for PR-mimic generation.\n        \"\"\"\n        get_logger().debug('Preparing repository for PR-mimic generation...')\n        if self.repo.is_dirty():\n            raise ValueError('The repository is not in a clean state. Please commit or stash pending changes.')\n        if self.target_branch_name not in self.repo.heads:\n            raise KeyError(f'Branch: {self.target_branch_name} does not exist')\n\n    def is_supported(self, capability: str) -> bool:\n        if capability in ['get_issue_comments', 'create_inline_comment', 'publish_inline_comments', 'get_labels',\n                          'gfm_markdown']:\n            return False\n        return True\n\n    def get_diff_files(self) -> list[FilePatchInfo]:\n        diffs = self.repo.head.commit.diff(\n            self.repo.merge_base(self.repo.head, self.repo.branches[self.target_branch_name]),\n            create_patch=True,\n            R=True\n        )\n        diff_files = []\n        for diff_item in diffs:\n            if diff_item.a_blob is not None:\n                original_file_content_str = diff_item.a_blob.data_stream.read().decode('utf-8')\n            else:\n                original_file_content_str = \"\"  # empty file\n            if diff_item.b_blob is not None:\n                new_file_content_str = diff_item.b_blob.data_stream.read().decode('utf-8')\n            else:\n                new_file_content_str = \"\"  # empty file\n            edit_type = EDIT_TYPE.MODIFIED\n            if diff_item.new_file:\n                edit_type = EDIT_TYPE.ADDED\n            elif diff_item.deleted_file:\n                edit_type = EDIT_TYPE.DELETED\n            elif diff_item.renamed_file:\n                edit_type = EDIT_TYPE.RENAMED\n            diff_files.append(\n                FilePatchInfo(original_file_content_str,\n                              new_file_content_str,\n                              diff_item.diff.decode('utf-8'),\n                              diff_item.b_path,\n                              edit_type=edit_type,\n                              old_filename=None if diff_item.a_path == diff_item.b_path else diff_item.a_path\n                              )\n            )\n        self.diff_files = diff_files\n        return diff_files\n\n    def get_files(self) -> List[str]:\n        \"\"\"\n        Returns a list of files with changes in the diff.\n        \"\"\"\n        diff_index = self.repo.head.commit.diff(\n            self.repo.merge_base(self.repo.head, self.repo.branches[self.target_branch_name]),\n            R=True\n        )\n        # Get the list of changed files\n        diff_files = [item.a_path for item in diff_index]\n        return diff_files\n\n    def publish_description(self, pr_title: str, pr_body: str):\n        with open(self.description_path, \"w\") as file:\n            # Write the string to the file\n            file.write(pr_title + '\\n' + pr_body)\n\n    def publish_comment(self, pr_comment: str, is_temporary: bool = False):\n        with open(self.review_path, \"w\") as file:\n            # Write the string to the file\n            file.write(pr_comment)\n\n    def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None):\n        raise NotImplementedError('Publishing inline comments is not implemented for the local git provider')\n\n    def publish_inline_comments(self, comments: list[dict]):\n        raise NotImplementedError('Publishing inline comments is not implemented for the local git provider')\n\n    def publish_code_suggestion(self, body: str, relevant_file: str,\n                                relevant_lines_start: int, relevant_lines_end: int):\n        raise NotImplementedError('Publishing code suggestions is not implemented for the local git provider')\n\n    def publish_code_suggestions(self, code_suggestions: list) -> bool:\n        raise NotImplementedError('Publishing code suggestions is not implemented for the local git provider')\n\n    def publish_labels(self, labels):\n        pass  # Not applicable to the local git provider, but required by the interface\n\n    def remove_initial_comment(self):\n        pass  # Not applicable to the local git provider, but required by the interface\n\n    def remove_comment(self, comment):\n        pass  # Not applicable to the local git provider, but required by the interface\n\n    def add_eyes_reaction(self, comment):\n        pass  # Not applicable to the local git provider, but required by the interface\n\n    def get_commit_messages(self):\n        pass  # Not applicable to the local git provider, but required by the interface\n\n    def get_repo_settings(self):\n        pass  # Not applicable to the local git provider, but required by the interface\n\n    def remove_reaction(self, comment):\n        pass  # Not applicable to the local git provider, but required by the interface\n\n    def get_languages(self):\n        \"\"\"\n        Calculate percentage of languages in repository. Used for hunk prioritisation.\n        \"\"\"\n        # Get all files in repository\n        filepaths = [Path(item.path) for item in self.repo.tree().traverse() if item.type == 'blob']\n        # Identify language by file extension and count\n        lang_count = Counter(ext.lstrip('.') for filepath in filepaths for ext in [filepath.suffix.lower()])\n        # Convert counts to percentages\n        total_files = len(filepaths)\n        lang_percentage = {lang: count / total_files * 100 for lang, count in lang_count.items()}\n        return lang_percentage\n\n    def get_pr_branch(self):\n        return self.repo.head\n\n    def get_user_id(self):\n        return -1  # Not used anywhere for the local provider, but required by the interface\n\n    def get_pr_description_full(self):\n        commits_diff = list(self.repo.iter_commits(self.target_branch_name + '..HEAD'))\n        # Get the commit messages and concatenate\n        commit_messages = \" \".join([commit.message for commit in commits_diff])\n        # TODO Handle the description better - maybe use gpt-3.5 summarisation here?\n        return commit_messages[:200]  # Use max 200 characters\n\n    def get_pr_title(self):\n        \"\"\"\n        Substitutes the branch-name as the PR-mimic title.\n        \"\"\"\n        return self.head_branch_name\n\n    def get_issue_comments(self):\n        raise NotImplementedError('Getting issue comments is not implemented for the local git provider')\n\n    def get_pr_labels(self, update=False):\n        raise NotImplementedError('Getting labels is not implemented for the local git provider')\n"
  },
  {
    "path": "pr_agent/git_providers/utils.py",
    "content": "import copy\nimport os\nimport tempfile\nimport traceback\n\nfrom dynaconf import Dynaconf\nfrom starlette_context import context\n\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers import get_git_provider_with_context\nfrom pr_agent.log import get_logger\n\n\ndef apply_repo_settings(pr_url):\n    os.environ[\"AUTO_CAST_FOR_DYNACONF\"] = \"false\"\n    git_provider = get_git_provider_with_context(pr_url)\n    if get_settings().config.use_repo_settings_file:\n        repo_settings_file = None\n        try:\n            try:\n                repo_settings = context.get(\"repo_settings\", None)\n            except Exception:\n                repo_settings = None\n                pass\n            if repo_settings is None:  # None is different from \"\", which is a valid value\n                repo_settings = git_provider.get_repo_settings()\n                try:\n                    context[\"repo_settings\"] = repo_settings\n                except Exception:\n                    pass\n\n            error_local = None\n            if repo_settings:\n                repo_settings_file = None\n                category = 'local'\n                try:\n                    fd, repo_settings_file = tempfile.mkstemp(suffix='.toml')\n                    os.write(fd, repo_settings)\n\n                    try:\n                        dynconf_kwargs = {'core_loaders': [],  # DISABLE default loaders, otherwise will load toml files more than once.\n                             'loaders': ['pr_agent.custom_merge_loader'],\n                             # Use a custom loader to merge sections, but overwrite their overlapping values. Don't involve ENV variables.\n                             'merge_enabled': True  # Merge multiple files; ensures [XYZ] sections only overwrite overlapping keys, not whole sections.\n                         }\n\n                        new_settings = Dynaconf(settings_files=[repo_settings_file],\n                                                # Disable all dynamic loading features\n                                                load_dotenv=False,  # Don't load .env files\n                                                envvar_prefix=False,  # Drop DYNACONF for env. variables\n                                                **dynconf_kwargs\n                                                )\n                    except TypeError as e:\n                        # Fallback for older Dynaconf versions that don't support these parameters\n                        get_logger().warning(\n                            \"Your Dynaconf version does not support disabled 'load_dotenv'/'merge_enabled' parameters. \"\n                            \"Loading repo settings without these security features. \"\n                            \"Please upgrade Dynaconf for better security.\",\n                            artifact={\"error\": e, \"traceback\": traceback.format_exc()})\n                        new_settings = Dynaconf(settings_files=[repo_settings_file])\n\n                    for section, contents in new_settings.as_dict().items():\n                        if not contents:\n                            # Skip excluded items, such as forbidden to load env.\n                            get_logger().debug(f\"Skipping a section: {section} which is not allowed\")\n                            continue\n                        section_dict = copy.deepcopy(get_settings().as_dict().get(section, {}))\n                        for key, value in contents.items():\n                            section_dict[key] = value\n                        get_settings().unset(section)\n                        get_settings().set(section, section_dict, merge=False)\n                    get_logger().info(f\"Applying repo settings:\\n{new_settings.as_dict()}\")\n                except Exception as e:\n                    get_logger().warning(f\"Failed to apply repo {category} settings, error: {str(e)}\")\n                    error_local = {'error': str(e), 'settings': repo_settings, 'category': category}\n\n                if error_local:\n                    handle_configurations_errors([error_local], git_provider)\n        except Exception as e:\n            get_logger().exception(\"Failed to apply repo settings\", e)\n        finally:\n            if repo_settings_file:\n                try:\n                    os.remove(repo_settings_file)\n                except Exception as e:\n                    get_logger().error(f\"Failed to remove temporary settings file {repo_settings_file}\", e)\n\n    # enable switching models with a short definition\n    if get_settings().config.model.lower() == 'claude-3-5-sonnet':\n        set_claude_model()\n\n\ndef handle_configurations_errors(config_errors, git_provider):\n    try:\n        if not any(config_errors):\n            return\n\n        for err in config_errors:\n            if err:\n                configuration_file_content = err['settings'].decode()\n                err_message = err['error']\n                config_type = err['category']\n                header = f\"❌ **PR-Agent failed to apply '{config_type}' repo settings**\"\n                body = f\"{header}\\n\\nThe configuration file needs to be a valid [TOML](https://qodo-merge-docs.qodo.ai/usage-guide/configuration_options/), please fix it.\\n\\n\"\n                body += f\"___\\n\\n**Error message:**\\n`{err_message}`\\n\\n\"\n                if git_provider.is_supported(\"gfm_markdown\"):\n                    body += f\"\\n\\n<details><summary>Configuration content:</summary>\\n\\n```toml\\n{configuration_file_content}\\n```\\n\\n</details>\"\n                else:\n                    body += f\"\\n\\n**Configuration content:**\\n\\n```toml\\n{configuration_file_content}\\n```\\n\\n\"\n                get_logger().warning(f\"Sending a 'configuration error' comment to the PR\", artifact={'body': body})\n                # git_provider.publish_comment(body)\n                if hasattr(git_provider, 'publish_persistent_comment'):\n                    git_provider.publish_persistent_comment(body,\n                                                            initial_header=header,\n                                                            update_header=False,\n                                                            final_update_message=False)\n                else:\n                    git_provider.publish_comment(body)\n    except Exception as e:\n        get_logger().exception(f\"Failed to handle configurations errors\", e)\n\n\ndef set_claude_model():\n    \"\"\"\n    set the claude-sonnet-3.5 model easily (even by users), just by stating: --config.model='claude-3-5-sonnet'\n    \"\"\"\n    model_claude = \"bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    get_settings().set('config.model', model_claude)\n    get_settings().set('config.model_weak', model_claude)\n    get_settings().set('config.fallback_models', [model_claude])\n"
  },
  {
    "path": "pr_agent/identity_providers/__init__.py",
    "content": "from pr_agent.config_loader import get_settings\nfrom pr_agent.identity_providers.default_identity_provider import \\\n    DefaultIdentityProvider\n\n_IDENTITY_PROVIDERS = {\n    'default': DefaultIdentityProvider\n}\n\n\ndef get_identity_provider():\n    identity_provider_id = get_settings().get(\"CONFIG.IDENTITY_PROVIDER\", \"default\")\n    if identity_provider_id not in _IDENTITY_PROVIDERS:\n        raise ValueError(f\"Unknown identity provider: {identity_provider_id}\")\n    return _IDENTITY_PROVIDERS[identity_provider_id]()\n"
  },
  {
    "path": "pr_agent/identity_providers/default_identity_provider.py",
    "content": "from pr_agent.identity_providers.identity_provider import (Eligibility,\n                                                           IdentityProvider)\n\n\nclass DefaultIdentityProvider(IdentityProvider):\n    def verify_eligibility(self, git_provider, git_provider_id, pr_url):\n        return Eligibility.ELIGIBLE\n\n    def inc_invocation_count(self, git_provider, git_provider_id):\n        pass\n"
  },
  {
    "path": "pr_agent/identity_providers/identity_provider.py",
    "content": "from abc import ABC, abstractmethod\nfrom enum import Enum\n\n\nclass Eligibility(Enum):\n    NOT_ELIGIBLE = 0\n    ELIGIBLE = 1\n    TRIAL = 2\n\n\nclass IdentityProvider(ABC):\n    @abstractmethod\n    def verify_eligibility(self, git_provider, git_provider_id, pr_url):\n        pass\n\n    @abstractmethod\n    def inc_invocation_count(self, git_provider, git_provider_id):\n        pass\n"
  },
  {
    "path": "pr_agent/log/__init__.py",
    "content": "import os\nos.environ[\"AUTO_CAST_FOR_DYNACONF\"] = \"false\"\nimport json\nimport logging\nimport sys\nfrom enum import Enum\n\nfrom loguru import logger\n\nfrom pr_agent.config_loader import get_settings\n\n\nclass LoggingFormat(str, Enum):\n    CONSOLE = \"CONSOLE\"\n    JSON = \"JSON\"\n\n\ndef json_format(record: dict) -> str:\n    return record[\"message\"]\n\n\ndef analytics_filter(record: dict) -> bool:\n    return record.get(\"extra\", {}).get(\"analytics\", False)\n\n\ndef inv_analytics_filter(record: dict) -> bool:\n    return not record.get(\"extra\", {}).get(\"analytics\", False)\n\n\ndef setup_logger(level: str = \"INFO\", fmt: LoggingFormat = LoggingFormat.CONSOLE):\n    level: int = logging.getLevelName(level.upper())\n    if type(level) is not int:\n        level = logging.INFO\n\n    if fmt == LoggingFormat.JSON and os.getenv(\"LOG_SANE\", \"0\").lower() == \"0\":  # better debugging github_app\n        logger.remove(None)\n        logger.add(\n            sys.stdout,\n            filter=inv_analytics_filter,\n            level=level,\n            format=\"{message}\",\n            colorize=False,\n            serialize=True,\n        )\n    elif fmt == LoggingFormat.CONSOLE: # does not print the 'extra' fields\n        logger.remove(None)\n        logger.add(sys.stdout, level=level, colorize=True, filter=inv_analytics_filter)\n\n    log_folder = get_settings().get(\"CONFIG.ANALYTICS_FOLDER\", \"\")\n    if log_folder:\n        pid = os.getpid()\n        log_file = os.path.join(log_folder, f\"pr-agent.{pid}.log\")\n        logger.add(\n            log_file,\n            filter=analytics_filter,\n            level=level,\n            format=\"{message}\",\n            colorize=False,\n            serialize=True,\n        )\n\n    return logger\n\n\ndef get_logger(*args, **kwargs):\n    return logger\n"
  },
  {
    "path": "pr_agent/secret_providers/__init__.py",
    "content": "from pr_agent.config_loader import get_settings\n\n\ndef get_secret_provider():\n    if not get_settings().get(\"CONFIG.SECRET_PROVIDER\"):\n        return None\n\n    provider_id = get_settings().config.secret_provider\n    if provider_id == 'google_cloud_storage':\n        try:\n            from pr_agent.secret_providers.google_cloud_storage_secret_provider import \\\n                GoogleCloudStorageSecretProvider\n            return GoogleCloudStorageSecretProvider()\n        except Exception as e:\n            raise ValueError(f\"Failed to initialize google_cloud_storage secret provider {provider_id}\") from e\n    elif provider_id == 'aws_secrets_manager':\n        try:\n            from pr_agent.secret_providers.aws_secrets_manager_provider import \\\n                AWSSecretsManagerProvider\n            return AWSSecretsManagerProvider()\n        except Exception as e:\n            raise ValueError(f\"Failed to initialize aws_secrets_manager secret provider {provider_id}\") from e\n    else:\n        raise ValueError(\"Unknown SECRET_PROVIDER\")\n"
  },
  {
    "path": "pr_agent/secret_providers/aws_secrets_manager_provider.py",
    "content": "import json\nimport boto3\nfrom botocore.exceptions import ClientError\n\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.log import get_logger\nfrom pr_agent.secret_providers.secret_provider import SecretProvider\n\n\nclass AWSSecretsManagerProvider(SecretProvider):\n    def __init__(self):\n        try:\n            region_name = get_settings().get(\"aws_secrets_manager.region_name\") or \\\n                         get_settings().get(\"aws.AWS_REGION_NAME\")\n            if region_name:\n                self.client = boto3.client('secretsmanager', region_name=region_name)\n            else:\n                self.client = boto3.client('secretsmanager')\n\n            self.secret_arn = get_settings().get(\"aws_secrets_manager.secret_arn\")\n            if not self.secret_arn:\n                raise ValueError(\"AWS Secrets Manager ARN is not configured\")\n        except Exception as e:\n            get_logger().error(f\"Failed to initialize AWS Secrets Manager Provider: {e}\")\n            raise e\n\n    def get_secret(self, secret_name: str) -> str:\n        \"\"\"\n        Retrieve individual secret by name (for webhook tokens)\n        \"\"\"\n        try:\n            response = self.client.get_secret_value(SecretId=secret_name)\n            return response['SecretString']\n        except Exception as e:\n            get_logger().warning(f\"Failed to get secret {secret_name} from AWS Secrets Manager: {e}\")\n            return \"\"\n\n    def get_all_secrets(self) -> dict:\n        \"\"\"\n        Retrieve all secrets for configuration override\n        \"\"\"\n        try:\n            response = self.client.get_secret_value(SecretId=self.secret_arn)\n            return json.loads(response['SecretString'])\n        except Exception as e:\n            get_logger().error(f\"Failed to get secrets from AWS Secrets Manager {self.secret_arn}: {e}\")\n            return {}\n\n    def store_secret(self, secret_name: str, secret_value: str):\n        try:\n            self.client.put_secret_value(\n                SecretId=secret_name,\n                SecretString=secret_value\n            )\n        except Exception as e:\n            get_logger().error(f\"Failed to store secret {secret_name} in AWS Secrets Manager: {e}\")\n            raise e \n"
  },
  {
    "path": "pr_agent/secret_providers/google_cloud_storage_secret_provider.py",
    "content": "import ujson\nfrom google.cloud import storage\n\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.log import get_logger\nfrom pr_agent.secret_providers.secret_provider import SecretProvider\n\n\nclass GoogleCloudStorageSecretProvider(SecretProvider):\n    def __init__(self):\n        try:\n            self.client = storage.Client.from_service_account_info(ujson.loads(get_settings().google_cloud_storage.\n                                                                               service_account))\n            self.bucket_name = get_settings().google_cloud_storage.bucket_name\n            self.bucket = self.client.bucket(self.bucket_name)\n        except Exception as e:\n            get_logger().error(f\"Failed to initialize Google Cloud Storage Secret Provider: {e}\")\n            raise e\n\n    def get_secret(self, secret_name: str) -> str:\n        try:\n            blob = self.bucket.blob(secret_name)\n            return blob.download_as_string()\n        except Exception as e:\n            get_logger().warning(f\"Failed to get secret {secret_name} from Google Cloud Storage: {e}\")\n            return \"\"\n\n    def store_secret(self, secret_name: str, secret_value: str):\n        try:\n            blob = self.bucket.blob(secret_name)\n            blob.upload_from_string(secret_value)\n        except Exception as e:\n            get_logger().error(f\"Failed to store secret {secret_name} in Google Cloud Storage: {e}\")\n            raise e\n"
  },
  {
    "path": "pr_agent/secret_providers/secret_provider.py",
    "content": "from abc import ABC, abstractmethod\n\n\nclass SecretProvider(ABC):\n\n    @abstractmethod\n    def get_secret(self, secret_name: str) -> str:\n        pass\n\n    @abstractmethod\n    def store_secret(self, secret_name: str, secret_value: str):\n        pass\n"
  },
  {
    "path": "pr_agent/servers/__init__.py",
    "content": ""
  },
  {
    "path": "pr_agent/servers/atlassian-connect-qodo-merge.json",
    "content": "{\n  \"name\": \"Qodo Merge\",\n  \"description\": \"Qodo Merge\",\n  \"key\": \"app_key\",\n  \"vendor\": {\n    \"name\": \"Qodo\",\n    \"url\": \"https://qodo.ai\"\n  },\n  \"authentication\": {\n    \"type\": \"jwt\"\n  },\n  \"baseUrl\": \"base_url\",\n  \"lifecycle\": {\n    \"installed\": \"/installed\",\n    \"uninstalled\": \"/uninstalled\"\n  },\n  \"scopes\": [\n    \"account\",\n    \"repository:write\",\n    \"pullrequest:write\",\n    \"wiki\"\n  ],\n  \"contexts\": [\n    \"account\"\n  ],\n  \"modules\": {\n    \"webhooks\": [\n      {\n        \"event\": \"*\",\n        \"url\": \"/webhook\"\n      }\n    ]\n  },\n  \"links\": {\n    \"privacy\": \"https://qodo.ai/privacy-policy\",\n    \"terms\": \"https://qodo.ai/terms\"\n  }\n}\n"
  },
  {
    "path": "pr_agent/servers/atlassian-connect.json",
    "content": "{\n  \"name\": \"CodiumAI PR-Agent\",\n  \"description\": \"CodiumAI PR-Agent\",\n  \"key\": \"app_key\",\n  \"vendor\": {\n    \"name\": \"CodiumAI\",\n    \"url\": \"https://codium.ai\"\n  },\n  \"authentication\": {\n    \"type\": \"jwt\"\n  },\n  \"baseUrl\": \"base_url\",\n  \"lifecycle\": {\n    \"installed\": \"/installed\",\n    \"uninstalled\": \"/uninstalled\"\n  },\n  \"scopes\": [\n    \"account\",\n    \"repository:write\",\n    \"pullrequest:write\",\n    \"wiki\"\n  ],\n  \"contexts\": [\n    \"account\"\n  ],\n  \"modules\": {\n    \"webhooks\": [\n      {\n        \"event\": \"*\",\n        \"url\": \"/webhook\"\n      }\n    ]\n  },\n  \"links\": {\n    \"privacy\": \"https://qodo.ai/privacy-policy\",\n    \"terms\": \"https://qodo.ai/terms\"\n  }\n}\n"
  },
  {
    "path": "pr_agent/servers/azuredevops_server_webhook.py",
    "content": "# This file contains the code for the Azure DevOps Server webhook server.\n# The server listens for incoming webhooks from Azure DevOps Server and forwards them to the PR Agent.\n# ADO webhook documentation: https://learn.microsoft.com/en-us/azure/devops/service-hooks/services/webhooks?view=azure-devops\n\nimport json\nimport os\nimport re\nimport secrets\nfrom urllib.parse import unquote\n\nimport uvicorn\nfrom fastapi import APIRouter, Depends, FastAPI, HTTPException, Request\nfrom fastapi.encoders import jsonable_encoder\nfrom fastapi.security import HTTPBasic, HTTPBasicCredentials\nfrom starlette import status\nfrom starlette.background import BackgroundTasks\nfrom starlette.middleware import Middleware\nfrom starlette.requests import Request\nfrom starlette.responses import JSONResponse\nfrom starlette_context.middleware import RawContextMiddleware\n\nfrom pr_agent.agent.pr_agent import PRAgent, command2class\nfrom pr_agent.algo.utils import update_settings_from_args\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers import get_git_provider_with_context\nfrom pr_agent.git_providers.azuredevops_provider import AzureDevopsProvider\nfrom pr_agent.git_providers.utils import apply_repo_settings\nfrom pr_agent.log import LoggingFormat, get_logger, setup_logger\n\nsetup_logger(fmt=LoggingFormat.JSON, level=get_settings().get(\"CONFIG.LOG_LEVEL\", \"DEBUG\"))\nsecurity = HTTPBasic(auto_error=False)\nrouter = APIRouter()\navailable_commands_rgx = re.compile(r\"^\\/(\" + \"|\".join(command2class.keys()) + r\")\\s*\")\nazure_devops_server = get_settings().get(\"azure_devops_server\")\nWEBHOOK_USERNAME = azure_devops_server.get(\"webhook_username\", None)\nWEBHOOK_PASSWORD = azure_devops_server.get(\"webhook_password\", None)\n\nasync def handle_request_comment(url: str, body: str, thread_id: int, comment_id: int, log_context: dict):\n    log_context[\"action\"] = body\n    log_context[\"api_url\"] = url\n    try:\n        with get_logger().contextualize(**log_context):\n            agent = PRAgent()\n            provider = get_git_provider_with_context(pr_url=url)\n            body = handle_line_comment(body, thread_id, provider)\n            handled = await agent.handle_request(url, body, notify=lambda: provider.reply_to_thread(thread_id, \"On it! ⏳\", True))\n            # mark command comment as closed\n            if handled:\n                provider.set_thread_status(thread_id, \"closed\")\n                provider.remove_initial_comment()\n    except Exception as e:\n        get_logger().exception(f\"Failed to handle webhook\", artifact={\"url\": url, \"body\": body}, error=str(e))\n\ndef handle_line_comment(body: str, thread_id: int, provider: AzureDevopsProvider):\n    body = body.strip()\n    if not body.startswith('/ask '):\n        return body\n    thread_context = provider.get_thread_context(thread_id)\n    if not thread_context:\n        return body\n    \n    path = thread_context.file_path\n    if thread_context.left_file_end or thread_context.left_file_start:\n        start_line = thread_context.left_file_start.line\n        end_line = thread_context.left_file_end.line\n        side = \"left\"\n    elif thread_context.right_file_end or thread_context.right_file_start:\n        start_line = thread_context.right_file_start.line\n        end_line = thread_context.right_file_end.line\n        side = \"right\"\n    else:\n        get_logger().info(\"No line range found in thread context\", artifact={\"thread_context\": thread_context})\n        return body\n    \n    question = body[5:].lstrip() # remove 4 chars: '/ask '\n    return f\"/ask_line --line_start={start_line} --line_end={end_line} --side={side} --file_name={path} --comment_id={thread_id} {question}\"\n\n# currently only basic auth is supported with azure webhooks\n# for this reason, https must be enabled to ensure the credentials are not sent in clear text\ndef authorize(credentials: HTTPBasicCredentials = Depends(security)):\n    if WEBHOOK_USERNAME is None or WEBHOOK_PASSWORD is None:\n        return\n    \n    is_user_ok = secrets.compare_digest(credentials.username, WEBHOOK_USERNAME)\n    is_pass_ok = secrets.compare_digest(credentials.password, WEBHOOK_PASSWORD)\n    if not (is_user_ok and is_pass_ok):\n        raise HTTPException(\n            status_code=status.HTTP_401_UNAUTHORIZED,\n            detail='Incorrect username or password.',\n            headers={'WWW-Authenticate': 'Basic'},\n        )\n\n\nasync def _perform_commands_azure(commands_conf: str, agent: PRAgent, api_url: str, log_context: dict):\n    apply_repo_settings(api_url)\n    if commands_conf == \"pr_commands\" and get_settings().config.disable_auto_feedback:  # auto commands for PR, and auto feedback is disabled\n        get_logger().info(f\"Auto feedback is disabled, skipping auto commands for PR {api_url=}\", **log_context)\n        return\n    commands = get_settings().get(f\"azure_devops_server.{commands_conf}\")\n    if not commands:\n        return\n\n    get_settings().set(\"config.is_auto_command\", True)\n    for command in commands:\n        try:\n            split_command = command.split(\" \")\n            command = split_command[0]\n            args = split_command[1:]\n            other_args = update_settings_from_args(args)\n            new_command = ' '.join([command] + other_args)\n            get_logger().info(f\"Performing command: {new_command}\")\n            with get_logger().contextualize(**log_context):\n                await agent.handle_request(api_url, new_command)\n        except Exception as e:\n            get_logger().error(f\"Failed to perform command {command}: {e}\")\n\n\nasync def handle_request_azure(data, log_context):\n    if data[\"eventType\"] == \"git.pullrequest.created\":\n        # API V1 (latest)\n        pr_url = unquote(data[\"resource\"][\"_links\"][\"web\"][\"href\"].replace(\"_apis/git/repositories\", \"_git\"))\n        log_context[\"event\"] = data[\"eventType\"]\n        log_context[\"api_url\"] = pr_url\n        await _perform_commands_azure(\"pr_commands\", PRAgent(), pr_url, log_context)\n        return JSONResponse(\n            status_code=status.HTTP_202_ACCEPTED,\n            content=jsonable_encoder({\"message\": \"webhook triggered successfully\"})\n        )\n    elif data[\"eventType\"] == \"ms.vss-code.git-pullrequest-comment-event\" and \"content\" in data[\"resource\"][\"comment\"]:\n        comment = data[\"resource\"][\"comment\"]\n        if available_commands_rgx.match(comment[\"content\"]):\n            if(data[\"resourceVersion\"] == \"2.0\"):\n                repo = data[\"resource\"][\"pullRequest\"][\"repository\"][\"webUrl\"]\n                pr_url = unquote(f'{repo}/pullrequest/{data[\"resource\"][\"pullRequest\"][\"pullRequestId\"]}')\n                action = comment[\"content\"]\n                thread_url = comment[\"_links\"][\"threads\"][\"href\"]\n                thread_id = int(thread_url.split(\"/\")[-1])\n                comment_id = int(comment[\"id\"])\n                pass\n            else:\n                # API V1 not supported as it does not contain the PR URL\n                return JSONResponse(\n                    status_code=status.HTTP_400_BAD_REQUEST,\n                    content=json.dumps({\"message\": \"version 1.0 webhook for Azure Devops PR comment is not supported. please upgrade to version 2.0\"})),\n        else:\n            return JSONResponse(\n                status_code=status.HTTP_400_BAD_REQUEST,\n                content=json.dumps({\"message\": \"Unsupported command\"}),\n            )\n    else:\n        return JSONResponse(\n            status_code=status.HTTP_204_NO_CONTENT,\n            content=json.dumps({\"message\": \"Unsupported event\"}),\n        )\n\n    log_context[\"event\"] = data[\"eventType\"]\n    log_context[\"api_url\"] = pr_url\n\n    try:\n        await handle_request_comment(pr_url, action, thread_id, comment_id, log_context)\n    except Exception as e:\n        get_logger().error(\"Azure DevOps Trigger failed. Error:\" + str(e))\n        return JSONResponse(\n            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,\n            content=json.dumps({\"message\": \"Internal server error\"}),\n        )\n    return JSONResponse(\n        status_code=status.HTTP_202_ACCEPTED, content=jsonable_encoder({\"message\": \"webhook triggered successfully\"})\n    )\n\n@router.post(\"/\", dependencies=[Depends(authorize)])\nasync def handle_webhook(background_tasks: BackgroundTasks, request: Request):\n    log_context = {\"server_type\": \"azure_devops_server\"}\n    data = await request.json()\n    # get_logger().info(json.dumps(data))\n\n    background_tasks.add_task(handle_request_azure, data, log_context)\n\n    return JSONResponse(\n        status_code=status.HTTP_202_ACCEPTED, content=jsonable_encoder({\"message\": \"webhook triggered successfully\"})\n    )\n\n@router.get(\"/\")\nasync def root():\n    return {\"status\": \"ok\"}\n\ndef start():\n    app = FastAPI(middleware=[Middleware(RawContextMiddleware)])\n    app.include_router(router)\n    uvicorn.run(app, host=\"0.0.0.0\", port=int(os.environ.get(\"PORT\", \"3000\")))\n\nif __name__ == \"__main__\":\n    start()\n"
  },
  {
    "path": "pr_agent/servers/bitbucket_app.py",
    "content": "import base64\nimport copy\nimport hashlib\nimport json\nimport os\nimport re\nimport time\n\nimport jwt\nimport requests\nimport uvicorn\nfrom fastapi import APIRouter, FastAPI, Request, Response\nfrom starlette.background import BackgroundTasks\nfrom starlette.middleware import Middleware\nfrom starlette.responses import JSONResponse\nfrom starlette_context import context\nfrom starlette_context.middleware import RawContextMiddleware\n\nfrom pr_agent.agent.pr_agent import PRAgent\nfrom pr_agent.algo.utils import update_settings_from_args\nfrom pr_agent.config_loader import get_settings, global_settings\nfrom pr_agent.git_providers.utils import apply_repo_settings\nfrom pr_agent.identity_providers import get_identity_provider\nfrom pr_agent.identity_providers.identity_provider import Eligibility\nfrom pr_agent.log import LoggingFormat, get_logger, setup_logger\nfrom pr_agent.secret_providers import get_secret_provider\n\nsetup_logger(fmt=LoggingFormat.JSON, level=get_settings().get(\"CONFIG.LOG_LEVEL\", \"DEBUG\"))\nrouter = APIRouter()\nsecret_provider = get_secret_provider() if get_settings().get(\"CONFIG.SECRET_PROVIDER\") else None\n\n\nasync def get_bearer_token(shared_secret: str, client_key: str):\n    try:\n        now = int(time.time())\n        url = \"https://bitbucket.org/site/oauth2/access_token\"\n        canonical_url = \"GET&/site/oauth2/access_token&\"\n        qsh = hashlib.sha256(canonical_url.encode(\"utf-8\")).hexdigest()\n        app_key = get_settings().bitbucket.app_key\n\n        payload = {\n            \"iss\": app_key,\n            \"iat\": now,\n            \"exp\": now + 240,\n            \"qsh\": qsh,\n            \"sub\": client_key,\n            }\n        token = jwt.encode(payload, shared_secret, algorithm=\"HS256\")\n        payload = 'grant_type=urn%3Abitbucket%3Aoauth2%3Ajwt'\n        headers = {\n            'Authorization': f'JWT {token}',\n            'Content-Type': 'application/x-www-form-urlencoded'\n        }\n        response = requests.request(\"POST\", url, headers=headers, data=payload)\n        bearer_token = response.json()[\"access_token\"]\n        return bearer_token\n    except Exception as e:\n        get_logger().error(f\"Failed to get bearer token: {e}\")\n        raise e\n\n@router.get(\"/\")\nasync def handle_manifest(request: Request, response: Response):\n    cur_dir = os.path.dirname(os.path.abspath(__file__))\n    manifest = open(os.path.join(cur_dir, \"atlassian-connect.json\"), \"rt\").read()\n    try:\n        manifest = manifest.replace(\"app_key\", get_settings().bitbucket.app_key)\n        manifest = manifest.replace(\"base_url\", get_settings().bitbucket.base_url)\n    except:\n        get_logger().error(\"Failed to replace api_key in Bitbucket manifest, trying to continue\")\n    manifest_obj = json.loads(manifest)\n    return JSONResponse(manifest_obj)\n\n\ndef _get_username(data):\n    actor = data.get(\"data\", {}).get(\"actor\", {})\n    if actor:\n        if \"username\" in actor:\n            return actor[\"username\"]\n        elif \"display_name\" in actor:\n            return actor[\"display_name\"]\n        elif \"nickname\" in actor:\n            return actor[\"nickname\"]\n    return \"\"\n\n\nasync def _validate_time_from_last_commit_to_pr_update(data: dict) -> bool:\n    is_valid_push = False\n    try:\n        data_inner = data.get('data', {})\n        if not data_inner:\n            get_logger().error(\"No data found in the webhook payload\")\n            return True\n        pull_request = data_inner.get('pullrequest', {})\n        commits_api = pull_request.get('links', {}).get('commits', {}).get('href')\n        if not commits_api:\n            return False\n        if not pull_request.get('updated_on'):\n            return False\n        bearer_token = context.get('bitbucket_bearer_token')\n        headers = {\n            'Authorization': f'Bearer {bearer_token}',\n            'Accept': 'application/json'\n        }\n        response = requests.get(commits_api, headers=headers)\n        if response.status_code != 200:\n            get_logger().warning(f\"Bitbucket commits API returned {response.status_code} for {commits_api}\")\n            return False\n\n        username =_get_username(data)\n        commits_data = response.json() or {}\n        values = commits_data.get('values') or []\n        if (not values or not isinstance(values, list) or not values[0].get('author') or not values[0]['author'].get('user')\n                or not values[0]['author']['user'].get('display_name')):\n            get_logger().warning(\"No commits returned for pull request or one of the required fields missing; skipping push validation\",\n                                 artifact={'values': values})\n            return False\n        commit_username = commits_data['values'][0]['author']['user']['display_name']\n        if username != commit_username:\n            get_logger().warning(f\"Mismatch in username {username} vs. commit_username {commit_username}\")\n            return False\n\n        time_pr_updated = pull_request['updated_on']\n        time_last_commit = commits_data['values'][0]['date']\n        from datetime import datetime\n        ts1 = datetime.fromisoformat(time_pr_updated)\n        ts2 = datetime.fromisoformat(time_last_commit)\n        diff = (ts1 - ts2).total_seconds()\n        max_delta_seconds = 15\n        if diff > 0 and diff < max_delta_seconds:\n            is_valid_push = True\n        else:\n            get_logger().debug(f\"Too much time passed since last commit\",\n                               artifact={'updated': time_pr_updated, 'last_commit': time_last_commit})\n    except Exception as e:\n        get_logger().exception(f\"Failed to validate time difference between last commit and PR update\",\n                               artifact={'error': e, 'data': data})\n    return is_valid_push\n\nasync def _perform_commands_bitbucket(commands_conf: str, agent: PRAgent, api_url: str, log_context: dict, data: dict):\n    apply_repo_settings(api_url)\n    if commands_conf == \"pr_commands\" and get_settings().config.disable_auto_feedback:  # auto commands for PR, and auto feedback is disabled\n        get_logger().info(f\"Auto feedback is disabled, skipping auto commands for PR {api_url=}\")\n        return\n    if commands_conf == \"push_commands\":\n        if not get_settings().get(\"bitbucket_app.handle_push_trigger\"):\n            get_logger().info(\n                \"Bitbucket push trigger handling disabled via config; skipping push commands\")\n            return\n    if data.get(\"event\", \"\") == \"pullrequest:created\":\n        if not should_process_pr_logic(data):\n            return\n    commands = get_settings().get(f\"bitbucket_app.{commands_conf}\", {})\n    get_settings().set(\"config.is_auto_command\", True)\n    if commands_conf == \"push_commands\":\n        is_valid_push = await _validate_time_from_last_commit_to_pr_update(data)\n        if not is_valid_push:\n            get_logger().info(f\"Bitbucket skipping 'pullrequest:updated' for push commands\")\n            return\n    for command in commands:\n        try:\n            split_command = command.split(\" \")\n            command = split_command[0]\n            args = split_command[1:]\n            other_args = update_settings_from_args(args)\n            new_command = ' '.join([command] + other_args)\n            get_logger().info(f\"Performing command: {new_command}\")\n            with get_logger().contextualize(**log_context):\n                await agent.handle_request(api_url, new_command)\n        except Exception as e:\n            get_logger().error(f\"Failed to perform command {command}: {e}\")\n\n\ndef is_bot_user(data) -> bool:\n    try:\n        actor = data.get(\"data\", {}).get(\"actor\", {})\n        # allow actor type: user . if it's \"AppUser\" or \"team\" then it is a bot user\n        allowed_actor_types = {\"user\"}\n        if actor and actor[\"type\"].lower() not in allowed_actor_types:\n            get_logger().info(f\"BitBucket actor type is not 'user', skipping: {actor}\")\n            return True\n    except Exception as e:\n        get_logger().error(f\"Failed 'is_bot_user' logic: {e}\")\n    return False\n\n\ndef should_process_pr_logic(data) -> bool:\n    try:\n        pr_data = data.get(\"data\", {}).get(\"pullrequest\", {})\n        title = pr_data.get(\"title\", \"\")\n        source_branch = pr_data.get(\"source\", {}).get(\"branch\", {}).get(\"name\", \"\")\n        target_branch = pr_data.get(\"destination\", {}).get(\"branch\", {}).get(\"name\", \"\")\n        sender = _get_username(data)\n        repo_full_name = pr_data.get(\"destination\", {}).get(\"repository\", {}).get(\"full_name\", \"\")\n\n        # logic to ignore PRs from specific repositories\n        ignore_repos = get_settings().get(\"CONFIG.IGNORE_REPOSITORIES\", [])\n        if repo_full_name and ignore_repos:\n            if any(re.search(regex, repo_full_name) for regex in ignore_repos):\n                get_logger().info(f\"Ignoring PR from repository '{repo_full_name}' due to 'config.ignore_repositories' setting\")\n                return False\n\n        # logic to ignore PRs from specific users\n        ignore_pr_users = get_settings().get(\"CONFIG.IGNORE_PR_AUTHORS\", [])\n        if ignore_pr_users and sender:\n            if any(re.search(regex, sender) for regex in ignore_pr_users):\n                get_logger().info(f\"Ignoring PR from user '{sender}' due to 'config.ignore_pr_authors' setting\")\n                return False\n\n        # logic to ignore PRs with specific titles\n        if title:\n            ignore_pr_title_re = get_settings().get(\"CONFIG.IGNORE_PR_TITLE\", [])\n            if not isinstance(ignore_pr_title_re, list):\n                ignore_pr_title_re = [ignore_pr_title_re]\n            if ignore_pr_title_re and any(re.search(regex, title) for regex in ignore_pr_title_re):\n                get_logger().info(f\"Ignoring PR with title '{title}' due to config.ignore_pr_title setting\")\n                return False\n\n        ignore_pr_source_branches = get_settings().get(\"CONFIG.IGNORE_PR_SOURCE_BRANCHES\", [])\n        ignore_pr_target_branches = get_settings().get(\"CONFIG.IGNORE_PR_TARGET_BRANCHES\", [])\n        if (ignore_pr_source_branches or ignore_pr_target_branches):\n            if any(re.search(regex, source_branch) for regex in ignore_pr_source_branches):\n                get_logger().info(\n                    f\"Ignoring PR with source branch '{source_branch}' due to config.ignore_pr_source_branches settings\")\n                return False\n            if any(re.search(regex, target_branch) for regex in ignore_pr_target_branches):\n                get_logger().info(\n                    f\"Ignoring PR with target branch '{target_branch}' due to config.ignore_pr_target_branches settings\")\n                return False\n    except Exception as e:\n        get_logger().error(f\"Failed 'should_process_pr_logic': {e}\")\n    return True\n\n\n@router.post(\"/webhook\")\nasync def handle_github_webhooks(background_tasks: BackgroundTasks, request: Request):\n    app_name = get_settings().get(\"CONFIG.APP_NAME\", \"Unknown\")\n    log_context = {\"server_type\": \"bitbucket_app\", \"app_name\": app_name}\n    get_logger().debug(request.headers)\n    jwt_header = request.headers.get(\"authorization\", None)\n    if jwt_header:\n        input_jwt = jwt_header.split(\" \")[1]\n    data = await request.json()\n    get_logger().debug(data)\n\n    async def inner():\n        try:\n            # ignore bot users\n            if is_bot_user(data):\n                return \"OK\"\n\n            # Check if the PR should be processed\n            if data.get(\"event\", \"\") == \"pullrequest:created\":\n                if not should_process_pr_logic(data):\n                    return \"OK\"\n\n            # Get the username of the sender\n            log_context[\"sender\"] = _get_username(data)\n\n            sender_id = data.get(\"data\", {}).get(\"actor\", {}).get(\"account_id\", \"\")\n            log_context[\"sender_id\"] = sender_id\n            jwt_parts = input_jwt.split(\".\")\n            claim_part = jwt_parts[1]\n            claim_part += \"=\" * (-len(claim_part) % 4)\n            decoded_claims = base64.urlsafe_b64decode(claim_part)\n            claims = json.loads(decoded_claims)\n            client_key = claims[\"iss\"]\n            secrets = json.loads(secret_provider.get_secret(client_key))\n            shared_secret = secrets[\"shared_secret\"]\n            jwt.decode(input_jwt, shared_secret, audience=client_key, algorithms=[\"HS256\"])\n            bearer_token = await get_bearer_token(shared_secret, client_key)\n            context['bitbucket_bearer_token'] = bearer_token\n            context[\"settings\"] = copy.deepcopy(global_settings)\n            event = data[\"event\"]\n            agent = PRAgent()\n            if event == \"pullrequest:created\":\n                pr_url = data[\"data\"][\"pullrequest\"][\"links\"][\"html\"][\"href\"]\n                log_context[\"api_url\"] = pr_url\n                log_context[\"event\"] = \"pull_request\"\n                if pr_url:\n                    with get_logger().contextualize(**log_context):\n                        if get_identity_provider().verify_eligibility(\"bitbucket\",\n                                                        sender_id, pr_url) is not Eligibility.NOT_ELIGIBLE:\n                            if get_settings().get(\"bitbucket_app.pr_commands\"):\n                                await _perform_commands_bitbucket(\"pr_commands\", agent, pr_url, log_context, data)\n            elif event == \"pullrequest:updated\": # PR updated, might be from a push (we will validate this later)\n                pr_url = data[\"data\"][\"pullrequest\"][\"links\"][\"html\"][\"href\"]\n                log_context[\"api_url\"] = pr_url\n                log_context[\"event\"] = \"pull_request\"\n                if pr_url:\n                    with get_logger().contextualize(**log_context):\n                        if get_identity_provider().verify_eligibility(\"bitbucket\",\n                                                        sender_id, pr_url) is not Eligibility.NOT_ELIGIBLE:\n\n                            if get_settings().get(\"bitbucket_app.push_commands\"):\n                                await _perform_commands_bitbucket(\"push_commands\", agent, pr_url, log_context, data)\n            elif event == \"pullrequest:comment_created\":\n                pr_url = data[\"data\"][\"pullrequest\"][\"links\"][\"html\"][\"href\"]\n                log_context[\"api_url\"] = pr_url\n                log_context[\"event\"] = \"comment\"\n                comment_body = data[\"data\"][\"comment\"][\"content\"][\"raw\"]\n                with get_logger().contextualize(**log_context):\n                    if get_identity_provider().verify_eligibility(\"bitbucket\",\n                                                                     sender_id, pr_url) is not Eligibility.NOT_ELIGIBLE:\n                        await agent.handle_request(pr_url, comment_body)\n        except Exception as e:\n            get_logger().error(f\"Failed to handle webhook: {e}\")\n    background_tasks.add_task(inner)\n    return \"OK\"\n\n@router.get(\"/webhook\")\nasync def handle_github_webhooks(request: Request, response: Response):\n    return \"Webhook server online!\"\n\n@router.post(\"/installed\")\nasync def handle_installed_webhooks(request: Request, response: Response):\n    try:\n        get_logger().info(\"handle_installed_webhooks\")\n        get_logger().info(request.headers)\n        data = await request.json()\n        get_logger().info(data)\n        shared_secret = data[\"sharedSecret\"]\n        client_key = data[\"clientKey\"]\n        username = data[\"principal\"][\"username\"]\n        secrets = {\n            \"shared_secret\": shared_secret,\n            \"client_key\": client_key\n        }\n        secret_provider.store_secret(username, json.dumps(secrets))\n    except Exception as e:\n        get_logger().error(f\"Failed to register user: {e}\")\n        return JSONResponse({\"error\": \"Unable to register user\"}, status_code=500)\n\n@router.post(\"/uninstalled\")\nasync def handle_uninstalled_webhooks(request: Request, response: Response):\n    get_logger().info(\"handle_uninstalled_webhooks\")\n\n    data = await request.json()\n    get_logger().info(data)\n\n\ndef start():\n    get_settings().set(\"CONFIG.PUBLISH_OUTPUT_PROGRESS\", False)\n    get_settings().set(\"CONFIG.GIT_PROVIDER\", \"bitbucket\")\n    get_settings().set(\"PR_DESCRIPTION.PUBLISH_DESCRIPTION_AS_COMMENT\", True)\n    middleware = [Middleware(RawContextMiddleware)]\n    app = FastAPI(middleware=middleware)\n    app.include_router(router)\n\n    uvicorn.run(app, host=\"0.0.0.0\", port=int(os.getenv(\"PORT\", \"3000\")))\n\n\nif __name__ == '__main__':\n    start()\n"
  },
  {
    "path": "pr_agent/servers/bitbucket_server_webhook.py",
    "content": "import ast\nimport json\nimport os\nimport re\nfrom typing import List\n\nimport uvicorn\nfrom fastapi import APIRouter, FastAPI\nfrom fastapi.encoders import jsonable_encoder\nfrom fastapi.responses import RedirectResponse\nfrom starlette import status\nfrom starlette.background import BackgroundTasks\nfrom starlette.middleware import Middleware\nfrom starlette.requests import Request\nfrom starlette.responses import JSONResponse\nfrom starlette_context.middleware import RawContextMiddleware\n\nfrom pr_agent.agent.pr_agent import PRAgent\nfrom pr_agent.algo.utils import update_settings_from_args\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers.utils import apply_repo_settings\nfrom pr_agent.log import LoggingFormat, get_logger, setup_logger\nfrom pr_agent.servers.utils import verify_signature\n\nsetup_logger(fmt=LoggingFormat.JSON, level=get_settings().get(\"CONFIG.LOG_LEVEL\", \"DEBUG\"))\nrouter = APIRouter()\n\n\ndef handle_request(\n    background_tasks: BackgroundTasks, url: str, body: str, log_context: dict\n):\n    log_context[\"action\"] = body\n    log_context[\"api_url\"] = url\n\n    async def inner():\n        try:\n            with get_logger().contextualize(**log_context):\n                await PRAgent().handle_request(url, body)\n        except Exception as e:\n            get_logger().error(f\"Failed to handle webhook: {e}\")\n\n    background_tasks.add_task(inner)\n\ndef should_process_pr_logic(data) -> bool:\n    try:\n        pr_data = data.get(\"pullRequest\", {})\n        title = pr_data.get(\"title\", \"\")\n        \n        from_ref = pr_data.get(\"fromRef\", {})\n        source_branch = from_ref.get(\"displayId\", \"\") if from_ref else \"\"\n        \n        to_ref = pr_data.get(\"toRef\", {})\n        target_branch = to_ref.get(\"displayId\", \"\") if to_ref else \"\"\n        \n        author = pr_data.get(\"author\", {})\n        user = author.get(\"user\", {}) if author else {}\n        sender = user.get(\"name\", \"\") if user else \"\"\n        \n        repository = to_ref.get(\"repository\", {}) if to_ref else {}\n        project = repository.get(\"project\", {}) if repository else {}\n        project_key = project.get(\"key\", \"\") if project else \"\"\n        repo_slug = repository.get(\"slug\", \"\") if repository else \"\"\n        \n        repo_full_name = f\"{project_key}/{repo_slug}\" if project_key and repo_slug else \"\"\n        pr_id = pr_data.get(\"id\", None)\n\n        # To ignore PRs from specific repositories\n        ignore_repos = get_settings().get(\"CONFIG.IGNORE_REPOSITORIES\", [])\n        if repo_full_name and ignore_repos:\n            if any(re.search(regex, repo_full_name) for regex in ignore_repos):\n                get_logger().info(f\"Ignoring PR from repository '{repo_full_name}' due to 'config.ignore_repositories' setting\")\n                return False\n\n        # To ignore PRs from specific users\n        ignore_pr_users = get_settings().get(\"CONFIG.IGNORE_PR_AUTHORS\", [])\n        if ignore_pr_users and sender:\n            if any(re.search(regex, sender) for regex in ignore_pr_users):\n                get_logger().info(f\"Ignoring PR from user '{sender}' due to 'config.ignore_pr_authors' setting\")\n                return False\n\n        # To ignore PRs with specific titles\n        if title:\n            ignore_pr_title_re = get_settings().get(\"CONFIG.IGNORE_PR_TITLE\", [])\n            if not isinstance(ignore_pr_title_re, list):\n                ignore_pr_title_re = [ignore_pr_title_re]\n            if ignore_pr_title_re and any(re.search(regex, title) for regex in ignore_pr_title_re):\n                get_logger().info(f\"Ignoring PR with title '{title}' due to config.ignore_pr_title setting\")\n                return False\n\n        ignore_pr_source_branches = get_settings().get(\"CONFIG.IGNORE_PR_SOURCE_BRANCHES\", [])\n        ignore_pr_target_branches = get_settings().get(\"CONFIG.IGNORE_PR_TARGET_BRANCHES\", [])\n        if (ignore_pr_source_branches or ignore_pr_target_branches):\n            if any(re.search(regex, source_branch) for regex in ignore_pr_source_branches):\n                get_logger().info(\n                    f\"Ignoring PR with source branch '{source_branch}' due to config.ignore_pr_source_branches settings\")\n                return False\n            if any(re.search(regex, target_branch) for regex in ignore_pr_target_branches):\n                get_logger().info(\n                    f\"Ignoring PR with target branch '{target_branch}' due to config.ignore_pr_target_branches settings\")\n                return False\n\n        # Allow_only_specific_folders\n        allowed_folders = get_settings().config.get(\"allow_only_specific_folders\", [])\n        if allowed_folders and pr_id and project_key and repo_slug:\n            from pr_agent.git_providers.bitbucket_server_provider import BitbucketServerProvider\n            bitbucket_server_url = get_settings().get(\"BITBUCKET_SERVER.URL\", \"\")\n            pr_url = f\"{bitbucket_server_url}/projects/{project_key}/repos/{repo_slug}/pull-requests/{pr_id}\"\n            provider = BitbucketServerProvider(pr_url=pr_url)\n            changed_files = provider.get_files()\n            if changed_files:\n                # Check if ALL files are outside allowed folders\n                all_files_outside = True\n                for file_path in changed_files:\n                    if any(file_path.startswith(folder) for folder in allowed_folders):\n                        all_files_outside = False\n                        break\n                \n                if all_files_outside:\n                    get_logger().info(f\"Ignoring PR because all files {changed_files} are outside allowed folders {allowed_folders}\")\n                    return False\n    except Exception as e:\n        get_logger().error(f\"Failed 'should_process_pr_logic': {e}\")\n        return True # On exception - we continue. Otherwise, we could just end up with filtering all PRs\n    return True\n\n@router.post(\"/\")\nasync def redirect_to_webhook():\n    return RedirectResponse(url=\"/webhook\")\n\n@router.post(\"/webhook\")\nasync def handle_webhook(background_tasks: BackgroundTasks, request: Request):\n    log_context = {\"server_type\": \"bitbucket_server\"}\n    data = await request.json()\n    get_logger().info(json.dumps(data))\n\n    webhook_secret = get_settings().get(\"BITBUCKET_SERVER.WEBHOOK_SECRET\", None)\n    if webhook_secret:\n        body_bytes = await request.body()\n        if body_bytes.decode('utf-8') == '{\"test\": true}':\n            return JSONResponse(\n                status_code=status.HTTP_200_OK, content=jsonable_encoder({\"message\": \"connection test successful\"})\n            )\n        signature_header = request.headers.get(\"x-hub-signature\", None)\n        verify_signature(body_bytes, webhook_secret, signature_header)\n\n    pr_id = data[\"pullRequest\"][\"id\"]\n    repository_name = data[\"pullRequest\"][\"toRef\"][\"repository\"][\"slug\"]\n    project_name = data[\"pullRequest\"][\"toRef\"][\"repository\"][\"project\"][\"key\"]\n    bitbucket_server = get_settings().get(\"BITBUCKET_SERVER.URL\")\n    pr_url = f\"{bitbucket_server}/projects/{project_name}/repos/{repository_name}/pull-requests/{pr_id}\"\n\n    log_context[\"api_url\"] = pr_url\n    log_context[\"event\"] = \"pull_request\"\n\n    commands_to_run = []\n\n    if (data[\"eventKey\"] == \"pr:opened\"\n            or (data[\"eventKey\"] == \"repo:refs_changed\" and data.get(\"pullRequest\", {}).get(\"id\", -1) != -1)):  # push event; -1 for push unassigned to a PR: #Check auto commands for creation/updating\n        apply_repo_settings(pr_url)\n        if not should_process_pr_logic(data):\n            get_logger().info(f\"PR ignored due to config settings\", **log_context)\n            return JSONResponse(\n                status_code=status.HTTP_200_OK, content=jsonable_encoder({\"message\": \"PR ignored by config\"})\n            )\n        if get_settings().config.disable_auto_feedback:  # auto commands for PR, and auto feedback is disabled\n            get_logger().info(f\"Auto feedback is disabled, skipping auto commands for PR {pr_url}\", **log_context)\n            return JSONResponse(\n                status_code=status.HTTP_200_OK, content=jsonable_encoder({\"message\": \"PR ignored due to auto feedback not enabled\"})\n            )\n        get_settings().set(\"config.is_auto_command\", True)\n        if data[\"eventKey\"] == \"pr:opened\":\n            commands_to_run.extend(_get_commands_list_from_settings('BITBUCKET_SERVER.PR_COMMANDS'))\n        else: #Has to be: data[\"eventKey\"] == \"pr:from_ref_updated\"\n            if not get_settings().get(\"BITBUCKET_SERVER.HANDLE_PUSH_TRIGGER\"):\n                get_logger().info(f\"Push trigger is disabled, skipping push commands for PR {pr_url}\", **log_context)\n                return JSONResponse(\n                    status_code=status.HTTP_200_OK, content=jsonable_encoder({\"message\": \"PR ignored due to push trigger not enabled\"})\n                )\n\n            get_settings().set(\"config.is_new_pr\", False)\n            commands_to_run.extend(_get_commands_list_from_settings('BITBUCKET_SERVER.PUSH_COMMANDS'))\n    elif data[\"eventKey\"] == \"pr:comment:added\":\n        commands_to_run.append(data[\"comment\"][\"text\"])\n    else:\n        return JSONResponse(\n            status_code=status.HTTP_400_BAD_REQUEST,\n            content=json.dumps({\"message\": \"Unsupported event\"}),\n        )\n\n    async def inner():\n        try:\n            await _run_commands_sequentially(commands_to_run, pr_url, log_context)\n        except Exception as e:\n            get_logger().error(f\"Failed to handle webhook: {e}\")\n\n    background_tasks.add_task(inner)\n\n    return JSONResponse(\n        status_code=status.HTTP_200_OK, content=jsonable_encoder({\"message\": \"success\"})\n    )\n\n\nasync def _run_commands_sequentially(commands: List[str], url: str, log_context: dict):\n    get_logger().info(f\"Running commands sequentially: {commands}\")\n    if commands is None:\n        return\n\n    for command in commands:\n        try:\n            body = _process_command(command, url)\n\n            log_context[\"action\"] = body\n            log_context[\"api_url\"] = url\n\n            with get_logger().contextualize(**log_context):\n                await PRAgent().handle_request(url, body)\n        except Exception as e:\n            get_logger().error(f\"Failed to handle command: {command} , error: {e}\")\n\ndef _process_command(command: str, url) -> str:\n    # don't think we need this\n    apply_repo_settings(url)\n    # Process the command string\n    split_command = command.split(\" \")\n    command = split_command[0]\n    args = split_command[1:]\n    # do I need this? if yes, shouldn't this be done in PRAgent?\n    other_args = update_settings_from_args(args)\n    new_command = ' '.join([command] + other_args)\n    return new_command\n\n\ndef _to_list(command_string: str) -> list:\n    try:\n        # Use ast.literal_eval to safely parse the string into a list\n        commands = ast.literal_eval(command_string)\n        # Check if the parsed object is a list of strings\n        if isinstance(commands, list) and all(isinstance(cmd, str) for cmd in commands):\n            return commands\n        else:\n            raise ValueError(\"Parsed data is not a list of strings.\")\n    except (SyntaxError, ValueError, TypeError) as e:\n        raise ValueError(f\"Invalid command string: {e}\")\n\n\ndef _get_commands_list_from_settings(setting_key:str ) -> list:\n    try:\n        return get_settings().get(setting_key, [])\n    except ValueError as e:\n        get_logger().error(f\"Failed to get commands list from settings {setting_key}: {e}\")\n\n\n@router.get(\"/\")\nasync def root():\n    return {\"status\": \"ok\"}\n\n\ndef start():\n    app = FastAPI(middleware=[Middleware(RawContextMiddleware)])\n    app.include_router(router)\n    uvicorn.run(app, host=\"0.0.0.0\", port=int(os.environ.get(\"PORT\", \"3000\")))\n\n\nif __name__ == \"__main__\":\n    start()\n"
  },
  {
    "path": "pr_agent/servers/gerrit_server.py",
    "content": "import copy\nfrom enum import Enum\nfrom json import JSONDecodeError\n\nimport uvicorn\nfrom fastapi import APIRouter, FastAPI, HTTPException\nfrom pydantic import BaseModel\nfrom starlette.middleware import Middleware\nfrom starlette_context import context\nfrom starlette_context.middleware import RawContextMiddleware\n\nfrom pr_agent.agent.pr_agent import PRAgent\nfrom pr_agent.config_loader import get_settings, global_settings\nfrom pr_agent.log import get_logger, setup_logger\n\nsetup_logger()\nrouter = APIRouter()\n\n\nclass Action(str, Enum):\n    review = \"review\"\n    describe = \"describe\"\n    ask = \"ask\"\n    improve = \"improve\"\n    reflect = \"reflect\"\n    answer = \"answer\"\n\n\nclass Item(BaseModel):\n    refspec: str\n    project: str\n    msg: str\n\n\n@router.post(\"/api/v1/gerrit/{action}\")\nasync def handle_gerrit_request(action: Action, item: Item):\n    get_logger().debug(\"Received a Gerrit request\")\n    context[\"settings\"] = copy.deepcopy(global_settings)\n\n    if action == Action.ask:\n        if not item.msg:\n            return HTTPException(\n                status_code=400,\n                detail=\"msg is required for ask command\"\n            )\n    await PRAgent().handle_request(\n        f\"{item.project}:{item.refspec}\",\n        f\"/{item.msg.strip()}\"\n    )\n\n\nasync def get_body(request):\n    try:\n        body = await request.json()\n    except JSONDecodeError as e:\n        get_logger().error(\"Error parsing request body\", e)\n        return {}\n    return body\n\n\n@router.get(\"/\")\nasync def root():\n    return {\"status\": \"ok\"}\n\n\ndef start():\n    # to prevent adding help messages with the output\n    get_settings().set(\"CONFIG.CLI_MODE\", True)\n    middleware = [Middleware(RawContextMiddleware)]\n    app = FastAPI(middleware=middleware)\n    app.include_router(router)\n\n    uvicorn.run(app, host=\"0.0.0.0\", port=3000)\n\n\nif __name__ == '__main__':\n    start()\n"
  },
  {
    "path": "pr_agent/servers/gitea_app.py",
    "content": "import copy\nimport os\nimport re\nfrom typing import Any, Dict\n\nfrom fastapi import APIRouter, FastAPI, HTTPException, Request, Response\nfrom starlette.background import BackgroundTasks\nfrom starlette.middleware import Middleware\nfrom starlette_context import context\nfrom starlette_context.middleware import RawContextMiddleware\n\nfrom pr_agent.agent.pr_agent import PRAgent\nfrom pr_agent.algo.utils import update_settings_from_args\nfrom pr_agent.config_loader import get_settings, global_settings\nfrom pr_agent.git_providers.utils import apply_repo_settings\nfrom pr_agent.log import LoggingFormat, get_logger, setup_logger\nfrom pr_agent.servers.utils import verify_signature\n\n# Setup logging and router\nsetup_logger(fmt=LoggingFormat.JSON, level=get_settings().get(\"CONFIG.LOG_LEVEL\", \"DEBUG\"))\nrouter = APIRouter()\n\n@router.post(\"/api/v1/gitea_webhooks\")\nasync def handle_gitea_webhooks(background_tasks: BackgroundTasks, request: Request, response: Response):\n    \"\"\"Handle incoming Gitea webhook requests\"\"\"\n    get_logger().debug(\"Received a Gitea webhook\")\n\n    body = await get_body(request)\n\n    # Set context for the request\n    context[\"settings\"] = copy.deepcopy(global_settings)\n    context[\"git_provider\"] = {}\n\n    # Handle the webhook in background\n    background_tasks.add_task(handle_request, body, event=request.headers.get(\"X-Gitea-Event\", None))\n    return {}\n\nasync def get_body(request: Request):\n    \"\"\"Parse and verify webhook request body\"\"\"\n    try:\n        body = await request.json()\n    except Exception as e:\n        get_logger().error(\"Error parsing request body\", artifact={'error': e})\n        raise HTTPException(status_code=400, detail=\"Error parsing request body\") from e\n\n\n    # Verify webhook signature\n    webhook_secret = getattr(get_settings().gitea, 'webhook_secret', None)\n    if webhook_secret:\n        body_bytes = await request.body()\n        signature_header = request.headers.get('x-gitea-signature', None)\n        if not signature_header:\n            get_logger().error(\"Missing signature header\")\n            raise HTTPException(status_code=400, detail=\"Missing signature header\")\n\n        try:\n            verify_signature(body_bytes, webhook_secret, f\"sha256={signature_header}\")\n        except Exception as ex:\n            get_logger().error(f\"Invalid signature: {ex}\")\n            raise HTTPException(status_code=401, detail=\"Invalid signature\")\n\n    return body\n\nasync def handle_request(body: Dict[str, Any], event: str):\n    \"\"\"Process Gitea webhook events\"\"\"\n    action = body.get(\"action\")\n    if not action:\n        get_logger().debug(\"No action found in request body\")\n        return {}\n\n    agent = PRAgent()\n\n    # Handle different event types\n    if event == \"pull_request\":\n        if not should_process_pr_logic(body):\n            get_logger().debug(f\"Request ignored: PR logic filtering\")\n            return {}\n        if action in [\"opened\", \"reopened\", \"synchronized\"]:\n            await handle_pr_event(body, event, action, agent)\n    elif event == \"issue_comment\":\n        if action == \"created\":\n            await handle_comment_event(body, event, action, agent)\n\n    return {}\n\nasync def handle_pr_event(body: Dict[str, Any], event: str, action: str, agent: PRAgent):\n    \"\"\"Handle pull request events\"\"\"\n    pr = body.get(\"pull_request\", {})\n    if not pr:\n        return\n\n    api_url = pr.get(\"url\")\n    if not api_url:\n        return\n\n    # Handle PR based on action\n    if action in [\"opened\", \"reopened\"]:\n        # commands = get_settings().get(\"gitea.pr_commands\", [])\n        await _perform_commands_gitea(\"pr_commands\", agent, body, api_url)\n        # for command in commands:\n        #     await agent.handle_request(api_url, command)\n    elif action == \"synchronized\":\n        # Handle push to PR\n        commands_on_push = get_settings().get(f\"gitea.push_commands\", {})\n        handle_push_trigger = get_settings().get(f\"gitea.handle_push_trigger\", False)\n        if not commands_on_push or not handle_push_trigger:\n            get_logger().info(\"Push event, but no push commands found or push trigger is disabled\")\n            return\n        get_logger().debug(f'A push event has been received: {api_url}')\n        await _perform_commands_gitea(\"push_commands\", agent, body, api_url)\n        # for command in commands_on_push:\n        #     await agent.handle_request(api_url, command)\n\nasync def handle_comment_event(body: Dict[str, Any], event: str, action: str, agent: PRAgent):\n    \"\"\"Handle comment events\"\"\"\n    comment = body.get(\"comment\", {})\n    if not comment:\n        return\n\n    comment_body = comment.get(\"body\", \"\")\n    if not comment_body or not comment_body.startswith(\"/\"):\n        return\n\n    pr_url = body.get(\"pull_request\", {}).get(\"url\")\n    if not pr_url:\n        return\n\n    await agent.handle_request(pr_url, comment_body)\n\nasync def _perform_commands_gitea(commands_conf: str, agent: PRAgent, body: dict, api_url: str):\n    apply_repo_settings(api_url)\n    if commands_conf == \"pr_commands\" and get_settings().config.disable_auto_feedback:  # auto commands for PR, and auto feedback is disabled\n        get_logger().info(f\"Auto feedback is disabled, skipping auto commands for PR {api_url=}\")\n        return\n    if not should_process_pr_logic(body): # Here we already updated the configuration with the repo settings\n        return {}\n    commands = get_settings().get(f\"gitea.{commands_conf}\")\n    if not commands:\n        get_logger().info(f\"New PR, but no auto commands configured\")\n        return\n    get_settings().set(\"config.is_auto_command\", True)\n    for command in commands:\n        split_command = command.split(\" \")\n        command = split_command[0]\n        args = split_command[1:]\n        other_args = update_settings_from_args(args)\n        new_command = ' '.join([command] + other_args)\n        get_logger().info(f\"{commands_conf}. Performing auto command '{new_command}', for {api_url=}\")\n        await agent.handle_request(api_url, new_command)\n\ndef should_process_pr_logic(body) -> bool:\n    try:\n        pull_request = body.get(\"pull_request\", {})\n        title = pull_request.get(\"title\", \"\")\n        pr_labels = pull_request.get(\"labels\", [])\n        source_branch = pull_request.get(\"head\", {}).get(\"ref\", \"\")\n        target_branch = pull_request.get(\"base\", {}).get(\"ref\", \"\")\n        sender = body.get(\"sender\", {}).get(\"login\")\n        repo_full_name = body.get(\"repository\", {}).get(\"full_name\", \"\")\n\n        # logic to ignore PRs from specific repositories\n        ignore_repos = get_settings().get(\"CONFIG.IGNORE_REPOSITORIES\", [])\n        if ignore_repos and repo_full_name:\n            if any(re.search(regex, repo_full_name) for regex in ignore_repos):\n                get_logger().info(f\"Ignoring PR from repository '{repo_full_name}' due to 'config.ignore_repositories' setting\")\n                return False\n\n        # logic to ignore PRs from specific users\n        ignore_pr_users = get_settings().get(\"CONFIG.IGNORE_PR_AUTHORS\", [])\n        if ignore_pr_users and sender:\n            if any(re.search(regex, sender) for regex in ignore_pr_users):\n                get_logger().info(f\"Ignoring PR from user '{sender}' due to 'config.ignore_pr_authors' setting\")\n                return False\n\n        # logic to ignore PRs with specific titles\n        if title:\n            ignore_pr_title_re = get_settings().get(\"CONFIG.IGNORE_PR_TITLE\", [])\n            if not isinstance(ignore_pr_title_re, list):\n                ignore_pr_title_re = [ignore_pr_title_re]\n            if ignore_pr_title_re and any(re.search(regex, title) for regex in ignore_pr_title_re):\n                get_logger().info(f\"Ignoring PR with title '{title}' due to config.ignore_pr_title setting\")\n                return False\n\n        # logic to ignore PRs with specific labels or source branches or target branches.\n        ignore_pr_labels = get_settings().get(\"CONFIG.IGNORE_PR_LABELS\", [])\n        if pr_labels and ignore_pr_labels:\n            labels = [label['name'] for label in pr_labels]\n            if any(label in ignore_pr_labels for label in labels):\n                labels_str = \", \".join(labels)\n                get_logger().info(f\"Ignoring PR with labels '{labels_str}' due to config.ignore_pr_labels settings\")\n                return False\n\n        # logic to ignore PRs with specific source or target branches\n        ignore_pr_source_branches = get_settings().get(\"CONFIG.IGNORE_PR_SOURCE_BRANCHES\", [])\n        ignore_pr_target_branches = get_settings().get(\"CONFIG.IGNORE_PR_TARGET_BRANCHES\", [])\n        if pull_request and (ignore_pr_source_branches or ignore_pr_target_branches):\n            if any(re.search(regex, source_branch) for regex in ignore_pr_source_branches):\n                get_logger().info(\n                    f\"Ignoring PR with source branch '{source_branch}' due to config.ignore_pr_source_branches settings\")\n                return False\n            if any(re.search(regex, target_branch) for regex in ignore_pr_target_branches):\n                get_logger().info(\n                    f\"Ignoring PR with target branch '{target_branch}' due to config.ignore_pr_target_branches settings\")\n                return False\n    except Exception as e:\n        get_logger().error(f\"Failed 'should_process_pr_logic': {e}\")\n    return True\n\n# FastAPI app setup\nmiddleware = [Middleware(RawContextMiddleware)]\napp = FastAPI(middleware=middleware)\napp.include_router(router)\n\ndef start():\n    \"\"\"Start the Gitea webhook server\"\"\"\n    port = int(os.environ.get(\"PORT\", \"3000\"))\n    import uvicorn\n    uvicorn.run(app, host=\"0.0.0.0\", port=port)\n\nif __name__ == \"__main__\":\n    start()\n"
  },
  {
    "path": "pr_agent/servers/github_action_runner.py",
    "content": "import asyncio\nimport json\nimport os\nfrom typing import Union\n\nfrom pr_agent.agent.pr_agent import PRAgent\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers import get_git_provider\nfrom pr_agent.git_providers.utils import apply_repo_settings\nfrom pr_agent.log import get_logger\nfrom pr_agent.servers.github_app import handle_line_comments\nfrom pr_agent.tools.pr_code_suggestions import PRCodeSuggestions\nfrom pr_agent.tools.pr_description import PRDescription\nfrom pr_agent.tools.pr_reviewer import PRReviewer\n\n\ndef is_true(value: Union[str, bool]) -> bool:\n    if isinstance(value, bool):\n        return value\n    if isinstance(value, str):\n        return value.lower() == 'true'\n    return False\n\n\ndef get_setting_or_env(key: str, default: Union[str, bool] = None) -> Union[str, bool]:\n    try:\n        value = get_settings().get(key, default)\n    except AttributeError:  # TBD still need to debug why this happens on GitHub Actions\n        value = os.getenv(key, None) or os.getenv(key.upper(), None) or os.getenv(key.lower(), None) or default\n    return value\n\n\nasync def run_action():\n    # Get environment variables\n    GITHUB_EVENT_NAME = os.environ.get('GITHUB_EVENT_NAME')\n    GITHUB_EVENT_PATH = os.environ.get('GITHUB_EVENT_PATH')\n    OPENAI_KEY = os.environ.get('OPENAI_KEY') or os.environ.get('OPENAI.KEY')\n    OPENAI_ORG = os.environ.get('OPENAI_ORG') or os.environ.get('OPENAI.ORG')\n    GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN')\n    # get_settings().set(\"CONFIG.PUBLISH_OUTPUT_PROGRESS\", False)\n\n    # Check if required environment variables are set\n    if not GITHUB_EVENT_NAME:\n        print(\"GITHUB_EVENT_NAME not set\")\n        return\n    if not GITHUB_EVENT_PATH:\n        print(\"GITHUB_EVENT_PATH not set\")\n        return\n    if not GITHUB_TOKEN:\n        print(\"GITHUB_TOKEN not set\")\n        return\n\n    # Set the environment variables in the settings\n    if OPENAI_KEY:\n        get_settings().set(\"OPENAI.KEY\", OPENAI_KEY)\n    else:\n        # Might not be set if the user is using models not from OpenAI\n        print(\"OPENAI_KEY not set\")\n    if OPENAI_ORG:\n        get_settings().set(\"OPENAI.ORG\", OPENAI_ORG)\n    get_settings().set(\"GITHUB.USER_TOKEN\", GITHUB_TOKEN)\n    get_settings().set(\"GITHUB.DEPLOYMENT_TYPE\", \"user\")\n    enable_output = get_setting_or_env(\"GITHUB_ACTION_CONFIG.ENABLE_OUTPUT\", True)\n    get_settings().set(\"GITHUB_ACTION_CONFIG.ENABLE_OUTPUT\", enable_output)\n\n    # Load the event payload\n    try:\n        with open(GITHUB_EVENT_PATH, 'r') as f:\n            event_payload = json.load(f)\n    except json.decoder.JSONDecodeError as e:\n        print(f\"Failed to parse JSON: {e}\")\n        return\n\n    try:\n        get_logger().info(\"Applying repo settings\")\n        pr_url = event_payload.get(\"pull_request\", {}).get(\"html_url\")\n        if pr_url:\n            apply_repo_settings(pr_url)\n            get_logger().info(f\"enable_custom_labels: {get_settings().config.enable_custom_labels}\")\n    except Exception as e:\n        get_logger().info(f\"github action: failed to apply repo settings: {e}\")\n\n    # Append the response language in the extra instructions\n    try:\n        response_language = get_settings().config.get('response_language', 'en-us')\n        if response_language.lower() != 'en-us':\n            get_logger().info(f'User has set the response language to: {response_language}')\n\n            lang_instruction_text = f\"Your response MUST be written in the language corresponding to locale code: '{response_language}'. This is crucial.\"\n            separator_text = \"\\n======\\n\\nIn addition, \"\n\n            for key in get_settings():\n                setting = get_settings().get(key)\n                if str(type(setting)) == \"<class 'dynaconf.utils.boxing.DynaBox'>\":\n                    if key.lower() in ['pr_description', 'pr_code_suggestions', 'pr_reviewer']:\n                        if hasattr(setting, 'extra_instructions'):\n                            extra_instructions = setting.extra_instructions\n\n                            if lang_instruction_text not in str(extra_instructions):\n                                updated_instructions = (\n                                    str(extra_instructions) + separator_text + lang_instruction_text\n                                    if extra_instructions else lang_instruction_text\n                                )\n                                setting.extra_instructions = updated_instructions\n    except Exception as e:\n        get_logger().info(f\"github action: failed to apply language-specific instructions: {e}\")\n    # Handle pull request opened event\n    if GITHUB_EVENT_NAME == \"pull_request\" or GITHUB_EVENT_NAME == \"pull_request_target\":\n        action = event_payload.get(\"action\")\n\n        # Retrieve the list of actions from the configuration\n        pr_actions = get_settings().get(\"GITHUB_ACTION_CONFIG.PR_ACTIONS\", [\"opened\", \"reopened\", \"ready_for_review\", \"review_requested\"])\n\n        if action in pr_actions:\n            pr_url = event_payload.get(\"pull_request\", {}).get(\"url\")\n            if pr_url:\n                # legacy - supporting both GITHUB_ACTION and GITHUB_ACTION_CONFIG\n                auto_review = get_setting_or_env(\"GITHUB_ACTION.AUTO_REVIEW\", None)\n                if auto_review is None:\n                    auto_review = get_setting_or_env(\"GITHUB_ACTION_CONFIG.AUTO_REVIEW\", None)\n                auto_describe = get_setting_or_env(\"GITHUB_ACTION.AUTO_DESCRIBE\", None)\n                if auto_describe is None:\n                    auto_describe = get_setting_or_env(\"GITHUB_ACTION_CONFIG.AUTO_DESCRIBE\", None)\n                auto_improve = get_setting_or_env(\"GITHUB_ACTION.AUTO_IMPROVE\", None)\n                if auto_improve is None:\n                    auto_improve = get_setting_or_env(\"GITHUB_ACTION_CONFIG.AUTO_IMPROVE\", None)\n\n                # Set the configuration for auto actions\n                get_settings().config.is_auto_command = True  # Set the flag to indicate that the command is auto\n                get_settings().pr_description.final_update_message = False  # No final update message when auto_describe is enabled\n                get_logger().info(f\"Running auto actions: auto_describe={auto_describe}, auto_review={auto_review}, auto_improve={auto_improve}\")\n\n                # invoke by default all three tools\n                if auto_describe is None or is_true(auto_describe):\n                    await PRDescription(pr_url).run()\n                if auto_review is None or is_true(auto_review):\n                    await PRReviewer(pr_url).run()\n                if auto_improve is None or is_true(auto_improve):\n                    await PRCodeSuggestions(pr_url).run()\n        else:\n            get_logger().info(f\"Skipping action: {action}\")\n\n    # Handle issue comment event\n    elif GITHUB_EVENT_NAME == \"issue_comment\" or GITHUB_EVENT_NAME == \"pull_request_review_comment\":\n        action = event_payload.get(\"action\")\n        if action in [\"created\", \"edited\"]:\n            comment_body = event_payload.get(\"comment\", {}).get(\"body\")\n            try:\n                if GITHUB_EVENT_NAME == \"pull_request_review_comment\":\n                    if '/ask' in comment_body:\n                        comment_body = handle_line_comments(event_payload, comment_body)\n            except Exception as e:\n                get_logger().error(f\"Failed to handle line comments: {e}\")\n                return\n            if comment_body:\n                is_pr = False\n                disable_eyes = False\n                # check if issue is pull request\n                if event_payload.get(\"issue\", {}).get(\"pull_request\"):\n                    url = event_payload.get(\"issue\", {}).get(\"pull_request\", {}).get(\"url\")\n                    is_pr = True\n                elif event_payload.get(\"comment\", {}).get(\"pull_request_url\"):  # for 'pull_request_review_comment\n                    url = event_payload.get(\"comment\", {}).get(\"pull_request_url\")\n                    is_pr = True\n                    disable_eyes = True\n                else:\n                    url = event_payload.get(\"issue\", {}).get(\"url\")\n\n                if url:\n                    body = comment_body.strip().lower()\n                    comment_id = event_payload.get(\"comment\", {}).get(\"id\")\n                    provider = get_git_provider()(pr_url=url)\n                    if is_pr:\n                        await PRAgent().handle_request(\n                            url, body, notify=lambda: provider.add_eyes_reaction(\n                                comment_id, disable_eyes=disable_eyes\n                            )\n                        )\n                    else:\n                        await PRAgent().handle_request(url, body)\n\n\nif __name__ == '__main__':\n    asyncio.run(run_action())\n"
  },
  {
    "path": "pr_agent/servers/github_app.py",
    "content": "import asyncio.locks\nimport copy\nimport os\nimport re\nimport uuid\nfrom typing import Any, Dict, Tuple\n\nimport uvicorn\nfrom fastapi import APIRouter, FastAPI, HTTPException, Request, Response\nfrom starlette.background import BackgroundTasks\nfrom starlette.middleware import Middleware\nfrom starlette_context import context\nfrom starlette_context.middleware import RawContextMiddleware\n\nfrom pr_agent.agent.pr_agent import PRAgent\nfrom pr_agent.algo.utils import update_settings_from_args\nfrom pr_agent.config_loader import get_settings, global_settings\nfrom pr_agent.git_providers import (get_git_provider,\n                                    get_git_provider_with_context)\nfrom pr_agent.git_providers.git_provider import IncrementalPR\nfrom pr_agent.git_providers.utils import apply_repo_settings\nfrom pr_agent.identity_providers import get_identity_provider\nfrom pr_agent.identity_providers.identity_provider import Eligibility\nfrom pr_agent.log import LoggingFormat, get_logger, setup_logger\nfrom pr_agent.servers.utils import DefaultDictWithTimeout, verify_signature\n\nsetup_logger(fmt=LoggingFormat.JSON, level=get_settings().get(\"CONFIG.LOG_LEVEL\", \"DEBUG\"))\nbase_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))\nbuild_number_path = os.path.join(base_path, \"build_number.txt\")\nif os.path.exists(build_number_path):\n    with open(build_number_path) as f:\n        build_number = f.read().strip()\nelse:\n    build_number = \"unknown\"\nrouter = APIRouter()\n\n\n@router.post(\"/api/v1/github_webhooks\")\nasync def handle_github_webhooks(background_tasks: BackgroundTasks, request: Request, response: Response):\n    \"\"\"\n    Receives and processes incoming GitHub webhook requests.\n    Verifies the request signature, parses the request body, and passes it to the handle_request function for further\n    processing.\n    \"\"\"\n    get_logger().debug(\"Received a GitHub webhook\")\n\n    body = await get_body(request)\n\n    installation_id = body.get(\"installation\", {}).get(\"id\")\n    context[\"installation_id\"] = installation_id\n    context[\"settings\"] = copy.deepcopy(global_settings)\n    context[\"git_provider\"] = {}\n    background_tasks.add_task(handle_request, body, event=request.headers.get(\"X-GitHub-Event\", None))\n    return {}\n\n\n@router.post(\"/api/v1/marketplace_webhooks\")\nasync def handle_marketplace_webhooks(request: Request, response: Response):\n    body = await get_body(request)\n    get_logger().info(f'Request body:\\n{body}')\n\n\nasync def get_body(request):\n    try:\n        body = await request.json()\n    except Exception as e:\n        get_logger().error(\"Error parsing request body\", artifact={'error': e})\n        raise HTTPException(status_code=400, detail=\"Error parsing request body\") from e\n    webhook_secret = getattr(get_settings().github, 'webhook_secret', None)\n    if webhook_secret:\n        body_bytes = await request.body()\n        signature_header = request.headers.get('x-hub-signature-256', None)\n        verify_signature(body_bytes, webhook_secret, signature_header)\n    return body\n\n\n_duplicate_push_triggers = DefaultDictWithTimeout(ttl=get_settings().github_app.push_trigger_pending_tasks_ttl)\n_pending_task_duplicate_push_conditions = DefaultDictWithTimeout(asyncio.locks.Condition, ttl=get_settings().github_app.push_trigger_pending_tasks_ttl)\n\nasync def handle_comments_on_pr(body: Dict[str, Any],\n                                event: str,\n                                sender: str,\n                                sender_id: str,\n                                action: str,\n                                log_context: Dict[str, Any],\n                                agent: PRAgent):\n    if \"comment\" not in body:\n        return {}\n    comment_body = body.get(\"comment\", {}).get(\"body\")\n    if comment_body and isinstance(comment_body, str) and not comment_body.lstrip().startswith(\"/\"):\n        if '/ask' in comment_body and comment_body.strip().startswith('> ![image]'):\n            comment_body_split = comment_body.split('/ask')\n            comment_body = '/ask' + comment_body_split[1] +' \\n' +comment_body_split[0].strip().lstrip('>')\n            get_logger().info(f\"Reformatting comment_body so command is at the beginning: {comment_body}\")\n        else:\n            get_logger().info(\"Ignoring comment not starting with /\")\n            return {}\n    disable_eyes = False\n    if \"issue\" in body and \"pull_request\" in body[\"issue\"] and \"url\" in body[\"issue\"][\"pull_request\"]:\n        api_url = body[\"issue\"][\"pull_request\"][\"url\"]\n    elif \"comment\" in body and \"pull_request_url\" in body[\"comment\"]:\n        api_url = body[\"comment\"][\"pull_request_url\"]\n        try:\n            if ('/ask' in comment_body and\n                    'subject_type' in body[\"comment\"] and body[\"comment\"][\"subject_type\"] == \"line\"):\n                # comment on a code line in the \"files changed\" tab\n                comment_body = handle_line_comments(body, comment_body)\n                disable_eyes = True\n        except Exception as e:\n            get_logger().error(\"Failed to get log context\", artifact={'error': e})\n    else:\n        return {}\n    log_context[\"api_url\"] = api_url\n    comment_id = body.get(\"comment\", {}).get(\"id\")\n    provider = get_git_provider_with_context(pr_url=api_url)\n    with get_logger().contextualize(**log_context):\n        if get_identity_provider().verify_eligibility(\"github\", sender_id, api_url) is not Eligibility.NOT_ELIGIBLE:\n            get_logger().info(f\"Processing comment on PR {api_url=}, comment_body={comment_body}\")\n            await agent.handle_request(api_url, comment_body,\n                        notify=lambda: provider.add_eyes_reaction(comment_id, disable_eyes=disable_eyes))\n        else:\n            get_logger().info(f\"User {sender=} is not eligible to process comment on PR {api_url=}\")\n\nasync def handle_new_pr_opened(body: Dict[str, Any],\n                               event: str,\n                               sender: str,\n                               sender_id: str,\n                               action: str,\n                               log_context: Dict[str, Any],\n                               agent: PRAgent):\n    title = body.get(\"pull_request\", {}).get(\"title\", \"\")\n\n    pull_request, api_url = _check_pull_request_event(action, body, log_context)\n    if not (pull_request and api_url):\n        get_logger().info(f\"Invalid PR event: {action=} {api_url=}\")\n        return {}\n    if action in get_settings().github_app.handle_pr_actions:  # ['opened', 'reopened', 'ready_for_review']\n        # logic to ignore PRs with specific titles (e.g. \"[Auto] ...\")\n        apply_repo_settings(api_url)\n        if get_identity_provider().verify_eligibility(\"github\", sender_id, api_url) is not Eligibility.NOT_ELIGIBLE:\n            await _perform_auto_commands_github(\"pr_commands\", agent, body, api_url, log_context)\n        else:\n            get_logger().info(f\"User {sender=} is not eligible to process PR {api_url=}\")\n\nasync def handle_push_trigger_for_new_commits(body: Dict[str, Any],\n                        event: str,\n                        sender: str,\n                        sender_id: str,\n                        action: str,\n                        log_context: Dict[str, Any],\n                        agent: PRAgent):\n    pull_request, api_url = _check_pull_request_event(action, body, log_context)\n    if not (pull_request and api_url):\n        return {}\n\n    apply_repo_settings(api_url) # we need to apply the repo settings to get the correct settings for the PR. This is quite expensive - a call to the git provider is made for each PR event.\n    if not get_settings().github_app.handle_push_trigger:\n        return {}\n\n    # TODO: do we still want to get the list of commits to filter bot/merge commits?\n    before_sha = body.get(\"before\")\n    after_sha = body.get(\"after\")\n    merge_commit_sha = pull_request.get(\"merge_commit_sha\")\n    if before_sha == after_sha:\n        return {}\n    if get_settings().github_app.push_trigger_ignore_merge_commits and after_sha == merge_commit_sha:\n        return {}\n\n    # Prevent triggering multiple times for subsequent push triggers when one is enough:\n    # The first push will trigger the processing, and if there's a second push in the meanwhile it will wait.\n    # Any more events will be discarded, because they will all trigger the exact same processing on the PR.\n    # We let the second event wait instead of discarding it because while the first event was being processed,\n    # more commits may have been pushed that led to the subsequent events,\n    # so we keep just one waiting as a delegate to trigger the processing for the new commits when done waiting.\n    current_active_tasks = _duplicate_push_triggers.setdefault(api_url, 0)\n    max_active_tasks = 2 if get_settings().github_app.push_trigger_pending_tasks_backlog else 1\n    if current_active_tasks < max_active_tasks:\n        # first task can enter, and second tasks too if backlog is enabled\n        get_logger().info(\n            f\"Continue processing push trigger for {api_url=} because there are {current_active_tasks} active tasks\"\n        )\n        _duplicate_push_triggers[api_url] += 1\n    else:\n        get_logger().info(\n            f\"Skipping push trigger for {api_url=} because another event already triggered the same processing\"\n        )\n        return {}\n    async with _pending_task_duplicate_push_conditions[api_url]:\n        if current_active_tasks == 1:\n            # second task waits\n            get_logger().info(\n                f\"Waiting to process push trigger for {api_url=} because the first task is still in progress\"\n            )\n            await _pending_task_duplicate_push_conditions[api_url].wait()\n            get_logger().info(f\"Finished waiting to process push trigger for {api_url=} - continue with flow\")\n\n    try:\n        if get_identity_provider().verify_eligibility(\"github\", sender_id, api_url) is not Eligibility.NOT_ELIGIBLE:\n            get_logger().info(f\"Performing incremental review for {api_url=} because of {event=} and {action=}\")\n            await _perform_auto_commands_github(\"push_commands\", agent, body, api_url, log_context)\n\n    finally:\n        # release the waiting task block\n        async with _pending_task_duplicate_push_conditions[api_url]:\n            _pending_task_duplicate_push_conditions[api_url].notify(1)\n            _duplicate_push_triggers[api_url] -= 1\n\n\ndef handle_closed_pr(body, event, action, log_context):\n    pull_request = body.get(\"pull_request\", {})\n    is_merged = pull_request.get(\"merged\", False)\n    if not is_merged:\n        return\n    api_url = pull_request.get(\"url\", \"\")\n    pr_statistics = get_git_provider()(pr_url=api_url).calc_pr_statistics(pull_request)\n    log_context[\"api_url\"] = api_url\n    get_logger().info(\"PR-Agent statistics for closed PR\", analytics=True, pr_statistics=pr_statistics, **log_context)\n\n\ndef get_log_context(body, event, action, build_number):\n    sender = \"\"\n    sender_id = \"\"\n    sender_type = \"\"\n    try:\n        sender = body.get(\"sender\", {}).get(\"login\")\n        sender_id = body.get(\"sender\", {}).get(\"id\")\n        sender_type = body.get(\"sender\", {}).get(\"type\")\n        repo = body.get(\"repository\", {}).get(\"full_name\", \"\")\n        git_org = body.get(\"organization\", {}).get(\"login\", \"\")\n        installation_id = body.get(\"installation\", {}).get(\"id\", \"\")\n        app_name = get_settings().get(\"CONFIG.APP_NAME\", \"Unknown\")\n        log_context = {\"action\": action, \"event\": event, \"sender\": sender, \"server_type\": \"github_app\",\n                       \"request_id\": uuid.uuid4().hex, \"build_number\": build_number, \"app_name\": app_name,\n                        \"repo\": repo, \"git_org\": git_org, \"installation_id\": installation_id}\n    except Exception as e:\n        get_logger().error(f\"Failed to get log context\", artifact={'error': e})\n        log_context = {}\n    return log_context, sender, sender_id, sender_type\n\n\ndef is_bot_user(sender, sender_type):\n    try:\n        # logic to ignore PRs opened by bot\n        if get_settings().get(\"GITHUB_APP.IGNORE_BOT_PR\", False) and sender_type == \"Bot\":\n            if 'pr-agent' not in sender:\n                get_logger().info(f\"Ignoring PR from '{sender=}' because it is a bot\")\n            return True\n    except Exception as e:\n        get_logger().error(f\"Failed 'is_bot_user' logic: {e}\")\n    return False\n\n\ndef should_process_pr_logic(body) -> bool:\n    try:\n        pull_request = body.get(\"pull_request\", {})\n        title = pull_request.get(\"title\", \"\")\n        pr_labels = pull_request.get(\"labels\", [])\n        source_branch = pull_request.get(\"head\", {}).get(\"ref\", \"\")\n        target_branch = pull_request.get(\"base\", {}).get(\"ref\", \"\")\n        sender = body.get(\"sender\", {}).get(\"login\")\n        repo_full_name = body.get(\"repository\", {}).get(\"full_name\", \"\")\n\n        # logic to ignore PRs from specific repositories\n        ignore_repos = get_settings().get(\"CONFIG.IGNORE_REPOSITORIES\", [])\n        if ignore_repos and repo_full_name:\n            if any(re.search(regex, repo_full_name) for regex in ignore_repos):\n                get_logger().info(f\"Ignoring PR from repository '{repo_full_name}' due to 'config.ignore_repositories' setting\")\n                return False\n\n        # logic to ignore PRs from specific users\n        ignore_pr_users = get_settings().get(\"CONFIG.IGNORE_PR_AUTHORS\", [])\n        if ignore_pr_users and sender:\n            if any(re.search(regex, sender) for regex in ignore_pr_users):\n                get_logger().info(f\"Ignoring PR from user '{sender}' due to 'config.ignore_pr_authors' setting\")\n                return False\n\n        # logic to ignore PRs with specific titles\n        if title:\n            ignore_pr_title_re = get_settings().get(\"CONFIG.IGNORE_PR_TITLE\", [])\n            if not isinstance(ignore_pr_title_re, list):\n                ignore_pr_title_re = [ignore_pr_title_re]\n            if ignore_pr_title_re and any(re.search(regex, title) for regex in ignore_pr_title_re):\n                get_logger().info(f\"Ignoring PR with title '{title}' due to config.ignore_pr_title setting\")\n                return False\n\n        # logic to ignore PRs with specific labels or source branches or target branches.\n        ignore_pr_labels = get_settings().get(\"CONFIG.IGNORE_PR_LABELS\", [])\n        if pr_labels and ignore_pr_labels:\n            labels = [label['name'] for label in pr_labels]\n            if any(label in ignore_pr_labels for label in labels):\n                labels_str = \", \".join(labels)\n                get_logger().info(f\"Ignoring PR with labels '{labels_str}' due to config.ignore_pr_labels settings\")\n                return False\n\n        # logic to ignore PRs with specific source or target branches\n        ignore_pr_source_branches = get_settings().get(\"CONFIG.IGNORE_PR_SOURCE_BRANCHES\", [])\n        ignore_pr_target_branches = get_settings().get(\"CONFIG.IGNORE_PR_TARGET_BRANCHES\", [])\n        if pull_request and (ignore_pr_source_branches or ignore_pr_target_branches):\n            if any(re.search(regex, source_branch) for regex in ignore_pr_source_branches):\n                get_logger().info(\n                    f\"Ignoring PR with source branch '{source_branch}' due to config.ignore_pr_source_branches settings\")\n                return False\n            if any(re.search(regex, target_branch) for regex in ignore_pr_target_branches):\n                get_logger().info(\n                    f\"Ignoring PR with target branch '{target_branch}' due to config.ignore_pr_target_branches settings\")\n                return False\n    except Exception as e:\n        get_logger().error(f\"Failed 'should_process_pr_logic': {e}\")\n    return True\n\n\nasync def handle_request(body: Dict[str, Any], event: str):\n    \"\"\"\n    Handle incoming GitHub webhook requests.\n\n    Args:\n        body: The request body.\n        event: The GitHub event type (e.g. \"pull_request\", \"issue_comment\", etc.).\n    \"\"\"\n    action = body.get(\"action\")  # \"created\", \"opened\", \"reopened\", \"ready_for_review\", \"review_requested\", \"synchronize\"\n    get_logger().debug(f\"Handling request with event: {event}, action: {action}\")\n    if not action:\n        get_logger().debug(f\"No action found in request body, exiting handle_request\")\n        return {}\n    agent = PRAgent()\n    log_context, sender, sender_id, sender_type = get_log_context(body, event, action, build_number)\n\n    # logic to ignore PRs opened by bot, PRs with specific titles, labels, source branches, or target branches\n    if is_bot_user(sender, sender_type) and 'check_run' not in body:\n        get_logger().debug(f\"Request ignored: bot user detected\")\n        return {}\n    if action != 'created' and 'check_run' not in body:\n        if not should_process_pr_logic(body):\n            get_logger().debug(f\"Request ignored: PR logic filtering\")\n            return {}\n\n    if 'check_run' in body:  # handle failed checks\n        # get_logger().debug(f'Request body', artifact=body, event=event) # added inside handle_checks\n        pass\n    # handle comments on PRs\n    elif action == 'created':\n        get_logger().debug(f'Request body', artifact=body, event=event)\n        await handle_comments_on_pr(body, event, sender, sender_id, action, log_context, agent)\n    # handle new PRs\n    elif event == 'pull_request' and action != 'synchronize' and action != 'closed':\n        get_logger().debug(f'Request body', artifact=body, event=event)\n        await handle_new_pr_opened(body, event, sender, sender_id, action, log_context, agent)\n    elif event == \"issue_comment\" and 'edited' in action:\n        pass # handle_checkbox_clicked\n    # handle pull_request event with synchronize action - \"push trigger\" for new commits\n    elif event == 'pull_request' and action == 'synchronize':\n        await handle_push_trigger_for_new_commits(body, event, sender,sender_id,  action, log_context, agent)\n    elif event == 'pull_request' and action == 'closed':\n        if get_settings().get(\"CONFIG.ANALYTICS_FOLDER\", \"\"):\n            handle_closed_pr(body, event, action, log_context)\n    else:\n        get_logger().info(f\"event {event=} action {action=} does not require any handling\")\n    return {}\n\n\ndef handle_line_comments(body: Dict, comment_body: [str, Any]) -> str:\n    if not comment_body:\n        return \"\"\n    start_line = body[\"comment\"][\"start_line\"]\n    end_line = body[\"comment\"][\"line\"]\n    start_line = end_line if not start_line else start_line\n    question = comment_body.replace('/ask', '').strip()\n    diff_hunk = body[\"comment\"][\"diff_hunk\"]\n    get_settings().set(\"ask_diff_hunk\", diff_hunk)\n    path = body[\"comment\"][\"path\"]\n    side = body[\"comment\"][\"side\"]\n    comment_id = body[\"comment\"][\"id\"]\n    if '/ask' in comment_body:\n        comment_body = f\"/ask_line --line_start={start_line} --line_end={end_line} --side={side} --file_name={path} --comment_id={comment_id} {question}\"\n    return comment_body\n\n\ndef _check_pull_request_event(action: str, body: dict, log_context: dict) -> Tuple[Dict[str, Any], str]:\n    invalid_result = {}, \"\"\n    pull_request = body.get(\"pull_request\")\n    if not pull_request:\n        return invalid_result\n    api_url = pull_request.get(\"url\")\n    if not api_url:\n        return invalid_result\n    log_context[\"api_url\"] = api_url\n    if pull_request.get(\"draft\", True) or pull_request.get(\"state\") != \"open\":\n        return invalid_result\n    if action in (\"review_requested\", \"synchronize\") and pull_request.get(\"created_at\") == pull_request.get(\"updated_at\"):\n        # avoid double reviews when opening a PR for the first time\n        return invalid_result\n    return pull_request, api_url\n\n\nasync def _perform_auto_commands_github(commands_conf: str, agent: PRAgent, body: dict, api_url: str,\n                                        log_context: dict):\n    apply_repo_settings(api_url)\n    if commands_conf == \"pr_commands\" and get_settings().config.disable_auto_feedback:  # auto commands for PR, and auto feedback is disabled\n        get_logger().info(f\"Auto feedback is disabled, skipping auto commands for PR {api_url=}\")\n        return\n    if not should_process_pr_logic(body): # Here we already updated the configuration with the repo settings\n        return {}\n    commands = get_settings().get(f\"github_app.{commands_conf}\")\n    if not commands:\n        get_logger().info(f\"New PR, but no auto commands configured\")\n        return\n    get_settings().set(\"config.is_auto_command\", True)\n    for command in commands:\n        split_command = command.split(\" \")\n        command = split_command[0]\n        args = split_command[1:]\n        other_args = update_settings_from_args(args)\n        new_command = ' '.join([command] + other_args)\n        get_logger().info(f\"{commands_conf}. Performing auto command '{new_command}', for {api_url=}\")\n        await agent.handle_request(api_url, new_command)\n\n\n@router.get(\"/\")\nasync def root():\n    return {\"status\": \"ok\"}\n\n\nif get_settings().github_app.override_deployment_type:\n    # Override the deployment type to app\n    get_settings().set(\"GITHUB.DEPLOYMENT_TYPE\", \"app\")\n# get_settings().set(\"CONFIG.PUBLISH_OUTPUT_PROGRESS\", False)\nmiddleware = [Middleware(RawContextMiddleware)]\napp = FastAPI(middleware=middleware)\napp.include_router(router)\n\n\ndef start():\n    uvicorn.run(app, host=\"0.0.0.0\", port=int(os.environ.get(\"PORT\", \"3000\")))\n\n\nif __name__ == '__main__':\n    start()\n"
  },
  {
    "path": "pr_agent/servers/github_lambda_webhook.py",
    "content": "from fastapi import FastAPI\nfrom mangum import Mangum\nfrom starlette.middleware import Middleware\nfrom starlette_context.middleware import RawContextMiddleware\n\nfrom pr_agent.servers.github_app import router\n\ntry:\n    from pr_agent.config_loader import apply_secrets_manager_config\n    apply_secrets_manager_config()\nexcept Exception as e:\n    try:\n        from pr_agent.log import get_logger\n        get_logger().debug(f\"AWS Secrets Manager initialization failed, falling back to environment variables: {e}\")\n    except:\n        # Fail completely silently if log module is not available\n        pass\n\nmiddleware = [Middleware(RawContextMiddleware)]\napp = FastAPI(middleware=middleware)\napp.include_router(router)\n\nhandler = Mangum(app, lifespan=\"off\")\n\n\ndef lambda_handler(event, context):\n    return handler(event, context)"
  },
  {
    "path": "pr_agent/servers/github_polling.py",
    "content": "import asyncio\nimport multiprocessing\nimport time\nimport traceback\nfrom collections import deque\nfrom datetime import datetime, timezone\n\nimport aiohttp\nimport requests\n\nfrom pr_agent.agent.pr_agent import PRAgent\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers import get_git_provider\nfrom pr_agent.log import LoggingFormat, get_logger, setup_logger\n\nsetup_logger(fmt=LoggingFormat.JSON, level=get_settings().get(\"CONFIG.LOG_LEVEL\", \"DEBUG\"))\nNOTIFICATION_URL = \"https://api.github.com/notifications\"\n\n\nasync def mark_notification_as_read(headers, notification, session):\n    async with session.patch(\n            f\"https://api.github.com/notifications/threads/{notification['id']}\",\n            headers=headers) as mark_read_response:\n        if mark_read_response.status != 205:\n            get_logger().error(\n                f\"Failed to mark notification as read. Status code: {mark_read_response.status}\")\n\n\ndef now() -> str:\n    \"\"\"\n    Get the current UTC time in ISO 8601 format.\n\n    Returns:\n        str: The current UTC time in ISO 8601 format.\n    \"\"\"\n    now_utc = datetime.now(timezone.utc).isoformat()\n    now_utc = now_utc.replace(\"+00:00\", \"Z\")\n    return now_utc\n\nasync def async_handle_request(pr_url, rest_of_comment, comment_id, git_provider):\n    agent = PRAgent()\n    success = await agent.handle_request(\n        pr_url,\n        rest_of_comment,\n        notify=lambda: git_provider.add_eyes_reaction(comment_id)\n    )\n    return success\n\ndef run_handle_request(pr_url, rest_of_comment, comment_id, git_provider):\n    return asyncio.run(async_handle_request(pr_url, rest_of_comment, comment_id, git_provider))\n\n\ndef process_comment_sync(pr_url, rest_of_comment, comment_id):\n    try:\n        # Run the async handle_request in a separate function\n        git_provider = get_git_provider()(pr_url=pr_url)\n        success = run_handle_request(pr_url, rest_of_comment, comment_id, git_provider)\n    except Exception as e:\n        get_logger().error(f\"Error processing comment: {e}\", artifact={\"traceback\": traceback.format_exc()})\n\n\nasync def process_comment(pr_url, rest_of_comment, comment_id):\n    try:\n        git_provider = get_git_provider()(pr_url=pr_url)\n        git_provider.set_pr(pr_url)\n        agent = PRAgent()\n        success = await agent.handle_request(\n            pr_url,\n            rest_of_comment,\n            notify=lambda: git_provider.add_eyes_reaction(comment_id)\n        )\n        get_logger().info(f\"Finished processing comment for PR: {pr_url}\")\n    except Exception as e:\n        get_logger().error(f\"Error processing comment: {e}\", artifact={\"traceback\": traceback.format_exc()})\n\nasync def is_valid_notification(notification, headers, handled_ids, session, user_id):\n    try:\n        if 'reason' in notification and notification['reason'] == 'mention':\n            if 'subject' in notification and notification['subject']['type'] == 'PullRequest':\n                pr_url = notification['subject']['url']\n                latest_comment = notification['subject']['latest_comment_url']\n                if not latest_comment or not isinstance(latest_comment, str):\n                    get_logger().debug(f\"no latest_comment\")\n                    return False, handled_ids\n                async with session.get(latest_comment, headers=headers) as comment_response:\n                    check_prev_comments = False\n                    user_tag = \"@\" + user_id\n                    if comment_response.status == 200:\n                        comment = await comment_response.json()\n                        if 'id' in comment:\n                            if comment['id'] in handled_ids:\n                                get_logger().debug(f\"comment['id'] in handled_ids\")\n                                return False, handled_ids\n                            else:\n                                handled_ids.add(comment['id'])\n                        if 'user' in comment and 'login' in comment['user']:\n                            if comment['user']['login'] == user_id:\n                                get_logger().debug(f\"comment['user']['login'] == user_id\")\n                                check_prev_comments = True\n                        comment_body = comment.get('body', '')\n                        if not comment_body:\n                            get_logger().debug(f\"no comment_body\")\n                            check_prev_comments = True\n                        else:\n                            if user_tag not in comment_body:\n                                get_logger().debug(f\"user_tag not in comment_body\")\n                                check_prev_comments = True\n                            else:\n                                get_logger().info(f\"Polling, pr_url: {pr_url}\",\n                                                  artifact={\"comment\": comment_body})\n\n                        if not check_prev_comments:\n                            return True, handled_ids, comment, comment_body, pr_url, user_tag\n                        else: # we could not find the user tag in the latest comment. Check previous comments\n                            # get all comments in the PR\n                            requests_url = f\"{pr_url}/comments\".replace(\"pulls\", \"issues\")\n                            comments_response = requests.get(requests_url, headers=headers)\n                            comments = comments_response.json()[::-1]\n                            max_comment_to_scan = 4\n                            for comment in comments[:max_comment_to_scan]:\n                                if 'user' in comment and 'login' in comment['user']:\n                                    if comment['user']['login'] == user_id:\n                                        continue\n                                comment_body = comment.get('body', '')\n                                if not comment_body:\n                                    continue\n                                if user_tag in comment_body:\n                                    get_logger().info(\"found user tag in previous comments\")\n                                    get_logger().info(f\"Polling, pr_url: {pr_url}\",\n                                                      artifact={\"comment\": comment_body})\n                                    return True, handled_ids, comment, comment_body, pr_url, user_tag\n\n                            get_logger().warning(f\"Failed to fetch comments for PR: {pr_url}\",\n                                                    artifact={\"comments\": comments})\n                            return False, handled_ids\n\n        return False, handled_ids\n    except Exception as e:\n        get_logger().exception(f\"Error processing polling notification\",\n                               artifact={\"notification\": notification, \"error\": e})\n        return False, handled_ids\n\n\n\nasync def polling_loop():\n    \"\"\"\n    Polls for notifications and handles them accordingly.\n    \"\"\"\n    handled_ids = set()\n    since = [now()]\n    last_modified = [None]\n    git_provider = get_git_provider()()\n    user_id = git_provider.get_user_id()\n    get_settings().set(\"CONFIG.PUBLISH_OUTPUT_PROGRESS\", False)\n    get_settings().set(\"pr_description.publish_description_as_comment\", True)\n\n    try:\n        deployment_type = get_settings().github.deployment_type\n        token = get_settings().github.user_token\n    except AttributeError:\n        deployment_type = 'none'\n        token = None\n\n    if deployment_type != 'user':\n        raise ValueError(\"Deployment mode must be set to 'user' to get notifications\")\n    if not token:\n        raise ValueError(\"User token must be set to get notifications\")\n\n    async with aiohttp.ClientSession() as session:\n        while True:\n            try:\n                await asyncio.sleep(5)\n                headers = {\n                    \"Accept\": \"application/vnd.github.v3+json\",\n                    \"Authorization\": f\"Bearer {token}\"\n                }\n                params = {\n                    \"participating\": \"true\"\n                }\n                if since[0]:\n                    params[\"since\"] = since[0]\n                if last_modified[0]:\n                    headers[\"If-Modified-Since\"] = last_modified[0]\n\n                async with session.get(NOTIFICATION_URL, headers=headers, params=params) as response:\n                    if response.status == 200:\n                        if 'Last-Modified' in response.headers:\n                            last_modified[0] = response.headers['Last-Modified']\n                            since[0] = None\n                        notifications = await response.json()\n                        if not notifications:\n                            continue\n                        get_logger().info(f\"Received {len(notifications)} notifications\")\n                        task_queue = deque()\n                        for notification in notifications:\n                            if not notification:\n                                continue\n                            # mark notification as read\n                            await mark_notification_as_read(headers, notification, session)\n\n                            handled_ids.add(notification['id'])\n                            output = await is_valid_notification(notification, headers, handled_ids, session, user_id)\n                            if output[0]:\n                                _, handled_ids, comment, comment_body, pr_url, user_tag = output\n                                rest_of_comment = comment_body.split(user_tag)[1].strip()\n                                comment_id = comment['id']\n\n                                # Add to the task queue\n                                get_logger().info(\n                                    f\"Adding comment processing to task queue for PR, {pr_url}, comment_body: {comment_body}\")\n                                task_queue.append((process_comment_sync, (pr_url, rest_of_comment, comment_id)))\n                                get_logger().info(f\"Queued comment processing for PR: {pr_url}\")\n                            else:\n                                get_logger().debug(f\"Skipping comment processing for PR\")\n\n                        max_allowed_parallel_tasks = 10\n                        if task_queue:\n                            processes = []\n                            for i, (func, args) in enumerate(task_queue):  # Create  parallel tasks\n                                p = multiprocessing.Process(target=func, args=args)\n                                processes.append(p)\n                                p.start()\n                                if i > max_allowed_parallel_tasks:\n                                    get_logger().error(\n                                        f\"Dropping {len(task_queue) - max_allowed_parallel_tasks} tasks from polling session\")\n                                    break\n                            task_queue.clear()\n\n                            # Dont wait for all processes to complete. Move on to the next iteration\n                            # for p in processes:\n                            #     p.join()\n\n                    elif response.status != 304:\n                        print(f\"Failed to fetch notifications. Status code: {response.status}\")\n\n            except Exception as e:\n                get_logger().error(f\"Polling exception during processing of a notification: {e}\",\n                                   artifact={\"traceback\": traceback.format_exc()})\n\n\nif __name__ == '__main__':\n    asyncio.run(polling_loop())\n"
  },
  {
    "path": "pr_agent/servers/gitlab_lambda_webhook.py",
    "content": "from fastapi import FastAPI\nfrom mangum import Mangum\nfrom starlette.middleware import Middleware\nfrom starlette_context.middleware import RawContextMiddleware\n\nfrom pr_agent.servers.gitlab_webhook import router\n\ntry:\n    from pr_agent.config_loader import apply_secrets_manager_config\n    apply_secrets_manager_config()\nexcept Exception as e:\n    try:\n        from pr_agent.log import get_logger\n        get_logger().debug(f\"AWS Secrets Manager initialization failed, falling back to environment variables: {e}\")\n    except:\n        # Fail completely silently if log module is not available\n        pass\n\nmiddleware = [Middleware(RawContextMiddleware)]\napp = FastAPI(middleware=middleware)\napp.include_router(router)\n\nhandler = Mangum(app, lifespan=\"off\")\n\n\ndef lambda_handler(event, context):\n    return handler(event, context)"
  },
  {
    "path": "pr_agent/servers/gitlab_webhook.py",
    "content": "import copy\nimport json\nimport os\nimport re\nfrom datetime import datetime\n\nimport uvicorn\nfrom fastapi import APIRouter, FastAPI, Request, status\nfrom fastapi.encoders import jsonable_encoder\nfrom fastapi.responses import JSONResponse\nfrom starlette.background import BackgroundTasks\nfrom starlette.middleware import Middleware\nfrom starlette_context import context\nfrom starlette_context.middleware import RawContextMiddleware\n\nfrom pr_agent.agent.pr_agent import PRAgent\nfrom pr_agent.algo.utils import update_settings_from_args\nfrom pr_agent.config_loader import get_settings, global_settings\nfrom pr_agent.git_providers.utils import apply_repo_settings\nfrom pr_agent.log import LoggingFormat, get_logger, setup_logger\nfrom pr_agent.secret_providers import get_secret_provider\nfrom pr_agent.git_providers import get_git_provider_with_context\n\nsetup_logger(fmt=LoggingFormat.JSON, level=get_settings().get(\"CONFIG.LOG_LEVEL\", \"DEBUG\"))\nrouter = APIRouter()\n\nsecret_provider = get_secret_provider() if get_settings().get(\"CONFIG.SECRET_PROVIDER\") else None\n\n\nasync def handle_request(api_url: str, body: str, log_context: dict, sender_id: str, notify=None):\n    log_context[\"action\"] = body\n    log_context[\"event\"] = \"pull_request\" if body == \"/review\" else \"comment\"\n    log_context[\"api_url\"] = api_url\n    log_context[\"app_name\"] = get_settings().get(\"CONFIG.APP_NAME\", \"Unknown\")\n\n    with get_logger().contextualize(**log_context):\n        await PRAgent().handle_request(api_url, body, notify)\n\nasync def _perform_commands_gitlab(commands_conf: str, agent: PRAgent, api_url: str,\n                                   log_context: dict, data: dict):\n    apply_repo_settings(api_url)\n    if commands_conf == \"pr_commands\" and get_settings().config.disable_auto_feedback:  # auto commands for PR, and auto feedback is disabled\n        get_logger().info(f\"Auto feedback is disabled, skipping auto commands for PR {api_url=}\", **log_context)\n        return\n    if not should_process_pr_logic(data): # Here we already updated the configurations\n        return\n    commands = get_settings().get(f\"gitlab.{commands_conf}\", {})\n    get_settings().set(\"config.is_auto_command\", True)\n    for command in commands:\n        try:\n            split_command = command.split(\" \")\n            command = split_command[0]\n            args = split_command[1:]\n            other_args = update_settings_from_args(args)\n            new_command = ' '.join([command] + other_args)\n            get_logger().info(f\"Performing command: {new_command}\")\n            with get_logger().contextualize(**log_context):\n                await agent.handle_request(api_url, new_command)\n        except Exception as e:\n            get_logger().error(f\"Failed to perform command {command}: {e}\")\n\n\ndef is_bot_user(data) -> bool:\n    try:\n        # logic to ignore bot users (unlike Github, no direct flag for bot users in gitlab)\n        sender_name = data.get(\"user\", {}).get(\"name\", \"unknown\").lower()\n        bot_indicators = ['codium', 'bot_', 'bot-', '_bot', '-bot']\n        if any(indicator in sender_name for indicator in bot_indicators):\n            get_logger().info(f\"Skipping GitLab bot user: {sender_name}\")\n            return True\n    except Exception as e:\n        get_logger().error(f\"Failed 'is_bot_user' logic: {e}\")\n    return False\n\ndef is_draft(data) -> bool:\n    try:\n        if 'draft' in data.get('object_attributes', {}):\n            return data['object_attributes']['draft']\n\n        # for gitlab server version before 16\n        elif 'Draft:' in data.get('object_attributes', {}).get('title'):\n            return True\n    except Exception as e:\n        get_logger().error(f\"Failed 'is_draft' logic: {e}\")\n    return False\n\ndef is_draft_ready(data) -> bool:\n    try:\n        if 'draft' in data.get('changes', {}):\n            # Handle both boolean values and string values for compatibility\n            previous = data['changes']['draft']['previous']\n            current = data['changes']['draft']['current']\n\n            # Convert to boolean if they're strings\n            if isinstance(previous, str):\n                previous = previous.lower() == 'true'\n            if isinstance(current, str):\n                current = current.lower() == 'true'\n\n            if previous is True and current is False:\n                return True\n\n        # for gitlab server version before 16\n        elif 'title' in data.get('changes', {}):\n            if 'Draft:' in data['changes']['title']['previous'] and 'Draft:' not in data['changes']['title']['current']:\n                return True\n    except Exception as e:\n        get_logger().error(f\"Failed 'is_draft_ready' logic: {e}\")\n    return False\n\ndef should_process_pr_logic(data) -> bool:\n    try:\n        if not data.get('object_attributes', {}):\n            return False\n        title = data['object_attributes'].get('title')\n        sender = data.get(\"user\", {}).get(\"username\", \"\")\n        repo_full_name = data.get('project', {}).get('path_with_namespace', \"\")\n\n        # logic to ignore PRs from specific repositories\n        ignore_repos = get_settings().get(\"CONFIG.IGNORE_REPOSITORIES\", [])\n        if ignore_repos and repo_full_name:\n            if any(re.search(regex, repo_full_name) for regex in ignore_repos):\n                get_logger().info(f\"Ignoring MR from repository '{repo_full_name}' due to 'config.ignore_repositories' setting\")\n                return False\n\n        # logic to ignore PRs from specific users\n        ignore_pr_users = get_settings().get(\"CONFIG.IGNORE_PR_AUTHORS\", [])\n        if ignore_pr_users and sender:\n            if any(re.search(regex, sender) for regex in ignore_pr_users):\n                get_logger().info(f\"Ignoring PR from user '{sender}' due to 'config.ignore_pr_authors' settings\")\n                return False\n\n        # logic to ignore MRs for titles, labels and source, target branches.\n        ignore_mr_title = get_settings().get(\"CONFIG.IGNORE_PR_TITLE\", [])\n        ignore_mr_labels = get_settings().get(\"CONFIG.IGNORE_PR_LABELS\", [])\n        ignore_mr_source_branches = get_settings().get(\"CONFIG.IGNORE_PR_SOURCE_BRANCHES\", [])\n        ignore_mr_target_branches = get_settings().get(\"CONFIG.IGNORE_PR_TARGET_BRANCHES\", [])\n\n        #\n        if ignore_mr_source_branches:\n            source_branch = data['object_attributes'].get('source_branch')\n            if any(re.search(regex, source_branch) for regex in ignore_mr_source_branches):\n                get_logger().info(\n                    f\"Ignoring MR with source branch '{source_branch}' due to gitlab.ignore_mr_source_branches settings\")\n                return False\n\n        if ignore_mr_target_branches:\n            target_branch = data['object_attributes'].get('target_branch')\n            if any(re.search(regex, target_branch) for regex in ignore_mr_target_branches):\n                get_logger().info(\n                    f\"Ignoring MR with target branch '{target_branch}' due to gitlab.ignore_mr_target_branches settings\")\n                return False\n\n        if ignore_mr_labels:\n            labels = [label['title'] for label in data['object_attributes'].get('labels', [])]\n            if any(label in ignore_mr_labels for label in labels):\n                labels_str = \", \".join(labels)\n                get_logger().info(f\"Ignoring MR with labels '{labels_str}' due to gitlab.ignore_mr_labels settings\")\n                return False\n\n        if ignore_mr_title:\n            if any(re.search(regex, title) for regex in ignore_mr_title):\n                get_logger().info(f\"Ignoring MR with title '{title}' due to gitlab.ignore_mr_title settings\")\n                return False\n    except Exception as e:\n        get_logger().error(f\"Failed 'should_process_pr_logic': {e}\")\n    return True\n\n\n@router.post(\"/webhook\")\nasync def gitlab_webhook(background_tasks: BackgroundTasks, request: Request):\n    start_time = datetime.now()\n    request_json = await request.json()\n    context[\"settings\"] = copy.deepcopy(global_settings)\n\n    async def inner(data: dict):\n        log_context = {\"server_type\": \"gitlab_app\"}\n        get_logger().debug(\"Received a GitLab webhook\")\n        if request.headers.get(\"X-Gitlab-Token\") and secret_provider:\n            request_token = request.headers.get(\"X-Gitlab-Token\")\n            secret = secret_provider.get_secret(request_token)\n            if not secret:\n                get_logger().warning(f\"Empty secret retrieved, request_token: {request_token}\")\n                return JSONResponse(status_code=status.HTTP_401_UNAUTHORIZED,\n                                    content=jsonable_encoder({\"message\": \"unauthorized\"}))\n            try:\n                secret_dict = json.loads(secret)\n                gitlab_token = secret_dict[\"gitlab_token\"]\n                log_context[\"token_id\"] = secret_dict.get(\"token_name\", secret_dict.get(\"id\", \"unknown\"))\n                context[\"settings\"].gitlab.personal_access_token = gitlab_token\n            except Exception as e:\n                get_logger().error(f\"Failed to validate secret {request_token}: {e}\")\n                return JSONResponse(status_code=status.HTTP_401_UNAUTHORIZED, content=jsonable_encoder({\"message\": \"unauthorized\"}))\n        elif get_settings().get(\"GITLAB.SHARED_SECRET\"):\n            secret = get_settings().get(\"GITLAB.SHARED_SECRET\")\n            if not request.headers.get(\"X-Gitlab-Token\") == secret:\n                get_logger().error(\"Failed to validate secret\")\n                return JSONResponse(status_code=status.HTTP_401_UNAUTHORIZED, content=jsonable_encoder({\"message\": \"unauthorized\"}))\n        else:\n            get_logger().error(\"Failed to validate secret\")\n            return JSONResponse(status_code=status.HTTP_401_UNAUTHORIZED, content=jsonable_encoder({\"message\": \"unauthorized\"}))\n        gitlab_token = get_settings().get(\"GITLAB.PERSONAL_ACCESS_TOKEN\", None)\n        if not gitlab_token:\n            get_logger().error(\"No gitlab token found\")\n            return JSONResponse(status_code=status.HTTP_401_UNAUTHORIZED, content=jsonable_encoder({\"message\": \"unauthorized\"}))\n\n        get_logger().info(\"GitLab data\", artifact=data)\n        sender = data.get(\"user\", {}).get(\"username\", \"unknown\")\n        sender_id = data.get(\"user\", {}).get(\"id\", \"unknown\")\n\n        # ignore bot users\n        if is_bot_user(data):\n            return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({\"message\": \"success\"}))\n\n        log_context[\"sender\"] = sender\n        if data.get('object_kind') == 'merge_request':\n            # ignore MRs based on title, labels, source and target branches\n            if not should_process_pr_logic(data):\n                return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({\"message\": \"success\"}))\n            object_attributes = data.get('object_attributes', {})\n            if object_attributes.get('action') in ['open', 'reopen']:\n                url = object_attributes.get('url')\n                get_logger().info(f\"New merge request: {url}\")\n                if is_draft(data):\n                    get_logger().info(f\"Skipping draft MR: {url}\")\n                    return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({\"message\": \"success\"}))\n\n                await _perform_commands_gitlab(\"pr_commands\", PRAgent(), url, log_context, data)\n\n            # for push event triggered merge requests\n            elif object_attributes.get('action') == 'update' and object_attributes.get('oldrev'):\n                url = object_attributes.get('url')\n                get_logger().info(f\"New merge request: {url}\")\n                if is_draft(data):\n                    get_logger().info(f\"Skipping draft MR: {url}\")\n                    return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({\"message\": \"success\"}))\n\n                # Apply repo settings before checking push commands or handle_push_trigger\n                apply_repo_settings(url)\n\n                commands_on_push = get_settings().get(f\"gitlab.push_commands\", {})\n                handle_push_trigger = get_settings().get(f\"gitlab.handle_push_trigger\", False)\n                if not commands_on_push or not handle_push_trigger:\n                    get_logger().info(\"Push event, but no push commands found or push trigger is disabled\")\n                    return JSONResponse(status_code=status.HTTP_200_OK,\n                                        content=jsonable_encoder({\"message\": \"success\"}))\n\n                get_logger().debug(f'A push event has been received: {url}')\n                await _perform_commands_gitlab(\"push_commands\", PRAgent(), url, log_context, data)\n                \n            # for draft to ready triggered merge requests\n            elif object_attributes.get('action') == 'update' and is_draft_ready(data):\n                url = object_attributes.get('url')\n                get_logger().info(f\"Draft MR is ready: {url}\")\n\n                # same as open MR\n                await _perform_commands_gitlab(\"pr_commands\", PRAgent(), url, log_context, data)\n\n        elif data.get('object_kind') == 'note' and data.get('event_type') == 'note': # comment on MR\n            if 'merge_request' in data:\n                mr = data['merge_request']\n                url = mr.get('url')\n                comment_id = data.get('object_attributes', {}).get('id')\n                provider = get_git_provider_with_context(pr_url=url)\n\n                get_logger().info(f\"A comment has been added to a merge request: {url}\")\n                body = data.get('object_attributes', {}).get('note')\n                if data.get('object_attributes', {}).get('type') == 'DiffNote' and '/ask' in body: # /ask_line\n                    body = handle_ask_line(body, data)\n\n                await handle_request(url, body, log_context, sender_id, notify=lambda: provider.add_eyes_reaction(comment_id))\n\n    background_tasks.add_task(inner, request_json)\n    end_time = datetime.now()\n    get_logger().info(f\"Processing time: {end_time - start_time}\", request=request_json)\n    return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({\"message\": \"success\"}))\n\n\ndef handle_ask_line(body, data):\n    try:\n        line_range_ = data['object_attributes']['position']['line_range']\n        # if line_range_['start']['type'] == 'new':\n        start_line = line_range_['start']['new_line']\n        end_line = line_range_['end']['new_line']\n        # else:\n        #     start_line = line_range_['start']['old_line']\n        #     end_line = line_range_['end']['old_line']\n        question = body.replace('/ask', '').strip()\n        path = data['object_attributes']['position']['new_path']\n        side = 'RIGHT'  # if line_range_['start']['type'] == 'new' else 'LEFT'\n        comment_id = data['object_attributes'][\"discussion_id\"]\n        get_logger().info(\"Handling line \")\n        body = f\"/ask_line --line_start={start_line} --line_end={end_line} --side={side} --file_name={path} --comment_id={comment_id} {question}\"\n    except Exception as e:\n        get_logger().error(f\"Failed to handle ask line comment: {e}\")\n    return body\n\n\n@router.get(\"/\")\nasync def root():\n    return {\"status\": \"ok\"}\n\ngitlab_url = get_settings().get(\"GITLAB.URL\", None)\nif not gitlab_url:\n    raise ValueError(\"GITLAB.URL is not set\")\nget_settings().config.git_provider = \"gitlab\"\nmiddleware = [Middleware(RawContextMiddleware)]\napp = FastAPI(middleware=middleware)\napp.include_router(router)\n\n\ndef start():\n    \"\"\"\n    Start the GitLab webhook server.\n\n    The server port can be configured via the PORT environment variable.\n    Defaults to 3000 if PORT is not set or invalid.\n    \"\"\"\n    raw_port = os.environ.get(\"PORT\")\n    try:\n        port = int(raw_port) if raw_port else 3000\n        if not (1 <= port <= 65535):\n            raise ValueError(f\"Port {port} is out of valid range\")\n        if raw_port:\n            get_logger().info(f\"Using custom PORT from environment: {port}\")\n    except ValueError as e:\n        get_logger().warning(f\"Invalid PORT environment variable ({e}), using default port 3000\")\n        port = 3000\n    uvicorn.run(app, host=\"0.0.0.0\", port=port)\n\n\nif __name__ == '__main__':\n    start()\n"
  },
  {
    "path": "pr_agent/servers/gunicorn_config.py",
    "content": "import multiprocessing\nimport os\n\n# from prometheus_client import multiprocess\n\n# Sample Gunicorn configuration file.\n\n#\n# Server socket\n#\n#   bind - The socket to bind.\n#\n#       A string of the form: 'HOST', 'HOST:PORT', 'unix:PATH'.\n#       An IP is a valid HOST.\n#\n#   backlog - The number of pending connections. This refers\n#       to the number of clients that can be waiting to be\n#       served. Exceeding this number results in the client\n#       getting an error when attempting to connect. It should\n#       only affect servers under significant load.\n#\n#       Must be a positive integer. Generally set in the 64-2048\n#       range.\n#\n\n# bind = '0.0.0.0:5000'\nbind = '0.0.0.0:3000'\nbacklog = 2048\n\n#\n# Worker processes\n#\n#   workers - The number of worker processes that this server\n#       should keep alive for handling requests.\n#\n#       A positive integer generally in the 2-4 x $(NUM_CORES)\n#       range. You'll want to vary this a bit to find the best\n#       for your particular application's work load.\n#\n#   worker_class - The type of workers to use. The default\n#       sync class should handle most 'normal' types of work\n#       loads. You'll want to read\n#       http://docs.gunicorn.org/en/latest/design.html#choosing-a-worker-type\n#       for information on when you might want to choose one\n#       of the other worker classes.\n#\n#       A string referring to a Python path to a subclass of\n#       gunicorn.workers.base.Worker. The default provided values\n#       can be seen at\n#       http://docs.gunicorn.org/en/latest/settings.html#worker-class\n#\n#   worker_connections - For the eventlet and gevent worker classes\n#       this limits the maximum number of simultaneous clients that\n#       a single process can handle.\n#\n#       A positive integer generally set to around 1000.\n#\n#   timeout - If a worker does not notify the master process in this\n#       number of seconds it is killed and a new worker is spawned\n#       to replace it.\n#\n#       Generally set to thirty seconds. Only set this noticeably\n#       higher if you're sure of the repercussions for sync workers.\n#       For the non sync workers it just means that the worker\n#       process is still communicating and is not tied to the length\n#       of time required to handle a single request.\n#\n#   keepalive - The number of seconds to wait for the next request\n#       on a Keep-Alive HTTP connection.\n#\n#       A positive integer. Generally set in the 1-5 seconds range.\n#\n\nif os.getenv('GUNICORN_WORKERS', None):\n    workers = int(os.getenv('GUNICORN_WORKERS'))\nelse:\n    cores = multiprocessing.cpu_count()\n    workers = cores * 2 + 1\nworker_connections = 1000\ntimeout = 240\nkeepalive = 2\n\n#\n#   spew - Install a trace function that spews every line of Python\n#       that is executed when running the server. This is the\n#       nuclear option.\n#\n#       True or False\n#\n\nspew = False\n\n#\n# Server mechanics\n#\n#   daemon - Detach the main Gunicorn process from the controlling\n#       terminal with a standard fork/fork sequence.\n#\n#       True or False\n#\n#   raw_env - Pass environment variables to the execution environment.\n#\n#   pidfile - The path to a pid file to write\n#\n#       A path string or None to not write a pid file.\n#\n#   user - Switch worker processes to run as this user.\n#\n#       A valid user id (as an integer) or the name of a user that\n#       can be retrieved with a call to pwd.getpwnam(value) or None\n#       to not change the worker process user.\n#\n#   group - Switch worker process to run as this group.\n#\n#       A valid group id (as an integer) or the name of a user that\n#       can be retrieved with a call to pwd.getgrnam(value) or None\n#       to change the worker processes group.\n#\n#   umask - A mask for file permissions written by Gunicorn. Note that\n#       this affects unix socket permissions.\n#\n#       A valid value for the os.umask(mode) call or a string\n#       compatible with int(value, 0) (0 means Python guesses\n#       the base, so values like \"0\", \"0xFF\", \"0022\" are valid\n#       for decimal, hex, and octal representations)\n#\n#   tmp_upload_dir - A directory to store temporary request data when\n#       requests are read. This will most likely be disappearing soon.\n#\n#       A path to a directory where the process owner can write. Or\n#       None to signal that Python should choose one on its own.\n#\n\ndaemon = False\nraw_env = []\npidfile = None\numask = 0\nuser = None\ngroup = None\ntmp_upload_dir = None\n\n#\n#   Logging\n#\n#   logfile - The path to a log file to write to.\n#\n#       A path string. \"-\" means log to stdout.\n#\n#   loglevel - The granularity of log output\n#\n#       A string of \"debug\", \"info\", \"warning\", \"error\", \"critical\"\n#\n\nerrorlog = '-'\nloglevel = 'info'\naccesslog = None\naccess_log_format = '%(h)s %(l)s %(u)s %(t)s \"%(r)s\" %(s)s %(b)s \"%(f)s\" \"%(a)s\"'\n\n#\n# Process naming\n#\n#   proc_name - A base to use with setproctitle to change the way\n#       that Gunicorn processes are reported in the system process\n#       table. This affects things like 'ps' and 'top'. If you're\n#       going to be running more than one instance of Gunicorn you'll\n#       probably want to set a name to tell them apart. This requires\n#       that you install the setproctitle module.\n#\n#       A string or None to choose a default of something like 'gunicorn'.\n#\n\nproc_name = None\n\n\n#\n# Server hooks\n#\n#   post_fork - Called just after a worker has been forked.\n#\n#       A callable that takes a server and worker instance\n#       as arguments.\n#\n#   pre_fork - Called just prior to forking the worker subprocess.\n#\n#       A callable that accepts the same arguments as after_fork\n#\n#   pre_exec - Called just prior to forking off a secondary\n#       master process during things like config reloading.\n#\n#       A callable that takes a server instance as the sole argument.\n#\n"
  },
  {
    "path": "pr_agent/servers/help.py",
    "content": "class HelpMessage:\n    @staticmethod\n    def get_general_commands_text():\n       commands_text = \"> - **/review**: Request a review of your Pull Request.   \\n\" \\\n                \"> - **/describe**: Update the PR title and description based on the contents of the PR.   \\n\" \\\n                \"> - **/improve [--extended]**: Suggest code improvements. Extended mode provides a higher quality feedback.   \\n\" \\\n                \"> - **/ask \\\\<QUESTION\\\\>**: Ask a question about the PR.   \\n\" \\\n                \"> - **/update_changelog**: Update the changelog based on the PR's contents.   \\n\" \\\n                \"> - **/help_docs \\\\<QUESTION\\\\>**: Given a path to documentation (either for this repository or for a given one), ask a question.   \\n\" \\\n                \"> - **/add_docs**: Generate docstring for new components introduced in the PR.   \\n\" \\\n                \"> - **/generate_labels**: Generate labels for the PR based on the PR's contents.   \\n\\n\" \\\n                \">See the [tools guide](https://pr-agent-docs.codium.ai/tools/) for more details.\\n\" \\\n                \">To list the possible configuration parameters, add a **/config** comment.   \\n\"\n       return commands_text\n\n\n    @staticmethod\n    def get_general_bot_help_text():\n        output = f\"> To invoke the PR-Agent, add a comment using one of the following commands:  \\n{HelpMessage.get_general_commands_text()} \\n\"\n        return output\n\n    @staticmethod\n    def get_review_usage_guide():\n        output =\"**Overview:**\\n\"\n        output +=(\"The `review` tool scans the PR code changes, and generates a PR review which includes several types of feedbacks, such as possible PR issues, security threats and relevant test in the PR. More feedbacks can be [added](https://pr-agent-docs.codium.ai/tools/review/#general-configurations) by configuring the tool.\\n\\n\"\n                  \"The tool can be triggered [automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) every time a new PR is opened, or can be invoked manually by commenting on any PR.\\n\")\n        output +=\"\"\"\\\n- When commenting, to edit [configurations](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L23) related to the review tool (`pr_reviewer` section), use the following template:\n```\n/review --pr_reviewer.some_config1=... --pr_reviewer.some_config2=...\n```\n- With a [configuration file](https://pr-agent-docs.codium.ai/usage-guide/configuration_options/), use the following template:\n```\n[pr_reviewer]\nsome_config1=...\nsome_config2=...\n```\n    \"\"\"\n\n        output += f\"\\n\\nSee the review [usage page](https://pr-agent-docs.codium.ai/tools/review/) for a comprehensive guide on using this tool.\\n\\n\"\n\n        return output\n\n\n\n    @staticmethod\n    def get_describe_usage_guide():\n        output = \"**Overview:**\\n\"\n        output += \"The `describe` tool scans the PR code changes, and generates a description for the PR - title, type, summary, walkthrough and labels. \"\n        output += \"The tool can be triggered [automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) every time a new PR is opened, or can be invoked manually by commenting on a PR.\\n\"\n        output += \"\"\"\\\n\nWhen commenting, to edit [configurations](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L46) related to the describe tool (`pr_description` section), use the following template:\n```\n/describe --pr_description.some_config1=... --pr_description.some_config2=...\n```\nWith a [configuration file](https://pr-agent-docs.codium.ai/usage-guide/configuration_options/), use the following template:\n```\n[pr_description]\nsome_config1=...\nsome_config2=...\n```\n\"\"\"\n        output += \"\\n\\n<table>\"\n\n        # automation\n        output += \"<tr><td><details> <summary><strong> Enabling\\\\disabling automation </strong></summary><hr>\\n\\n\"\n        output += \"\"\"\\\n- When you first install the app, the [default mode](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) for the describe tool is:\n```\npr_commands = [\"/describe\", ...]\n```\nmeaning the `describe` tool will run automatically on every PR.\n\n- Markers are an alternative way to control the generated description, to give maximal control to the user. If you set:\n```\npr_commands = [\"/describe --pr_description.use_description_markers=true\", ...]\n```\nthe tool will replace every marker of the form `pr_agent:marker_name` in the PR description with the relevant content, where `marker_name` is one of the following:\n  - `type`: the PR type.\n  - `summary`: the PR summary.\n  - `walkthrough`: the PR walkthrough.\n  - `diagram`: the PR sequence diagram (if enabled).\n\nNote that when markers are enabled, if the original PR description does not contain any markers, the tool will not alter the description at all.\n\n\"\"\"\n        output += \"\\n\\n</details></td></tr>\\n\\n\"\n\n        # custom labels\n        output += \"<tr><td><details> <summary><strong> Custom labels </strong></summary><hr>\\n\\n\"\n        output += \"\"\"\\\nThe default labels of the `describe` tool are quite generic: [`Bug fix`, `Tests`, `Enhancement`, `Documentation`, `Other`].\n\nIf you specify [custom labels](https://pr-agent-docs.codium.ai/tools/describe/#handle-custom-labels-from-the-repos-labels-page) in the repo's labels page or via configuration file, you can get tailored labels for your use cases.\nExamples for custom labels:\n- `Main topic:performance` - pr_agent:The main topic of this PR is performance\n- `New endpoint` - pr_agent:A new endpoint was added in this PR\n- `SQL query` - pr_agent:A new SQL query was added in this PR\n- `Dockerfile changes` - pr_agent:The PR contains changes in the Dockerfile\n- ...\n\nThe list above is eclectic, and aims to give an idea of different possibilities. Define custom labels that are relevant for your repo and use cases.\nNote that Labels are not mutually exclusive, so you can add multiple label categories.\nMake sure to provide proper title, and a detailed and well-phrased description for each label, so the tool will know when to suggest it.\n\"\"\"\n        output += \"\\n\\n</details></td></tr>\\n\\n\"\n\n        # extra instructions\n        output += \"<tr><td><details> <summary><strong> Utilizing extra instructions</strong></summary><hr>\\n\\n\"\n        output += '''\\\nThe `describe` tool can be configured with extra instructions, to guide the model to a feedback tailored to the needs of your project.\n\nBe specific, clear, and concise in the instructions. With extra instructions, you are the prompter. Notice that the general structure of the description is fixed, and cannot be changed. Extra instructions can change the content or style of each sub-section of the PR description.\n\nExamples for extra instructions:\n```\n[pr_description]\nextra_instructions=\"\"\"\\\n- The PR title should be in the format: '<PR type>: <title>'\n- The title should be short and concise (up to 10 words)\n- ...\n\"\"\"\n```\nUse triple quotes to write multi-line instructions. Use bullet points to make the instructions more readable.\n'''\n        output += \"\\n\\n</details></td></tr>\\n\\n\"\n\n\n        # general\n        output += \"\\n\\n<tr><td><details> <summary><strong> More PR-Agent commands</strong></summary><hr> \\n\\n\"\n        output += HelpMessage.get_general_bot_help_text()\n        output += \"\\n\\n</details></td></tr>\\n\\n\"\n\n        output += \"</table>\"\n\n        output += f\"\\n\\nSee the [describe usage](https://pr-agent-docs.codium.ai/tools/describe/) page for a comprehensive guide on using this tool.\\n\\n\"\n\n        return output\n\n    @staticmethod\n    def get_ask_usage_guide():\n        output = \"**Overview:**\\n\"\n        output += \"\"\"\\\nThe `ask` tool answers questions about the PR, based on the PR code changes.\nIt can be invoked manually by commenting on any PR:\n```\n/ask \"...\"\n```\n\nNote that the tool does not have \"memory\" of previous questions, and answers each question independently.\nYou can ask questions about the entire PR, about specific code lines, or about an image related to the PR code changes.\n        \"\"\"\n        # output += \"\\n\\n<table>\"\n        #\n        # # # general\n        # # output += \"\\n\\n<tr><td><details> <summary><strong> More PR-Agent commands</strong></summary><hr> \\n\\n\"\n        # # output += HelpMessage.get_general_bot_help_text()\n        # # output += \"\\n\\n</details></td></tr>\\n\\n\"\n        #\n        # output += \"</table>\"\n\n        output += f\"\\n\\nSee the [ask usage](https://pr-agent-docs.codium.ai/tools/ask/) page for a comprehensive guide on using this tool.\\n\\n\"\n\n        return output\n\n\n    @staticmethod\n    def get_improve_usage_guide():\n        output = \"**Overview:**\\n\"\n        output += \"The code suggestions tool, named `improve`, scans the PR code changes, and automatically generates code suggestions for improving the PR.\"\n        output += \"The tool can be triggered [automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) every time a new PR is opened, or can be invoked manually by commenting on a PR.\\n\"\n        output += \"\"\"\\\n- When commenting, to edit [configurations](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L78) related to the improve tool (`pr_code_suggestions` section), use the following template:\n\n```\n/improve --pr_code_suggestions.some_config1=... --pr_code_suggestions.some_config2=...\n```\n\n- With a [configuration file](https://pr-agent-docs.codium.ai/usage-guide/configuration_options/), use the following template:\n\n```\n[pr_code_suggestions]\nsome_config1=...\nsome_config2=...\n```\n\n\"\"\"\n\n        output += f\"\\n\\nSee the improve [usage page](https://pr-agent-docs.codium.ai/tools/improve/) for a comprehensive guide on using this tool.\\n\\n\"\n\n        return output\n\n\n    @staticmethod\n    def get_help_docs_usage_guide():\n        output = \"**Overview:**\\n\"\n        output += \"\"\"\\\nThe help docs tool, named `help_docs`, answers a question based on a given relative path of documentation, either from the repository of this merge request or from a given one.\"\nIt can be invoked manually by commenting on any PR:\n```\n/help_docs \"...\"\n```\n\"\"\"\n        output += f\"\\n\\nSee the [help_docs usage](https://pr-agent-docs.codium.ai/tools/help_docs/) page for a comprehensive guide on using this tool.\\n\\n\"\n        return output\n"
  },
  {
    "path": "pr_agent/servers/utils.py",
    "content": "import hashlib\nimport hmac\nimport time\nfrom collections import defaultdict\nfrom typing import Any, Callable\n\nfrom fastapi import HTTPException\n\n\ndef verify_signature(payload_body, secret_token, signature_header):\n    \"\"\"Verify that the payload was sent from GitHub by validating SHA256.\n\n    Raise and return 403 if not authorized.\n\n    Args:\n        payload_body: original request body to verify (request.body())\n        secret_token: GitHub app webhook token (WEBHOOK_SECRET)\n        signature_header: header received from GitHub (x-hub-signature-256)\n    \"\"\"\n    if not signature_header:\n        raise HTTPException(status_code=403, detail=\"x-hub-signature-256 header is missing!\")\n    hash_object = hmac.new(secret_token.encode('utf-8'), msg=payload_body, digestmod=hashlib.sha256)\n    expected_signature = \"sha256=\" + hash_object.hexdigest()\n    if not hmac.compare_digest(expected_signature, signature_header):\n        raise HTTPException(status_code=403, detail=\"Request signatures didn't match!\")\n\n\nclass RateLimitExceeded(Exception):\n    \"\"\"Raised when the git provider API rate limit has been exceeded.\"\"\"\n    pass\n\n\nclass DefaultDictWithTimeout(defaultdict):\n    \"\"\"A defaultdict with a time-to-live (TTL).\"\"\"\n\n    def __init__(\n        self,\n        default_factory: Callable[[], Any] = None,\n        ttl: int = None,\n        refresh_interval: int = 60,\n        update_key_time_on_get: bool = True,\n        *args,\n        **kwargs,\n    ):\n        \"\"\"\n        Args:\n            default_factory: The default factory to use for keys that are not in the dictionary.\n            ttl: The time-to-live (TTL) in seconds.\n            refresh_interval: How often to refresh the dict and delete items older than the TTL.\n            update_key_time_on_get: Whether to update the access time of a key also on get (or only when set).\n        \"\"\"\n        super().__init__(default_factory, *args, **kwargs)\n        self.__key_times = dict()\n        self.__ttl = ttl\n        self.__refresh_interval = refresh_interval\n        self.__update_key_time_on_get = update_key_time_on_get\n        self.__last_refresh = self.__time() - self.__refresh_interval\n\n    @staticmethod\n    def __time():\n        return time.monotonic()\n\n    def __refresh(self):\n        if self.__ttl is None:\n            return\n        request_time = self.__time()\n        if request_time - self.__last_refresh > self.__refresh_interval:\n            return\n        to_delete = [key for key, key_time in self.__key_times.items() if request_time - key_time > self.__ttl]\n        for key in to_delete:\n            del self[key]\n        self.__last_refresh = request_time\n\n    def __getitem__(self, __key):\n        if self.__update_key_time_on_get:\n            self.__key_times[__key] = self.__time()\n        self.__refresh()\n        return super().__getitem__(__key)\n\n    def __setitem__(self, __key, __value):\n        self.__key_times[__key] = self.__time()\n        return super().__setitem__(__key, __value)\n\n    def __delitem__(self, __key):\n        del self.__key_times[__key]\n        return super().__delitem__(__key)\n"
  },
  {
    "path": "pr_agent/settings/.secrets_template.toml",
    "content": "# QUICKSTART:\n# Copy this file to .secrets.toml in the same folder.\n# The minimum workable settings - set openai.key to your API key.\n# Set github.deployment_type to \"user\" and github.user_token to your GitHub personal access token.\n# This will allow you to run the CLI scripts in the scripts/ folder and the github_polling server.\n#\n# See README for details about GitHub App deployment.\n\n[openai]\nkey = \"\"  # Acquire through https://platform.openai.com\n#org = \"<ORGANIZATION>\"  # Optional, may be commented out.\n# Uncomment the following for Azure OpenAI\n#api_type = \"azure\"\n#api_version = '2023-05-15'  # Check Azure documentation for the current API version\n#api_base = \"\"  # The base URL for your Azure OpenAI resource. e.g. \"https://<your resource name>.openai.azure.com\"\n#deployment_id = \"\"  # The deployment name you chose when you deployed the engine\n#fallback_deployments = []  # For each fallback model specified in configuration.toml in the [config] section, specify the appropriate deployment_id\n\n# OpenAI Flex Processing (optional, for cost savings)\n# [litellm]\n# extra_body='{\"processing_mode\": \"flex\"}'\n# model_id = \"\" # Optional: Custom inference profile ID for Amazon Bedrock\n\n[pinecone]\napi_key = \"...\"\nenvironment = \"gcp-starter\"\n\n[qdrant]\n# For Qdrant Cloud or self-hosted Qdrant\nurl = \"\"  # e.g., https://xxxxxxxx-xxxxxxxx.eu-central-1-0.aws.cloud.qdrant.io\napi_key = \"\"\n\n[anthropic]\nkey = \"\" # Optional, uncomment if you want to use Anthropic. Acquire through https://www.anthropic.com/\n\n[cohere]\nkey = \"\" # Optional, uncomment if you want to use Cohere. Acquire through https://dashboard.cohere.ai/\n\n[replicate]\nkey = \"\" # Optional, uncomment if you want to use Replicate. Acquire through https://replicate.com/\n\n[groq]\nkey = \"\" # Acquire through https://console.groq.com/keys\n\n[xai]\nkey = \"\" # Optional, uncomment if you want to use xAI. Acquire through https://console.x.ai/\n\n[huggingface]\nkey = \"\" # Optional, uncomment if you want to use Huggingface Inference API. Acquire through https://huggingface.co/docs/api-inference/quicktour\napi_base = \"\" # the base url for your huggingface inference endpoint\n\n[ollama]\napi_base = \"\" # the base url for your local Llama 2, Code Llama, and other models inference endpoint. Acquire through https://ollama.ai/\n\n[vertexai]\nvertex_project = \"\" # the google cloud platform project name for your vertexai deployment\nvertex_location = \"\" # the google cloud platform location for your vertexai deployment\n\n[google_ai_studio]\ngemini_api_key = \"\" # the google AI Studio API key\n\n[github]\n# ---- Set the following only for deployment type == \"user\"\nuser_token = \"\"  # A GitHub personal access token with 'repo' scope.\ndeployment_type = \"user\" #set to user by default\n\n# ---- Set the following only for deployment type == \"app\", see README for details.\nprivate_key = \"\"\"\\\n-----BEGIN RSA PRIVATE KEY-----\n<GITHUB PRIVATE KEY>\n-----END RSA PRIVATE KEY-----\n\"\"\"\napp_id = 123456  # The GitHub App ID, replace with your own.\nwebhook_secret = \"<WEBHOOK SECRET>\"  # Optional, may be commented out.\n\n[gitlab]\n# Gitlab personal access token\npersonal_access_token = \"\"\nshared_secret = \"\"  # webhook secret\n\n[gitea]\n# Gitea personal access token\npersonal_access_token=\"\"\nwebhook_secret=\"\" # webhook secret\n\n[bitbucket]\n# For Bitbucket authentication\nauth_type = \"bearer\"  # \"bearer\" or \"basic\"\n# For bearer token authentication\nbearer_token = \"\"\n# For basic authentication (uses token only)\nbasic_token = \"\"\n\n[bitbucket_server]\n# For Bitbucket Server bearer token\nbearer_token = \"\"\nwebhook_secret = \"\"\n\n# For Bitbucket app\napp_key = \"\"\nurl = \"\"\n\n[azure_devops]\n# For Azure devops personal access token\norg = \"\"\npat = \"\"\n\n[azure_devops_server]\n# For Azure devops Server basic auth - configured in the webhook creation\n# Optional, uncomment if you want to use Azure devops webhooks. Value assinged when you create the webhook\n# webhook_username = \"<basic auth user>\"\n# webhook_password = \"<basic auth password>\"\n\n[deepseek]\nkey = \"\"\n\n[deepinfra]\nkey = \"\"\n\n[azure_ad]\n# Azure AD authentication for OpenAI services\nclient_id = \"\"  # Your Azure AD application client ID\nclient_secret = \"\"  # Your Azure AD application client secret\ntenant_id = \"\"  # Your Azure AD tenant ID\napi_base = \"\"  # Your Azure OpenAI service base URL (e.g., https://openai.xyz.com/)\n\n[openrouter]\nkey = \"\"\napi_base = \"\"\n\n[aws]\nAWS_ACCESS_KEY_ID = \"\"\nAWS_SECRET_ACCESS_KEY = \"\"\nAWS_REGION_NAME = \"\"\n\n[aws_secrets_manager]\nsecret_arn = \"\"         # The ARN of the AWS Secrets Manager secret containing PR-Agent configuration\nregion_name = \"\"        # Optional: specific AWS region (defaults to AWS_REGION_NAME or Lambda region)\n"
  },
  {
    "path": "pr_agent/settings/code_suggestions/pr_code_suggestions_prompts.toml",
    "content": "[pr_code_suggestions_prompt]\nsystem=\"\"\"You are PR-Reviewer, an AI specializing in Pull Request (PR) code analysis and suggestions.\n{%- if not focus_only_on_problems %}\nYour task is to examine the provided code diff, focusing on new code (lines prefixed with '+'), and offer concise, actionable suggestions to fix possible bugs and problems, and enhance code quality and performance.\n{%- else %}\nYour task is to examine the provided code diff, focusing on new code (lines prefixed with '+'), and offer concise, actionable suggestions to fix critical bugs and problems.\n{%- endif %}\n\nThe PR code diff will be in the following structured format:\n======\n## File: 'src/file1.py'\n{%- if is_ai_metadata %}\n### AI-generated changes summary:\n* ...\n* ...\n{%- endif %}\n\n@@ ... @@ def func1():\n__new hunk__\n unchanged code line0\n unchanged code line1\n+new code line2 added\n unchanged code line3\n__old hunk__\n unchanged code line0\n unchanged code line1\n-old code line2 removed\n unchanged code line3\n\n@@ ... @@ def func2():\n__new hunk__\n unchanged code line4\n+new code line5 added\n unchanged code line6\n\n## File: 'src/file2.py'\n...\n======\n\nImportant notes about the structured diff format above:\n1. Each PR code chunk is decoupled into separate '__new hunk__' and '__old hunk__' sections:\n  - The '__new hunk__' section shows the code chunk AFTER the PR changes.\n  - The '__old hunk__' section shows the code chunk BEFORE the PR changes. If no code was removed from the chunk, the '__old hunk__' section will be omitted.\n2. The diff uses line prefixes to show changes:\n  '+' → new line code added (will appear only in '__new hunk__')\n  '-' → line code removed (will appear only in '__old hunk__')\n  ' ' → unchanged context lines (will appear in both sections)\n{%- if is_ai_metadata %}\n3. When available, an AI-generated summary will precede each file's diff, with a high-level overview of the changes. Note that this summary may not be fully accurate or complete.\n{%- endif %}\n\n\nSpecific guidelines for generating code suggestions:\n{%- if not focus_only_on_problems %}\n- Provide up to {{ num_code_suggestions }} distinct and insightful code suggestions.\n{%- else %}\n- Provide up to {{ num_code_suggestions }} distinct and insightful code suggestions. Return less suggestions if no pertinent ones are applicable.\n{%- endif %}\n- DO NOT suggest implementing changes that are already present in the '+' lines compared to the '-' lines.\n- Focus your suggestions ONLY on new code introduced in the PR ('+' lines in '__new hunk__' sections).\n{%- if not focus_only_on_problems %}\n- Prioritize suggestions that address potential issues, critical problems, and bugs in the PR code. Avoid repeating changes already implemented in the PR. If no pertinent suggestions are applicable, return an empty list.\n- Don't suggest to add docstring, type hints, or comments, to remove unused imports, or to use more specific exception types.\n{%- else %}\n- Only give suggestions that address critical problems and bugs in the PR code. If no relevant suggestions are applicable, return an empty list.\n- DO NOT suggest the following:\n    - change packages version\n    - add missing import statement\n    - declare undefined variable, or remove unused variable\n    - use more specific exception types\n    - repeat changes already done in the PR code\n{%- endif %}\n- Be aware that your input consists only of partial code segments (PR diff code), not the complete codebase. Therefore, avoid making suggestions that might duplicate existing functionality, and refrain from questioning code elements (such as variable declarations or import statements) that may be defined elsewhere in the codebase.\n- When mentioning code elements (variables, names, or files) in your response, surround them with backticks (`). For example: \"verify that `user_id` is...\"\n\n{%- if extra_instructions %}\n\n\nExtra user-provided instructions (should be addressed with high priority):\n======\n{{ extra_instructions }}\n======\n{%- endif %}\n\n\nThe output must be a YAML object equivalent to type $PRCodeSuggestions, according to the following Pydantic definitions:\n=====\nclass CodeSuggestion(BaseModel):\n    relevant_file: str = Field(description=\"Full path of the relevant file\")\n    language: str = Field(description=\"Programming language used by the relevant file\")\n    existing_code: str = Field(description=\"A short code snippet, from a '__new hunk__' section after the PR changes, that the suggestion aims to enhance or fix. Include only complete code lines. Use ellipsis (...) for brevity if needed. This snippet should represent the specific PR code targeted for improvement.\")\n    suggestion_content: str = Field(description=\"An actionable suggestion to enhance, improve or fix the new code introduced in the PR. Don't present here actual code snippets, just the suggestion. Be short and concise\")\n    improved_code: str = Field(description=\"A refined code snippet that replaces the 'existing_code' snippet after implementing the suggestion.\")\n    one_sentence_summary: str = Field(description=\"A concise, single-sentence overview (up to 6 words) of the suggested improvement. Focus on the 'what'. Be general, and avoid method or variable names.\")\n{%- if not focus_only_on_problems %}\n    label: str = Field(description=\"A single, descriptive label that best characterizes the suggestion type. Possible labels include 'security', 'possible bug', 'possible issue', 'performance', 'enhancement', 'best practice', 'maintainability', 'typo'. Other relevant labels are also acceptable.\")\n{%- else %}\n    label: str = Field(description=\"A single, descriptive label that best characterizes the suggestion type. Possible labels include 'security', 'critical bug', 'general'. The 'general' section should be used for suggestions that address a major issue, but are not necessarily on a critical level.\")\n{%- endif %}\n\n\nclass PRCodeSuggestions(BaseModel):\n    code_suggestions: List[CodeSuggestion]\n=====\n\n\nExample output:\n```yaml\ncode_suggestions:\n- relevant_file: |\n    src/file1.py\n  language: |\n    python\n  existing_code: |\n    ...\n  suggestion_content: |\n    ...\n  improved_code: |\n    ...\n  one_sentence_summary: |\n    ...\n  label: |\n    ...\n```\n\nEach YAML output MUST be after a newline, indented, with block scalar indicator ('|').\n\"\"\"\n\nuser=\"\"\"--PR Info--\n\nTitle: '{{title}}'\n\n{%- if date %}\n\nToday's Date: {{date}}\n{%- endif %}\n\nThe PR Diff:\n======\n{{ diff_no_line_numbers|trim }}\n======\n\n{%- if duplicate_prompt_examples %}\n\n\nExample output:\n```yaml\ncode_suggestions:\n- relevant_file: |\n    src/file1.py\n  language: |\n    python\n  existing_code: |\n    ...\n  suggestion_content: |\n    ...\n  improved_code: |\n    ...\n  one_sentence_summary: |\n    ...\n  label: |\n    ...\n```\n(replace '...' with actual content)\n{%- endif %}\n\n\nResponse (should be a valid YAML, and nothing else):\n```yaml\n\"\"\"\n"
  },
  {
    "path": "pr_agent/settings/code_suggestions/pr_code_suggestions_prompts_not_decoupled.toml",
    "content": "[pr_code_suggestions_prompt_not_decoupled]\nsystem=\"\"\"You are PR-Reviewer, an AI specializing in Pull Request (PR) code analysis and suggestions.\n{%- if not focus_only_on_problems %}\nYour task is to examine the provided code diff, focusing on new code (lines prefixed with '+'), and offer concise, actionable suggestions to fix possible bugs and problems, and enhance code quality and performance.\n{%- else %}\nYour task is to examine the provided code diff, focusing on new code (lines prefixed with '+'), and offer concise, actionable suggestions to fix critical bugs and problems.\n{%- endif %}\n\n\nThe PR code diff will be in the following structured format:\n======\n## File: 'src/file1.py'\n{%- if is_ai_metadata %}\n### AI-generated changes summary:\n* ...\n* ...\n{%- endif %}\n\n@@ ... @@ def func1():\n unchanged code line0\n unchanged code line1\n+new code line2\n-removed code line2\n unchanged code line3\n\n@@ ... @@ def func2():\n...\n\n\n## File: 'src/file2.py'\n...\n======\nThe diff structure above uses line prefixes to show changes:\n'+' → new line code added\n'-' → line code removed\n' ' → unchanged context lines\n{%- if is_ai_metadata %}\n\nWhen available, an AI-generated summary will precede each file's diff, with a high-level overview of the changes. Note that this summary may not be fully accurate or complete.\n{%- endif %}\n\n\nSpecific guidelines for generating code suggestions:\n{%- if not focus_only_on_problems %}\n- Provide up to {{ num_code_suggestions }} distinct and insightful code suggestions.\n{%- else %}\n- Provide up to {{ num_code_suggestions }} distinct and insightful code suggestions. Return less suggestions if no pertinent ones are applicable.\n{%- endif %}\n- Focus your suggestions ONLY on improving the new code introduced in the PR (lines starting with '+' in the diff). The lines in the diff starting with '-' are only for reference and should not be considered for suggestions.\n{%- if not focus_only_on_problems %}\n- Prioritize suggestions that address potential issues, critical problems, and bugs in the PR code. Avoid repeating changes already implemented in the PR. If no pertinent suggestions are applicable, return an empty list.\n- Don't suggest to add docstring, type hints, or comments, to remove unused imports, or to use more specific exception types.\n{%- else %}\n- Only give suggestions that address critical problems and bugs in the PR code. If no relevant suggestions are applicable, return an empty list.\n- DO NOT suggest the following:\n    - change packages version\n    - add missing import statement\n    - declare undefined variable, add missing imports, etc.\n    - use more specific exception types\n{%- endif %}\n- When mentioning code elements (variables, names, or files) in your response, surround them with markdown backticks (`). For example: \"verify that `user_id` is...\"\n- Note that you will only see partial code segments that were changed (diff hunks in a PR code), and not the entire codebase. Avoid suggestions that might duplicate existing functionality of the outer codebase. In addition, the absence of a definition, declaration, import, or initialization for any entity in the PR code is NEVER a basis for a suggestion.\n- Also note that if the code ends at an opening brace or statement that begins a new scope (like 'if', 'for', 'try'), don't treat it as incomplete. Instead, acknowledge the visible scope boundary and analyze only the code shown.\n\n{%- if extra_instructions %}\n\n\nExtra user-provided instructions (should be addressed with high priority):\n======\n{{ extra_instructions }}\n======\n{%- endif %}\n\n\nThe output must be a YAML object equivalent to type $PRCodeSuggestions, according to the following Pydantic definitions:\n=====\nclass CodeSuggestion(BaseModel):\n    relevant_file: str = Field(description=\"Full path of the relevant file\")\n    language: str = Field(description=\"Programming language used by the relevant file\")\n    existing_code: str = Field(description=\"A short code snippet, from the final state of the PR diff, that the suggestion will address. Select only the specific span of code that will be modified - without surrounding unchanged code. Preserve all indentation, newlines, and original formatting. Show the code snippet without the '+'/'-'/' ' prefixes. When providing suggestions for long code sections, shorten the presented code with ellipsis (...) for brevity where possible.\")\n    suggestion_content: str = Field(description=\"An actionable suggestion to enhance, improve or fix the new code introduced in the PR. Use 2-3 short sentences.\")\n    improved_code: str = Field(description=\"A refined code snippet that replaces the 'existing_code' snippet after implementing the suggestion.\")\n    one_sentence_summary: str = Field(description=\"A single-sentence overview (up to 6 words) of the suggestion. Focus on the 'what'. Be general, and avoid mentioning method or variable names.\")\n{%- if not focus_only_on_problems %}\n    label: str = Field(description=\"A single, descriptive label that best characterizes the suggestion type. Possible labels include 'security', 'possible bug', 'possible issue', 'performance', 'enhancement', 'best practice', 'maintainability', 'typo'. Other relevant labels are also acceptable.\")\n{%- else %}\n    label: str = Field(description=\"A single, descriptive label that best characterizes the suggestion type. Possible labels include 'security', 'critical bug', 'general'. The 'general' section should be used for suggestions that address a major issue, but are not necessarily on a critical level.\")\n{%- endif %}\n\n\nclass PRCodeSuggestions(BaseModel):\n    code_suggestions: List[CodeSuggestion]\n=====\n\n\nExample output:\n```yaml\ncode_suggestions:\n- relevant_file: |\n    src/file1.py\n  language: |\n    python\n  existing_code: |\n    ...\n  suggestion_content: |\n    ...\n  improved_code: |\n    ...\n  one_sentence_summary: |\n    ...\n  label: |\n    ...\n```\n\nEach YAML output MUST be after a newline, indented, with block scalar indicator ('|').\n\"\"\"\n\nuser=\"\"\"--PR Info--\n\nTitle: '{{title}}'\n\n{%- if date %}\n\nToday's Date: {{date}}\n{%- endif %}\n\nThe PR Diff:\n======\n{{ diff_no_line_numbers|trim }}\n======\n\n{%- if duplicate_prompt_examples %}\n\n\nExample output:\n```yaml\ncode_suggestions:\n- relevant_file: |\n    src/file1.py\n  language: |\n    python\n  existing_code: |\n    ...\n  suggestion_content: |\n    ...\n  improved_code: |\n    ...\n  one_sentence_summary: |\n    ...\n  label: |\n    ...\n```\n(replace '...' with actual content)\n{%- endif %}\n\n\nResponse (should be a valid YAML, and nothing else):\n```yaml\n\"\"\"\n"
  },
  {
    "path": "pr_agent/settings/code_suggestions/pr_code_suggestions_reflect_prompts.toml",
    "content": "[pr_code_suggestions_reflect_prompt]\nsystem=\"\"\"You are an AI language model specialized in reviewing and evaluating code suggestions for a Pull Request (PR).\nYour task is to analyze a PR code diff and evaluate the correctness and importance set of AI-generated code suggestions.\nIn addition to evaluating the suggestion correctness and importance, another sub-task you have is to detect the line numbers in the '__new hunk__' of the PR code diff section that correspond to the 'existing_code' snippet.\n\nExamine each suggestion meticulously, assessing its quality, relevance, and accuracy within the context of PR. Keep in mind that the suggestions may vary in their correctness, accuracy and impact.\nConsider the following components of each suggestion:\n    1. 'one_sentence_summary' - A one-liner summary of the suggestion's purpose\n    2. 'suggestion_content' - The suggestion content, explaining the proposed modification\n    3. 'existing_code' - a code snippet from a __new hunk__ section in the PR code diff that the suggestion addresses\n    4. 'improved_code' - a code snippet demonstrating how the 'existing_code' should be after the suggestion is applied\n\nBe particularly vigilant for suggestions that:\n    - Overlook crucial details in the PR code\n    - The 'improved_code' section does not accurately reflect the suggested changes, in relation to the 'existing_code'\n    - Contradict or ignore parts of the PR's modifications\nIn such cases, assign the suggestion a score of 0.\n\nEvaluate each valid suggestion by scoring its potential impact on the PR's correctness, quality and functionality.\nKey guidelines for evaluation:\n- Thoroughly examine both the suggestion content and the corresponding PR code diff. Be vigilant for potential errors in each suggestion, ensuring they are logically sound, accurate, and directly derived from the PR code diff.\n- Extend your review beyond the specifically mentioned code lines to encompass surrounding PR code context, verifying the suggestions' contextual accuracy.\n- Validate the 'existing_code' field by confirming it matches or is accurately derived from code lines within a '__new hunk__' section of the PR code diff.\n- Ensure the 'improved_code' section accurately reflects the 'existing_code' segment after the suggested modification is applied.\n- Apply a nuanced scoring system:\n  - Reserve high scores (8-10) for suggestions addressing critical issues such as major bugs or security concerns.\n  - Assign moderate scores (3-7) to suggestions that tackle minor issues, improve code style, enhance readability, or boost maintainability.\n  - Avoid inflating scores for suggestions that, while correct, offer only marginal improvements or optimizations.\n- Maintain the original order of suggestions in your feedback, corresponding to their input sequence.\n\nAdditional scoring considerations:\n- If the suggestion only asks the user to verify or ensure a change done in the PR, it should not receive a score above 7 (and may be lower).\n- Error handling or type checking suggestions should not receive a score above 8 (and may be lower).\n- If the 'existing_code' snippet is equal to the 'improved_code' snippet, it should not receive a score above 7 (and may be lower).\n- Assume each suggestion is independent and is not influenced by the other suggestions.\n- Assign a score of 0 to suggestions aiming at:\n   - Adding docstring, type hints, or comments\n   - Remove unused imports or variables\n   - Add missing import statements\n   - Using more specific exception types.\n   - Questions the definition, declaration, import, or initialization of any entity in the PR code, that might be done in the outer codebase.\n\n\n\nThe PR code diff will be presented in the following structured format:\n======\n## File: 'src/file1.py'\n{%- if is_ai_metadata %}\n### AI-generated changes summary:\n* ...\n* ...\n{%- endif %}\n\n@@ ... @@ def func1():\n__new hunk__\n11  unchanged code line0\n12  unchanged code line1\n13 +new code line2 added\n14  unchanged code line3\n__old hunk__\n unchanged code line0\n unchanged code line1\n-old code line2 removed\n unchanged code line3\n\n@@ ... @@ def func2():\n__new hunk__\n...\n__old hunk__\n...\n\n\n## File: 'src/file2.py'\n...\n======\n- In the format above, the diff is organized into separate '__new hunk__' and '__old hunk__' sections for each code chunk. '__new hunk__' contains the updated code, while '__old hunk__' shows the removed code. If no code was added or removed in a specific chunk, the corresponding section will be omitted.\n- Line numbers are included for the '__new hunk__' sections to enable referencing specific lines in the code suggestions. These numbers are for reference only and are not part of the actual code.\n- Code lines are prefixed with symbols: '+' for new code added in the PR, '-' for code removed, and ' ' for unchanged code.\n{%- if is_ai_metadata %}\n- When available, an AI-generated summary will precede each file's diff, with a high-level overview of the changes. Note that this summary may not be fully accurate or comprehensive.\n{%- endif %}\n\n\nThe output must be a YAML object equivalent to type $PRCodeSuggestionsFeedback, according to the following Pydantic definitions:\n=====\nclass CodeSuggestionFeedback(BaseModel):\n    suggestion_summary: str = Field(description=\"Repeated from the input\")\n    relevant_file: str = Field(description=\"Repeated from the input\")\n    relevant_lines_start: int = Field(description=\"The relevant line number, from a '__new hunk__' section, where the suggestion starts (inclusive). Should be derived from the added '__new hunk__' line numbers, and correspond to the first line of the relevant 'existing code' snippet.\")\n    relevant_lines_end: int = Field(description=\"The relevant line number, from a '__new hunk__' section, where the suggestion ends (inclusive). Should be derived from the added '__new hunk__' line numbers, and correspond to the end of the relevant 'existing code' snippet\")\n    suggestion_score: int = Field(description=\"Evaluate the suggestion and assign a score from 0 to 10. Give 0 if the suggestion is wrong. For valid suggestions, score from 1 (lowest impact/importance) to 10 (highest impact/importance).\")\n    why: str = Field(description=\"Briefly explain the score given in 1-2 short sentences, focusing on the suggestion's impact, relevance, and accuracy. When mentioning code elements (variables, names, or files) in your response, surround them with markdown backticks (`).\")\n\nclass PRCodeSuggestionsFeedback(BaseModel):\n    code_suggestions: List[CodeSuggestionFeedback]\n=====\n\n\nExample output:\n```yaml\ncode_suggestions:\n- suggestion_summary: |\n    Use a more descriptive variable name here\n  relevant_file: \"src/file1.py\"\n  relevant_lines_start: 13\n  relevant_lines_end: 14\n  suggestion_score: 6\n  why: |\n    The variable name 't' is not descriptive enough\n- ...\n```\n\n\nEach YAML output MUST be after a newline, indented, with block scalar indicator ('|').\n\"\"\"\n\nuser=\"\"\"You are given a Pull Request (PR) code diff:\n======\n{{ diff|trim }}\n======\n\n\nBelow are {{ num_code_suggestions }} AI-generated code suggestions for the Pull Request:\n======\n{{ suggestion_str|trim }}\n======\n\n\n{%- if duplicate_prompt_examples %}\n\n\nExample output:\n```yaml\ncode_suggestions:\n- suggestion_summary: |\n    ...\n  relevant_file: \"...\"\n  relevant_lines_start: ...\n  relevant_lines_end: ...\n  suggestion_score: ...\n  why: |\n    ...\n- ...\n```\n(replace '...' with actual content)\n{%- endif %}\n\nResponse (should be a valid YAML, and nothing else):\n```yaml\n\"\"\"\n"
  },
  {
    "path": "pr_agent/settings/configuration.toml",
    "content": "# Important: This file contains all available configuration options.\n# Do not copy this entire file to your repository configuration.\n# Your repository configuration should only include options you wish to override from the defaults.\n\n[config]\n# models\nmodel=\"gpt-5.4-2026-03-05\"\nfallback_models=[\"o4-mini\"]\n#model_reasoning=\"o4-mini\" # dedicated reasoning model for self-reflection\n#model_weak=\"gpt-4o\" # optional, a weaker model to use for some easier tasks\n# CLI\ngit_provider=\"github\"\npublish_output=true\npublish_output_progress=true\nverbosity_level=0 # 0,1,2\nuse_extra_bad_extensions=false\n# Log\nlog_level=\"DEBUG\"\n# Configurations\nuse_wiki_settings_file=true\nuse_repo_settings_file=true\nuse_global_settings_file=true\ndisable_auto_feedback = false\nai_timeout=120 # 2minutes\nskip_keys = []\ncustom_reasoning_model = false # when true, disables system messages and temperature controls for models that don't support chat-style inputs\nresponse_language=\"en-US\" # Language locales code for PR responses in ISO 3166 and ISO 639 format (e.g., \"en-US\", \"it-IT\", \"zh-CN\", ...)\n# token limits\nmax_description_tokens = 500\nmax_commits_tokens = 500\nmax_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities.\ncustom_model_max_tokens=-1 # for models not in the default list\nmodel_token_count_estimate_factor=0.3 # factor to increase the token count estimate, in order to reduce likelihood of model failure due to too many tokens - applicable only when requesting an accurate estimate.\n# patch extension logic\npatch_extension_skip_types =[\".md\",\".txt\"]\nallow_dynamic_context=true\nmax_extra_lines_before_dynamic_context = 10 # will try to include up to 10 extra lines before the hunk in the patch, until we reach an enclosing function or class\npatch_extra_lines_before = 5 # Number of extra lines (+3 default ones) to include before each hunk in the patch\npatch_extra_lines_after = 1 # Number of extra lines (+3 default ones) to include after each hunk in the patch\nsecret_provider=\"\" # \"\" (disabled), \"google_cloud_storage\", or \"aws_secrets_manager\" for secure secret management\ncli_mode=false\noutput_relevant_configurations=false\nlarge_patch_policy = \"clip\" # \"clip\", \"skip\"\nduplicate_prompt_examples = false\n# seed\nseed=-1 # set positive value to fix the seed (and ensure temperature=0)\ntemperature=0.2\n# ignore logic\nignore_pr_title = [\"^\\\\[Auto\\\\]\", \"^Auto\"] # a list of regular expressions to match against the PR title to ignore the PR agent\nignore_pr_target_branches = [] # a list of regular expressions of target branches to ignore from PR agent when an PR is created\nignore_pr_source_branches = [] # a list of regular expressions of source branches to ignore from PR agent when an PR is created\nignore_pr_labels = [] # labels to ignore from PR agent when an PR is created\nignore_pr_authors = [] # authors to ignore from PR agent when an PR is created\nignore_repositories = [] # a list of regular expressions of repository full names (e.g. \"org/repo\") to ignore from PR agent processing\nignore_language_framework = [] # a list of code-generation languages or frameworks (e.g. 'protobuf', 'go_gen') whose auto-generated source files will be excluded from analysis\n#\nis_auto_command = false # will be auto-set to true if the command is triggered by an automation\nenable_ai_metadata = false # will enable adding ai metadata\nreasoning_effort = \"medium\" # \"low\", \"medium\", \"high\"\n# extended thinking for Claude reasoning models\nenable_claude_extended_thinking = false # Set to true to enable extended thinking feature\nextended_thinking_budget_tokens = 2048\nextended_thinking_max_output_tokens = 4096\n# Extract issue number from PR source branch name (e.g. feature/1-auth-google -> issue #1). When true, branch-derived\n# issue URLs are merged with tickets from the PR description for compliance. Set to false to restore description-only behaviour.\n# Note: Branch-name extraction is GitHub-only for now; other providers planned for later.\nextract_issue_from_branch = true\n# Optional: custom regex with exactly one capturing group for the issue number (validated at runtime; falls back\n# to default if missing). If empty, uses default pattern: first 1-6 digits at start of branch or after a slash,\n# followed by hyphen or end (e.g. feature/1-test, 123-fix). GitHub only; other providers planned for later.\nbranch_issue_regex = \"\"\n\n\n[pr_reviewer] # /review #\n# enable/disable features\nrequire_score_review=false\nrequire_tests_review=true\nrequire_estimate_effort_to_review=true\nrequire_can_be_split_review=false\nrequire_security_review=true\nrequire_estimate_contribution_time_cost=false\nrequire_todo_scan=false\nrequire_ticket_analysis_review=true\n# general options\npublish_output_no_suggestions=true # Set to \"false\" if you only need the reviewer's remarks (not labels, not \"security audit\", etc.) and want to avoid noisy \"No major issues detected\" comments.\npersistent_comment=true\nextra_instructions = \"\"\nnum_max_findings = 3\nfinal_update_message = true\n# review labels\nenable_review_labels_security=true\nenable_review_labels_effort=true\n# specific configurations for incremental review (/review -i)\nrequire_all_thresholds_for_incremental_review=false\nminimal_commits_for_incremental_review=0\nminimal_minutes_for_incremental_review=0\nenable_intro_text=true\nenable_help_text=false # Determines whether to include help text in the PR review. Enabled by default.\n\n[pr_description] # /describe #\npublish_labels=false\nadd_original_user_description=true\ngenerate_ai_title=false\nuse_bullet_points=true\nextra_instructions = \"\"\nenable_pr_type=true\nfinal_update_message = true\nenable_help_text=false\nenable_help_comment=false\nenable_pr_diagram=true # adds a section with a diagram of the PR changes\n# describe as comment\npublish_description_as_comment=false\npublish_description_as_comment_persistent=true\n## changes walkthrough section\nenable_semantic_files_types=true\ncollapsible_file_list='adaptive' # true, false, 'adaptive'\ncollapsible_file_list_threshold=6\ninline_file_summary=false # false, true, 'table'\n# markers\nuse_description_markers=false\nenable_large_pr_handling=true\ninclude_generated_by_header=true\n#custom_labels = ['Bug fix', 'Tests', 'Bug fix with tests', 'Enhancement', 'Documentation', 'Other']\nmax_ai_calls=4\nasync_ai_calls=true\n[pr_questions] # /ask #\nenable_help_text=false\nuse_conversation_history=true\n\n\n[pr_code_suggestions] # /improve #\ncommitable_code_suggestions = false\ndual_publishing_score_threshold=-1 # -1 to disable, [0-10] to set the threshold (>=) for publishing a code suggestion both in a table and as commitable\nfocus_only_on_problems=true\n#\nextra_instructions = \"\"\nenable_help_text=false\nenable_chat_text=false\npersistent_comment=true\nmax_history_len=4\npublish_output_no_suggestions=true\n# suggestions scoring\nsuggestions_score_threshold=0 # [0-10]| recommend not to set this value above 8, since above it may clip highly relevant suggestions\nnew_score_mechanism=true\nnew_score_mechanism_th_high=9\nnew_score_mechanism_th_medium=7\n# params for '/improve --extended' mode\nauto_extended_mode=true\nnum_code_suggestions_per_chunk=3\nmax_number_of_calls = 3\nparallel_calls = true\n\nfinal_clip_factor = 0.8\ndecouple_hunks = false\n# self-review checkbox\ndemand_code_suggestions_self_review=false # add a checkbox for the author to self-review the code suggestions\ncode_suggestions_self_review_text= \"**Author self-review**: I have reviewed the PR code suggestions, and addressed the relevant ones.\"\napprove_pr_on_self_review=false # if true, the PR will be auto-approved after the author clicks on the self-review checkbox\nfold_suggestions_on_self_review=true # if true, the code suggestions will be folded after the author clicks on the self-review checkbox\n\n[pr_custom_prompt] # /custom_prompt #\nprompt = \"\"\"\\\nThe code suggestions should focus only on the following:\n- ...\n- ...\n...\n\"\"\"\nsuggestions_score_threshold=0\nnum_code_suggestions_per_chunk=3\nself_reflect_on_custom_suggestions=true\nenable_help_text=false\n\n\n[pr_add_docs] # /add_docs #\nextra_instructions = \"\"\ndocs_style = \"Sphinx\" # \"Google Style with Args, Returns, Attributes...etc\", \"Numpy Style\", \"Sphinx Style\", \"PEP257\", \"reStructuredText\"\nfile = \"\"              # in case there are several components with the same name, you can specify the relevant file\nclass_name = \"\"        # in case there are several methods with the same name in the same file, you can specify the relevant class name\n\n[pr_update_changelog] # /update_changelog #\npush_changelog_changes=false\nextra_instructions = \"\"\nadd_pr_link=true\nskip_ci_on_push=true\n\n[pr_analyze] # /analyze #\nenable_help_text=true\n\n[pr_test] # /test #\nextra_instructions = \"\"\ntesting_framework = \"\" # specify the testing framework you want to use\nnum_tests=3            # number of tests to generate. max 5.\navoid_mocks=true       # if true, the generated tests will prefer to use real objects instead of mocks\nfile = \"\"              # in case there are several components with the same name, you can specify the relevant file\nclass_name = \"\"        # in case there are several methods with the same name in the same file, you can specify the relevant class name\nenable_help_text=false\n\n[pr_improve_component] # /improve_component #\nnum_code_suggestions=4\nextra_instructions = \"\"\nfile = \"\"              # in case there are several components with the same name, you can specify the relevant file\nclass_name = \"\"        # in case there are several methods with the same name in the same file, you can specify the relevant class name\n\n[pr_help] # /help #\nforce_local_db=false\nnum_retrieved_snippets=5\n\n[pr_config] # /config #\n\n[pr_help_docs]\nrepo_url = \"\" #If not overwritten, will use the repo from where the context came from (issue or PR)\nrepo_default_branch = \"main\"\ndocs_path = \"docs\"\nexclude_root_readme = false\nsupported_doc_exts = [\".md\", \".mdx\", \".rst\"]\nenable_help_text=false\n\n[github]\n# The type of deployment to create. Valid values are 'app' or 'user'.\ndeployment_type = \"user\"\nratelimit_retries = 5\nbase_url = \"https://api.github.com\"\npublish_inline_comments_fallback_with_verification = true\ntry_fix_invalid_inline_comments = true\napp_name = \"pr-agent\"\nignore_bot_pr = true\n\n[github_action_config]\n# auto_review = true    # set as env var in .github/workflows/pr-agent.yaml\n# auto_describe = true  # set as env var in .github/workflows/pr-agent.yaml\n# auto_improve = true   # set as env var in .github/workflows/pr-agent.yaml\n# pr_actions = ['opened', 'reopened', 'ready_for_review', 'review_requested']\n\n[github_app]\n# these toggles allows running the github app from custom deployments\nbot_user = \"github-actions[bot]\"\noverride_deployment_type = true\n# settings for \"pull_request\" event\nhandle_pr_actions = ['opened', 'reopened', 'ready_for_review']\npr_commands = [\n    \"/describe --pr_description.final_update_message=false\",\n    \"/review\",\n    \"/improve\",\n]\n# settings for \"pull_request\" event with \"synchronize\" action - used to detect and handle push triggers for new commits\nhandle_push_trigger = false\npush_trigger_ignore_bot_commits = true\npush_trigger_ignore_merge_commits = true\npush_trigger_wait_for_initial_review = true\npush_trigger_pending_tasks_backlog = true\npush_trigger_pending_tasks_ttl = 300\npush_commands = [\n    \"/describe\",\n    \"/review\",\n]\n\n[gitlab]\nurl = \"https://gitlab.com\"\nexpand_submodule_diffs = false\npr_commands = [\n    \"/describe --pr_description.final_update_message=false\",\n    \"/review\",\n    \"/improve\",\n]\nhandle_push_trigger = false\npush_commands = [\n    \"/describe\",\n    \"/review\",\n]\n# Configure SSL validation for GitLab. Can be either set to the path of a custom CA or disabled entirely.\n# ssl_verify = true\n\n[gitea]\nurl = \"https://gitea.com\"\nhandle_push_trigger = false\npr_commands = [\n    \"/describe\",\n    \"/review\",\n    \"/improve\",\n]\npush_commands = [\n    \"/describe\",\n    \"/review\",\n]\n\n[bitbucket_app]\npr_commands = [\n    \"/describe --pr_description.final_update_message=false\",\n    \"/review\",\n    \"/improve --pr_code_suggestions.commitable_code_suggestions=true\",\n]\navoid_full_files = false\n\n[local]\n# LocalGitProvider settings - uncomment to use paths other than default\n# description_path= \"path/to/description.md\"\n# review_path= \"path/to/review.md\"\n\n[gerrit]\n# endpoint to the gerrit service\n# url = \"ssh://gerrit.example.com:29418\"\n# user for gerrit authentication\n# user = \"ai-reviewer\"\n# patch server where patches will be saved\n# patch_server_endpoint = \"http://127.0.0.1:5000/patch\"\n# token to authenticate in the patch server\n# patch_server_token = \"\"\n\n[bitbucket_server]\n# URL to the BitBucket Server instance\n# url = \"https://git.bitbucket.com\"\nurl = \"\"\npr_commands = [\n    \"/describe --pr_description.final_update_message=false\",\n    \"/review\",\n    \"/improve --pr_code_suggestions.commitable_code_suggestions=true\",\n]\n\n[litellm]\n# use_client = false\n# drop_params = false\nenable_callbacks = false\nsuccess_callback = []\nfailure_callback = []\nservice_callback = []\n# model_id = \"\" # Optional: Custom inference profile ID for Amazon Bedrock\n\n[pr_similar_issue]\nskip_comments = false\nforce_update_dataset = false\nmax_issues_to_scan = 500\nvectordb = \"pinecone\" # options: \"pinecone\", \"lancedb\", \"qdrant\"\n\n[pr_find_similar_component]\nclass_name = \"\"\nfile = \"\"\nsearch_from_org = false\nallow_fallback_less_words = true\nnumber_of_keywords = 5\nnumber_of_results = 5\n\n[pinecone]\n# fill and place in .secrets.toml\n#api_key = ...\n# environment = \"gcp-starter\"\n\n[lancedb]\nuri = \"./lancedb\"\n\n[qdrant]\n# fill and place credentials in .secrets.toml\n# url = \"https://YOUR-QDRANT-URL\"\n# api_key = \"...\"\n\n[best_practices]\ncontent = \"\"\norganization_name = \"\"\nmax_lines_allowed = 800\nenable_global_best_practices = false\n\n[auto_best_practices]\nenable_auto_best_practices = true # public - general flag to disable all auto best practices usage\nutilize_auto_best_practices = true # public - disable usage of auto best practices in the 'improve' tool\nextra_instructions = \"\" # public - extra instructions to the auto best practices generation prompt\ncontent = \"\"\nmax_patterns = 5 # max number of patterns to be detected\n\n[azure_devops]\ndefault_comment_status = \"closed\"\n\n[azure_devops_server]\npr_commands = [\n    \"/describe\",\n    \"/review\",\n    \"/improve\",\n]\n"
  },
  {
    "path": "pr_agent/settings/custom_labels.toml",
    "content": "[config]\nenable_custom_labels=false\n\n## template for custom labels\n#[custom_labels.\"Bug fix\"]\n#description = \"\"\"Fixes a bug in the code\"\"\"\n#[custom_labels.\"Tests\"]\n#description = \"\"\"Adds or modifies tests\"\"\"\n#[custom_labels.\"Bug fix with tests\"]\n#description = \"\"\"Fixes a bug in the code and adds or modifies tests\"\"\"\n#[custom_labels.\"Enhancement\"]\n#description = \"\"\"Adds new features or modifies existing ones\"\"\"\n#[custom_labels.\"Documentation\"]\n#description = \"\"\"Adds or modifies documentation\"\"\"\n#[custom_labels.\"Other\"]\n#description = \"\"\"Other changes that do not fit in any of the above categories\"\"\"\n"
  },
  {
    "path": "pr_agent/settings/generated_code_ignore.toml",
    "content": "[generated_code]\n\n# Protocol Buffers\nprotobuf = [\n  \"**/*.pb.go\",\n  \"**/*.pb.cc\",\n  \"**/*_pb2.py\",\n  \"**/*.pb.swift\",\n  \"**/*.pb.rb\",\n  \"**/*.pb.php\",\n  \"**/*.pb.h\"\n]\n\n# OpenAPI / Swagger stubs\nopenapi = [\n  \"**/__generated__/**\",\n  \"**/openapi_client/**\",\n  \"**/openapi_server/**\"\n]\nswagger = [\n  \"**/swagger.json\",\n  \"**/swagger.yaml\"\n]\n\n# GraphQL codegen\ngraphql = [\n  \"**/*.graphql.ts\",\n  \"**/*.generated.ts\",\n  \"**/*.graphql.js\"\n]\n\n# RPC / gRPC Generators \ngrpc_python      = [\"**/*_grpc.py\"]\ngrpc_java        = [\"**/*Grpc.java\"]\ngrpc_csharp      = [\"**/*Grpc.cs\"]\ngrpc_typescript  = [\"**/*_grpc.ts\", \"**/*_grpc.js\"]\n\n# Go code generators\ngo_gen = [\n  \"**/*_gen.go\",\n  \"**/*generated.go\"\n]\n"
  },
  {
    "path": "pr_agent/settings/ignore.toml",
    "content": "[ignore]\n\nglob = [\n    # Ignore files and directories matching these glob patterns.\n    # See https://docs.python.org/3/library/glob.html\n    'vendor/**',\n]\nregex = [\n    # Ignore files and directories matching these regex patterns.\n    # See https://learnbyexample.github.io/python-regex-cheatsheet/\n    # for example: regex = ['.*\\.toml$']\n]\n"
  },
  {
    "path": "pr_agent/settings/language_extensions.toml",
    "content": "[bad_extensions]\ndefault = [\n    'app',\n    'bin',\n    'bmp',\n    'bz2',\n    'class',\n    'csv',\n    'dat',\n    'db',\n    'dll',\n    'dylib',\n    'egg',\n    'eot',\n    'exe',\n    'gif',\n    'gitignore',\n    'glif',\n    'gradle',\n    'gz',\n    'ico',\n    'jar',\n    'jpeg',\n    'jpg',\n    'lo',\n    'lock',\n    'log',\n    'mp3',\n    'mp4',\n    'nar',\n    'o',\n    'ogg',\n    'otf',\n    'p',\n    'pdf',\n    'png',\n    'pickle',\n    'pkl',\n    'pyc',\n    'pyd',\n    'pyo',\n    'rkt',\n    'so',\n    'ss',\n    'svg',\n    'tar',\n    'tgz',\n    'tsv',\n    'ttf',\n    'war',\n    'webm',\n    'woff',\n    'woff2',\n    'xz',\n    'zip',\n    'zst',\n    'snap',\n    'lockb'\n]\nextra = [\n    'md',\n    'txt'\n]\n\n[language_extension_map_org]\n\"1C Enterprise\" = [\"*.bsl\", ]\nABAP = [\".abap\", ]\n\"AGS Script\" = [\".ash\", ]\nAMPL = [\".ampl\", ]\nANTLR = [\".g4\", ]\n\"API Blueprint\" = [\".apib\", ]\nAPL = [\".apl\", \".dyalog\", ]\nASP = [\".asp\", \".asax\", \".ascx\", \".ashx\", \".asmx\", \".aspx\", \".axd\", ]\nATS = [\".dats\", \".hats\", \".sats\", ]\nActionScript = [\".as\", ]\nAda = [\".adb\", \".ada\", \".ads\", ]\nAgda = [\".agda\", ]\nAlloy = [\".als\", ]\nApacheConf = [\".apacheconf\", \".vhost\", ]\nAppleScript = [\".applescript\", \".scpt\", ]\nArc = [\".arc\", ]\nArduino = [\".ino\", ]\nAsciiDoc = [\".asciidoc\", \".adoc\", ]\nAspectJ = [\".aj\", ]\nAssembly = [\".asm\", \".a51\", \".nasm\", ]\nAugeas = [\".aug\", ]\nAutoHotkey = [\".ahk\", \".ahkl\", ]\nAutoIt = [\".au3\", ]\nAwk = [\".awk\", \".auk\", \".gawk\", \".mawk\", \".nawk\", ]\nBatchfile = [\".bat\", \".cmd\", ]\nBefunge = [\".befunge\", ]\nBison = [\".bison\", ]\nBitBake = [\".bb\", ]\nBlitzBasic = [\".decls\", ]\nBlitzMax = [\".bmx\", ]\nBluespec = [\".bsv\", ]\nBoo = [\".boo\", ]\nBrainfuck = [\".bf\", ]\nBrightscript = [\".brs\", ]\nBro = [\".bro\", ]\nC = [\".c\", \".cats\", \".h\", \".idc\", \".w\", ]\n\"C#\" = [\".cs\", \".cake\", \".cshtml\", \".csx\", ]\n\"C++\" = [\".cpp\", \".c++\", \".cc\", \".cp\", \".cxx\", \".h++\", \".hh\", \".hpp\", \".hxx\", \".inl\", \".ipp\", \".tcc\", \".tpp\", \".C\", \".H\", ]\nC-ObjDump = [\".c-objdump\", ]\n\"C2hs Haskell\" = [\".chs\", ]\nCLIPS = [\".clp\", ]\nCMake = [\".cmake\", \".cmake.in\", ]\nCOBOL = [\".cob\", \".cbl\", \".ccp\", \".cobol\", \".cpy\", ]\nCSS = [\".css\", ]\nCSV = [\".csv\", ]\n\"Cap'n Proto\" = [\".capnp\", ]\nCartoCSS = [\".mss\", ]\nCeylon = [\".ceylon\", ]\nChapel = [\".chpl\", ]\nChucK = [\".ck\", ]\nCirru = [\".cirru\", ]\nClarion = [\".clw\", ]\nClean = [\".icl\", \".dcl\", ]\nClick = [\".click\", ]\nClojure = [\".clj\", \".boot\", \".cl2\", \".cljc\", \".cljs\", \".cljs.hl\", \".cljscm\", \".cljx\", \".hic\", ]\nCoffeeScript = [\".coffee\", \"._coffee\", \".cjsx\", \".cson\", \".iced\", ]\nColdFusion = [\".cfm\", \".cfml\", ]\n\"ColdFusion CFC\" = [\".cfc\", ]\n\"Common Lisp\" = [\".lisp\", \".asd\", \".lsp\", \".ny\", \".podsl\", \".sexp\", ]\n\"Component Pascal\" = [\".cps\", ]\nCoq = [\".coq\", ]\nCpp-ObjDump = [\".cppobjdump\", \".c++-objdump\", \".c++objdump\", \".cpp-objdump\", \".cxx-objdump\", ]\nCreole = [\".creole\", ]\nCrystal = [\".cr\", ]\nCsound = [\".csd\", ]\nCucumber = [\".feature\", ]\nCuda = [\".cu\", \".cuh\", ]\nCycript = [\".cy\", ]\nCython = [\".pyx\", \".pxd\", \".pxi\", ]\nD = [\".di\", ]\nD-ObjDump = [\".d-objdump\", ]\n\"DIGITAL Command Language\" = [\".com\", ]\nDM = [\".dm\", ]\n\"DNS Zone\" = [\".zone\", \".arpa\", ]\n\"Darcs Patch\" = [\".darcspatch\", \".dpatch\", ]\nDart = [\".dart\", ]\nDiff = [\".diff\", \".patch\", ]\nDockerfile = [\".dockerfile\", \"Dockerfile\", ]\nDogescript = [\".djs\", ]\nDylan = [\".dylan\", \".dyl\", \".intr\", \".lid\", ]\nE = [\".E\", ]\nECL = [\".ecl\", \".eclxml\", ]\nEagle = [\".sch\", \".brd\", ]\n\"Ecere Projects\" = [\".epj\", ]\nEiffel = [\".e\", ]\nElixir = [\".ex\", \".exs\", ]\nElm = [\".elm\", ]\n\"Emacs Lisp\" = [\".el\", \".emacs\", \".emacs.desktop\", ]\nEmberScript = [\".em\", \".emberscript\", ]\nErlang = [\".erl\", \".escript\", \".hrl\", \".xrl\", \".yrl\", ]\n\"F#\" = [\".fs\", \".fsi\", \".fsx\", ]\nFLUX = [\".flux\", ]\nFORTRAN = [\".f90\", \".f\", \".f03\", \".f08\", \".f77\", \".f95\", \".for\", \".fpp\", ]\nFactor = [\".factor\", ]\nFancy = [\".fy\", \".fancypack\", ]\nFantom = [\".fan\", ]\nFormatted = [\".eam.fs\", ]\nForth = [\".fth\", \".4th\", \".forth\", \".frt\", ]\nFreeMarker = [\".ftl\", ]\nG-code = [\".g\", \".gco\", \".gcode\", ]\nGAMS = [\".gms\", ]\nGAP = [\".gap\", \".gi\", ]\nGAS = [\".s\", ]\nGDScript = [\".gd\", ]\nGLSL = [\".glsl\", \".fp\", \".frag\", \".frg\", \".fsh\", \".fshader\", \".geo\", \".geom\", \".glslv\", \".gshader\", \".shader\", \".vert\", \".vrx\", \".vsh\", \".vshader\", ]\nGenshi = [\".kid\", ]\n\"Gentoo Ebuild\" = [\".ebuild\", ]\n\"Gentoo Eclass\" = [\".eclass\", ]\n\"Gettext Catalog\" = [\".po\", \".pot\", ]\nGlyph = [\".glf\", ]\nGnuplot = [\".gp\", \".gnu\", \".gnuplot\", \".plot\", \".plt\", ]\nGo = [\".go\", ]\nGolo = [\".golo\", ]\nGosu = [\".gst\", \".gsx\", \".vark\", ]\nGrace = [\".grace\", ]\nGradle = [\".gradle\", ]\n\"Grammatical Framework\" = [\".gf\", ]\nGraphQL = [\".graphql\", ]\n\"Graphviz (DOT)\" = [\".dot\", \".gv\", ]\nGroff = [\".man\", \".1\", \".1in\", \".1m\", \".1x\", \".2\", \".3\", \".3in\", \".3m\", \".3qt\", \".3x\", \".4\", \".5\", \".6\", \".7\", \".8\", \".9\", \".me\", \".rno\", \".roff\", ]\nGroovy = [\".groovy\", \".grt\", \".gtpl\", \".gvy\", ]\n\"Groovy Server Pages\" = [\".gsp\", ]\nHCL = [\".hcl\", \".tf\", ]\nHLSL = [\".hlsl\", \".fxh\", \".hlsli\", ]\nHTML = [\".html\", \".htm\", \".html.hl\", \".xht\", \".xhtml\", ]\n\"HTML+Django\" = [\".mustache\", \".jinja\", ]\n\"HTML+EEX\" = [\".eex\", ]\n\"HTML+ERB\" = [\".erb\", \".erb.deface\", ]\n\"HTML+PHP\" = [\".phtml\", ]\nHTTP = [\".http\", ]\nHaml = [\".haml\", \".haml.deface\", ]\nHandlebars = [\".handlebars\", \".hbs\", ]\nHarbour = [\".hb\", ]\nHaskell = [\".hs\", \".hsc\", ]\nHaxe = [\".hx\", \".hxsl\", ]\nHy = [\".hy\", ]\nIDL = [\".dlm\", ]\n\"IGOR Pro\" = [\".ipf\", ]\nINI = [\".ini\", \".cfg\", \".prefs\", \".properties\", ]\n\"IRC log\" = [\".irclog\", \".weechatlog\", ]\nIdris = [\".idr\", \".lidr\", ]\n\"Inform 7\" = [\".ni\", \".i7x\", ]\n\"Inno Setup\" = [\".iss\", ]\nIo = [\".io\", ]\nIoke = [\".ik\", ]\nIsabelle = [\".thy\", ]\nJ = [\".ijs\", ]\nJFlex = [\".flex\", \".jflex\", ]\nJSON = [\".json\", \".geojson\", \".lock\", \".topojson\", ]\nJSON5 = [\".json5\", ]\nJSONLD = [\".jsonld\", ]\nJSONiq = [\".jq\", ]\nJSX = [\".jsx\", ]\nJade = [\".jade\", ]\nJasmin = [\".j\", ]\nJava = [\".java\", ]\n\"Java Server Pages\" = [\".jsp\", ]\nJavaScript = [\".js\", \"._js\", \".bones\", \".es6\", \".jake\", \".jsb\", \".jscad\", \".jsfl\", \".jsm\", \".jss\", \".njs\", \".pac\", \".sjs\", \".ssjs\", \".xsjs\", \".xsjslib\", ]\nJulia = [\".jl\", ]\n\"Jupyter Notebook\" = [\".ipynb\", ]\nKRL = [\".krl\", ]\nKiCad = [\".kicad_pcb\", ]\nKit = [\".kit\", ]\nKotlin = [\".kt\", \".ktm\", \".kts\", ]\nLFE = [\".lfe\", ]\nLLVM = [\".ll\", ]\nLOLCODE = [\".lol\", ]\nLSL = [\".lsl\", \".lslp\", ]\nLabVIEW = [\".lvproj\", ]\nLasso = [\".lasso\", \".las\", \".lasso8\", \".lasso9\", \".ldml\", ]\nLatte = [\".latte\", ]\nLean = [\".lean\", \".hlean\", ]\nLess = [\".less\", ]\nLex = [\".lex\", ]\nLilyPond = [\".ly\", \".ily\", ]\n\"Linker Script\" = [\".ld\", \".lds\", ]\nLiquid = [\".liquid\", ]\n\"Literate Agda\" = [\".lagda\", ]\n\"Literate CoffeeScript\" = [\".litcoffee\", ]\n\"Literate Haskell\" = [\".lhs\", ]\nLiveScript = [\".ls\", \"._ls\", ]\nLogos = [\".xm\", \".x\", \".xi\", ]\nLogtalk = [\".lgt\", \".logtalk\", ]\nLookML = [\".lookml\", ]\nLua = [\".lua\", \".nse\", \".pd_lua\", \".rbxs\", \".wlua\", ]\nM = [\".mumps\", ]\nM4 = [\".m4\", ]\nMAXScript = [\".mcr\", ]\nMTML = [\".mtml\", ]\nMUF = [\".muf\", ]\nMakefile = [\".mak\", \".mk\", \".mkfile\", \"Makefile\", ]\nMako = [\".mako\", \".mao\", ]\nMaple = [\".mpl\", ]\nMarkdown = [\".md\", \".markdown\", \".mkd\", \".mkdn\", \".mkdown\", \".ron\", ]\nMask = [\".mask\", ]\nMathematica = [\".mathematica\", \".cdf\", \".ma\", \".mt\", \".nb\", \".nbp\", \".wl\", \".wlt\", ]\nMatlab = [\".matlab\", ]\nMax = [\".maxpat\", \".maxhelp\", \".maxproj\", \".mxt\", \".pat\", ]\nMediaWiki = [\".mediawiki\", \".wiki\", ]\nMetal = [\".metal\", ]\nMiniD = [\".minid\", ]\nMirah = [\".druby\", \".duby\", \".mir\", \".mirah\", ]\nModelica = [\".mo\", ]\n\"Module Management System\" = [\".mms\", \".mmk\", ]\nMonkey = [\".monkey\", ]\nMoonScript = [\".moon\", ]\nMyghty = [\".myt\", ]\nNSIS = [\".nsi\", \".nsh\", ]\nNetLinx = [\".axs\", \".axi\", ]\n\"NetLinx+ERB\" = [\".axs.erb\", \".axi.erb\", ]\nNetLogo = [\".nlogo\", ]\nNginx = [\".nginxconf\", ]\nNimrod = [\".nim\", \".nimrod\", ]\nNinja = [\".ninja\", ]\nNit = [\".nit\", ]\nNix = [\".nix\", ]\nNu = [\".nu\", ]\nNumPy = [\".numpy\", \".numpyw\", \".numsc\", ]\nOCaml = [\".ml\", \".eliom\", \".eliomi\", \".ml4\", \".mli\", \".mll\", \".mly\", ]\nObjDump = [\".objdump\", ]\n\"Objective-C++\" = [\".mm\", ]\nObjective-J = [\".sj\", ]\nOctave = [\".oct\", ]\nOmgrofl = [\".omgrofl\", ]\nOpa = [\".opa\", ]\nOpal = [\".opal\", ]\nOpenCL = [\".cl\", \".opencl\", ]\n\"OpenEdge ABL\" = [\".p\", ]\nOpenSCAD = [\".scad\", ]\nOrg = [\".org\", ]\nOx = [\".ox\", \".oxh\", \".oxo\", ]\nOxygene = [\".oxygene\", ]\nOz = [\".oz\", ]\nPAWN = [\".pwn\", ]\nPHP = [\".php\", \".aw\", \".ctp\", \".php3\", \".php4\", \".php5\", \".phps\", \".phpt\", ]\n\"POV-Ray SDL\" = [\".pov\", ]\nPan = [\".pan\", ]\nPapyrus = [\".psc\", ]\nParrot = [\".parrot\", ]\n\"Parrot Assembly\" = [\".pasm\", ]\n\"Parrot Internal Representation\" = [\".pir\", ]\nPascal = [\".pas\", \".dfm\", \".dpr\", \".lpr\", ]\nPerl = [\".pl\", \".al\", \".perl\", \".ph\", \".plx\", \".pm\", \".psgi\", \".t\", ]\nPerl6 = [\".6pl\", \".6pm\", \".nqp\", \".p6\", \".p6l\", \".p6m\", \".pl6\", \".pm6\", ]\nPickle = [\".pkl\", ]\nPigLatin = [\".pig\", ]\nPike = [\".pike\", \".pmod\", ]\nPod = [\".pod\", ]\nPogoScript = [\".pogo\", ]\nPony = [\".pony\", ]\nPostScript = [\".ps\", \".eps\", ]\nPowerShell = [\".ps1\", \".psd1\", \".psm1\", ]\nProcessing = [\".pde\", ]\nProlog = [\".prolog\", \".yap\", ]\n\"Propeller Spin\" = [\".spin\", ]\n\"Protocol Buffer\" = [\".proto\", ]\n\"Public Key\" = [\".pub\", ]\n\"Pure Data\" = [\".pd\", ]\nPureBasic = [\".pb\", \".pbi\", ]\nPureScript = [\".purs\", ]\nPython = [\".py\", \".bzl\", \".gyp\", \".lmi\", \".pyde\", \".pyp\", \".pyt\", \".pyw\", \".tac\", \".wsgi\", \".xpy\", ]\n\"Python traceback\" = [\".pytb\", ]\nQML = [\".qml\", \".qbs\", ]\nQMake = [\".pri\", ]\nR = [\".r\", \".rd\", \".rsx\", ]\nRAML = [\".raml\", ]\nRDoc = [\".rdoc\", ]\nREALbasic = [\".rbbas\", \".rbfrm\", \".rbmnu\", \".rbres\", \".rbtbar\", \".rbuistate\", ]\nRHTML = [\".rhtml\", ]\nRMarkdown = [\".rmd\", ]\nRacket = [\".rkt\", \".rktd\", \".rktl\", \".scrbl\", ]\n\"Ragel in Ruby Host\" = [\".rl\", ]\n\"Raw token data\" = [\".raw\", ]\nRebol = [\".reb\", \".r2\", \".r3\", \".rebol\", ]\nRed = [\".red\", \".reds\", ]\nRedcode = [\".cw\", ]\n\"Ren'Py\" = [\".rpy\", ]\nRenderScript = [\".rsh\", ]\nRobotFramework = [\".robot\", ]\nRouge = [\".rg\", ]\nRuby = [\".rb\", \".builder\", \".gemspec\", \".god\", \".irbrc\", \".jbuilder\", \".mspec\", \".podspec\", \".rabl\", \".rake\", \".rbuild\", \".rbw\", \".rbx\", \".ru\", \".ruby\", \".thor\", \".watchr\", ]\nRust = [\".rs\", \".rs.in\", ]\nSAS = [\".sas\", ]\nSCSS = [\".scss\", ]\nSMT = [\".smt2\", \".smt\", ]\nSPARQL = [\".sparql\", \".rq\", ]\nSQF = [\".sqf\", \".hqf\", ]\nSQL = [\".pls\", \".pck\", \".pkb\", \".pks\", \".plb\", \".plsql\", \".sql\", \".cql\", \".ddl\", \".prc\", \".tab\", \".udf\", \".viw\", \".db2\", ]\nSTON = [\".ston\", ]\nSVG = [\".svg\", ]\nSage = [\".sage\", \".sagews\", ]\nSaltStack = [\".sls\", ]\nSass = [\".sass\", ]\nScala = [\".scala\", \".sbt\", ]\nScaml = [\".scaml\", ]\nScheme = [\".scm\", \".sld\", \".sps\", \".ss\", ]\nScilab = [\".sci\", \".sce\", ]\nSelf = [\".self\", ]\nShell = [\".sh\", \".bash\", \".bats\", \".command\", \".ksh\", \".sh.in\", \".tmux\", \".tool\", \".zsh\", ]\nShellSession = [\".sh-session\", ]\nShen = [\".shen\", ]\nSlash = [\".sl\", ]\nSlim = [\".slim\", ]\nSmali = [\".smali\", ]\nSmalltalk = [\".st\", ]\nSmarty = [\".tpl\", ]\nSolidity = [\".sol\", ]\nSourcePawn = [\".sp\", \".sma\", ]\nSquirrel = [\".nut\", ]\nStan = [\".stan\", ]\n\"Standard ML\" = [\".ML\", \".fun\", \".sig\", \".sml\", ]\nStata = [\".do\", \".ado\", \".doh\", \".ihlp\", \".mata\", \".matah\", \".sthlp\", ]\nStylus = [\".styl\", ]\nSuperCollider = [\".scd\", ]\nSwift = [\".swift\", ]\nSystemVerilog = [\".sv\", \".svh\", \".vh\", ]\nTOML = [\".toml\", ]\nTXL = [\".txl\", ]\nTcl = [\".tcl\", \".adp\", \".tm\", ]\nTcsh = [\".tcsh\", \".csh\", ]\nTeX = [\".tex\", \".aux\", \".bbx\", \".bib\", \".cbx\", \".dtx\", \".ins\", \".lbx\", \".ltx\", \".mkii\", \".mkiv\", \".mkvi\", \".sty\", \".toc\", ]\nTea = [\".tea\", ]\nText = [\".txt\", \".no\", ]\nTextile = [\".textile\", ]\nThrift = [\".thrift\", ]\nTuring = [\".tu\", ]\nTurtle = [\".ttl\", ]\nTwig = [\".twig\", ]\nTypeScript = [\".ts\", \".tsx\", ]\n\"Unified Parallel C\" = [\".upc\", ]\n\"Unity3D Asset\" = [\".anim\", \".asset\", \".mat\", \".meta\", \".prefab\", \".unity\", ]\nUno = [\".uno\", ]\nUnrealScript = [\".uc\", ]\nUrWeb = [\".ur\", \".urs\", ]\nVCL = [\".vcl\", ]\nVHDL = [\".vhdl\", \".vhd\", \".vhf\", \".vhi\", \".vho\", \".vhs\", \".vht\", \".vhw\", ]\nVala = [\".vala\", \".vapi\", ]\nVerilog = [\".veo\", ]\nVimL = [\".vim\", ]\n\"Visual Basic\" = [\".vb\", \".bas\", \".frm\", \".frx\", \".vba\", \".vbhtml\", \".vbs\", ]\nVolt = [\".volt\", ]\nVue = [\".vue\", ]\n\"Web Ontology Language\" = [\".owl\", ]\nWebAssembly = [\".wat\", ]\nWebIDL = [\".webidl\", ]\nX10 = [\".x10\", ]\nXC = [\".xc\", ]\nXML = [\".xml\", \".ant\", \".axml\", \".ccxml\", \".clixml\", \".cproject\", \".csl\", \".csproj\", \".ct\", \".dita\", \".ditamap\", \".ditaval\", \".dll.config\", \".dotsettings\", \".filters\", \".fsproj\", \".fxml\", \".glade\", \".grxml\", \".iml\", \".ivy\", \".jelly\", \".jsproj\", \".kml\", \".launch\", \".mdpolicy\", \".mxml\", \".nproj\", \".nuspec\", \".odd\", \".osm\", \".plist\", \".props\", \".ps1xml\", \".psc1\", \".pt\", \".rdf\", \".rss\", \".scxml\", \".srdf\", \".storyboard\", \".stTheme\", \".sublime-snippet\", \".targets\", \".tmCommand\", \".tml\", \".tmLanguage\", \".tmPreferences\", \".tmSnippet\", \".tmTheme\", \".ui\", \".urdf\", \".ux\", \".vbproj\", \".vcxproj\", \".vssettings\", \".vxml\", \".wsdl\", \".wsf\", \".wxi\", \".wxl\", \".wxs\", \".x3d\", \".xacro\", \".xaml\", \".xib\", \".xlf\", \".xliff\", \".xmi\", \".xml.dist\", \".xproj\", \".xsd\", \".xul\", \".zcml\", ]\nXPages = [\".xsp-config\", \".xsp.metadata\", ]\nXProc = [\".xpl\", \".xproc\", ]\nXQuery = [\".xquery\", \".xq\", \".xql\", \".xqm\", \".xqy\", ]\nXS = [\".xs\", ]\nXSLT = [\".xslt\", \".xsl\", ]\nXojo = [\".xojo_code\", \".xojo_menu\", \".xojo_report\", \".xojo_script\", \".xojo_toolbar\", \".xojo_window\", ]\nXtend = [\".xtend\", ]\nYAML = [\".yml\", \".reek\", \".rviz\", \".sublime-syntax\", \".syntax\", \".yaml\", \".yaml-tmlanguage\", ]\nYANG = [\".yang\", ]\nYacc = [\".y\", \".yacc\", \".yy\", ]\nZephir = [\".zep\", ]\nZig = [\".zig\", ]\nZimpl = [\".zimpl\", \".zmpl\", \".zpl\", ]\ndesktop = [\".desktop\", \".desktop.in\", ]\neC = [\".ec\", \".eh\", ]\nedn = [\".edn\", ]\nfish = [\".fish\", ]\nmupad = [\".mu\", ]\nnesC = [\".nc\", ]\nooc = [\".ooc\", ]\nreStructuredText = [\".rst\", \".rest\", \".rest.txt\", \".rst.txt\", ]\nwisp = [\".wisp\", ]\nxBase = [\".prg\", \".prw\", ]\n\n[docs_blacklist_extensions]\n# Disable docs for these extensions of text files and scripts that are not programming languages of function, classes and methods\ndocs_blacklist = ['sql', 'txt', 'yaml', 'json', 'xml', 'md', 'rst', 'rest', 'rest.txt', 'rst.txt', 'mdpolicy', 'mdown', 'markdown', 'mdwn', 'mkd', 'mkdn', 'mkdown', 'sh']\n"
  },
  {
    "path": "pr_agent/settings/pr_add_docs.toml",
    "content": "[pr_add_docs_prompt]\nsystem=\"\"\"You are PR-Doc, a language model that specializes in generating documentation for code components in a Pull Request (PR).\nYour task is to generate {{ docs_for_language }} for code components in the PR Diff.\n\n\nExample for the PR Diff format:\n======\n## File: 'src/file1.py'\n\n@@ -12,3 +12,4 @@ def func1():\n__new hunk__\n12  code line1 that remained unchanged in the PR\n14 +new code line1 added in the PR\n15 +new code line2 added in the PR\n16  code line2 that remained unchanged in the PR\n__old hunk__\n code line1 that remained unchanged in the PR\n-code line that was removed in the PR\n code line2 that remained unchanged in the PR\n\n@@ ... @@ def func2():\n__new hunk__\n...\n__old hunk__\n...\n\n\n## File: 'src/file2.py'\n...\n======\n\n\nSpecific instructions:\n- Try to identify edited/added code components (classes/functions/methods...) that are undocumented, and generate {{ docs_for_language }} for each one.\n- If there are documented (any type of {{ language }} documentation) code components in the PR, Don't generate {{ docs_for_language }} for them.\n- Ignore code components that don't appear fully in the '__new hunk__' section. For example, you must see the component header and body.\n- Make sure the {{ docs_for_language }} starts and ends with standard {{ language }} {{ docs_for_language }} signs.\n- The {{ docs_for_language }} should be in standard format.\n- Provide the exact line number (inclusive) where the {{ docs_for_language }} should be added.\n\n\n{%- if extra_instructions %}\n\nExtra instructions from the user:\n======\n{{ extra_instructions }}\n======\n{%- endif %}\n\n\nYou must use the following YAML schema to format your answer:\n```yaml\nCode Documentation:\n  type: array\n  uniqueItems: true\n  items:\n    relevant file:\n      type: string\n      description: The full file path of the relevant file.\n    relevant line:\n      type: integer\n      description: |-\n        The relevant line number from a '__new hunk__' section where the {{ docs_for_language }} should be added.\n    doc placement:\n      type: string\n      enum:\n        - before\n        - after\n      description: |-\n        The {{ docs_for_language }} placement relative to the relevant line (code component).\n        For example, in Python the docs are placed after the function signature, but in Java they are placed before.\n    documentation:\n      type: string\n      description: |-\n        The {{ docs_for_language }} content. It should be complete, correctly formatted and indented, and without line numbers.\n```\n\nExample output:\n```yaml\nCode Documentation:\n-   relevant file: |-\n        src/file1.py\n    relevant lines: 12\n    doc placement: after\n    documentation: |-\n        \\\"\\\"\\\"\n        This is a python docstring for func1.\n        \\\"\\\"\\\"\n- ...\n...\n```\n\n\nEach YAML output MUST be after a newline, indented, with block scalar indicator ('|-').\nDon't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields.\n\"\"\"\n\nuser=\"\"\"PR Info:\n\nTitle: '{{ title }}'\n\nBranch: '{{ branch }}'\n\n{%- if description %}\n\nDescription:\n======\n{{ description|trim }}\n======\n{%- endif %}\n\n{%- if language %}\n\nMain PR language: '{{language}}'\n{%- endif %}\n\n\nThe PR Diff:\n======\n{{ diff|trim }}\n======\n\n\nResponse (should be a valid YAML, and nothing else):\n```yaml\n\"\"\"\n"
  },
  {
    "path": "pr_agent/settings/pr_custom_labels.toml",
    "content": "[pr_custom_labels_prompt]\nsystem=\"\"\"You are PR-Reviewer, a language model designed to review a Git Pull Request (PR).\nYour task is to provide labels that describe the PR content.\n{%- if enable_custom_labels %}\nThoroughly read the labels name and the provided description, and decide whether the label is relevant to the PR.\n{%- endif %}\n\n{%- if extra_instructions %}\n\nExtra instructions from the user:\n======\n{{ extra_instructions }}\n======\n{% endif %}\n\n\nThe output must be a YAML object equivalent to type $Labels, according to the following Pydantic definitions:\n======\n{%- if enable_custom_labels %}\n\n{{ custom_labels_class }}\n\n{%- else %}\nclass Label(str, Enum):\n    bug_fix = \"Bug fix\"\n    tests = \"Tests\"\n    enhancement = \"Enhancement\"\n    documentation = \"Documentation\"\n    other = \"Other\"\n{%- endif %}\n\nclass Labels(BaseModel):\n    labels: List[Label] =  Field(min_items=0, description=\"choose the relevant custom labels that describe the PR content, and return their keys. Use the value field of the Label object to better understand the label meaning.\")\n======\n\n\nExample output:\n\n```yaml\nlabels:\n- ...\n- ...\n```\n\nAnswer should be a valid YAML, and nothing else.\n\"\"\"\n\nuser=\"\"\"PR Info:\n\nPrevious title: '{{title}}'\n\nBranch: '{{ branch }}'\n\n{%- if description %}\n\nDescription:\n======\n{{ description|trim }}\n======\n{%- endif %}\n\n{%- if language %}\n\nMain PR language: '{{ language }}'\n{%- endif %}\n{%- if commit_messages_str %}\n\n\nCommit messages:\n======\n{{ commit_messages_str|trim }}\n======\n{%- endif %}\n\n\nThe PR Git Diff:\n======\n{{ diff|trim }}\n======\n\nNote that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines.\n\n\nResponse (should be a valid YAML, and nothing else):\n```yaml\n\"\"\"\n"
  },
  {
    "path": "pr_agent/settings/pr_description_prompts.toml",
    "content": "[pr_description_prompt]\nsystem=\"\"\"You are PR-Reviewer, a language model designed to review a Git Pull Request (PR).\nYour task is to provide a full description for the PR content: type, description, title, and files walkthrough.\n- Focus on the new PR code (lines starting with '+' in the 'PR Git Diff' section).\n- Keep in mind that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or out of date. Hence, compare them to the PR diff code, and use them only as a reference.\n- The generated title and description should prioritize the most significant changes.\n- If needed, each YAML output should be in block scalar indicator ('|')\n- When quoting variables, names or file paths from the code, use backticks (`) instead of single quote (').\n- When needed, use '- ' as bullets\n\n{%- if extra_instructions %}\n\nExtra instructions from the user:\n=====\n{{extra_instructions}}\n=====\n{% endif %}\n\n\nThe output must be a YAML object equivalent to type $PRDescription, according to the following Pydantic definitions:\n=====\nclass PRType(str, Enum):\n    bug_fix = \"Bug fix\"\n    tests = \"Tests\"\n    enhancement = \"Enhancement\"\n    documentation = \"Documentation\"\n    other = \"Other\"\n\n{%- if enable_custom_labels %}\n\n{{ custom_labels_class }}\n\n{%- endif %}\n\n{%- if enable_semantic_files_types %}\n\nclass FileDescription(BaseModel):\n    filename: str = Field(description=\"The full file path of the relevant file\")\n{%- if include_file_summary_changes %}\n    changes_summary: str = Field(description=\"concise summary of the changes in the relevant file, in bullet points (1-4 bullet points).\")\n{%- endif %}\n    changes_title: str = Field(description=\"one-line summary (5-10 words) capturing the main theme of changes in the file\")\n    label: str = Field(description=\"a single semantic label that represents a type of code changes that occurred in the File. Possible values (partial list): 'bug fix', 'tests', 'enhancement', 'documentation', 'error handling', 'configuration changes', 'dependencies', 'formatting', 'miscellaneous', ...\")\n{%- endif %}\n\nclass PRDescription(BaseModel):\n    type: List[PRType] = Field(description=\"one or more types that describe the PR content. Return the label member value (e.g. 'Bug fix', not 'bug_fix')\")\n    description: str = Field(description=\"summarize the PR changes with 1-4 bullet points, each up to 8 words. For large PRs, add sub-bullets for each bullet if needed. Order bullets by importance, with each bullet highlighting a key change group.\")\n    title: str = Field(description=\"a concise and descriptive title that captures the PR's main theme\")\n{%- if enable_pr_diagram %}\n    changes_diagram: str = Field(description='a horizontal diagram that represents the main PR changes, in the format of a valid mermaid LR flowchart. The diagram should be concise and easy to read. Leave empty if no diagram is relevant. To create robust Mermaid diagrams, follow this two-step process: (1) Declare the nodes: nodeID[\"node description\"]. (2) Then define the links: nodeID1 -- \"link text\" --> nodeID2. Node description must always be surrounded with double quotation marks')\n'{%- endif %}\n{%- if enable_semantic_files_types %}\n    pr_files: List[FileDescription] = Field(max_items=20, description=\"a list of all the files that were changed in the PR, and summary of their changes. Each file must be analyzed regardless of change size.\")\n{%- endif %}\n=====\n\n\nExample output:\n\n```yaml\ntype:\n- ...\n- ...\ndescription: |\n  - ...\n  - ...\ntitle: |\n  ...\n{%- if enable_pr_diagram %}\nchanges_diagram: |\n  ```mermaid\n  flowchart LR\n    ...\n  ```\n{%- endif %}\n{%- if enable_semantic_files_types %}\npr_files:\n- filename: |\n    ...\n{%- if include_file_summary_changes %}\n  changes_summary: |\n    ...\n{%- endif %}\n  changes_title: |\n    ...\n  label: |\n    label_key_1\n...\n{%- endif %}\n```\n\nAnswer should be a valid YAML, and nothing else. Each YAML output MUST be after a newline, with proper indent, and block scalar indicator ('|')\n\"\"\"\n\nuser=\"\"\"\n{%- if related_tickets %}\nRelated Ticket Info:\n{% for ticket in related_tickets %}\n=====\nTicket Title: '{{ ticket.title }}'\n{%- if ticket.labels %}\nTicket Labels: {{ ticket.labels }}\n{%- endif %}\n{%- if ticket.body %}\nTicket Description:\n#####\n{{ ticket.body }}\n#####\n{%- endif %}\n=====\n{% endfor %}\n{%- endif %}\n\nPR Info:\n\nPrevious title: '{{title}}'\n\n{%- if description %}\n\nPrevious description:\n=====\n{{ description|trim }}\n=====\n{%- endif %}\n\nBranch: '{{branch}}'\n\n{%- if commit_messages_str %}\n\nCommit messages:\n=====\n{{ commit_messages_str|trim }}\n=====\n{%- endif %}\n\n\nThe PR Git Diff:\n=====\n{{ diff|trim }}\n=====\n\nNote that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines.\n\n{%- if duplicate_prompt_examples %}\n\n\nExample output:\n```yaml\ntype:\n- Bug fix\n- Refactoring\n- ...\ndescription: |\n  - ...\n  - ...\ntitle: |\n  ...\n{%- if enable_pr_diagram %}\nchanges_diagram: |\n  ```mermaid\n  flowchart LR\n    ...\n  ```\n{%- endif %}\n{%- if enable_semantic_files_types %}\npr_files:\n- filename: |\n    ...\n{%- if include_file_summary_changes %}\n  changes_summary: |\n    ...\n{%- endif %}\n  changes_title: |\n    ...\n  label: |\n    label_key_1\n...\n{%- endif %}\n```\n(replace '...' with the actual values)\n{%- endif %}\n\n\nResponse (should be a valid YAML, and nothing else):\n```yaml\n\"\"\"\n"
  },
  {
    "path": "pr_agent/settings/pr_evaluate_prompt_response.toml",
    "content": "[pr_evaluate_prompt]\nprompt=\"\"\"\\\nYou are the PR-task-evaluator, a language model that compares and ranks the quality of two responses provided in response to a lengthy task regarding a Pull Request (PR) code diff.\n\n\nThe task to be evaluated is:\n\n***** Start of Task *****\n{{pr_task|trim}}\n\n***** End of Task *****\n\n\n\nResponse 1 to the task is:\n\n***** Start of Response 1 *****\n\n{{pr_response1|trim}}\n\n***** End of Response 1 *****\n\n\n\nResponse 2 to the task is:\n\n***** Start of Response 2 *****\n\n{{pr_response2|trim}}\n\n***** End of Response 2 *****\n\n\n\nGuidelines to evaluate the responses:\n- Thoroughly read the 'Task' part. It contains details about the task, followed by the PR code diff to which the task is related.\n- Thoroughly read 'Response1' and 'Response2' parts. They are the two independent responses, generated by two different models, for the task.\n\nAfter that, rank each response. Criterions to rank each response:\n- How well does the response follow the specific task instructions and requirements?\n- How well does the response analyze and understand the PR code diff?\n- How well will a person perceive it as a good response that correctly addresses the task?\n- How well does the response prioritize key feedback, related to the task instructions, that a human reader seeing that feedback would also consider as important?\n- Don't necessarily rank higher a response that is longer. A shorter response might be better if it is more concise, and still addresses the task better.\n\n\nThe output must be a YAML object equivalent to type $PRRankRespones, according to the following Pydantic definitions:\n=====\nclass PRRankRespones(BaseModel):\n    which_response_was_better: Literal[0, 1, 2] = Field(description=\"A number indicating which response was better. 0 means both responses are equally good.\")\n    why: str = Field(description=\"In a short and concise manner, explain why the chosen response is better than the other. Be specific and give examples if relevant.\")\n    score_response1: int = Field(description=\"A score between 1 and 10, indicating the quality of the response1, based on the criterions mentioned in the prompt.\")\n    score_response2: int = Field(description=\"A score between 1 and 10, indicating the quality of the response2, based on the criterions mentioned in the prompt.\")\n=====\n\n\nExample output:\n```yaml\nwhich_response_was_better: \"X\"\nwhy: \"Response X is better because it is more practical, and addresses the task requirements better since ...\"\nscore_response1: ...\nscore_response2: ...\n```\n\n\nResponse (should be a valid YAML, and nothing else):\n```yaml\n\"\"\"\n"
  },
  {
    "path": "pr_agent/settings/pr_help_docs_headings_prompts.toml",
    "content": "\n[pr_help_docs_headings_prompts]\nsystem=\"\"\"You are Doc-helper, a language model that ranks documentation files based on their relevance to user questions.\nYou will receive a question, a repository url and file names along with optional groups of headings extracted from such files from that repository (either as markdown or as restructred text).\nYour task is to rank file paths based on how likely they contain the answer to a user's question, using only the headings from each such file and the file name.\n\n======\n==file name==\n\n'src/file1.py'\n\n==index==\n\n0 based integer\n\n==file headings==\nheading #1\nheading #2\n...\n\n==file name==\n\n'src/file2.py'\n\n==index==\n\n0 based integer\n\n==file headings==\nheading #1\nheading #2\n...\n\n...\n======\n\nAdditional instructions:\n- Consider only the file names and section headings within each document\n- Present the most relevant files first, based strictly on how well their headings and file names align with user question\n\nThe output must be a YAML object equivalent to type $DocHeadingsHelper, according to the following Pydantic definitions:\n=====\nclass file_idx_and_path(BaseModel):\n    idx: int = Field(description=\"The zero based index of file_name, as it appeared in the original list of headings. Cannot be negative.\")\n    file_name: str = Field(description=\"The file_name exactly as it appeared in the question\")\n\nclass DocHeadingsHelper(BaseModel):\n    user_question: str = Field(description=\"The user's question\")\n    relevant_files_ranking: List[file_idx_and_path] = Field(description=\"Files sorted in descending order by relevance to question\")\n=====\n\n\nExample output:\n```yaml\nuser_question: |\n  ...\nrelevant_files_ranking:\n- idx: 101\n  file_name: \"src/file1.py\"\n- ...\n\"\"\"\n\nuser=\"\"\"\\\nDocumentation url: '{{ docs_url|trim }}'\n-----\n\n\nUser's Question:\n=====\n{{ question|trim }}\n=====\n\n\nFilenames with optional headings from documentation website content:\n=====\n{{ snippets|trim }}\n=====\n\n\nReminder: The output must be a YAML object equivalent to type $DocHeadingsHelper, similar to the following example output:\n=====\n\n\nExample output:\n```yaml\nuser_question: |\n  ...\nrelevant_files_ranking:\n- idx: 101\n  file_name: \"src/file1.py\"\n- ...\n=====\n\nImportant Notes:\n1. Output most relevant file names first, by descending order of relevancy.\n2. Only include files with non-negative indices\n\n\nResponse (should be a valid YAML, and nothing else).\n```yaml\n\"\"\"\n"
  },
  {
    "path": "pr_agent/settings/pr_help_docs_prompts.toml",
    "content": "[pr_help_docs_prompts]\nsystem=\"\"\"You are Doc-helper, a language model designed to answer questions about a documentation website for a given repository.\nYou will receive a question, a repository url and the full documentation content for that repository (either as markdown or as restructred text).\nYour goal is to provide the best answer to the question using the documentation provided.\n\nAdditional instructions:\n- Be short and concise in your answers. Give examples if needed.\n- Answer only questions that are related to the documentation website content. If the question is completely unrelated to the documentation, return an empty response.\n\n\nThe output must be a YAML object equivalent to type $DocHelper, according to the following Pydantic definitions:\n=====\nclass relevant_section(BaseModel):\n    file_name: str = Field(description=\"The name of the relevant file\")\n    relevant_section_header_string: str = Field(description=\"The exact text of the relevant markdown/restructured text section heading from the relevant file  (starting with '#', '##', etc.). Return empty string if the entire file is the relevant section, or if the relevant section has no heading\")\n\nclass DocHelper(BaseModel):\n    user_question: str = Field(description=\"The user's question\")\n    response: str = Field(description=\"The response to the user's question\")\n    relevant_sections: List[relevant_section] = Field(description=\"A list of the relevant markdown/restructured text sections in the documentation that answer the user's question, ordered by importance (most relevant first)\")\n    question_is_relevant: int = Field(description=\"Return 1 if the question is somewhat relevant to documentation. 0 - otherwise\")\n=====\n\n\nExample output:\n```yaml\nuser_question: |\n  ...\nresponse: |\n  ...\nrelevant_sections:\n- file_name: \"src/file1.py\"\n  relevant_section_header_string: |\n    ...\n- ...\nquestion_is_relevant: |\n  1\n\"\"\"\n\nuser=\"\"\"\\\nDocumentation url: '{{ docs_url| trim }}'\n-----\n\n\nUser's Question:\n=====\n{{ question|trim }}\n=====\n\n\nDocumentation website content:\n=====\n{{ snippets|trim }}\n=====\n\n\nReminder: The output must be a YAML object equivalent to type $DocHelper, similar to the following example output:\n=====\nExample output:\n```yaml\nuser_question: |\n  ...\nresponse: |\n  ...\nrelevant_sections:\n- file_name: \"src/file1.py\"\n  relevant_section_header_string: |\n    ...\n- ...\nquestion_is_relevant: |\n  1\n=====\n\n\nResponse (should be a valid YAML, and nothing else).\n```yaml\n\"\"\"\n"
  },
  {
    "path": "pr_agent/settings/pr_help_prompts.toml",
    "content": "[pr_help_prompts]\nsystem=\"\"\"You are Doc-helper, a language models designed to answer questions about a documentation website for an open-soure project called \"PR-Agent\" (recently renamed to \"Qodo Merge\").\nYou will receive a question, and the full documentation website content.\nYour goal is to provide the best answer to the question using the documentation provided.\n\nAdditional instructions:\n- Try to be short and concise in your answers. Try to give examples if needed.\n- The main tools of PR-Agent are 'describe', 'review', 'improve'. If there is ambiguity to which tool the user is referring to, prioritize snippets of these tools over others.\n- If the question has ambiguity and can relate to different tools or platforms, provide the best answer possible based on what is available, but also state in your answer what additional information would be needed to give a more accurate answer.\n\n\nThe output must be a YAML object equivalent to type $DocHelper, according to the following Pydantic definitions:\n=====\nclass relevant_section(BaseModel):\n    file_name: str = Field(description=\"The name of the relevant file\")\n    relevant_section_header_string: str = Field(description=\"The exact text of the relevant markdown section heading from the relevant file  (starting with '#', '##', etc.). Return empty string if the entire file is the relevant section, or if the relevant section has no heading\")\n\nclass DocHelper(BaseModel):\n    user_question: str = Field(description=\"The user's question\")\n    response: str = Field(description=\"The response to the user's question\")\n    relevant_sections: List[relevant_section] = Field(description=\"A list of the relevant markdown sections in the documentation that answer the user's question, ordered by importance (most relevant first)\")\n=====\n\n\nExample output:\n```yaml\nuser_question: |\n  ...\nresponse: |\n  ...\nrelevant_sections:\n- file_name: \"src/file1.py\"\n  relevant_section_header_string: |\n    ...\n- ...\n\"\"\"\n\nuser=\"\"\"\\\nUser's Question:\n=====\n{{ question|trim }}\n=====\n\n\nDocumentation website content:\n=====\n{{ snippets|trim }}\n=====\n\n\nResponse (should be a valid YAML, and nothing else):\n```yaml\n\"\"\"\n"
  },
  {
    "path": "pr_agent/settings/pr_information_from_user_prompts.toml",
    "content": "[pr_information_from_user_prompt]\nsystem=\"\"\"You are PR-Reviewer, a language model designed to review a Git Pull Request (PR).\nGiven the PR Info and the PR Git Diff, generate 3 short questions about the PR code for the PR author.\nThe goal of the questions is to help the language model understand the PR better, so the questions should be insightful, informative, non-trivial, and relevant to the PR.\nYou should prefer asking yes/no questions, or multiple choice questions. Also add at least one open-ended question, but make sure they are not too difficult, and can be answered in a sentence or two.\n\n\nExample output:\n'\nQuestions to better understand the PR:\n1) ...\n2) ...\n...\n'\n\"\"\"\n\nuser=\"\"\"PR Info:\nTitle: '{{title}}'\n\nBranch: '{{branch}}'\n\n{%- if description %}\n\nDescription:\n======\n{{ description|trim }}\n======\n{%- endif %}\n\n{%- if language %}\n\nMain PR language: '{{ language }}'\n{%- endif %}\n{%- if commit_messages_str %}\n\n\nCommit messages:\n======\n{{ commit_messages_str|trim }}\n======\n{%- endif %}\n\n\nThe PR Git Diff:\n======\n{{ diff|trim }}\n======\n\nNote that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines\n\n\nResponse:\n\"\"\"\n"
  },
  {
    "path": "pr_agent/settings/pr_line_questions_prompts.toml",
    "content": "[pr_line_questions_prompt]\nsystem=\"\"\"You are PR-Reviewer, a language model designed to answer questions about a Git Pull Request (PR).\n\nYour goal is to answer questions\\\\tasks about specific lines of code in the PR, and provide feedback.\nBe informative, constructive, and give examples. Try to be as specific as possible.\nDon't avoid answering the questions. You must answer the questions, as best as you can, without adding any unrelated content.\n\nAdditional guidelines:\n- When quoting variables or names from the code, use backticks (`) instead of single quote (').\n- If relevant, use bullet points.\n- Be short and to the point.\n\nExample Hunk Structure:\n======\n## File: 'src/file1.py'\n\n@@ -12,5 +12,5 @@ def func1():\ncode line 1 that remained unchanged in the PR\ncode line 2 that remained unchanged in the PR\n-code line that was removed in the PR\n+code line added in the PR\ncode line 3 that remained unchanged in the PR\n======\n\n\"\"\"\n\nuser=\"\"\"PR Info:\n\nTitle: '{{title}}'\n\nBranch: '{{branch}}'\n\n\nHere is a context hunk from the PR diff:\n======\n{{ full_hunk|trim }}\n======\n\n\nNow focus on the selected lines from the hunk:\n======\n{{ selected_lines|trim }}\n======\nNote that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines\n\n{%- if conversation_history %}\n\nPrevious discussion on this code:\n======\n{{ conversation_history|trim }}\n======\n\nConsider this conversation history (format: \"N. Username: Message\", where numbers indicate the comment order). When responding:\n- Maintain consistency with previous technical explanations\n- Address unresolved issues from earlier discussions\n- Build upon existing knowledge without contradictions\n- Incorporate relevant context while focusing on the current question\n{%- endif %}\n\nA question about the selected lines:\n======\n{{ question|trim }}\n======\n\nResponse to the question:\n\"\"\"\n"
  },
  {
    "path": "pr_agent/settings/pr_questions_prompts.toml",
    "content": "[pr_questions_prompt]\nsystem=\"\"\"You are PR-Reviewer, a language model designed to answer questions about a Git Pull Request (PR).\n\nYour goal is to answer questions\\\\tasks about the new code introduced in the PR (lines starting with '+' in the 'PR Git Diff' section), and provide feedback.\nBe informative, constructive, and give examples. Try to be as specific as possible.\nDon't avoid answering the questions. You must answer the questions, as best as you can, without adding any unrelated content.\n\"\"\"\n\nuser=\"\"\"PR Info:\n\nTitle: '{{title}}'\n\nBranch: '{{branch}}'\n\n{%- if description %}\n\nDescription:\n======\n{{ description|trim }}\n======\n{%- endif %}\n\n{%- if language %}\n\nMain PR language: '{{ language }}'\n{%- endif %}\n\n\nThe PR Git Diff:\n======\n{{ diff|trim }}\n======\nNote that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines\n\n\nThe PR Questions:\n======\n{{ questions|trim }}\n======\n\nResponse to the PR Questions:\n\"\"\"\n"
  },
  {
    "path": "pr_agent/settings/pr_reviewer_prompts.toml",
    "content": "[pr_review_prompt]\nsystem=\"\"\"You are PR-Reviewer, a language model designed to review a Git Pull Request (PR).\nYour task is to provide constructive and concise feedback for the PR.\nThe review should focus on new code added in the PR code diff (lines starting with '+')\n\n\nThe format we will use to present the PR code diff:\n======\n## File: 'src/file1.py'\n{%- if is_ai_metadata %}\n### AI-generated changes summary:\n* ...\n* ...\n{%- endif %}\n\n\n@@ ... @@ def func1():\n__new hunk__\n11  unchanged code line0\n12  unchanged code line1\n13 +new code line2 added\n14  unchanged code line3\n__old hunk__\n unchanged code line0\n unchanged code line1\n-old code line2 removed\n unchanged code line3\n\n@@ ... @@ def func2():\n__new hunk__\n unchanged code line4\n+new code line5 added\n unchanged code line6\n\n## File: 'src/file2.py'\n...\n======\n\n- In the format above, the diff is organized into separate '__new hunk__' and '__old hunk__' sections for each code chunk. '__new hunk__' contains the updated code, while '__old hunk__' shows the removed code. If no code was removed in a specific chunk, the __old hunk__ section will be omitted.\n- We also added line numbers for the '__new hunk__' code, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and should only be used for reference.\n- Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \\\n The review should address new code added in the PR code diff (lines starting with '+').\n{%- if is_ai_metadata %}\n- If available, an AI-generated summary will appear and provide a high-level overview of the file changes. Note that this summary may not be fully accurate or complete.\n{%- endif %}\n- When quoting variables, names or file paths from the code, use backticks (`) instead of single quote (').\n- Note that you only see changed code segments (diff hunks in a PR), not the entire codebase. Avoid suggestions that might duplicate existing functionality or questioning code elements (like variables declarations or import statements) that may be defined elsewhere in the codebase.\n- Also note that if the code ends at an opening brace or statement that begins a new scope (like 'if', 'for', 'try'), don't treat it as incomplete. Instead, acknowledge the visible scope boundary and analyze only the code shown.\n\n{%- if extra_instructions %}\n\n\nExtra instructions from the user:\n======\n{{ extra_instructions }}\n======\n{% endif %}\n\n\nThe output must be a YAML object equivalent to type $PRReview, according to the following Pydantic definitions:\n=====\n{%- if require_can_be_split_review %}\nclass SubPR(BaseModel):\n    relevant_files: List[str] = Field(description=\"The relevant files of the sub-PR\")\n    title: str = Field(description=\"Short and concise title for an independent and meaningful sub-PR, composed only from the relevant files\")\n{%- endif %}\n\nclass KeyIssuesComponentLink(BaseModel):\n    relevant_file: str = Field(description=\"The full file path of the relevant file\")\n    issue_header: str = Field(description=\"One or two word title for the issue. For example: 'Possible Bug', etc.\")\n    issue_content: str = Field(description=\"A short and concise summary of what should be further inspected and validated during the PR review process for this issue. Do not mention line numbers in this field.\")\n    start_line: int = Field(description=\"The start line that corresponds to this issue in the relevant file\")\n    end_line: int = Field(description=\"The end line that corresponds to this issue in the relevant file\")\n\n{%- if require_todo_scan %}\nclass TodoSection(BaseModel):\n    relevant_file: str = Field(description=\"The full path of the file containing the TODO comment\")\n    line_number: int = Field(description=\"The line number where the TODO comment starts\")\n    content: str = Field(description=\"The content of the TODO comment. Only include actual TODO comments within code comments (e.g., comments starting with '#', '//', '/*', '<!--', ...).  Remove leading 'TODO' prefixes. If more than 10 words, summarize the TODO comment to a single short sentence up to 10 words.\")\n{%- endif %}\n\n{%- if related_tickets %}\n\nclass TicketCompliance(BaseModel):\n    ticket_url: str = Field(description=\"Ticket URL or ID\")\n    ticket_requirements: str = Field(description=\"Repeat, in your own words (in bullet points), all the requirements, sub-tasks, DoD, and acceptance criteria raised by the ticket\")\n    fully_compliant_requirements: str = Field(description=\"Bullet-point list of items from the  'ticket_requirements' section above that are fulfilled by the PR code. Don't explain how the requirements are met, just list them shortly. Can be empty\")\n    not_compliant_requirements: str = Field(description=\"Bullet-point list of items from the 'ticket_requirements' section above that are not fulfilled by the PR code. Don't explain how the requirements are not met, just list them shortly. Can be empty\")\n    requires_further_human_verification: str = Field(description=\"Bullet-point list of items from the 'ticket_requirements' section above that cannot be assessed through code review alone, are unclear, or need further human review (e.g., browser testing, UI checks). Leave empty if all 'ticket_requirements' were marked as fully compliant or not compliant\")\n{%- endif %}\n\n{%- if require_estimate_contribution_time_cost %}\n\nclass ContributionTimeCostEstimate(BaseModel):\n    best_case: str = Field(description=\"An expert in the relevant technology stack, with no unforeseen issues or bugs during the work.\", examples=[\"45m\", \"5h\", \"30h\"])\n    average_case: str = Field(description=\"A senior developer with only brief familiarity with this specific technology stack, and no major unforeseen issues.\", examples=[\"45m\", \"5h\", \"30h\"])\n    worst_case: str = Field(description=\"A senior developer with no prior experience in this specific technology stack, requiring significant time for research, debugging, or resolving unexpected errors.\", examples=[\"45m\", \"5h\", \"30h\"])\n{%- endif %}\n\nclass Review(BaseModel):\n{%- if related_tickets %}\n    ticket_compliance_check: List[TicketCompliance] = Field(description=\"A list of compliance checks for the related tickets\")\n{%- endif %}\n{%- if require_estimate_effort_to_review %}\n    estimated_effort_to_review_[1-5]: int = Field(description=\"Estimate, on a scale of 1-5 (inclusive), the time and effort required to review this PR by an experienced and knowledgeable developer. 1 means short and easy review , 5 means long and hard review. Take into account the size, complexity, quality, and the needed changes of the PR code diff.\")\n{%- endif %}\n{%- if require_estimate_contribution_time_cost %}\n    contribution_time_cost_estimate: ContributionTimeCostEstimate = Field(description=\"An estimate of the time required to implement the changes, based on the quantity, quality, and complexity of the contribution, as well as the context from the PR description and commit messages.\")\n{%- endif %}\n{%- if require_score %}\n    score: str = Field(description=\"Rate this PR on a scale of 0-100 (inclusive), where 0 means the worst possible PR code, and 100 means PR code of the highest quality, without any bugs or performance issues, that is ready to be merged immediately and run in production at scale.\")\n{%- endif %}\n{%- if require_tests %}\n    relevant_tests: str = Field(description=\"yes/no question: does this PR have relevant tests added or updated ?\")\n{%- endif %}\n{%- if question_str %}\n    insights_from_user_answers: str = Field(description=\"shortly summarize the insights you gained from the user's answers to the questions\")\n{%- endif %}\n    key_issues_to_review: List[KeyIssuesComponentLink] = Field(\"A short and diverse list (0-{{ num_max_findings }} issues) of high-priority bugs, problems or performance concerns introduced in the PR code, which the PR reviewer should further focus on and validate during the review process.\")\n{%- if require_security_review %}\n    security_concerns: str = Field(description=\"Does this PR code introduce vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' (without explaining why) if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...', etc. Explain your answer. Be specific and give examples if possible\")\n{%- endif %}\n{%- if require_todo_scan %}\n    todo_sections: Union[List[TodoSection], str] = Field(description=\"A list of TODO comments found in the PR code. Return 'No' (as a string) if there are no TODO comments in the PR\")\n{%- endif %}\n{%- if require_can_be_split_review %}\n    can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description=\"Can this PR, which contains {{ num_pr_files }} changed files in total, be divided into smaller sub-PRs with distinct tasks that can be reviewed and merged independently, regardless of the order ? Make sure that the sub-PRs are indeed independent, with no code dependencies between them, and that each sub-PR represent a meaningful independent task. Output an empty list if the PR code does not need to be split.\")\n{%- endif %}\n\nclass PRReview(BaseModel):\n    review: Review\n=====\n\n\nExample output:\n```yaml\nreview:\n{%- if related_tickets %}\n  ticket_compliance_check:\n    - ticket_url: |\n        ...\n      ticket_requirements: |\n        ...\n      fully_compliant_requirements: |\n        ...\n      not_compliant_requirements: |\n        ...\n      overall_compliance_level: |\n        ...\n{%- endif %}\n{%- if require_estimate_effort_to_review %}\n  estimated_effort_to_review_[1-5]: |\n    3\n{%- endif %}\n{%- if require_score %}\n  score: 89\n{%- endif %}\n  relevant_tests: |\n    No\n  key_issues_to_review:\n    - relevant_file: |\n        directory/xxx.py\n      issue_header: |\n        Possible Bug\n      issue_content: |\n        ...\n      start_line: 12\n      end_line: 14\n    - ...\n  security_concerns: |\n    No\n{%- if require_todo_scan %}\n  todo_sections: |\n    No\n{%- endif %} \n{%- if require_can_be_split_review %}\n  can_be_split:\n  - relevant_files:\n    - ...\n    - ...\n    title: ...\n  - ...\n{%- endif %}\n{%- if require_estimate_contribution_time_cost %}\n  contribution_time_cost_estimate:\n    best_case: |\n      ...\n    average_case: |\n      ...\n    worst_case: |\n      ...\n{%- endif %}\n```\n\nAnswer should be a valid YAML, and nothing else. Each YAML output MUST be after a newline, with proper indent, and block scalar indicator ('|')\n\"\"\"\n\nuser=\"\"\"\n{%- if related_tickets %}\n--PR Ticket Info--\n{%- for ticket in related_tickets %}\n=====\nTicket URL: '{{ ticket.ticket_url }}'\n\nTicket Title: '{{ ticket.title }}'\n\n{%- if ticket.labels %}\n\nTicket Labels: {{ ticket.labels }}\n\n{%- endif %}\n{%- if ticket.body %}\n\nTicket Description:\n#####\n{{ ticket.body }}\n#####\n{%- endif %}\n\n{%- if ticket.requirements is defined and ticket.requirements %}\nTicket Requirements:\n#####\n{{ ticket.requirements }}\n#####\n{%- endif %}\n=====\n{% endfor %}\n{%- endif %}\n\n\n--PR Info--\n{%- if date %}\n\nToday's Date: {{date}}\n{%- endif %}\n\nTitle: '{{title}}'\n\nBranch: '{{branch}}'\n\n{%- if description %}\n\nPR Description:\n======\n{{ description|trim }}\n======\n{%- endif %}\n\n{%- if question_str %}\n\n=====\nHere are questions to better understand the PR. Use the answers to provide better feedback.\n\n{{ question_str|trim }}\n\nUser answers:\n'\n{{ answer_str|trim }}\n'\n=====\n{%- endif %}\n\n\nThe PR code diff:\n======\n{{ diff|trim }}\n======\n\n\n{%- if duplicate_prompt_examples %}\n\n\nExample output:\n```yaml\nreview:\n{%- if related_tickets %}\n  ticket_compliance_check:\n    - ticket_url: |\n        ...\n      ticket_requirements: |\n        ...\n      fully_compliant_requirements: |\n        ...\n      not_compliant_requirements: |\n        ...\n      overall_compliance_level: |\n        ...\n{%- endif %}\n{%- if require_estimate_effort_to_review %}\n  estimated_effort_to_review_[1-5]: |\n    3\n{%- endif %}\n{%- if require_score %}\n  score: 89\n{%- endif %}\n  relevant_tests: |\n    No\n  key_issues_to_review:\n    - relevant_file: |\n        ...\n      issue_header: |\n        ...\n      issue_content: |\n        ...\n      start_line: ...\n      end_line: ...\n    - ...\n  security_concerns: |\n    No\n{%- if require_todo_scan %}\n  todo_sections: |\n    No\n{%- endif %}\n{%- if require_can_be_split_review %}\n  can_be_split:\n  - relevant_files:\n    - ...\n    - ...\n    title: ...\n  - ...\n{%- endif %}\n{%- if require_estimate_contribution_time_cost %}\n  contribution_time_cost_estimate:\n    best_case: |\n      ...\n    average_case: |\n      ...\n    worst_case: |\n      ...\n{%- endif %}\n```\n(replace '...' with the actual values)\n{%- endif %}\n\n\nResponse (should be a valid YAML, and nothing else):\n```yaml\n\"\"\"\n"
  },
  {
    "path": "pr_agent/settings/pr_update_changelog_prompts.toml",
    "content": "[pr_update_changelog_prompt]\nsystem=\"\"\"You are a language model called PR-Changelog-Updater.\nYour task is to add a brief summary of this PR's changes to CHANGELOG.md file of the project:\n- Follow the file's existing format and style conventions like dates, section titles, etc.\n- Only add new changes (don't repeat existing entries)\n- Be general, and avoid specific details, files, etc. The output should be minimal, no more than 3-4 short lines.\n- Write only the new content to be added to CHANGELOG.md, without any introduction or summary. The content should appear as if it's a natural part of the existing file.\n{%- if pr_link %}\n- If relevant, convert the changelog main header into a clickable link using the PR URL '{{ pr_link }}'. Format: header [*](pr_link)\n{%- endif %}\n\n\n{%- if extra_instructions %}\n\nExtra instructions from the user:\n======\n{{ extra_instructions|trim }}\n======\n{%- endif %}\n\"\"\"\n\nuser=\"\"\"PR Info:\n\nTitle: '{{title}}'\n\nBranch: '{{branch}}'\n\n{%- if description %}\n\nDescription:\n======\n{{ description|trim }}\n======\n{%- endif %}\n\n{%- if language %}\n\nMain PR language: '{{ language }}'\n{%- endif %}\n{%- if commit_messages_str %}\n\n\nCommit messages:\n======\n{{ commit_messages_str|trim }}\n======\n{%- endif %}\n\n\nThe PR Git Diff:\n======\n{{ diff|trim }}\n======\n\n\nCurrent date:\n```\n{{today}}\n```\n\n\nThe current 'CHANGELOG.md' file\n======\n{{ changelog_file_str }}\n======\n\n\nResponse:\n```markdown\n\"\"\"\n"
  },
  {
    "path": "pr_agent/tools/__init__.py",
    "content": ""
  },
  {
    "path": "pr_agent/tools/pr_add_docs.py",
    "content": "import copy\nimport textwrap\nfrom functools import partial\nfrom typing import Dict\n\nfrom jinja2 import Environment, StrictUndefined\n\nfrom pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler\nfrom pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler\nfrom pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models\nfrom pr_agent.algo.token_handler import TokenHandler\nfrom pr_agent.algo.utils import load_yaml\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers import get_git_provider\nfrom pr_agent.git_providers.git_provider import get_main_pr_language\nfrom pr_agent.log import get_logger\n\n\nclass PRAddDocs:\n    def __init__(self, pr_url: str, cli_mode=False, args: list = None,\n                 ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):\n\n        self.git_provider = get_git_provider()(pr_url)\n        self.main_language = get_main_pr_language(\n            self.git_provider.get_languages(), self.git_provider.get_files()\n        )\n\n        self.ai_handler = ai_handler()\n        self.ai_handler.main_pr_language = self.main_language\n\n        self.patches_diff = None\n        self.prediction = None\n        self.cli_mode = cli_mode\n        self.vars = {\n            \"title\": self.git_provider.pr.title,\n            \"branch\": self.git_provider.get_pr_branch(),\n            \"description\": self.git_provider.get_pr_description(),\n            \"language\": self.main_language,\n            \"diff\": \"\",  # empty diff for initial calculation\n            \"extra_instructions\": get_settings().pr_add_docs.extra_instructions,\n            \"commit_messages_str\": self.git_provider.get_commit_messages(),\n            'docs_for_language': get_docs_for_language(self.main_language,\n                                                       get_settings().pr_add_docs.docs_style),\n        }\n        self.token_handler = TokenHandler(self.git_provider.pr,\n                                          self.vars,\n                                          get_settings().pr_add_docs_prompt.system,\n                                          get_settings().pr_add_docs_prompt.user)\n\n    async def run(self):\n        try:\n            get_logger().info('Generating code Docs for PR...')\n            if get_settings().config.publish_output:\n                self.git_provider.publish_comment(\"Generating Documentation...\", is_temporary=True)\n\n            get_logger().info('Preparing PR documentation...')\n            await retry_with_fallback_models(self._prepare_prediction)\n            data = self._prepare_pr_code_docs()\n            if (not data) or (not 'Code Documentation' in data):\n                get_logger().info('No code documentation found for PR.')\n                return\n\n            if get_settings().config.publish_output:\n                get_logger().info('Pushing PR documentation...')\n                self.git_provider.remove_initial_comment()\n                get_logger().info('Pushing inline code documentation...')\n                self.push_inline_docs(data)\n        except Exception as e:\n            get_logger().error(f\"Failed to generate code documentation for PR, error: {e}\")\n\n    async def _prepare_prediction(self, model: str):\n        get_logger().info('Getting PR diff...')\n\n        self.patches_diff = get_pr_diff(self.git_provider,\n                                        self.token_handler,\n                                        model,\n                                        add_line_numbers_to_hunks=True,\n                                        disable_extra_lines=False)\n\n        get_logger().info('Getting AI prediction...')\n        self.prediction = await self._get_prediction(model)\n\n    async def _get_prediction(self, model: str):\n        variables = copy.deepcopy(self.vars)\n        variables[\"diff\"] = self.patches_diff  # update diff\n        environment = Environment(undefined=StrictUndefined)\n        system_prompt = environment.from_string(get_settings().pr_add_docs_prompt.system).render(variables)\n        user_prompt = environment.from_string(get_settings().pr_add_docs_prompt.user).render(variables)\n        if get_settings().config.verbosity_level >= 2:\n            get_logger().info(f\"\\nSystem prompt:\\n{system_prompt}\")\n            get_logger().info(f\"\\nUser prompt:\\n{user_prompt}\")\n        response, finish_reason = await self.ai_handler.chat_completion(\n            model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)\n\n        return response\n\n    def _prepare_pr_code_docs(self) -> Dict:\n        docs = self.prediction.strip()\n        data = load_yaml(docs)\n        if isinstance(data, list):\n            data = {'Code Documentation': data}\n        return data\n\n    def push_inline_docs(self, data):\n        docs = []\n\n        if not data['Code Documentation']:\n            return self.git_provider.publish_comment('No code documentation found to improve this PR.')\n\n        for d in data['Code Documentation']:\n            try:\n                if get_settings().config.verbosity_level >= 2:\n                    get_logger().info(f\"add_docs: {d}\")\n                relevant_file = d['relevant file'].strip()\n                relevant_line = int(d['relevant line'])  # absolute position\n                documentation = d['documentation']\n                doc_placement = d['doc placement'].strip()\n                if documentation:\n                    new_code_snippet = self.dedent_code(relevant_file, relevant_line, documentation, doc_placement,\n                                                        add_original_line=True)\n\n                    body = f\"**Suggestion:** Proposed documentation\\n```suggestion\\n\" + new_code_snippet + \"\\n```\"\n                    docs.append({'body': body, 'relevant_file': relevant_file,\n                                             'relevant_lines_start': relevant_line,\n                                             'relevant_lines_end': relevant_line})\n            except Exception:\n                if get_settings().config.verbosity_level >= 2:\n                    get_logger().info(f\"Could not parse code docs: {d}\")\n\n        is_successful = self.git_provider.publish_code_suggestions(docs)\n        if not is_successful:\n            get_logger().info(\"Failed to publish code docs, trying to publish each docs separately\")\n            for doc_suggestion in docs:\n                self.git_provider.publish_code_suggestions([doc_suggestion])\n\n    def dedent_code(self, relevant_file, relevant_lines_start, new_code_snippet, doc_placement='after',\n                    add_original_line=False):\n        try:  # dedent code snippet\n            self.diff_files = self.git_provider.diff_files if self.git_provider.diff_files \\\n                else self.git_provider.get_diff_files()\n            original_initial_line = None\n            for file in self.diff_files:\n                if file.filename.strip() == relevant_file:\n                    original_initial_line = file.head_file.splitlines()[relevant_lines_start - 1]\n                    break\n            if original_initial_line:\n                if doc_placement == 'after':\n                    line = file.head_file.splitlines()[relevant_lines_start]\n                else:\n                    line = original_initial_line\n                suggested_initial_line = new_code_snippet.splitlines()[0]\n                original_initial_spaces = len(line) - len(line.lstrip())\n                suggested_initial_spaces = len(suggested_initial_line) - len(suggested_initial_line.lstrip())\n                delta_spaces = original_initial_spaces - suggested_initial_spaces\n                if delta_spaces > 0:\n                    new_code_snippet = textwrap.indent(new_code_snippet, delta_spaces * \" \").rstrip('\\n')\n                if add_original_line:\n                    if doc_placement == 'after':\n                        new_code_snippet = original_initial_line + \"\\n\" + new_code_snippet\n                    else:\n                        new_code_snippet = new_code_snippet.rstrip() + \"\\n\" + original_initial_line\n        except Exception as e:\n            if get_settings().config.verbosity_level >= 2:\n                get_logger().info(f\"Could not dedent code snippet for file {relevant_file}, error: {e}\")\n\n        return new_code_snippet\n\n\ndef get_docs_for_language(language, style):\n    language = language.lower()\n    if language == 'java':\n        return \"Javadocs\"\n    elif language in ['python', 'lisp', 'clojure']:\n        return f\"Docstring ({style})\"\n    elif language in ['javascript', 'typescript']:\n        return \"JSdocs\"\n    elif language == 'c++':\n        return \"Doxygen\"\n    else:\n        return \"Docs\"\n"
  },
  {
    "path": "pr_agent/tools/pr_code_suggestions.py",
    "content": "import asyncio\nimport copy\nimport difflib\nimport re\nimport textwrap\nimport traceback\nfrom datetime import datetime\nfrom functools import partial\nfrom typing import Dict, List\n\nfrom jinja2 import Environment, StrictUndefined\n\nfrom pr_agent.algo import MAX_TOKENS\nfrom pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler\nfrom pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler\nfrom pr_agent.algo.git_patch_processing import decouple_and_convert_to_hunks_with_lines_numbers\nfrom pr_agent.algo.pr_processing import (add_ai_metadata_to_diff_files,\n                                         get_pr_diff, get_pr_multi_diffs,\n                                         retry_with_fallback_models)\nfrom pr_agent.algo.token_handler import TokenHandler\nfrom pr_agent.algo.utils import (ModelType, load_yaml, replace_code_tags,\n                                 show_relevant_configurations, get_max_tokens, clip_tokens, get_model)\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers import (AzureDevopsProvider, GithubProvider,\n                                    GitLabProvider, get_git_provider,\n                                    get_git_provider_with_context)\nfrom pr_agent.git_providers.git_provider import get_main_pr_language, GitProvider\nfrom pr_agent.log import get_logger\nfrom pr_agent.servers.help import HelpMessage\nfrom pr_agent.tools.pr_description import insert_br_after_x_chars\n\n\nclass PRCodeSuggestions:\n    def __init__(self, pr_url: str, cli_mode=False, args: list = None,\n                 ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):\n\n        self.git_provider = get_git_provider_with_context(pr_url)\n        self.main_language = get_main_pr_language(\n            self.git_provider.get_languages(), self.git_provider.get_files()\n        )\n\n        num_code_suggestions = int(get_settings().pr_code_suggestions.num_code_suggestions_per_chunk)\n\n        self.ai_handler = ai_handler()\n        self.ai_handler.main_pr_language = self.main_language\n        self.patches_diff = None\n        self.prediction = None\n        self.pr_url = pr_url\n        self.cli_mode = cli_mode\n        self.pr_description, self.pr_description_files = (\n            self.git_provider.get_pr_description(split_changes_walkthrough=True))\n        if (self.pr_description_files and get_settings().get(\"config.is_auto_command\", False) and\n                get_settings().get(\"config.enable_ai_metadata\", False)):\n            add_ai_metadata_to_diff_files(self.git_provider, self.pr_description_files)\n            get_logger().debug(f\"AI metadata added to the this command\")\n        else:\n            get_settings().set(\"config.enable_ai_metadata\", False)\n            get_logger().debug(f\"AI metadata is disabled for this command\")\n\n        self.vars = {\n            \"title\": self.git_provider.pr.title,\n            \"branch\": self.git_provider.get_pr_branch(),\n            \"description\": self.pr_description,\n            \"language\": self.main_language,\n            \"diff\": \"\",  # empty diff for initial calculation\n            \"diff_no_line_numbers\": \"\",  # empty diff for initial calculation\n            \"num_code_suggestions\": num_code_suggestions,\n            \"extra_instructions\": get_settings().pr_code_suggestions.extra_instructions,\n            \"commit_messages_str\": self.git_provider.get_commit_messages(),\n            \"relevant_best_practices\": \"\",\n            \"is_ai_metadata\": get_settings().get(\"config.enable_ai_metadata\", False),\n            \"focus_only_on_problems\": get_settings().get(\"pr_code_suggestions.focus_only_on_problems\", False),\n            \"date\": datetime.now().strftime('%Y-%m-%d'),\n            'duplicate_prompt_examples': get_settings().config.get('duplicate_prompt_examples', False),\n        }\n\n        if get_settings().pr_code_suggestions.get(\"decouple_hunks\", True):\n            self.pr_code_suggestions_prompt_system = get_settings().pr_code_suggestions_prompt.system\n            self.pr_code_suggestions_prompt_user = get_settings().pr_code_suggestions_prompt.user\n        else:\n            self.pr_code_suggestions_prompt_system = get_settings().pr_code_suggestions_prompt_not_decoupled.system\n            self.pr_code_suggestions_prompt_user = get_settings().pr_code_suggestions_prompt_not_decoupled.user\n\n        self.token_handler = TokenHandler(self.git_provider.pr,\n                                          self.vars,\n                                          self.pr_code_suggestions_prompt_system,\n                                          self.pr_code_suggestions_prompt_user)\n\n        self.progress = f\"## Generating PR code suggestions\\n\\n\"\n        self.progress += f\"\"\"\\nWork in progress ...<br>\\n<img src=\"https://codium.ai/images/pr_agent/dual_ball_loading-crop.gif\" width=48>\"\"\"\n        self.progress_response = None\n\n    async def run(self):\n        try:\n            if not self.git_provider.get_files():\n                get_logger().info(f\"PR has no files: {self.pr_url}, skipping code suggestions\")\n                return None\n\n            get_logger().info('Generating code suggestions for PR...')\n            relevant_configs = {'pr_code_suggestions': dict(get_settings().pr_code_suggestions),\n                                'config': dict(get_settings().config)}\n            get_logger().debug(\"Relevant configs\", artifacts=relevant_configs)\n\n            # publish \"Preparing suggestions...\" comments\n            if (get_settings().config.publish_output and get_settings().config.publish_output_progress and\n                    not get_settings().config.get('is_auto_command', False)):\n                if self.git_provider.is_supported(\"gfm_markdown\"):\n                    self.progress_response = self.git_provider.publish_comment(self.progress)\n                else:\n                    self.git_provider.publish_comment(\"Preparing suggestions...\", is_temporary=True)\n\n            # # call the model to get the suggestions, and self-reflect on them\n            # if not self.is_extended:\n            #     data = await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.REGULAR)\n            # else:\n            data = await retry_with_fallback_models(self.prepare_prediction_main, model_type=ModelType.REGULAR)\n            if not data:\n                data = {\"code_suggestions\": []}\n            self.data = data\n\n            # Handle the case where the PR has no suggestions\n            if (data is None or 'code_suggestions' not in data or not data['code_suggestions']):\n                await self.publish_no_suggestions()\n                return\n\n            # publish the suggestions\n            if get_settings().config.publish_output:\n                # If a temporary comment was published, remove it\n                self.git_provider.remove_initial_comment()\n\n                # Publish table summarized suggestions\n                if ((not get_settings().pr_code_suggestions.commitable_code_suggestions) and\n                        self.git_provider.is_supported(\"gfm_markdown\")):\n\n                    # generate summarized suggestions\n                    pr_body = self.generate_summarized_suggestions(data)\n                    get_logger().debug(f\"PR output\", artifact=pr_body)\n\n                    # require self-review\n                    if get_settings().pr_code_suggestions.demand_code_suggestions_self_review:\n                        pr_body = await self.add_self_review_text(pr_body)\n\n                    # add usage guide\n                    if (get_settings().pr_code_suggestions.enable_chat_text and get_settings().config.is_auto_command\n                            and isinstance(self.git_provider, GithubProvider)):\n                        pr_body += \"\\n\\n>💡 Need additional feedback ? start a [PR chat](https://chromewebstore.google.com/detail/ephlnjeghhogofkifjloamocljapahnl) \\n\\n\"\n                    if get_settings().pr_code_suggestions.enable_help_text:\n                        pr_body += \"<hr>\\n\\n<details> <summary><strong>💡 Tool usage guide:</strong></summary><hr> \\n\\n\"\n                        pr_body += HelpMessage.get_improve_usage_guide()\n                        pr_body += \"\\n</details>\\n\"\n\n                    # Output the relevant configurations if enabled\n                    if get_settings().get('config', {}).get('output_relevant_configurations', False):\n                        pr_body += show_relevant_configurations(relevant_section='pr_code_suggestions')\n\n                    # publish the PR comment\n                    if get_settings().pr_code_suggestions.persistent_comment: # true by default\n                        self.publish_persistent_comment_with_history(self.git_provider,\n                                                                     pr_body,\n                                                                     initial_header=\"## PR Code Suggestions ✨\",\n                                                                     update_header=True,\n                                                                     name=\"suggestions\",\n                                                                     final_update_message=False,\n                                                                     max_previous_comments=get_settings().pr_code_suggestions.max_history_len,\n                                                                     progress_response=self.progress_response)\n                    else:\n                        if self.progress_response:\n                            self.git_provider.edit_comment(self.progress_response, body=pr_body)\n                        else:\n                            self.git_provider.publish_comment(pr_body)\n\n                    # dual publishing mode\n                    if int(get_settings().pr_code_suggestions.dual_publishing_score_threshold) > 0:\n                        await self.dual_publishing(data)\n                else:\n                    await self.push_inline_code_suggestions(data)\n                    if self.progress_response:\n                        self.git_provider.remove_comment(self.progress_response)\n            else:\n                get_logger().info('Code suggestions generated for PR, but not published since publish_output is False.')\n                pr_body = self.generate_summarized_suggestions(data)\n                get_settings().data = {\"artifact\": pr_body}\n                return\n        except Exception as e:\n            get_logger().error(f\"Failed to generate code suggestions for PR, error: {e}\",\n                               artifact={\"traceback\": traceback.format_exc()})\n            if get_settings().config.publish_output:\n                if self.progress_response:\n                    self.git_provider.remove_comment(self.progress_response)\n                else:\n                    try:\n                        self.git_provider.remove_initial_comment()\n                        self.git_provider.publish_comment(f\"Failed to generate code suggestions for PR\")\n                    except Exception as e:\n                        get_logger().exception(f\"Failed to update persistent review, error: {e}\")\n\n    async def add_self_review_text(self, pr_body):\n        text = get_settings().pr_code_suggestions.code_suggestions_self_review_text\n        pr_body += f\"\\n\\n- [ ]  {text}\"\n        approve_pr_on_self_review = get_settings().pr_code_suggestions.approve_pr_on_self_review\n        fold_suggestions_on_self_review = get_settings().pr_code_suggestions.fold_suggestions_on_self_review\n        if approve_pr_on_self_review and not fold_suggestions_on_self_review:\n            pr_body += ' <!-- approve pr self-review -->'\n        elif fold_suggestions_on_self_review and not approve_pr_on_self_review:\n            pr_body += ' <!-- fold suggestions self-review -->'\n        else:\n            pr_body += ' <!-- approve and fold suggestions self-review -->'\n        return pr_body\n\n    async def publish_no_suggestions(self):\n        pr_body = \"## PR Code Suggestions ✨\\n\\nNo code suggestions found for the PR.\"\n        if (get_settings().config.publish_output and\n                get_settings().pr_code_suggestions.get('publish_output_no_suggestions', True)):\n            get_logger().warning('No code suggestions found for the PR.')\n            get_logger().debug(f\"PR output\", artifact=pr_body)\n            if self.progress_response:\n                self.git_provider.edit_comment(self.progress_response, body=pr_body)\n            else:\n                self.git_provider.publish_comment(pr_body)\n        else:\n            get_settings().data = {\"artifact\": \"\"}\n\n    async def dual_publishing(self, data):\n        data_above_threshold = {'code_suggestions': []}\n        try:\n            for suggestion in data['code_suggestions']:\n                if int(suggestion.get('score', 0)) >= int(\n                        get_settings().pr_code_suggestions.dual_publishing_score_threshold) \\\n                        and suggestion.get('improved_code'):\n                    data_above_threshold['code_suggestions'].append(suggestion)\n                    if not data_above_threshold['code_suggestions'][-1]['existing_code']:\n                        get_logger().info(f'Identical existing and improved code for dual publishing found')\n                        data_above_threshold['code_suggestions'][-1]['existing_code'] = suggestion[\n                            'improved_code']\n            if data_above_threshold['code_suggestions']:\n                get_logger().info(\n                    f\"Publishing {len(data_above_threshold['code_suggestions'])} suggestions in dual publishing mode\")\n                await self.push_inline_code_suggestions(data_above_threshold)\n        except Exception as e:\n            get_logger().error(f\"Failed to publish dual publishing suggestions, error: {e}\")\n\n    @staticmethod\n    def publish_persistent_comment_with_history(git_provider: GitProvider,\n                                                pr_comment: str,\n                                                initial_header: str,\n                                                update_header: bool = True,\n                                                name='review',\n                                                final_update_message=True,\n                                                max_previous_comments=4,\n                                                progress_response=None,\n                                                only_fold=False):\n\n        def _extract_link(comment_text: str):\n            r = re.compile(r\"<!--.*?-->\")\n            match = r.search(comment_text)\n\n            up_to_commit_txt = \"\"\n            if match:\n                up_to_commit_txt = f\" up to commit {match.group(0)[4:-3].strip()}\"\n            return up_to_commit_txt\n\n        history_header = f\"#### Previous suggestions\\n\"\n        last_commit_num = git_provider.get_latest_commit_url().split('/')[-1][:7]\n        if only_fold: # A user clicked on the 'self-review' checkbox\n            text = get_settings().pr_code_suggestions.code_suggestions_self_review_text\n            latest_suggestion_header = f\"\\n\\n- [x]  {text}\"\n        else:\n            latest_suggestion_header = f\"Latest suggestions up to {last_commit_num}\"\n        latest_commit_html_comment = f\"<!-- {last_commit_num} -->\"\n        found_comment = None\n\n        if max_previous_comments > 0:\n            try:\n                prev_comments = list(git_provider.get_issue_comments())\n                for comment in prev_comments:\n                    if comment.body.startswith(initial_header):\n                        prev_suggestions = comment.body\n                        found_comment = comment\n                        comment_url = git_provider.get_comment_url(comment)\n\n                        if history_header.strip() not in comment.body:\n                            # no history section\n                            # extract everything between <table> and </table> in comment.body including <table> and </table>\n                            table_index = comment.body.find(\"<table>\")\n                            if table_index == -1:\n                                git_provider.edit_comment(comment, pr_comment)\n                                continue\n                            # find http link from comment.body[:table_index]\n                            up_to_commit_txt = _extract_link(comment.body[:table_index])\n                            prev_suggestion_table = comment.body[\n                                                    table_index:comment.body.rfind(\"</table>\") + len(\"</table>\")]\n\n                            tick = \"✅ \" if \"✅\" in prev_suggestion_table else \"\"\n                            # surround with details tag\n                            prev_suggestion_table = f\"<details><summary>{tick}{name.capitalize()}{up_to_commit_txt}</summary>\\n<br>{prev_suggestion_table}\\n\\n</details>\"\n\n                            new_suggestion_table = pr_comment.replace(initial_header, \"\").strip()\n\n                            pr_comment_updated = f\"{initial_header}\\n{latest_commit_html_comment}\\n\\n\"\n                            pr_comment_updated += f\"{latest_suggestion_header}\\n{new_suggestion_table}\\n\\n___\\n\\n\"\n                            pr_comment_updated += f\"{history_header}{prev_suggestion_table}\\n\"\n                        else:\n                            # get the text of the previous suggestions until the latest commit\n                            sections = prev_suggestions.split(history_header.strip())\n                            latest_table = sections[0].strip()\n                            prev_suggestion_table = sections[1].replace(history_header, \"\").strip()\n\n                            # get text after the latest_suggestion_header in comment.body\n                            table_ind = latest_table.find(\"<table>\")\n                            up_to_commit_txt = _extract_link(latest_table[:table_ind])\n\n                            latest_table = latest_table[table_ind:latest_table.rfind(\"</table>\") + len(\"</table>\")]\n                            # enforce max_previous_comments\n                            count = prev_suggestions.count(f\"\\n<details><summary>{name.capitalize()}\")\n                            count += prev_suggestions.count(f\"\\n<details><summary>✅ {name.capitalize()}\")\n                            if count >= max_previous_comments:\n                                # remove the oldest suggestion\n                                prev_suggestion_table = prev_suggestion_table[:prev_suggestion_table.rfind(\n                                    f\"<details><summary>{name.capitalize()} up to commit\")]\n\n                            tick = \"✅ \" if \"✅\" in latest_table else \"\"\n                            # Add to the prev_suggestions section\n                            last_prev_table = f\"\\n<details><summary>{tick}{name.capitalize()}{up_to_commit_txt}</summary>\\n<br>{latest_table}\\n\\n</details>\"\n                            prev_suggestion_table = last_prev_table + \"\\n\" + prev_suggestion_table\n\n                            new_suggestion_table = pr_comment.replace(initial_header, \"\").strip()\n\n                            pr_comment_updated = f\"{initial_header}\\n\"\n                            pr_comment_updated += f\"{latest_commit_html_comment}\\n\\n\"\n                            pr_comment_updated += f\"{latest_suggestion_header}\\n\\n{new_suggestion_table}\\n\\n\"\n                            pr_comment_updated += \"___\\n\\n\"\n                            pr_comment_updated += f\"{history_header}\\n\"\n                            pr_comment_updated += f\"{prev_suggestion_table}\\n\"\n\n                        get_logger().info(f\"Persistent mode - updating comment {comment_url} to latest {name} message\")\n                        if progress_response:  # publish to 'progress_response' comment, because it refreshes immediately\n                            git_provider.edit_comment(progress_response, pr_comment_updated)\n                            git_provider.remove_comment(comment)\n                            comment = progress_response\n                        else:\n                            git_provider.edit_comment(comment, pr_comment_updated)\n                        return comment\n            except Exception as e:\n                get_logger().exception(f\"Failed to update persistent review, error: {e}\")\n                pass\n\n        # if we are here, we did not find a previous comment to update\n        body = pr_comment.replace(initial_header, \"\").strip()\n        pr_comment = f\"{initial_header}\\n\\n{latest_commit_html_comment}\\n\\n{body}\\n\\n\"\n        if progress_response:\n            git_provider.edit_comment(progress_response, pr_comment)\n            new_comment = progress_response\n        else:\n            new_comment = git_provider.publish_comment(pr_comment)\n        return new_comment\n\n\n    def extract_link(self, s):\n        r = re.compile(r\"<!--.*?-->\")\n        match = r.search(s)\n\n        up_to_commit_txt = \"\"\n        if match:\n            up_to_commit_txt = f\" up to commit {match.group(0)[4:-3].strip()}\"\n        return up_to_commit_txt\n\n    async def _prepare_prediction(self, model: str) -> dict:\n        self.patches_diff = get_pr_diff(self.git_provider,\n                                        self.token_handler,\n                                        model,\n                                        add_line_numbers_to_hunks=True,\n                                        disable_extra_lines=False)\n        self.patches_diff_list = [self.patches_diff]\n        self.patches_diff_no_line_number = self.remove_line_numbers([self.patches_diff])[0]\n\n        if self.patches_diff:\n            get_logger().debug(f\"PR diff\", artifact=self.patches_diff)\n            self.prediction = await self._get_prediction(model, self.patches_diff, self.patches_diff_no_line_number)\n        else:\n            get_logger().warning(f\"Empty PR diff\")\n            self.prediction = None\n\n        data = self.prediction\n        return data\n\n    async def _get_prediction(self, model: str, patches_diff: str, patches_diff_no_line_number: str) -> dict:\n        variables = copy.deepcopy(self.vars)\n        variables[\"diff\"] = patches_diff  # update diff\n        variables[\"diff_no_line_numbers\"] = patches_diff_no_line_number  # update diff\n        environment = Environment(undefined=StrictUndefined)\n        system_prompt = environment.from_string(self.pr_code_suggestions_prompt_system).render(variables)\n        user_prompt = environment.from_string(get_settings().pr_code_suggestions_prompt.user).render(variables)\n        response, finish_reason = await self.ai_handler.chat_completion(\n            model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)\n        if not get_settings().config.publish_output:\n            get_settings().system_prompt = system_prompt\n            get_settings().user_prompt = user_prompt\n\n        # load suggestions from the AI response\n        data = self._prepare_pr_code_suggestions(response)\n\n        # self-reflect on suggestions (mandatory, since line numbers are generated now here)\n        model_reflect_with_reasoning = get_model('model_reasoning')\n        fallbacks = get_settings().config.fallback_models\n        if model_reflect_with_reasoning == get_settings().config.model and model != get_settings().config.model and fallbacks and model == \\\n                fallbacks[0]:\n            # we are using a fallback model (should not happen on regular conditions)\n            get_logger().warning(f\"Using the same model for self-reflection as the one used for suggestions\")\n            model_reflect_with_reasoning = model\n        response_reflect = await self.self_reflect_on_suggestions(data[\"code_suggestions\"],\n                                                                  patches_diff, model=model_reflect_with_reasoning)\n        if response_reflect:\n            await self.analyze_self_reflection_response(data, response_reflect)\n        else:\n            # get_logger().error(f\"Could not self-reflect on suggestions. using default score 7\")\n            for i, suggestion in enumerate(data[\"code_suggestions\"]):\n                suggestion[\"score\"] = 7\n                suggestion[\"score_why\"] = \"\"\n\n        return data\n\n    async def analyze_self_reflection_response(self, data, response_reflect):\n        response_reflect_yaml = load_yaml(response_reflect)\n        code_suggestions_feedback = response_reflect_yaml.get(\"code_suggestions\", [])\n        if code_suggestions_feedback and len(code_suggestions_feedback) == len(data[\"code_suggestions\"]):\n            for i, suggestion in enumerate(data[\"code_suggestions\"]):\n                try:\n                    suggestion[\"score\"] = code_suggestions_feedback[i][\"suggestion_score\"]\n                    suggestion[\"score_why\"] = code_suggestions_feedback[i][\"why\"]\n\n                    if 'relevant_lines_start' not in suggestion:\n                        relevant_lines_start = code_suggestions_feedback[i].get('relevant_lines_start', -1)\n                        relevant_lines_end = code_suggestions_feedback[i].get('relevant_lines_end', -1)\n                        suggestion['relevant_lines_start'] = relevant_lines_start\n                        suggestion['relevant_lines_end'] = relevant_lines_end\n                        if relevant_lines_start < 0 or relevant_lines_end < 0:\n                            suggestion[\"score\"] = 0\n\n                    try:\n                        if get_settings().config.publish_output:\n                            if not suggestion[\"score\"]:\n                                score = -1\n                            else:\n                                score = int(suggestion[\"score\"])\n                            label = suggestion[\"label\"].lower().strip()\n                            label = label.replace('<br>', ' ')\n                            suggestion_statistics_dict = {'score': score,\n                                                          'label': label}\n                            get_logger().info(f\"PR-Agent suggestions statistics\",\n                                              statistics=suggestion_statistics_dict, analytics=True)\n                    except Exception as e:\n                        get_logger().error(f\"Failed to log suggestion statistics, error: {e}\")\n                        pass\n\n                except Exception as e:  #\n                    get_logger().error(f\"Error processing suggestion score {i}\",\n                                       artifact={\"suggestion\": suggestion,\n                                                 \"code_suggestions_feedback\": code_suggestions_feedback[i]})\n                    suggestion[\"score\"] = 7\n                    suggestion[\"score_why\"] = \"\"\n\n                suggestion = self.validate_one_liner_suggestion_not_repeating_code(suggestion)\n\n                # if the before and after code is the same, clear one of them\n                try:\n                    if suggestion['existing_code'] == suggestion['improved_code']:\n                        get_logger().debug(\n                            f\"edited improved suggestion {i + 1}, because equal to existing code: {suggestion['existing_code']}\")\n                        if get_settings().pr_code_suggestions.commitable_code_suggestions:\n                            suggestion['improved_code'] = \"\"  # we need 'existing_code' to locate the code in the PR\n                        else:\n                            suggestion['existing_code'] = \"\"\n                except Exception as e:\n                    get_logger().error(f\"Error processing suggestion {i + 1}, error: {e}\")\n\n    @staticmethod\n    def _truncate_if_needed(suggestion):\n        max_code_suggestion_length = get_settings().get(\"PR_CODE_SUGGESTIONS.MAX_CODE_SUGGESTION_LENGTH\", 0)\n        suggestion_truncation_message = get_settings().get(\"PR_CODE_SUGGESTIONS.SUGGESTION_TRUNCATION_MESSAGE\", \"\")\n        if max_code_suggestion_length > 0:\n            if len(suggestion['improved_code']) > max_code_suggestion_length:\n                get_logger().info(f\"Truncated suggestion from {len(suggestion['improved_code'])} \"\n                                  f\"characters to {max_code_suggestion_length} characters\")\n                suggestion['improved_code'] = suggestion['improved_code'][:max_code_suggestion_length]\n                suggestion['improved_code'] += f\"\\n{suggestion_truncation_message}\"\n        return suggestion\n\n    def _prepare_pr_code_suggestions(self, predictions: str) -> Dict:\n        data = load_yaml(predictions.strip(),\n                         keys_fix_yaml=[\"relevant_file\", \"suggestion_content\", \"existing_code\", \"improved_code\"],\n                         first_key=\"code_suggestions\", last_key=\"label\")\n        if isinstance(data, list):\n            data = {'code_suggestions': data}\n\n        # remove or edit invalid suggestions\n        suggestion_list = []\n        one_sentence_summary_list = []\n        for i, suggestion in enumerate(data['code_suggestions']):\n            try:\n                needed_keys = ['one_sentence_summary', 'label', 'relevant_file']\n                is_valid_keys = True\n                for key in needed_keys:\n                    if key not in suggestion:\n                        is_valid_keys = False\n                        get_logger().debug(\n                            f\"Skipping suggestion {i + 1}, because it does not contain '{key}':\\n'{suggestion}\")\n                        break\n                if not is_valid_keys:\n                    continue\n\n                if get_settings().get(\"pr_code_suggestions.focus_only_on_problems\", False):\n                    CRITICAL_LABEL = 'critical'\n                    if CRITICAL_LABEL in suggestion['label'].lower(): # we want the published labels to be less declarative\n                        suggestion['label'] = 'possible issue'\n\n                if suggestion['one_sentence_summary'] in one_sentence_summary_list:\n                    get_logger().debug(f\"Skipping suggestion {i + 1}, because it is a duplicate: {suggestion}\")\n                    continue\n\n                if 'const' in suggestion['suggestion_content'] and 'instead' in suggestion[\n                    'suggestion_content'] and 'let' in suggestion['suggestion_content']:\n                    get_logger().debug(\n                        f\"Skipping suggestion {i + 1}, because it uses 'const instead let': {suggestion}\")\n                    continue\n\n                if ('existing_code' in suggestion) and ('improved_code' in suggestion):\n                    suggestion = self._truncate_if_needed(suggestion)\n                    one_sentence_summary_list.append(suggestion['one_sentence_summary'])\n                    suggestion_list.append(suggestion)\n                else:\n                    get_logger().info(\n                        f\"Skipping suggestion {i + 1}, because it does not contain 'existing_code' or 'improved_code': {suggestion}\")\n            except Exception as e:\n                get_logger().error(f\"Error processing suggestion {i + 1}: {suggestion}, error: {e}\")\n        data['code_suggestions'] = suggestion_list\n\n        return data\n\n    async def push_inline_code_suggestions(self, data):\n        code_suggestions = []\n\n        if not data['code_suggestions']:\n            get_logger().info('No suggestions found to improve this PR.')\n            if self.progress_response:\n                return self.git_provider.edit_comment(self.progress_response,\n                                                      body='No suggestions found to improve this PR.')\n            else:\n                return self.git_provider.publish_comment('No suggestions found to improve this PR.')\n\n        for d in data['code_suggestions']:\n            try:\n                if get_settings().config.verbosity_level >= 2:\n                    get_logger().info(f\"suggestion: {d}\")\n                relevant_file = d['relevant_file'].strip()\n                relevant_lines_start = int(d['relevant_lines_start'])  # absolute position\n                relevant_lines_end = int(d['relevant_lines_end'])\n                content = d['suggestion_content'].rstrip()\n                new_code_snippet = d['improved_code'].rstrip()\n                label = d['label'].strip()\n\n                if new_code_snippet:\n                    new_code_snippet = self.dedent_code(relevant_file, relevant_lines_start, new_code_snippet)\n\n                if d.get('score'):\n                    body = f\"**Suggestion:** {content} [{label}, importance: {d.get('score')}]\\n```suggestion\\n\" + new_code_snippet + \"\\n```\"\n                else:\n                    body = f\"**Suggestion:** {content} [{label}]\\n```suggestion\\n\" + new_code_snippet + \"\\n```\"\n                code_suggestions.append({'body': body, 'relevant_file': relevant_file,\n                                         'relevant_lines_start': relevant_lines_start,\n                                         'relevant_lines_end': relevant_lines_end,\n                                         'original_suggestion': d})\n            except Exception:\n                get_logger().info(f\"Could not parse suggestion: {d}\")\n\n        is_successful = self.git_provider.publish_code_suggestions(code_suggestions)\n        if not is_successful:\n            get_logger().info(\"Failed to publish code suggestions, trying to publish each suggestion separately\")\n            for code_suggestion in code_suggestions:\n                self.git_provider.publish_code_suggestions([code_suggestion])\n\n    def dedent_code(self, relevant_file, relevant_lines_start, new_code_snippet):\n        try:  # dedent code snippet\n            self.diff_files = self.git_provider.diff_files if self.git_provider.diff_files \\\n                else self.git_provider.get_diff_files()\n            original_initial_line = None\n            for file in self.diff_files:\n                if file.filename.strip() == relevant_file:\n                    if file.head_file:\n                        file_lines = file.head_file.splitlines()\n                        if relevant_lines_start > len(file_lines):\n                            get_logger().warning(\n                                \"Could not dedent code snippet, because relevant_lines_start is out of range\",\n                                artifact={'filename': file.filename,\n                                          'file_content': file.head_file,\n                                          'relevant_lines_start': relevant_lines_start,\n                                          'new_code_snippet': new_code_snippet})\n                            return new_code_snippet\n                        else:\n                            original_initial_line = file_lines[relevant_lines_start - 1]\n                    else:\n                        get_logger().warning(\"Could not dedent code snippet, because head_file is missing\",\n                                             artifact={'filename': file.filename,\n                                                       'relevant_lines_start': relevant_lines_start,\n                                                       'new_code_snippet': new_code_snippet})\n                        return new_code_snippet\n                    break\n            if original_initial_line:\n                suggested_initial_line = new_code_snippet.splitlines()[0]\n                original_initial_spaces = len(original_initial_line) - len(original_initial_line.lstrip()) # lstrip works both for spaces and tabs\n                suggested_initial_spaces = len(suggested_initial_line) - len(suggested_initial_line.lstrip())\n                delta_spaces = original_initial_spaces - suggested_initial_spaces\n                if delta_spaces > 0:\n                    # Detect indentation character from original line\n                    indent_char = '\\t' if original_initial_line.startswith('\\t') else ' '\n                    new_code_snippet = textwrap.indent(new_code_snippet, delta_spaces * indent_char).rstrip('\\n')\n        except Exception as e:\n            get_logger().error(f\"Error when dedenting code snippet for file {relevant_file}, error: {e}\")\n\n        return new_code_snippet\n\n    def validate_one_liner_suggestion_not_repeating_code(self, suggestion):\n        try:\n            existing_code = suggestion.get('existing_code', '').strip()\n            if '...' in existing_code:\n                return suggestion\n            new_code = suggestion.get('improved_code', '').strip()\n\n            relevant_file = suggestion.get('relevant_file', '').strip()\n            diff_files = self.git_provider.get_diff_files()\n            for file in diff_files:\n                if file.filename.strip() == relevant_file:\n                    # protections\n                    if not file.head_file:\n                        get_logger().info(f\"head_file is empty\")\n                        return suggestion\n                    head_file = file.head_file\n                    base_file = file.base_file\n                    if existing_code in base_file and existing_code not in head_file and new_code in head_file:\n                        suggestion[\"score\"] = 0\n                        get_logger().warning(\n                            f\"existing_code is in the base file but not in the head file, setting score to 0\",\n                            artifact={\"suggestion\": suggestion})\n        except Exception as e:\n            get_logger().exception(f\"Error validating one-liner suggestion\", artifact={\"error\": e})\n\n        return suggestion\n\n    def remove_line_numbers(self, patches_diff_list: List[str]) -> List[str]:\n        # create a copy of the patches_diff_list, without line numbers for '__new hunk__' sections\n        try:\n            self.patches_diff_list_no_line_numbers = []\n            for patches_diff in self.patches_diff_list:\n                patches_diff_lines = patches_diff.splitlines()\n                for i, line in enumerate(patches_diff_lines):\n                    if line.strip():\n                        if line.isnumeric():\n                            patches_diff_lines[i] = ''\n                        elif line[0].isdigit():\n                            # find the first letter in the line that starts with a valid letter\n                            for j, char in enumerate(line):\n                                if not char.isdigit():\n                                    patches_diff_lines[i] = line[j + 1:]\n                                    break\n                self.patches_diff_list_no_line_numbers.append('\\n'.join(patches_diff_lines))\n            return self.patches_diff_list_no_line_numbers\n        except Exception as e:\n            get_logger().error(f\"Error removing line numbers from patches_diff_list, error: {e}\")\n            return patches_diff_list\n\n    async def prepare_prediction_main(self, model: str) -> dict:\n        # get PR diff\n        if get_settings().pr_code_suggestions.decouple_hunks:\n            self.patches_diff_list = get_pr_multi_diffs(self.git_provider,\n                                                        self.token_handler,\n                                                        model,\n                                                        max_calls=get_settings().pr_code_suggestions.max_number_of_calls,\n                                                        add_line_numbers=True)  # decouple hunk with line numbers\n            self.patches_diff_list_no_line_numbers = self.remove_line_numbers(self.patches_diff_list)  # decouple hunk\n\n        else:\n            # non-decoupled hunks\n            self.patches_diff_list_no_line_numbers = get_pr_multi_diffs(self.git_provider,\n                                                                        self.token_handler,\n                                                                        model,\n                                                                        max_calls=get_settings().pr_code_suggestions.max_number_of_calls,\n                                                                        add_line_numbers=False)\n            self.patches_diff_list = await self.convert_to_decoupled_with_line_numbers(\n                self.patches_diff_list_no_line_numbers, model)\n            if not self.patches_diff_list:\n                # fallback to decoupled hunks\n                self.patches_diff_list = get_pr_multi_diffs(self.git_provider,\n                                                            self.token_handler,\n                                                            model,\n                                                            max_calls=get_settings().pr_code_suggestions.max_number_of_calls,\n                                                            add_line_numbers=True)  # decouple hunk with line numbers\n\n        if self.patches_diff_list:\n            get_logger().info(f\"Number of PR chunk calls: {len(self.patches_diff_list)}\")\n            get_logger().debug(f\"PR diff:\", artifact=self.patches_diff_list)\n\n            # parallelize calls to AI:\n            if get_settings().pr_code_suggestions.parallel_calls:\n                prediction_list = await asyncio.gather(\n                    *[self._get_prediction(model, patches_diff, patches_diff_no_line_numbers) for\n                      patches_diff, patches_diff_no_line_numbers in\n                      zip(self.patches_diff_list, self.patches_diff_list_no_line_numbers)])\n                self.prediction_list = prediction_list\n            else:\n                prediction_list = []\n                for patches_diff, patches_diff_no_line_numbers in zip(self.patches_diff_list, self.patches_diff_list_no_line_numbers):\n                    prediction = await self._get_prediction(model, patches_diff, patches_diff_no_line_numbers)\n                    prediction_list.append(prediction)\n\n            data = {\"code_suggestions\": []}\n            for j, predictions in enumerate(prediction_list):  # each call adds an element to the list\n                if \"code_suggestions\" in predictions:\n                    score_threshold = max(1, int(get_settings().pr_code_suggestions.suggestions_score_threshold))\n                    for i, prediction in enumerate(predictions[\"code_suggestions\"]):\n                        try:\n                            score = int(prediction.get(\"score\", 1))\n                            if score >= score_threshold:\n                                data[\"code_suggestions\"].append(prediction)\n                            else:\n                                get_logger().info(\n                                    f\"Removing suggestions {i} from call {j}, because score is {score}, and score_threshold is {score_threshold}\",\n                                    artifact=prediction)\n                        except Exception as e:\n                            get_logger().error(f\"Error getting PR diff for suggestion {i} in call {j}, error: {e}\",\n                                               artifact={\"prediction\": prediction})\n            self.data = data\n        else:\n            get_logger().warning(f\"Empty PR diff list\")\n            self.data = data = None\n        return data\n\n    async def convert_to_decoupled_with_line_numbers(self, patches_diff_list_no_line_numbers, model) -> List[str]:\n        with get_logger().contextualize(sub_feature='convert_to_decoupled_with_line_numbers'):\n            try:\n                patches_diff_list = []\n                for patch_prompt in patches_diff_list_no_line_numbers:\n                    file_prefix = \"## File: \"\n                    patches = patch_prompt.strip().split(f\"\\n{file_prefix}\")\n                    patches_new = copy.deepcopy(patches)\n                    for i in range(len(patches_new)):\n                        if i == 0:\n                            prefix = patches_new[i].split(\"\\n@@\")[0].strip()\n                        else:\n                            prefix = file_prefix + patches_new[i].split(\"\\n@@\")[0][1:]\n                            prefix = prefix.strip()\n                        patches_new[i] = prefix + '\\n\\n' + decouple_and_convert_to_hunks_with_lines_numbers(patches_new[i],\n                                                                                                          file=None).strip()\n                        patches_new[i] = patches_new[i].strip()\n                    patch_final = \"\\n\\n\\n\".join(patches_new)\n                    if model in MAX_TOKENS:\n                        max_tokens_full = MAX_TOKENS[\n                            model]  # note - here we take the actual max tokens, without any reductions. we do aim to get the full documentation website in the prompt\n                    else:\n                        max_tokens_full = get_max_tokens(model)\n                    delta_output = 2000\n                    token_count = self.token_handler.count_tokens(patch_final)\n                    if token_count > max_tokens_full - delta_output:\n                        get_logger().warning(\n                            f\"Token count {token_count} exceeds the limit {max_tokens_full - delta_output}. clipping the tokens\")\n                        patch_final = clip_tokens(patch_final, max_tokens_full - delta_output)\n                    patches_diff_list.append(patch_final)\n                return patches_diff_list\n            except Exception as e:\n                get_logger().exception(f\"Error converting to decoupled with line numbers\",\n                                       artifact={'patches_diff_list_no_line_numbers': patches_diff_list_no_line_numbers})\n                return []\n\n    def generate_summarized_suggestions(self, data: Dict) -> str:\n        try:\n            pr_body = \"## PR Code Suggestions ✨\\n\\n\"\n\n            if len(data.get('code_suggestions', [])) == 0:\n                pr_body += \"No suggestions found to improve this PR.\"\n                return pr_body\n\n            if get_settings().config.is_auto_command:\n                pr_body += \"Explore these optional code suggestions:\\n\\n\"\n\n            language_extension_map_org = get_settings().language_extension_map_org\n            extension_to_language = {}\n            for language, extensions in language_extension_map_org.items():\n                for ext in extensions:\n                    extension_to_language[ext] = language\n\n            pr_body += \"<table>\"\n            header = f\"Suggestion\"\n            delta = 66\n            header += \"&nbsp; \" * delta\n            pr_body += f\"\"\"<thead><tr><td><strong>Category</strong></td><td align=left><strong>{header}</strong></td><td align=center><strong>Impact</strong></td></tr>\"\"\"\n            pr_body += \"\"\"<tbody>\"\"\"\n            suggestions_labels = dict()\n            # add all suggestions related to each label\n            for suggestion in data['code_suggestions']:\n                label = suggestion['label'].strip().strip(\"'\").strip('\"')\n                if label not in suggestions_labels:\n                    suggestions_labels[label] = []\n                suggestions_labels[label].append(suggestion)\n\n            # sort suggestions_labels by the suggestion with the highest score\n            suggestions_labels = dict(\n                sorted(suggestions_labels.items(), key=lambda x: max([s['score'] for s in x[1]]), reverse=True))\n            # sort the suggestions inside each label group by score\n            for label, suggestions in suggestions_labels.items():\n                suggestions_labels[label] = sorted(suggestions, key=lambda x: x['score'], reverse=True)\n\n            counter_suggestions = 0\n            for label, suggestions in suggestions_labels.items():\n                num_suggestions = len(suggestions)\n                pr_body += f\"\"\"<tr><td rowspan={num_suggestions}>{label.capitalize()}</td>\\n\"\"\"\n                for i, suggestion in enumerate(suggestions):\n\n                    relevant_file = suggestion['relevant_file'].strip()\n                    relevant_lines_start = int(suggestion['relevant_lines_start'])\n                    relevant_lines_end = int(suggestion['relevant_lines_end'])\n                    range_str = \"\"\n                    if relevant_lines_start == relevant_lines_end:\n                        range_str = f\"[{relevant_lines_start}]\"\n                    else:\n                        range_str = f\"[{relevant_lines_start}-{relevant_lines_end}]\"\n\n                    try:\n                        code_snippet_link = self.git_provider.get_line_link(relevant_file, relevant_lines_start,\n                                                                            relevant_lines_end)\n                    except:\n                        code_snippet_link = \"\"\n                    # add html table for each suggestion\n\n                    suggestion_content = suggestion['suggestion_content'].rstrip()\n                    CHAR_LIMIT_PER_LINE = 84\n                    suggestion_content = insert_br_after_x_chars(suggestion_content, CHAR_LIMIT_PER_LINE)\n                    # pr_body += f\"<tr><td><details><summary>{suggestion_content}</summary>\"\n                    existing_code = suggestion['existing_code'].rstrip() + \"\\n\"\n                    improved_code = suggestion['improved_code'].rstrip() + \"\\n\"\n\n                    diff = difflib.unified_diff(existing_code.split('\\n'),\n                                                improved_code.split('\\n'), n=999)\n                    patch_orig = \"\\n\".join(diff)\n                    patch = \"\\n\".join(patch_orig.splitlines()[5:]).strip('\\n')\n\n                    example_code = \"\"\n                    example_code += f\"```diff\\n{patch.rstrip()}\\n```\\n\"\n                    if i == 0:\n                        pr_body += f\"\"\"<td>\\n\\n\"\"\"\n                    else:\n                        pr_body += f\"\"\"<tr><td>\\n\\n\"\"\"\n                    suggestion_summary = suggestion['one_sentence_summary'].strip().rstrip('.')\n                    if \"'<\" in suggestion_summary and \">'\" in suggestion_summary:\n                        # escape the '<' and '>' characters, otherwise they are interpreted as html tags\n                        get_logger().info(f\"Escaped suggestion summary: {suggestion_summary}\")\n                        suggestion_summary = suggestion_summary.replace(\"'<\", \"`<\")\n                        suggestion_summary = suggestion_summary.replace(\">'\", \">`\")\n                    if '`' in suggestion_summary:\n                        suggestion_summary = replace_code_tags(suggestion_summary)\n\n                    pr_body += f\"\"\"\\n\\n<details><summary>{suggestion_summary}</summary>\\n\\n___\\n\\n\"\"\"\n                    pr_body += f\"\"\"\n**{suggestion_content}**\n\n[{relevant_file} {range_str}]({code_snippet_link})\n\n{example_code.rstrip()}\n\"\"\"\n                    if suggestion.get('score_why'):\n                        pr_body += f\"<details><summary>Suggestion importance[1-10]: {suggestion['score']}</summary>\\n\\n\"\n                        pr_body += f\"__\\n\\nWhy: {suggestion['score_why']}\\n\\n\"\n                        pr_body += f\"</details>\"\n\n                    pr_body += f\"</details>\"\n\n                    # # add another column for 'score'\n                    score_int = int(suggestion.get('score', 0))\n                    score_str = f\"{score_int}\"\n                    if get_settings().pr_code_suggestions.new_score_mechanism:\n                        score_str = self.get_score_str(score_int)\n                    pr_body += f\"</td><td align=center>{score_str}\\n\\n\"\n\n                    pr_body += f\"</td></tr>\"\n                    counter_suggestions += 1\n\n                # pr_body += \"</details>\"\n                # pr_body += \"\"\"</td></tr>\"\"\"\n            pr_body += \"\"\"</tr></tbody></table>\"\"\"\n            return pr_body\n        except Exception as e:\n            get_logger().info(f\"Failed to publish summarized code suggestions, error: {e}\")\n            return \"\"\n\n    def get_score_str(self, score: int) -> str:\n        th_high = get_settings().pr_code_suggestions.get('new_score_mechanism_th_high', 9)\n        th_medium = get_settings().pr_code_suggestions.get('new_score_mechanism_th_medium', 7)\n        if score >= th_high:\n            return \"High\"\n        elif score >= th_medium:\n            return \"Medium\"\n        else:  # score < 7\n            return \"Low\"\n\n    async def self_reflect_on_suggestions(self,\n                                          suggestion_list: List,\n                                          patches_diff: str,\n                                          model: str,\n                                          prev_suggestions_str: str = \"\",\n                                          dedicated_prompt: str = \"\") -> str:\n        if not suggestion_list:\n            return \"\"\n\n        try:\n            suggestion_str = \"\"\n            for i, suggestion in enumerate(suggestion_list):\n                suggestion_str += f\"suggestion {i + 1}: \" + str(suggestion) + '\\n\\n'\n\n            variables = {'suggestion_list': suggestion_list,\n                         'suggestion_str': suggestion_str,\n                         \"diff\": patches_diff,\n                         'num_code_suggestions': len(suggestion_list),\n                         'prev_suggestions_str': prev_suggestions_str,\n                         \"is_ai_metadata\": get_settings().get(\"config.enable_ai_metadata\", False),\n                         'duplicate_prompt_examples': get_settings().config.get('duplicate_prompt_examples', False)}\n            environment = Environment(undefined=StrictUndefined)\n\n            if dedicated_prompt:\n                system_prompt_reflect = environment.from_string(\n                    get_settings().get(dedicated_prompt).system).render(variables)\n                user_prompt_reflect = environment.from_string(\n                    get_settings().get(dedicated_prompt).user).render(variables)\n            else:\n                system_prompt_reflect = environment.from_string(\n                    get_settings().pr_code_suggestions_reflect_prompt.system).render(variables)\n                user_prompt_reflect = environment.from_string(\n                    get_settings().pr_code_suggestions_reflect_prompt.user).render(variables)\n\n            with get_logger().contextualize(command=\"self_reflect_on_suggestions\"):\n                response_reflect, finish_reason_reflect = await self.ai_handler.chat_completion(model=model,\n                                                                                                system=system_prompt_reflect,\n                                                                                                temperature=get_settings().config.temperature,\n                                                                                                user=user_prompt_reflect)\n        except Exception as e:\n            get_logger().info(f\"Could not reflect on suggestions, error: {e}\")\n            return \"\"\n        return response_reflect"
  },
  {
    "path": "pr_agent/tools/pr_config.py",
    "content": "from dynaconf import Dynaconf\n\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers import get_git_provider\nfrom pr_agent.log import get_logger\n\n\nclass PRConfig:\n    \"\"\"\n    The PRConfig class is responsible for listing all configuration options available for the user.\n    \"\"\"\n    def __init__(self, pr_url: str, args=None, ai_handler=None):\n        \"\"\"\n        Initialize the PRConfig object with the necessary attributes and objects to comment on a pull request.\n\n        Args:\n            pr_url (str): The URL of the pull request to be reviewed.\n            args (list, optional): List of arguments passed to the PRReviewer class. Defaults to None.\n        \"\"\"\n        self.git_provider = get_git_provider()(pr_url)\n\n    async def run(self):\n        get_logger().info('Getting configuration settings...')\n        get_logger().info('Preparing configs...')\n        pr_comment = self._prepare_pr_configs()\n        if get_settings().config.publish_output:\n            get_logger().info('Pushing configs...')\n            self.git_provider.publish_comment(pr_comment)\n            self.git_provider.remove_initial_comment()\n        return \"\"\n\n    def _prepare_pr_configs(self) -> str:\n        try:\n            conf_file = get_settings().find_file(\"configuration.toml\")\n            dynconf_kwargs = {'core_loaders': [],  # DISABLE default loaders, otherwise will load toml files more than once.\n                 'loaders': ['pr_agent.custom_merge_loader'],\n                 # Use a custom loader to merge sections, but overwrite their overlapping values. Do not use ENV variables.\n                 'merge_enabled': True\n                 # Merge multiple TOML files; prevent full section overwrite—only overlapping keys in sections overwrite prior ones.\n             }\n            conf_settings = Dynaconf(settings_files=[conf_file],\n                                     # Security: Disable all dynamic loading features\n                                     load_dotenv=False,  # Don't load .env files\n                                     envvar_prefix=False,\n                                     **dynconf_kwargs\n                                     )\n        except Exception as e:\n            get_logger().error(\"Caught exception during Dynaconf loading. Returning empty dict\",\n                               artifact={\"exception\": e})\n            conf_settings = {}\n        configuration_headers = [header.lower() for header in conf_settings.keys()]\n        relevant_configs = {\n            header: configs for header, configs in get_settings().to_dict().items()\n            if (header.lower().startswith(\"pr_\") or header.lower().startswith(\"config\")) and header.lower() in configuration_headers\n        }\n\n        skip_keys = ['ai_disclaimer', 'ai_disclaimer_title', 'ANALYTICS_FOLDER', 'secret_provider', \"skip_keys\", \"app_id\", \"redirect\",\n                     'trial_prefix_message', 'no_eligible_message', 'identity_provider', 'ALLOWED_REPOS',\n                     'APP_NAME', 'PERSONAL_ACCESS_TOKEN', 'shared_secret', 'key', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY', 'user_token',\n                     'private_key', 'private_key_id', 'client_id', 'client_secret', 'token', 'bearer_token', 'jira_api_token','webhook_secret']\n        partial_skip_keys = ['key', 'secret', 'token', 'private']\n        extra_skip_keys = get_settings().config.get('config.skip_keys', [])\n        if extra_skip_keys:\n            skip_keys.extend(extra_skip_keys)\n        skip_keys_lower = [key.lower() for key in skip_keys]\n\n\n        markdown_text = \"<details> <summary><strong>🛠️ PR-Agent Configurations:</strong></summary> \\n\\n\"\n        markdown_text += f\"\\n\\n```yaml\\n\\n\"\n        for header, configs in relevant_configs.items():\n            if configs:\n                markdown_text += \"\\n\\n\"\n                markdown_text += f\"==================== {header} ====================\"\n            for key, value in configs.items():\n                if key.lower() in skip_keys_lower:\n                    continue\n                if any(skip_key in key.lower() for skip_key in partial_skip_keys):\n                    continue\n                markdown_text += f\"\\n{header.lower()}.{key.lower()} = {repr(value) if isinstance(value, str) else value}\"\n                markdown_text += \"  \"\n        markdown_text += \"\\n```\"\n        markdown_text += \"\\n</details>\\n\"\n        get_logger().info(f\"Possible Configurations outputted to PR comment\", artifact=markdown_text)\n        return markdown_text\n"
  },
  {
    "path": "pr_agent/tools/pr_description.py",
    "content": "import asyncio\nimport copy\nimport re\nimport traceback\nfrom functools import partial\nfrom typing import List, Tuple\n\nimport yaml\nfrom jinja2 import Environment, StrictUndefined\n\nfrom pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler\nfrom pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler\nfrom pr_agent.algo.pr_processing import (OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD,\n                                         get_pr_diff,\n                                         get_pr_diff_multiple_patchs,\n                                         retry_with_fallback_models)\nfrom pr_agent.algo.token_handler import TokenHandler\nfrom pr_agent.algo.utils import (ModelType, PRDescriptionHeader, clip_tokens,\n                                 get_max_tokens, get_user_labels, load_yaml,\n                                 set_custom_labels,\n                                 show_relevant_configurations)\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers import (GithubProvider, get_git_provider,\n                                    get_git_provider_with_context)\nfrom pr_agent.git_providers.git_provider import get_main_pr_language\nfrom pr_agent.log import get_logger\nfrom pr_agent.servers.help import HelpMessage\nfrom pr_agent.tools.ticket_pr_compliance_check import (\n    extract_and_cache_pr_tickets, extract_ticket_links_from_pr_description,\n    extract_tickets)\n\n\nclass PRDescription:\n    def __init__(self, pr_url: str, args: list = None,\n                 ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):\n        \"\"\"\n        Initialize the PRDescription object with the necessary attributes and objects for generating a PR description\n        using an AI model.\n        Args:\n            pr_url (str): The URL of the pull request.\n            args (list, optional): List of arguments passed to the PRDescription class. Defaults to None.\n        \"\"\"\n        # Initialize the git provider and main PR language\n        self.git_provider = get_git_provider_with_context(pr_url)\n        self.main_pr_language = get_main_pr_language(\n            self.git_provider.get_languages(), self.git_provider.get_files()\n        )\n        self.pr_id = self.git_provider.get_pr_id()\n        self.keys_fix = [\"filename:\", \"language:\", \"changes_summary:\", \"changes_title:\", \"description:\", \"title:\"]\n\n        if get_settings().pr_description.enable_semantic_files_types and not self.git_provider.is_supported(\n                \"gfm_markdown\"):\n            get_logger().debug(f\"Disabling semantic files types for {self.pr_id}, gfm_markdown not supported.\")\n            get_settings().pr_description.enable_semantic_files_types = False\n\n        # Initialize the AI handler\n        self.ai_handler = ai_handler()\n        self.ai_handler.main_pr_language = self.main_pr_language\n\n        # Initialize the variables dictionary\n        self.COLLAPSIBLE_FILE_LIST_THRESHOLD = get_settings().pr_description.get(\"collapsible_file_list_threshold\", 8)\n        enable_pr_diagram = get_settings().pr_description.get(\"enable_pr_diagram\", False) and self.git_provider.is_supported(\"gfm_markdown\") # github and gitlab support gfm_markdown\n        self.vars = {\n            \"title\": self.git_provider.pr.title,\n            \"branch\": self.git_provider.get_pr_branch(),\n            \"description\": self.git_provider.get_pr_description(full=False),\n            \"language\": self.main_pr_language,\n            \"diff\": \"\",  # empty diff for initial calculation\n            \"extra_instructions\": get_settings().pr_description.extra_instructions,\n            \"commit_messages_str\": self.git_provider.get_commit_messages(),\n            \"enable_custom_labels\": get_settings().config.enable_custom_labels,\n            \"custom_labels_class\": \"\",  # will be filled if necessary in 'set_custom_labels' function\n            \"enable_semantic_files_types\": get_settings().pr_description.enable_semantic_files_types,\n            \"related_tickets\": \"\",\n            \"include_file_summary_changes\": len(self.git_provider.get_diff_files()) <= self.COLLAPSIBLE_FILE_LIST_THRESHOLD,\n            \"duplicate_prompt_examples\": get_settings().config.get(\"duplicate_prompt_examples\", False),\n            \"enable_pr_diagram\": enable_pr_diagram,\n        }\n\n        self.user_description = self.git_provider.get_user_description()\n\n        # Initialize the token handler\n        self.token_handler = TokenHandler(\n            self.git_provider.pr,\n            self.vars,\n            get_settings().pr_description_prompt.system,\n            get_settings().pr_description_prompt.user,\n        )\n\n        # Initialize patches_diff and prediction attributes\n        self.patches_diff = None\n        self.prediction = None\n        self.file_label_dict = None\n\n    async def run(self):\n        try:\n            get_logger().info(f\"Generating a PR description for pr_id: {self.pr_id}\")\n            relevant_configs = {'pr_description': dict(get_settings().pr_description),\n                                'config': dict(get_settings().config)}\n            get_logger().debug(\"Relevant configs\", artifact=relevant_configs)\n            if get_settings().config.publish_output and not get_settings().config.get('is_auto_command', False):\n                self.git_provider.publish_comment(\"Preparing PR description...\", is_temporary=True)\n\n            # ticket extraction if exists\n            await extract_and_cache_pr_tickets(self.git_provider, self.vars)\n\n            await retry_with_fallback_models(self._prepare_prediction, ModelType.WEAK)\n\n            if self.prediction:\n                self._prepare_data()\n            else:\n                get_logger().warning(f\"Empty prediction, PR: {self.pr_id}\")\n                self.git_provider.remove_initial_comment()\n                return None\n\n            if get_settings().pr_description.enable_semantic_files_types:\n                self.file_label_dict = self._prepare_file_labels()\n\n            pr_labels, pr_file_changes = [], []\n            if get_settings().pr_description.publish_labels:\n                pr_labels = self._prepare_labels()\n            else:\n                get_logger().debug(f\"Publishing labels disabled\")\n\n            if get_settings().pr_description.use_description_markers:\n                pr_title, pr_body, changes_walkthrough, pr_file_changes = self._prepare_pr_answer_with_markers()\n            else:\n                pr_title, pr_body, changes_walkthrough, pr_file_changes = self._prepare_pr_answer()\n                if not self.git_provider.is_supported(\n                        \"publish_file_comments\") or not get_settings().pr_description.inline_file_summary:\n                    pr_body += \"\\n\\n\" + changes_walkthrough + \"___\\n\\n\"\n            get_logger().debug(\"PR output\", artifact={\"title\": pr_title, \"body\": pr_body})\n\n            # Add help text if gfm_markdown is supported\n            if self.git_provider.is_supported(\"gfm_markdown\") and get_settings().pr_description.enable_help_text:\n                pr_body += \"<hr>\\n\\n<details> <summary><strong>✨ Describe tool usage guide:</strong></summary><hr> \\n\\n\"\n                pr_body += HelpMessage.get_describe_usage_guide()\n                pr_body += \"\\n</details>\\n\"\n            elif get_settings().pr_description.enable_help_comment and self.git_provider.is_supported(\"gfm_markdown\"):\n                if isinstance(self.git_provider, GithubProvider):\n                    pr_body += ('\\n\\n___\\n\\n> <details> <summary>  Need help?</summary><li>Type <code>/help how to ...</code> '\n                                'in the comments thread for any questions about PR-Agent usage.</li><li>Check out the '\n                                '<a href=\"https://qodo-merge-docs.qodo.ai/usage-guide/\">documentation</a> '\n                                'for more information.</li></details>')\n                else: # gitlab\n                    pr_body += (\"\\n\\n___\\n\\n<details><summary>Need help?</summary>- Type <code>/help how to ...</code> in the comments \"\n                                \"thread for any questions about PR-Agent usage.<br>- Check out the \"\n                                \"<a href='https://qodo-merge-docs.qodo.ai/usage-guide/'>documentation</a> for more information.</details>\")\n            # elif get_settings().pr_description.enable_help_comment:\n            #     pr_body += '\\n\\n___\\n\\n> 💡 **PR-Agent usage**: Comment `/help \"your question\"` on any pull request to receive relevant information'\n\n            # Output the relevant configurations if enabled\n            if get_settings().get('config', {}).get('output_relevant_configurations', False):\n                pr_body += show_relevant_configurations(relevant_section='pr_description')\n\n            if get_settings().config.publish_output:\n\n                # publish labels\n                if get_settings().pr_description.publish_labels and pr_labels and self.git_provider.is_supported(\"get_labels\"):\n                    original_labels = self.git_provider.get_pr_labels(update=True)\n                    get_logger().debug(f\"original labels\", artifact=original_labels)\n                    user_labels = get_user_labels(original_labels)\n                    new_labels = pr_labels + user_labels\n                    get_logger().debug(f\"published labels\", artifact=new_labels)\n                    if set(new_labels) != set(original_labels):\n                        get_logger().info(f\"Setting describe labels:\\n{new_labels}\")\n                        self.git_provider.publish_labels(new_labels)\n                    else:\n                        get_logger().debug(f\"Labels are the same, not updating\")\n\n                # publish description\n                if get_settings().pr_description.publish_description_as_comment:\n                    full_markdown_description = f\"## Title\\n\\n{pr_title.strip()}\\n\\n___\\n{pr_body}\"\n                    if get_settings().pr_description.publish_description_as_comment_persistent:\n                        self.git_provider.publish_persistent_comment(full_markdown_description,\n                                                                     initial_header=\"## Title\",\n                                                                     update_header=True,\n                                                                     name=\"describe\",\n                                                                     final_update_message=False, )\n                    else:\n                        self.git_provider.publish_comment(full_markdown_description)\n                else:\n                    self.git_provider.publish_description(pr_title.strip(), pr_body)\n\n                    # publish final update message\n                    if (get_settings().pr_description.final_update_message and not get_settings().config.get('is_auto_command', False)):\n                        latest_commit_url = self.git_provider.get_latest_commit_url()\n                        if latest_commit_url:\n                            pr_url = self.git_provider.get_pr_url()\n                            update_comment = f\"**[PR Description]({pr_url})** updated to latest commit ({latest_commit_url})\"\n                            self.git_provider.publish_comment(update_comment)\n                self.git_provider.remove_initial_comment()\n            else:\n                get_logger().info('PR description, but not published since publish_output is False.')\n                get_settings().data = {\"artifact\": pr_body}\n                return\n        except Exception as e:\n            get_logger().error(f\"Error generating PR description {self.pr_id}: {e}\",\n                               artifact={\"traceback\": traceback.format_exc()})\n\n        return \"\"\n\n    async def _prepare_prediction(self, model: str) -> None:\n        if get_settings().pr_description.use_description_markers and 'pr_agent:' not in self.user_description:\n            get_logger().info(\"Markers were enabled, but user description does not contain markers. Skipping AI prediction\")\n            return None\n\n        large_pr_handling = get_settings().pr_description.enable_large_pr_handling and \"pr_description_only_files_prompts\" in get_settings()\n        output = get_pr_diff(self.git_provider, self.token_handler, model, large_pr_handling=large_pr_handling, return_remaining_files=True)\n        if isinstance(output, tuple):\n            patches_diff, remaining_files_list = output\n        else:\n            patches_diff = output\n            remaining_files_list = []\n\n        if not large_pr_handling or patches_diff:\n            self.patches_diff = patches_diff\n            if patches_diff:\n                # generate the prediction\n                get_logger().debug(f\"PR diff\", artifact=self.patches_diff)\n                self.prediction = await self._get_prediction(model, patches_diff, prompt=\"pr_description_prompt\")\n\n                # extend the prediction with additional files not shown\n                if get_settings().pr_description.enable_semantic_files_types:\n                    self.prediction = await self.extend_uncovered_files(self.prediction)\n            else:\n                get_logger().error(f\"Error getting PR diff {self.pr_id}\",\n                                   artifact={\"traceback\": traceback.format_exc()})\n                self.prediction = None\n        else:\n            # get the diff in multiple patches, with the token handler only for the files prompt\n            get_logger().debug('large_pr_handling for describe')\n            token_handler_only_files_prompt = TokenHandler(\n                self.git_provider.pr,\n                self.vars,\n                get_settings().pr_description_only_files_prompts.system,\n                get_settings().pr_description_only_files_prompts.user,\n            )\n            (patches_compressed_list, total_tokens_list, deleted_files_list, remaining_files_list, file_dict,\n             files_in_patches_list) = get_pr_diff_multiple_patchs(\n                self.git_provider, token_handler_only_files_prompt, model)\n\n            # get the files prediction for each patch\n            if not get_settings().pr_description.async_ai_calls:\n                results = []\n                for i, patches in enumerate(patches_compressed_list):  # sync calls\n                    patches_diff = \"\\n\".join(patches)\n                    get_logger().debug(f\"PR diff number {i + 1} for describe files\")\n                    prediction_files = await self._get_prediction(model, patches_diff,\n                                                                  prompt=\"pr_description_only_files_prompts\")\n                    results.append(prediction_files)\n            else:  # async calls\n                tasks = []\n                for i, patches in enumerate(patches_compressed_list):\n                    if patches:\n                        patches_diff = \"\\n\".join(patches)\n                        get_logger().debug(f\"PR diff number {i + 1} for describe files\")\n                        task = asyncio.create_task(\n                            self._get_prediction(model, patches_diff, prompt=\"pr_description_only_files_prompts\"))\n                        tasks.append(task)\n                # Wait for all tasks to complete\n                results = await asyncio.gather(*tasks)\n            file_description_str_list = []\n            for i, result in enumerate(results):\n                prediction_files = result.strip().removeprefix('```yaml').strip('`').strip()\n                if load_yaml(prediction_files, keys_fix_yaml=self.keys_fix) and prediction_files.startswith('pr_files'):\n                    prediction_files = prediction_files.removeprefix('pr_files:').strip()\n                    file_description_str_list.append(prediction_files)\n                else:\n                    get_logger().debug(f\"failed to generate predictions in iteration {i + 1} for describe files\")\n\n            # generate files_walkthrough string, with proper token handling\n            token_handler_only_description_prompt = TokenHandler(\n                self.git_provider.pr,\n                self.vars,\n                get_settings().pr_description_only_description_prompts.system,\n                get_settings().pr_description_only_description_prompts.user)\n            files_walkthrough = \"\\n\".join(file_description_str_list)\n            files_walkthrough_prompt = copy.deepcopy(files_walkthrough)\n            MAX_EXTRA_FILES_TO_PROMPT = 50\n            if remaining_files_list:\n                files_walkthrough_prompt += \"\\n\\nNo more token budget. Additional unprocessed files:\"\n                for i, file in enumerate(remaining_files_list):\n                    files_walkthrough_prompt += f\"\\n- {file}\"\n                    if i >= MAX_EXTRA_FILES_TO_PROMPT:\n                        get_logger().debug(f\"Too many remaining files, clipping to {MAX_EXTRA_FILES_TO_PROMPT}\")\n                        files_walkthrough_prompt += f\"\\n... and {len(remaining_files_list) - MAX_EXTRA_FILES_TO_PROMPT} more\"\n                        break\n            if deleted_files_list:\n                files_walkthrough_prompt += \"\\n\\nAdditional deleted files:\"\n                for i, file in enumerate(deleted_files_list):\n                    files_walkthrough_prompt += f\"\\n- {file}\"\n                    if i >= MAX_EXTRA_FILES_TO_PROMPT:\n                        get_logger().debug(f\"Too many deleted files, clipping to {MAX_EXTRA_FILES_TO_PROMPT}\")\n                        files_walkthrough_prompt += f\"\\n... and {len(deleted_files_list) - MAX_EXTRA_FILES_TO_PROMPT} more\"\n                        break\n            tokens_files_walkthrough = len(\n                token_handler_only_description_prompt.encoder.encode(files_walkthrough_prompt))\n            total_tokens = token_handler_only_description_prompt.prompt_tokens + tokens_files_walkthrough\n            max_tokens_model = get_max_tokens(model)\n            if total_tokens > max_tokens_model - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD:\n                # clip files_walkthrough to git the tokens within the limit\n                files_walkthrough_prompt = clip_tokens(files_walkthrough_prompt,\n                                                       max_tokens_model - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD - token_handler_only_description_prompt.prompt_tokens,\n                                                       num_input_tokens=tokens_files_walkthrough)\n\n            # PR header inference\n            get_logger().debug(f\"PR diff only description\", artifact=files_walkthrough_prompt)\n            prediction_headers = await self._get_prediction(model, patches_diff=files_walkthrough_prompt,\n                                                            prompt=\"pr_description_only_description_prompts\")\n            prediction_headers = prediction_headers.strip().removeprefix('```yaml').strip('`').strip()\n\n            # extend the tables with the files not shown\n            files_walkthrough_extended = await self.extend_uncovered_files(files_walkthrough)\n\n            # final processing\n            self.prediction = prediction_headers + \"\\n\" + \"pr_files:\\n\" + files_walkthrough_extended\n            if not load_yaml(self.prediction, keys_fix_yaml=self.keys_fix):\n                get_logger().error(f\"Error getting valid YAML in large PR handling for describe {self.pr_id}\")\n                if load_yaml(prediction_headers, keys_fix_yaml=self.keys_fix):\n                    get_logger().debug(f\"Using only headers for describe {self.pr_id}\")\n                    self.prediction = prediction_headers\n\n    async def extend_uncovered_files(self, original_prediction: str) -> str:\n        try:\n            prediction = original_prediction\n\n            # get the original prediction filenames\n            original_prediction_loaded = load_yaml(original_prediction, keys_fix_yaml=self.keys_fix)\n            if isinstance(original_prediction_loaded, list):\n                original_prediction_dict = {\"pr_files\": original_prediction_loaded}\n            else:\n                original_prediction_dict = original_prediction_loaded\n            if original_prediction_dict:\n                files = original_prediction_dict.get('pr_files', [])\n                filenames_predicted = [file.get('filename', '').strip() for file in files if isinstance(file, dict)]\n            else:\n                filenames_predicted = []\n\n            # extend the prediction with additional files not included in the original prediction\n            pr_files = self.git_provider.get_diff_files()\n            prediction_extra = \"pr_files:\"\n            MAX_EXTRA_FILES_TO_OUTPUT = 100\n            counter_extra_files = 0\n            for file in pr_files:\n                if file.filename in filenames_predicted:\n                    continue\n\n                # add up to MAX_EXTRA_FILES_TO_OUTPUT files\n                counter_extra_files += 1\n                if counter_extra_files > MAX_EXTRA_FILES_TO_OUTPUT:\n                    extra_file_yaml = f\"\"\"\\\n- filename: |\n    Additional files not shown\n  changes_title: |\n    ...\n  label: |\n    additional files\n\"\"\"\n                    prediction_extra = prediction_extra + \"\\n\" + extra_file_yaml.strip()\n                    get_logger().debug(f\"Too many remaining files, clipping to {MAX_EXTRA_FILES_TO_OUTPUT}\")\n                    break\n\n                extra_file_yaml = f\"\"\"\\\n- filename: |\n    {file.filename}\n  changes_title: |\n    ...\n  label: |\n    additional files\n\"\"\"\n                prediction_extra = prediction_extra + \"\\n\" + extra_file_yaml.strip()\n\n            # merge the two dictionaries\n            if counter_extra_files > 0:\n                get_logger().info(f\"Adding {counter_extra_files} unprocessed extra files to table prediction\")\n                prediction_extra_dict = load_yaml(prediction_extra, keys_fix_yaml=self.keys_fix)\n                if original_prediction_dict and isinstance(original_prediction_dict, dict) and \\\n                        isinstance(prediction_extra_dict, dict) and \"pr_files\" in prediction_extra_dict:\n                    if \"pr_files\" in original_prediction_dict:\n                        original_prediction_dict[\"pr_files\"].extend(prediction_extra_dict[\"pr_files\"])\n                    else:\n                        original_prediction_dict[\"pr_files\"] = prediction_extra_dict[\"pr_files\"]\n                    new_yaml = yaml.dump(original_prediction_dict)\n                    if load_yaml(new_yaml, keys_fix_yaml=self.keys_fix):\n                        prediction = new_yaml\n                if isinstance(original_prediction, list):\n                    prediction = yaml.dump(original_prediction_dict[\"pr_files\"])\n\n            return prediction\n        except Exception as e:\n            get_logger().exception(f\"Error extending uncovered files {self.pr_id}\", artifact={\"error\": e})\n            return original_prediction\n\n\n    async def extend_additional_files(self, remaining_files_list) -> str:\n        prediction = self.prediction\n        try:\n            original_prediction_dict = load_yaml(self.prediction, keys_fix_yaml=self.keys_fix)\n            prediction_extra = \"pr_files:\"\n            for file in remaining_files_list:\n                extra_file_yaml = f\"\"\"\\\n- filename: |\n    {file}\n  changes_summary: |\n    ...\n  changes_title: |\n    ...\n  label: |\n    additional files (token-limit)\n\"\"\"\n                prediction_extra = prediction_extra + \"\\n\" + extra_file_yaml.strip()\n            prediction_extra_dict = load_yaml(prediction_extra, keys_fix_yaml=self.keys_fix)\n            # merge the two dictionaries\n            if isinstance(original_prediction_dict, dict) and isinstance(prediction_extra_dict, dict):\n                original_prediction_dict[\"pr_files\"].extend(prediction_extra_dict[\"pr_files\"])\n                new_yaml = yaml.dump(original_prediction_dict)\n                if load_yaml(new_yaml, keys_fix_yaml=self.keys_fix):\n                    prediction = new_yaml\n            return prediction\n        except Exception as e:\n            get_logger().error(f\"Error extending additional files {self.pr_id}: {e}\")\n            return self.prediction\n\n    async def _get_prediction(self, model: str, patches_diff: str, prompt=\"pr_description_prompt\") -> str:\n        variables = copy.deepcopy(self.vars)\n        variables[\"diff\"] = patches_diff  # update diff\n\n        environment = Environment(undefined=StrictUndefined)\n        set_custom_labels(variables, self.git_provider)\n        self.variables = variables\n\n        system_prompt = environment.from_string(get_settings().get(prompt, {}).get(\"system\", \"\")).render(self.variables)\n        user_prompt = environment.from_string(get_settings().get(prompt, {}).get(\"user\", \"\")).render(self.variables)\n\n        response, finish_reason = await self.ai_handler.chat_completion(\n            model=model,\n            temperature=get_settings().config.temperature,\n            system=system_prompt,\n            user=user_prompt\n        )\n\n        return response\n\n    def _prepare_data(self):\n        # Load the AI prediction data into a dictionary\n        self.data = load_yaml(self.prediction.strip(), keys_fix_yaml=self.keys_fix)\n\n        if get_settings().pr_description.add_original_user_description and self.user_description:\n            self.data[\"User Description\"] = self.user_description\n\n        # re-order keys\n        if 'User Description' in self.data:\n            self.data['User Description'] = self.data.pop('User Description')\n        if 'title' in self.data:\n            self.data['title'] = self.data.pop('title')\n        if 'type' in self.data:\n            self.data['type'] = self.data.pop('type')\n        if 'labels' in self.data:\n            self.data['labels'] = self.data.pop('labels')\n        if 'description' in self.data:\n            self.data['description'] = self.data.pop('description')\n        if 'changes_diagram' in self.data:\n            changes_diagram = self.data.pop('changes_diagram').strip()\n            if changes_diagram.startswith('```'):\n                if not changes_diagram.endswith('```'):  # fallback for missing closing\n                    changes_diagram += '\\n```'\n                self.data['changes_diagram'] = '\\n'+ changes_diagram\n        if 'pr_files' in self.data:\n            self.data['pr_files'] = self.data.pop('pr_files')\n\n    def _prepare_labels(self) -> List[str]:\n        pr_labels = []\n\n        # If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types'\n        if 'labels' in self.data and self.data['labels']:\n            if type(self.data['labels']) == list:\n                pr_labels = self.data['labels']\n            elif type(self.data['labels']) == str:\n                pr_labels = self.data['labels'].split(',')\n        elif 'type' in self.data and self.data['type'] and get_settings().pr_description.publish_labels:\n            if type(self.data['type']) == list:\n                pr_labels = self.data['type']\n            elif type(self.data['type']) == str:\n                pr_labels = self.data['type'].split(',')\n        pr_labels = [label.strip() for label in pr_labels]\n\n        # convert lowercase labels to original case\n        try:\n            if \"labels_minimal_to_labels_dict\" in self.variables:\n                d: dict = self.variables[\"labels_minimal_to_labels_dict\"]\n                for i, label_i in enumerate(pr_labels):\n                    if label_i in d:\n                        pr_labels[i] = d[label_i]\n        except Exception as e:\n            get_logger().error(f\"Error converting labels to original case {self.pr_id}: {e}\")\n        return pr_labels\n\n    def _prepare_pr_answer_with_markers(self) -> Tuple[str, str, str, List[dict]]:\n        get_logger().info(f\"Using description marker replacements {self.pr_id}\")\n\n        # Remove the 'PR Title' key from the dictionary\n        ai_title = self.data.pop('title', self.vars[\"title\"])\n        if (not get_settings().pr_description.generate_ai_title):\n            # Assign the original PR title to the 'title' variable\n            title = self.vars[\"title\"]\n        else:\n            # Assign the value of the 'PR Title' key to 'title' variable\n            title = ai_title\n\n        body = self.user_description\n        if get_settings().pr_description.include_generated_by_header:\n            ai_header = f\"### 🤖 Generated by PR Agent at {self.git_provider.last_commit_id.sha}\\n\\n\"\n        else:\n            ai_header = \"\"\n\n        ai_type = self.data.get('type')\n        if ai_type and not re.search(r'<!--\\s*pr_agent:type\\s*-->', body):\n            if isinstance(ai_type, list):\n                pr_type = ', '.join(str(t) for t in ai_type)\n            else:\n                pr_type = ai_type\n            pr_type = f\"{ai_header}{pr_type}\"\n            body = body.replace('pr_agent:type', pr_type)\n\n        ai_summary = self.data.get('description')\n        if ai_summary and not re.search(r'<!--\\s*pr_agent:summary\\s*-->', body):\n            summary = f\"{ai_header}{ai_summary}\"\n            body = body.replace('pr_agent:summary', summary)\n\n        ai_walkthrough = self.data.get('pr_files')\n        walkthrough_gfm = \"\"\n        pr_file_changes = []\n        if ai_walkthrough and not re.search(r'<!--\\s*pr_agent:walkthrough\\s*-->', body):\n            try:\n                walkthrough_gfm, pr_file_changes = self.process_pr_files_prediction(walkthrough_gfm,\n                                                                                    self.file_label_dict)\n                body = body.replace('pr_agent:walkthrough', walkthrough_gfm)\n            except Exception as e:\n                get_logger().error(f\"Failing to process walkthrough {self.pr_id}: {e}\")\n                body = body.replace('pr_agent:walkthrough', \"\")\n\n        # Add support for pr_agent:diagram marker (plain and HTML comment formats)\n        ai_diagram = self.data.get('changes_diagram')\n        if ai_diagram:\n            body = re.sub(r'<!--\\s*pr_agent:diagram\\s*-->|pr_agent:diagram', ai_diagram, body)\n\n        return title, body, walkthrough_gfm, pr_file_changes\n\n    def _prepare_pr_answer(self) -> Tuple[str, str, str, List[dict]]:\n        \"\"\"\n        Prepare the PR description based on the AI prediction data.\n\n        Returns:\n        - title: a string containing the PR title.\n        - pr_body: a string containing the PR description body in a markdown format.\n        \"\"\"\n\n        # Iterate over the dictionary items and append the key and value to 'markdown_text' in a markdown format\n        # Don't display 'PR Labels'\n        if 'labels' in self.data and self.git_provider.is_supported(\"get_labels\"):\n            self.data.pop('labels')\n        if not get_settings().pr_description.enable_pr_type:\n            self.data.pop('type')\n\n        # Remove the 'PR Title' key from the dictionary\n        ai_title = self.data.pop('title', self.vars[\"title\"])\n        if (not get_settings().pr_description.generate_ai_title):\n            # Assign the original PR title to the 'title' variable\n            title = self.vars[\"title\"]\n        else:\n            # Assign the value of the 'PR Title' key to 'title' variable\n            title = ai_title\n\n        # Iterate over the remaining dictionary items and append the key and value to 'pr_body' in a markdown format,\n        # except for the items containing the word 'walkthrough'\n        pr_body, changes_walkthrough = \"\", \"\"\n        pr_file_changes = []\n        for idx, (key, value) in enumerate(self.data.items()):\n            if key == 'changes_diagram':\n                pr_body += f\"### {PRDescriptionHeader.DIAGRAM_WALKTHROUGH.value}\\n\\n\"\n                pr_body += f\"{value}\\n\\n\"\n                continue\n            if key == 'pr_files':\n                value = self.file_label_dict\n            else:\n                key_publish = key.rstrip(':').replace(\"_\", \" \").capitalize()\n                if key_publish == \"Type\":\n                    key_publish = \"PR Type\"\n                # elif key_publish == \"Description\":\n                #     key_publish = \"PR Description\"\n                pr_body += f\"### **{key_publish}**\\n\"\n            if 'walkthrough' in key.lower():\n                if self.git_provider.is_supported(\"gfm_markdown\"):\n                    pr_body += \"<details> <summary>files:</summary>\\n\\n\"\n                for file in value:\n                    filename = file['filename'].replace(\"'\", \"`\")\n                    description = file['changes_in_file']\n                    pr_body += f'- `{filename}`: {description}\\n'\n                if self.git_provider.is_supported(\"gfm_markdown\"):\n                    pr_body += \"</details>\\n\"\n            elif 'pr_files' in key.lower() and get_settings().pr_description.enable_semantic_files_types: # 'File Walkthrough' section\n                changes_walkthrough_table, pr_file_changes = self.process_pr_files_prediction(changes_walkthrough, value)\n                if get_settings().pr_description.get('file_table_collapsible_open_by_default', False):\n                    initial_status = \" open\"\n                else:\n                    initial_status = \"\"\n                changes_walkthrough = f\"<details{initial_status}> <summary><h3> {PRDescriptionHeader.FILE_WALKTHROUGH.value}</h3></summary>\\n\\n\"\n                changes_walkthrough += f\"{changes_walkthrough_table}\\n\\n\"\n                changes_walkthrough += \"</details>\\n\\n\"\n            elif key.lower().strip() == 'description':\n                if isinstance(value, list):\n                    value = ', '.join(v.rstrip() for v in value)\n                value = value.replace('\\n-', '\\n\\n-').strip() # makes the bullet points more readable by adding double space\n                pr_body += f\"{value}\\n\"\n            else:\n                # if the value is a list, join its items by comma\n                if isinstance(value, list):\n                    value = ', '.join(v.rstrip() for v in value)\n                pr_body += f\"{value}\\n\"\n            if idx < len(self.data) - 1:\n                pr_body += \"\\n\\n___\\n\\n\"\n\n        return title, pr_body, changes_walkthrough, pr_file_changes,\n\n    def _prepare_file_labels(self):\n        file_label_dict = {}\n        if (not self.data or not isinstance(self.data, dict) or\n                'pr_files' not in self.data or not self.data['pr_files']):\n            return file_label_dict\n        for file in self.data['pr_files']:\n            try:\n                required_fields = ['changes_title', 'filename', 'label']\n                if not all(field in file for field in required_fields):\n                    # can happen for example if a YAML generation was interrupted in the middle (no more tokens)\n                    get_logger().warning(f\"Missing required fields in file label dict {self.pr_id}, skipping file\",\n                                         artifact={\"file\": file})\n                    continue\n                if not file.get('changes_title'):\n                    get_logger().warning(f\"Empty changes title or summary in file label dict {self.pr_id}, skipping file\",\n                                         artifact={\"file\": file})\n                    continue\n                filename = file['filename'].replace(\"'\", \"`\").replace('\"', '`')\n                changes_summary = file.get('changes_summary', \"\")\n                if not changes_summary and self.vars.get('include_file_summary_changes', True):\n                    get_logger().warning(f\"Empty changes summary in file label dict, skipping file\",\n                                         artifact={\"file\": file})\n                    continue\n                changes_summary = changes_summary.strip()\n                changes_title = file['changes_title'].strip()\n                label = file.get('label').strip().lower()\n                if label not in file_label_dict:\n                    file_label_dict[label] = []\n                file_label_dict[label].append((filename, changes_title, changes_summary))\n            except Exception as e:\n                get_logger().exception(f\"Error preparing file label dict {self.pr_id}\")\n                pass\n        return file_label_dict\n\n    def process_pr_files_prediction(self, pr_body, value):\n        pr_comments = []\n        # logic for using collapsible file list\n        use_collapsible_file_list = get_settings().pr_description.collapsible_file_list\n        num_files = 0\n        if value:\n            for semantic_label in value.keys():\n                num_files += len(value[semantic_label])\n        if use_collapsible_file_list == \"adaptive\":\n            use_collapsible_file_list = num_files > self.COLLAPSIBLE_FILE_LIST_THRESHOLD\n\n        if not self.git_provider.is_supported(\"gfm_markdown\"):\n            return pr_body, pr_comments\n        try:\n            pr_body += \"<table>\"\n            header = f\"Relevant files\"\n            delta = 75\n            # header += \"&nbsp; \" * delta\n            pr_body += f\"\"\"<thead><tr><th></th><th align=\"left\">{header}</th></tr></thead>\"\"\"\n            pr_body += \"\"\"<tbody>\"\"\"\n            for semantic_label in value.keys():\n                s_label = semantic_label.strip(\"'\").strip('\"')\n                pr_body += f\"\"\"<tr><td><strong>{s_label.capitalize()}</strong></td>\"\"\"\n                list_tuples = value[semantic_label]\n\n                if use_collapsible_file_list:\n                    pr_body += f\"\"\"<td><details><summary>{len(list_tuples)} files</summary><table>\"\"\"\n                else:\n                    pr_body += f\"\"\"<td><table>\"\"\"\n                for filename, file_changes_title, file_change_description in list_tuples:\n                    filename = filename.replace(\"'\", \"`\").rstrip()\n                    filename_publish = filename.split(\"/\")[-1]\n                    if file_changes_title and file_changes_title.strip() != \"...\":\n                        file_changes_title_code = f\"<code>{file_changes_title}</code>\"\n                        file_changes_title_code_br = insert_br_after_x_chars(file_changes_title_code, x=(delta - 5)).strip()\n                        if len(file_changes_title_code_br) < (delta - 5):\n                            file_changes_title_code_br += \"&nbsp; \" * ((delta - 5) - len(file_changes_title_code_br))\n                        filename_publish = f\"<strong>{filename_publish}</strong><dd>{file_changes_title_code_br}</dd>\"\n                    else:\n                        filename_publish = f\"<strong>{filename_publish}</strong>\"\n                    diff_plus_minus = \"\"\n                    delta_nbsp = \"\"\n                    diff_files = self.git_provider.get_diff_files()\n                    for f in diff_files:\n                        if f.filename.lower().strip('/') == filename.lower().strip('/'):\n                            num_plus_lines = f.num_plus_lines\n                            num_minus_lines = f.num_minus_lines\n                            diff_plus_minus += f\"+{num_plus_lines}/-{num_minus_lines}\"\n                            if len(diff_plus_minus) > 12 or diff_plus_minus == \"+0/-0\":\n                                diff_plus_minus = \"[link]\"\n                            delta_nbsp = \"&nbsp; \" * max(0, (8 - len(diff_plus_minus)))\n                            break\n\n                    # try to add line numbers link to code suggestions\n                    link = \"\"\n                    if hasattr(self.git_provider, 'get_line_link'):\n                        filename = filename.strip()\n                        link = self.git_provider.get_line_link(filename, relevant_line_start=-1)\n                    if (not link or not diff_plus_minus) and ('additional files' not in filename.lower()):\n                        # get_logger().warning(f\"Error getting line link for '{filename}'\")\n                        link = \"\"\n                        # continue\n\n                    # Add file data to the PR body\n                    file_change_description_br = insert_br_after_x_chars(file_change_description, x=(delta - 5))\n                    pr_body = self.add_file_data(delta_nbsp, diff_plus_minus, file_change_description_br, filename,\n                                                 filename_publish, link, pr_body)\n\n                # Close the collapsible file list\n                if use_collapsible_file_list:\n                    pr_body += \"\"\"</table></details></td></tr>\"\"\"\n                else:\n                    pr_body += \"\"\"</table></td></tr>\"\"\"\n            pr_body += \"\"\"</tr></tbody></table>\"\"\"\n\n        except Exception as e:\n            get_logger().error(f\"Error processing pr files to markdown {self.pr_id}: {str(e)}\")\n            pass\n        return pr_body, pr_comments\n\n    def add_file_data(self, delta_nbsp, diff_plus_minus, file_change_description_br, filename, filename_publish, link,\n                      pr_body) -> str:\n\n        if not file_change_description_br:\n            pr_body += f\"\"\"\n<tr>\n  <td>{filename_publish}</td>\n  <td><a href=\"{link}\">{diff_plus_minus}</a>{delta_nbsp}</td>\n\n</tr>\n\"\"\"\n        else:\n            pr_body += f\"\"\"\n<tr>\n  <td>\n    <details>\n      <summary>{filename_publish}</summary>\n<hr>\n\n{filename}\n\n{file_change_description_br}\n\n\n</details>\n\n\n  </td>\n  <td><a href=\"{link}\">{diff_plus_minus}</a>{delta_nbsp}</td>\n\n</tr>\n\"\"\"\n        return pr_body\n\ndef count_chars_without_html(string):\n    if '<' not in string:\n        return len(string)\n    no_html_string = re.sub('<[^>]+>', '', string)\n    return len(no_html_string)\n\n\ndef insert_br_after_x_chars(text: str, x=70):\n    \"\"\"\n    Insert <br> into a string after a word that increases its length above x characters.\n    Use proper HTML tags for code and new lines.\n    \"\"\"\n\n    if not text:\n        return \"\"\n    if count_chars_without_html(text) < x:\n        return text\n\n    is_list = text.lstrip().startswith((\"- \", \"* \"))\n\n    # replace odd instances of ` with <code> and even instances of ` with </code>\n    text = replace_code_tags(text)\n\n    # convert list items to <li> only if the text is identified as a list\n    if is_list:\n        # To handle lists that start with indentation\n        leading_whitespace = text[:len(text) - len(text.lstrip())]\n        body = text.lstrip()\n        body = \"<li>\" + body[2:]\n        text = leading_whitespace + body\n\n        text = text.replace(\"\\n- \", '<br><li> ').replace(\"\\n - \", '<br><li> ')\n        text = text.replace(\"\\n* \", '<br><li> ').replace(\"\\n * \", '<br><li> ')\n\n    # convert new lines to <br>\n    text = text.replace(\"\\n\", '<br>')\n\n    # split text into lines\n    lines = text.split('<br>')\n    words = []\n    for i, line in enumerate(lines):\n        words += line.split(' ')\n        if i < len(lines) - 1:\n            words[-1] += \"<br>\"\n\n    new_text = []\n    is_inside_code = False\n    current_length = 0\n    for word in words:\n        is_saved_word = False\n        if word == \"<code>\" or word == \"</code>\" or word == \"<li>\" or word == \"<br>\":\n            is_saved_word = True\n\n        len_word = count_chars_without_html(word)\n        if not is_saved_word and (current_length + len_word > x):\n            if is_inside_code:\n                new_text.append(\"</code><br><code>\")\n            else:\n                new_text.append(\"<br>\")\n            current_length = 0  # Reset counter\n        new_text.append(word + \" \")\n\n        if not is_saved_word:\n            current_length += len_word + 1  # Add 1 for the space\n\n        if word == \"<li>\" or word == \"<br>\":\n            current_length = 0\n\n        if \"<code>\" in word:\n            is_inside_code = True\n        if \"</code>\" in word:\n            is_inside_code = False\n\n    processed_text = ''.join(new_text).strip()\n\n    if is_list:\n        processed_text = f\"<ul>{processed_text}</ul>\"\n\n    return processed_text\n\n\ndef replace_code_tags(text):\n    \"\"\"\n    Replace odd instances of ` with <code> and even instances of ` with </code>\n    \"\"\"\n    parts = text.split('`')\n    for i in range(1, len(parts), 2):\n        parts[i] = '<code>' + parts[i] + '</code>'\n    return ''.join(parts)\n"
  },
  {
    "path": "pr_agent/tools/pr_generate_labels.py",
    "content": "import copy\nimport re\nfrom functools import partial\nfrom typing import List, Tuple\n\nfrom jinja2 import Environment, StrictUndefined\n\nfrom pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler\nfrom pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler\nfrom pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models\nfrom pr_agent.algo.token_handler import TokenHandler\nfrom pr_agent.algo.utils import get_user_labels, load_yaml, set_custom_labels\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers import get_git_provider\nfrom pr_agent.git_providers.git_provider import get_main_pr_language\nfrom pr_agent.log import get_logger\n\n\nclass PRGenerateLabels:\n    def __init__(self, pr_url: str, args: list = None,\n                 ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):\n        \"\"\"\n        Initialize the PRGenerateLabels object with the necessary attributes and objects for generating labels\n        corresponding to the PR using an AI model.\n        Args:\n            pr_url (str): The URL of the pull request.\n            args (list, optional): List of arguments passed to the PRGenerateLabels class. Defaults to None.\n        \"\"\"\n        # Initialize the git provider and main PR language\n        self.git_provider = get_git_provider()(pr_url)\n        self.main_pr_language = get_main_pr_language(\n            self.git_provider.get_languages(), self.git_provider.get_files()\n        )\n        self.pr_id = self.git_provider.get_pr_id()\n\n        # Initialize the AI handler\n        self.ai_handler = ai_handler()\n        self.ai_handler.main_pr_language = self.main_pr_language\n\n        # Initialize the variables dictionary\n        self.vars = {\n            \"title\": self.git_provider.pr.title,\n            \"branch\": self.git_provider.get_pr_branch(),\n            \"description\": self.git_provider.get_pr_description(full=False),\n            \"language\": self.main_pr_language,\n            \"diff\": \"\",  # empty diff for initial calculation\n            \"extra_instructions\": get_settings().pr_description.extra_instructions,\n            \"commit_messages_str\": self.git_provider.get_commit_messages(),\n            \"enable_custom_labels\": get_settings().config.enable_custom_labels,\n            \"custom_labels_class\": \"\",  # will be filled if necessary in 'set_custom_labels' function\n        }\n\n        # Initialize the token handler\n        self.token_handler = TokenHandler(\n            self.git_provider.pr,\n            self.vars,\n            get_settings().pr_custom_labels_prompt.system,\n            get_settings().pr_custom_labels_prompt.user,\n        )\n\n        # Initialize patches_diff and prediction attributes\n        self.patches_diff = None\n        self.prediction = None\n\n    async def run(self):\n        \"\"\"\n        Generates a PR labels using an AI model and publishes it to the PR.\n        \"\"\"\n\n        try:\n            get_logger().info(f\"Generating a PR labels {self.pr_id}\")\n            if get_settings().config.publish_output:\n                self.git_provider.publish_comment(\"Preparing PR labels...\", is_temporary=True)\n\n            await retry_with_fallback_models(self._prepare_prediction)\n\n            get_logger().info(f\"Preparing answer {self.pr_id}\")\n            if self.prediction:\n                self._prepare_data()\n            else:\n                return None\n\n            pr_labels = self._prepare_labels()\n\n            if get_settings().config.publish_output:\n                get_logger().info(f\"Pushing labels {self.pr_id}\")\n\n                current_labels = self.git_provider.get_pr_labels()\n                user_labels = get_user_labels(current_labels)\n                pr_labels = pr_labels + user_labels\n\n                if self.git_provider.is_supported(\"get_labels\"):\n                    self.git_provider.publish_labels(pr_labels)\n                elif pr_labels:\n                    value = ', '.join(v for v in pr_labels)\n                    pr_labels_text = f\"## PR Labels:\\n{value}\\n\"\n                    self.git_provider.publish_comment(pr_labels_text, is_temporary=False)\n                self.git_provider.remove_initial_comment()\n        except Exception as e:\n            get_logger().error(f\"Error generating PR labels {self.pr_id}: {e}\")\n\n        return \"\"\n\n    async def _prepare_prediction(self, model: str) -> None:\n        \"\"\"\n        Prepare the AI prediction for the PR labels based on the provided model.\n\n        Args:\n            model (str): The name of the model to be used for generating the prediction.\n\n        Returns:\n            None\n\n        Raises:\n            Any exceptions raised by the 'get_pr_diff' and '_get_prediction' functions.\n\n        \"\"\"\n\n        get_logger().info(f\"Getting PR diff {self.pr_id}\")\n        self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)\n        get_logger().info(f\"Getting AI prediction {self.pr_id}\")\n        self.prediction = await self._get_prediction(model)\n\n    async def _get_prediction(self, model: str) -> str:\n        \"\"\"\n        Generate an AI prediction for the PR labels based on the provided model.\n\n        Args:\n            model (str): The name of the model to be used for generating the prediction.\n\n        Returns:\n            str: The generated AI prediction.\n        \"\"\"\n        variables = copy.deepcopy(self.vars)\n        variables[\"diff\"] = self.patches_diff  # update diff\n\n        environment = Environment(undefined=StrictUndefined)\n        set_custom_labels(variables, self.git_provider)\n        self.variables = variables\n\n        system_prompt = environment.from_string(get_settings().pr_custom_labels_prompt.system).render(self.variables)\n        user_prompt = environment.from_string(get_settings().pr_custom_labels_prompt.user).render(self.variables)\n\n        response, finish_reason = await self.ai_handler.chat_completion(\n            model=model,\n            temperature=get_settings().config.temperature,\n            system=system_prompt,\n            user=user_prompt\n        )\n\n        return response\n\n    def _prepare_data(self):\n        # Load the AI prediction data into a dictionary\n        self.data = load_yaml(self.prediction.strip())\n\n\n\n    def _prepare_labels(self) -> List[str]:\n        pr_types = []\n\n        # If the 'labels' key is present in the dictionary, split its value by comma and assign it to 'pr_types'\n        if 'labels' in self.data:\n            if type(self.data['labels']) == list:\n                pr_types = self.data['labels']\n            elif type(self.data['labels']) == str:\n                pr_types = self.data['labels'].split(',')\n        pr_types = [label.strip() for label in pr_types]\n\n        # convert lowercase labels to original case\n        try:\n            if \"labels_minimal_to_labels_dict\" in self.variables:\n                d: dict = self.variables[\"labels_minimal_to_labels_dict\"]\n                for i, label_i in enumerate(pr_types):\n                    if label_i in d:\n                        pr_types[i] = d[label_i]\n        except Exception as e:\n            get_logger().error(f\"Error converting labels to original case {self.pr_id}: {e}\")\n\n        return pr_types\n"
  },
  {
    "path": "pr_agent/tools/pr_help_docs.py",
    "content": "import copy\nfrom functools import partial\n\nfrom jinja2 import Environment, StrictUndefined\nimport math\nimport os\nimport re\nfrom tempfile import TemporaryDirectory\n\nfrom pr_agent.algo import MAX_TOKENS\nfrom pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler\nfrom pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler\nfrom pr_agent.algo.pr_processing import retry_with_fallback_models\nfrom pr_agent.algo.token_handler import TokenHandler\nfrom pr_agent.algo.utils import clip_tokens, get_max_tokens, load_yaml, ModelType\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers import get_git_provider_with_context\nfrom pr_agent.log import get_logger\nfrom pr_agent.servers.help import HelpMessage\n\n\n#Common code that can be called from similar tools:\ndef modify_answer_section(ai_response: str) -> str | None:\n    # Gets the model's answer and relevant sources section, replacing the heading of the answer section with:\n    # :bulb: Auto-generated documentation-based answer:\n    \"\"\"\n    For example: The following input:\n\n    ### Question: \\nThe following general issue was asked by a user: Title: How does one request to re-review a PR? More Info: I cannot seem to find to do this.\n    ### Answer:\\nAccording to the documentation, one needs to invoke the command: /review\n    #### Relevant Sources...\n\n    Should become:\n\n    ### :bulb: Auto-generated documentation-based answer:\\n\n    According to the documentation, one needs to invoke the command: /review\n    #### Relevant Sources...\n    \"\"\"\n    model_answer_and_relevant_sections_in_response \\\n        = extract_model_answer_and_relevant_sources(ai_response)\n    if model_answer_and_relevant_sections_in_response is not None:\n        cleaned_question_with_answer = \"### :bulb: Auto-generated documentation-based answer:\\n\"\n        cleaned_question_with_answer += model_answer_and_relevant_sections_in_response\n        return cleaned_question_with_answer\n    get_logger().warning(f\"Either no answer section found, or that section is malformed: {ai_response}\")\n    return None\n\ndef extract_model_answer_and_relevant_sources(ai_response: str) -> str | None:\n    # It is assumed that the input contains several sections with leading \"### \",\n    # where the answer is the last one of them having the format: \"### Answer:\\n\"), since the model returns the answer\n    # AFTER the user question. By splitting using the string: \"### Answer:\\n\" and grabbing the last part,\n    # the model answer is guaranteed to be in that last part, provided it is followed by a \"#### Relevant Sources:\\n\\n\".\n    # (for more details, see here: https://github.com/Codium-ai/pr-agent-pro/blob/main/pr_agent/tools/pr_help_message.py#L173)\n    \"\"\"\n    For example:\n    ### Question: \\nHow does one request to re-review a PR?\\n\\n\n    ### Answer:\\nAccording to the documentation, one needs to invoke the command: /review\\n\\n\n    #### Relevant Sources:\\n\\n...\n\n    The answer part is: \"According to the documentation, one needs to invoke the command: /review\\n\\n\"\n    followed by \"Relevant Sources:\\n\\n\".\n    \"\"\"\n    if \"### Answer:\\n\" in ai_response:\n        model_answer_and_relevant_sources_sections_in_response = ai_response.split(\"### Answer:\\n\")[-1]\n        # Split such part by \"Relevant Sources\" section to contain only the model answer:\n        if \"#### Relevant Sources:\\n\\n\" in model_answer_and_relevant_sources_sections_in_response:\n            model_answer_section_in_response \\\n                = model_answer_and_relevant_sources_sections_in_response.split(\"#### Relevant Sources:\\n\\n\")[0]\n            get_logger().info(f\"Found model answer: {model_answer_section_in_response}\")\n            return model_answer_and_relevant_sources_sections_in_response \\\n                if len(model_answer_section_in_response) > 0 else None\n    get_logger().warning(f\"Either no answer section found, or that section is malformed: {ai_response}\")\n    return None\n\ndef get_maximal_text_input_length_for_token_count_estimation():\n    model = get_settings().config.model\n    if 'claude-3-7-sonnet' in model.lower():\n        return 900000 #Claude API for token estimation allows maximal text input of 900K chars\n    return math.inf #Otherwise, no known limitation on input text just for token estimation\n\ndef return_document_headings(text: str, ext: str) -> str:\n    try:\n        lines = text.split('\\n')\n        headings = set()\n\n        if not text or not re.search(r'[a-zA-Z]', text):\n            get_logger().error(f\"Empty or non text content found in text: {text}.\")\n            return \"\"\n\n        if ext in ['.md', '.mdx']:\n            # Extract Markdown headings (lines starting with #)\n            headings = {line.strip() for line in lines if line.strip().startswith('#')}\n        elif ext == '.rst':\n            # Find indices of lines that have all same character:\n            #Allowed characters according to list from: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#sections\n            section_chars = set('!\"#$%&\\'()*+,-./:;<=>?@[\\\\]^_`{|}~')\n\n            # Find potential section marker lines (underlines/overlines): They have to be the same character\n            marker_lines = []\n            for i, line in enumerate(lines):\n                line = line.rstrip()\n                if line and all(c == line[0] for c in line) and line[0] in section_chars:\n                    marker_lines.append((i, len(line)))\n\n            # Check for headings adjacent to marker lines (below + text must be in length equal or less)\n            for idx, length in marker_lines:\n                # Check if it's an underline (heading is above it)\n                if idx > 0 and lines[idx - 1].rstrip() and len(lines[idx - 1].rstrip()) <= length:\n                    headings.add(lines[idx - 1].rstrip())\n        else:\n            get_logger().error(f\"Unsupported file extension: {ext}\")\n            return \"\"\n\n        return '\\n'.join(headings)\n    except Exception as e:\n        get_logger().exception(f\"Unexpected exception thrown. Returning empty result.\")\n        return \"\"\n\n# Load documentation files to memory: full file path (as will be given as prompt) -> doc contents\ndef map_documentation_files_to_contents(base_path: str, doc_files: list[str], max_allowed_file_len=5000) -> dict[str, str]:\n    try:\n        returned_dict = {}\n        for file in doc_files:\n            try:\n                with open(file, 'r', encoding='utf-8') as f:\n                    content = f.read()\n                    # Skip files with no text content\n                    if not re.search(r'[a-zA-Z]', content):\n                        continue\n                    if len(content) > max_allowed_file_len:\n                        get_logger().warning(f\"File {file} length: {len(content)} exceeds limit: {max_allowed_file_len}, so it will be trimmed.\")\n                        content = content[:max_allowed_file_len]\n                    file_path = str(file).replace(str(base_path), '')\n                    returned_dict[file_path] = content.strip()\n            except Exception as e:\n                get_logger().warning(f\"Error while reading the file {file}: {e}\")\n                continue\n        if not returned_dict:\n            get_logger().error(\"Couldn't find any usable documentation files. Returning empty dict.\")\n        return returned_dict\n    except Exception as e:\n        get_logger().exception(f\"Unexpected exception thrown. Returning empty dict.\")\n        return {}\n\n# Goes over files' contents, generating payload for prompt while decorating them with a header to mark where each file begins,\n# as to help the LLM to give a better answer.\ndef aggregate_documentation_files_for_prompt_contents(file_path_to_contents: dict[str, str], return_just_headings=False) -> str:\n    try:\n        docs_prompt = \"\"\n        for idx, file_path in enumerate(file_path_to_contents):\n            file_contents = file_path_to_contents[file_path].strip()\n            if not file_contents:\n                get_logger().error(f\"Got empty file contents for: {file_path}. Skipping this file.\")\n                continue\n            if return_just_headings:\n                file_headings = return_document_headings(file_contents, os.path.splitext(file_path)[-1]).strip()\n                if file_headings:\n                    docs_prompt += f\"\\n==file name==\\n\\n{file_path}\\n\\n==index==\\n\\n{idx}\\n\\n==file headings==\\n\\n{file_headings}\\n=========\\n\\n\"\n                else:\n                    get_logger().warning(f\"No headers for: {file_path}. Will only use filename\")\n                    docs_prompt += f\"\\n==file name==\\n\\n{file_path}\\n\\n==index==\\n\\n{idx}\\n\\n\"\n            else:\n                docs_prompt += f\"\\n==file name==\\n\\n{file_path}\\n\\n==file content==\\n\\n{file_contents}\\n=========\\n\\n\"\n        return docs_prompt\n    except Exception as e:\n        get_logger().exception(f\"Unexpected exception thrown. Returning empty result.\")\n        return \"\"\n\ndef format_markdown_q_and_a_response(question_str: str, response_str: str, relevant_sections: list[dict[str, str]],\n                                     supported_suffixes: list[str], base_url_prefix: str, base_url_suffix: str=\"\") -> str:\n    try:\n        base_url_prefix = base_url_prefix.strip('/') #Sanitize base_url_prefix\n        answer_str = \"\"\n        answer_str += f\"### Question: \\n{question_str}\\n\\n\"\n        answer_str += f\"### Answer:\\n{response_str.strip()}\\n\\n\"\n        answer_str += f\"#### Relevant Sources:\\n\\n\"\n        for section in relevant_sections:\n            file = section.get('file_name').lstrip('/').strip() #Remove any '/' in the beginning, since some models do it anyway\n            ext = [suffix for suffix in supported_suffixes if file.endswith(suffix)]\n            if not ext:\n                get_logger().warning(f\"Unsupported file extension: {file}\")\n                continue\n            if str(section['relevant_section_header_string']).strip():\n                markdown_header = format_markdown_header(section['relevant_section_header_string'])\n                if base_url_prefix:\n                    answer_str += f\"> - {base_url_prefix}/{file}{base_url_suffix}#{markdown_header}\\n\"\n            else:\n                answer_str += f\"> - {base_url_prefix}/{file}{base_url_suffix}\\n\"\n        return answer_str\n    except Exception as e:\n        get_logger().exception(f\"Unexpected exception thrown. Returning empty result.\")\n        return \"\"\n\ndef format_markdown_header(header: str) -> str:\n    try:\n        # First, strip common characters from both ends\n        cleaned = header.strip('# 💎\\n')\n\n        # Define all characters to be removed/replaced in a single pass\n        replacements = {\n            \"'\": '',\n            \"`\": '',\n            '(': '',\n            ')': '',\n            ',': '',\n            '.': '',\n            '?': '',\n            '!': '',\n            ' ': '-'\n        }\n\n        # Compile regex pattern for characters to remove\n        pattern = re.compile('|'.join(map(re.escape, replacements.keys())))\n\n        # Perform replacements in a single pass and convert to lowercase\n        return pattern.sub(lambda m: replacements[m.group()], cleaned).lower()\n    except Exception:\n        get_logger().exception(f\"Error while formatting markdown header\", artifacts={'header': header})\n        return \"\"\n\ndef clean_markdown_content(content: str) -> str:\n    \"\"\"\n    Remove hidden comments and unnecessary elements from markdown content to reduce size.\n\n    Args:\n        content: The original markdown content\n\n    Returns:\n        Cleaned markdown content\n    \"\"\"\n    try:\n        # Remove HTML comments\n        content = re.sub(r'<!--.*?-->', '', content, flags=re.DOTALL)\n\n        # Remove frontmatter (YAML between --- or +++ delimiters)\n        content = re.sub(r'^---\\s*\\n.*?\\n---\\s*\\n', '', content, flags=re.DOTALL)\n        content = re.sub(r'^\\+\\+\\+\\s*\\n.*?\\n\\+\\+\\+\\s*\\n', '', content, flags=re.DOTALL)\n\n        # Remove excessive blank lines (more than 2 consecutive)\n        content = re.sub(r'\\n{3,}', '\\n\\n', content)\n\n        # Remove HTML tags that are often used for styling only\n        content = re.sub(r'<div.*?>|</div>|<span.*?>|</span>', '', content, flags=re.DOTALL)\n\n        # Remove image alt text which can be verbose\n        content = re.sub(r'!\\[(.*?)\\]', '![]', content)\n\n        # Remove images completely\n        content = re.sub(r'!\\[.*?\\]\\(.*?\\)', '', content)\n\n        # Remove simple HTML tags but preserve content between them\n        content = re.sub(r'<(?!table|tr|td|th|thead|tbody)([a-zA-Z][a-zA-Z0-9]*)[^>]*>(.*?)</\\1>',\n                         r'\\2', content, flags=re.DOTALL)\n        return content.strip()\n    except Exception as e:\n        get_logger().exception(f\"Unexpected exception thrown. Returning empty result.\")\n        return \"\"\n\nclass PredictionPreparator:\n    def __init__(self, ai_handler, vars, system_prompt, user_prompt):\n        try:\n            self.ai_handler = ai_handler\n            variables = copy.deepcopy(vars)\n            environment = Environment(undefined=StrictUndefined)\n            self.system_prompt = environment.from_string(system_prompt).render(variables)\n            self.user_prompt = environment.from_string(user_prompt).render(variables)\n        except Exception as e:\n            get_logger().exception(f\"Caught exception during init. Setting ai_handler to None to prevent __call__.\")\n            self.ai_handler = None\n\n    #Called by retry_with_fallback_models and therefore, on any failure must throw an exception:\n    async def __call__(self, model: str) -> str:\n        if not self.ai_handler:\n            get_logger().error(\"ai handler not set. Cannot invoke model!\")\n            raise ValueError(\"PredictionPreparator not initialized\")\n        try:\n            response, finish_reason = await self.ai_handler.chat_completion(\n                model=model, temperature=get_settings().config.temperature, system=self.system_prompt, user=self.user_prompt)\n            return response\n        except Exception as e:\n            get_logger().exception(\"Caught exception during prediction.\", artifacts={'system': self.system_prompt, 'user': self.user_prompt})\n            raise e\n\n\nclass PRHelpDocs(object):\n    def __init__(self, ctx_url, ai_handler:partial[BaseAiHandler,] = LiteLLMAIHandler, args: tuple[str]=None, return_as_string: bool=False):\n        try:\n            self.ctx_url = ctx_url\n            self.question = args[0] if args else None\n            self.return_as_string = return_as_string\n            self.repo_url_given_explicitly = True\n            self.repo_url = get_settings().get('PR_HELP_DOCS.REPO_URL', '')\n            self.repo_desired_branch = get_settings().get('PR_HELP_DOCS.REPO_DEFAULT_BRANCH', 'main') #Ignored if self.repo_url is empty\n            self.include_root_readme_file = not(get_settings()['PR_HELP_DOCS.EXCLUDE_ROOT_README'])\n            self.supported_doc_exts = get_settings()['PR_HELP_DOCS.SUPPORTED_DOC_EXTS']\n            self.docs_path = get_settings()['PR_HELP_DOCS.DOCS_PATH']\n\n            retrieved_settings = [self.include_root_readme_file, self.supported_doc_exts, self.docs_path]\n            if any([setting is None for setting in retrieved_settings]):\n                raise Exception(f\"One of the settings is invalid: {retrieved_settings}\")\n\n            self.git_provider = get_git_provider_with_context(ctx_url)\n            if not self.git_provider:\n                raise Exception(f\"No git provider found at {ctx_url}\")\n            if not self.repo_url:\n                self.repo_url_given_explicitly = False\n                get_logger().debug(f\"No explicit repo url provided, deducing it from type: {self.git_provider.__class__.__name__} \"\n                                  f\"context url: {self.ctx_url}\")\n                self.repo_url = self.git_provider.get_git_repo_url(self.ctx_url)\n                if not self.repo_url:\n                    raise Exception(f\"Unable to deduce repo url from type: {self.git_provider.__class__.__name__} url: {self.ctx_url}\")\n                get_logger().debug(f\"deduced repo url: {self.repo_url}\")\n                self.repo_desired_branch = None #Inferred from the repo provider.\n\n            self.ai_handler = ai_handler()\n            self.vars = {\n                \"docs_url\": self.repo_url,\n                \"question\": self.question,\n                \"snippets\": \"\",\n            }\n            self.token_handler = TokenHandler(None,\n                                                  self.vars,\n                                                  get_settings().pr_help_docs_prompts.system,\n                                                  get_settings().pr_help_docs_prompts.user)\n        except Exception as e:\n            get_logger().exception(f\"Caught exception during init. Setting self.question to None to prevent run() to do anything.\")\n            self.question = None\n\n    async def run(self):\n        if not self.question:\n            get_logger().warning('No question provided. Will do nothing.')\n            return None\n\n        try:\n            # Clone the repository and gather relevant documentation files.\n            docs_filepath_to_contents = self._gen_filenames_to_contents_map_from_repo()\n\n            #Generate prompt for the AI model. This will be the full text of all the documentation files combined.\n            docs_prompt = aggregate_documentation_files_for_prompt_contents(docs_filepath_to_contents)\n            if not docs_filepath_to_contents or not docs_prompt:\n                get_logger().warning(f\"Could not find any usable documentation. Returning with no result...\")\n                return None\n            docs_prompt_to_send_to_model = docs_prompt\n\n            # Estimate how many tokens will be needed.\n            # In case the expected number of tokens exceeds LLM limits, retry with just headings, asking the LLM to rank according to relevance to the question.\n            # Based on returned ranking, rerun but sort the documents accordingly, this time, trim in case of exceeding limit.\n\n            #First, check if the text is not too long to even query the LLM provider:\n            max_allowed_txt_input = get_maximal_text_input_length_for_token_count_estimation()\n            invoke_llm_just_with_headings = self._trim_docs_input(docs_prompt_to_send_to_model, max_allowed_txt_input,\n                                                                  only_return_if_trim_needed=True)\n            if invoke_llm_just_with_headings:\n                #Entire docs is too long. Rank and return according to relevance.\n                docs_prompt_to_send_to_model = await self._rank_docs_and_return_them_as_prompt(docs_filepath_to_contents,\n                                                                                         max_allowed_txt_input)\n\n            if not docs_prompt_to_send_to_model:\n                get_logger().error(\"Failed to generate docs prompt for model. Returning with no result...\")\n                return\n            # At this point, either all original documents be used (if their total length doesn't exceed limits), or only those selected.\n            self.vars['snippets'] = docs_prompt_to_send_to_model.strip()\n            # Run the AI model and extract sections from its response\n            response = await retry_with_fallback_models(PredictionPreparator(self.ai_handler, self.vars,\n                                                                             get_settings().pr_help_docs_prompts.system,\n                                                                             get_settings().pr_help_docs_prompts.user),\n                                                        model_type=ModelType.REGULAR)\n            response_yaml = load_yaml(response)\n            if not response_yaml:\n                get_logger().error(\"Failed to parse the AI response.\", artifacts={'response': response})\n                return\n            response_str = response_yaml.get('response')\n            relevant_sections = response_yaml.get('relevant_sections')\n            if not response_str or not relevant_sections:\n                get_logger().error(\"Failed to extract response/relevant sections.\",\n                                       artifacts={'raw_response': response, 'response_str': response_str, 'relevant_sections': relevant_sections})\n                return\n            if int(response_yaml.get('question_is_relevant', '1')) == 0:\n                get_logger().warning(f\"Question is not relevant. Returning without an answer...\",\n                                         artifacts={'raw_response': response})\n                return\n\n            # Format the response as markdown\n            answer_str = self._format_model_answer(response_str, relevant_sections)\n            if self.return_as_string: #Skip publishing\n                return answer_str\n            #Otherwise, publish the answer if answer is non empty and publish is not turned off:\n            if answer_str and get_settings().config.publish_output:\n                self.git_provider.publish_comment(answer_str)\n            else:\n                get_logger().info(\"Answer:\", artifacts={'answer_str': answer_str})\n            return answer_str\n        except Exception as e:\n            get_logger().exception('failed to provide answer to given user question as a result of a thrown exception (see above)')\n\n    def _find_all_document_files_matching_exts(self, abs_docs_path: str, ignore_readme=False, max_allowed_files=5000) -> list[str]:\n        try:\n            matching_files = []\n\n            # Ensure extensions don't have leading dots and are lowercase\n            dotless_extensions = [ext.lower().lstrip('.') for ext in self.supported_doc_exts]\n\n            # Walk through directory and subdirectories\n            file_cntr = 0\n            for root, _, files in os.walk(abs_docs_path):\n                for file in files:\n                    if ignore_readme and root == abs_docs_path and file.lower() in [f\"readme.{ext}\" for ext in dotless_extensions]:\n                        continue\n                    # Check if file has one of the specified extensions\n                    if any(file.lower().endswith(f'.{ext}') for ext in dotless_extensions):\n                        file_cntr+=1\n                        matching_files.append(os.path.join(root, file))\n                        if file_cntr >= max_allowed_files:\n                            get_logger().warning(f\"Found at least {max_allowed_files} files in {abs_docs_path}, skipping the rest.\")\n                            return matching_files\n            return matching_files\n        except Exception as e:\n            get_logger().exception(f\"Unexpected exception thrown. Returning empty list.\")\n            return []\n\n    def _gen_filenames_to_contents_map_from_repo(self) -> dict[str, str]:\n        try:\n            with TemporaryDirectory() as tmp_dir:\n                get_logger().debug(f\"About to clone repository: {self.repo_url} to temporary directory: {tmp_dir}...\")\n                returned_cloned_repo_root = self.git_provider.clone(self.repo_url, tmp_dir, remove_dest_folder=False)\n                if not returned_cloned_repo_root:\n                    raise Exception(f\"Failed to clone {self.repo_url} to {tmp_dir}\")\n\n                get_logger().debug(f\"About to gather relevant documentation files...\")\n                doc_files = []\n                if self.include_root_readme_file:\n                    for root, _, files in os.walk(returned_cloned_repo_root.path):\n                        # Only look at files in the root directory, not subdirectories\n                        if root == returned_cloned_repo_root.path:\n                            for file in files:\n                                if file.lower().startswith(\"readme.\"):\n                                    doc_files.append(os.path.join(root, file))\n                abs_docs_path = os.path.join(returned_cloned_repo_root.path, self.docs_path)\n                if os.path.exists(abs_docs_path):\n                    doc_files.extend(self._find_all_document_files_matching_exts(abs_docs_path,\n                                                                                 ignore_readme=(self.docs_path=='.')))\n                    if not doc_files:\n                        get_logger().warning(f\"No documentation files found matching file extensions: \"\n                                             f\"{self.supported_doc_exts} under repo: {self.repo_url} \"\n                                             f\"path: {self.docs_path}. Returning empty dict.\")\n                        return {}\n\n                get_logger().info(f'For context {self.ctx_url} and repo: {self.repo_url}'\n                                  f' will be using the following documentation files: ',\n                                  artifacts={'doc_files': doc_files})\n\n                return map_documentation_files_to_contents(returned_cloned_repo_root.path, doc_files)\n        except Exception as e:\n            get_logger().exception(f\"Unexpected exception thrown. Returning empty dict.\")\n            return {}\n\n    def _trim_docs_input(self, docs_input: str, max_allowed_txt_input: int, only_return_if_trim_needed=False) -> bool|str:\n        try:\n            if len(docs_input) >= max_allowed_txt_input:\n                get_logger().warning(\n                    f\"Text length: {len(docs_input)} exceeds the current returned limit of {max_allowed_txt_input} just for token count estimation. Trimming the text...\")\n                if only_return_if_trim_needed:\n                    return True\n                docs_input = docs_input[:max_allowed_txt_input]\n            # Then, count the tokens in the prompt. If the count exceeds the limit, trim the text.\n            token_count = self.token_handler.count_tokens(docs_input, force_accurate=True)\n            get_logger().debug(f\"Estimated token count of documentation to send to model: {token_count}\")\n            model = get_settings().config.model\n            if model in MAX_TOKENS:\n                max_tokens_full = MAX_TOKENS[\n                    model]  # note - here we take the actual max tokens, without any reductions. we do aim to get the full documentation website in the prompt\n            else:\n                max_tokens_full = get_max_tokens(model)\n            delta_output = 5000  # Elbow room to reduce chance of exceeding token limit or model paying less attention to prompt guidelines.\n            if token_count > max_tokens_full - delta_output:\n                if only_return_if_trim_needed:\n                    return True\n                docs_input = clean_markdown_content(\n                    docs_input)  # Reduce unnecessary text/images/etc.\n                get_logger().info(\n                    f\"Token count {token_count} exceeds the limit {max_tokens_full - delta_output}. Attempting to clip text to fit within the limit...\")\n                docs_input = clip_tokens(docs_input, max_tokens_full - delta_output,\n                                                           num_input_tokens=token_count)\n            if only_return_if_trim_needed:\n                return False\n            return docs_input\n        except Exception as e:\n            # Unexpected exception. Rethrowing it since:\n            # 1. This is an internal function.\n            # 2. An empty str/False result is a valid one - would require now checking also for None.\n            get_logger().exception(f\"Unexpected exception thrown. Rethrowing it...\")\n            raise e\n\n    async def _rank_docs_and_return_them_as_prompt(self, docs_filepath_to_contents: dict[str, str], max_allowed_txt_input: int) -> str:\n        try:\n            #Return just file name and their headings (if exist):\n            docs_prompt_to_send_to_model = (\n                aggregate_documentation_files_for_prompt_contents(docs_filepath_to_contents,\n                                                                  return_just_headings=True))\n            # Verify list of headings does not exceed limits - trim it if it does.\n            docs_prompt_to_send_to_model = self._trim_docs_input(docs_prompt_to_send_to_model, max_allowed_txt_input,\n                                                                 only_return_if_trim_needed=False)\n            if not docs_prompt_to_send_to_model:\n                get_logger().error(\"_trim_docs_input returned an empty result.\")\n                return \"\"\n\n            self.vars['snippets'] = docs_prompt_to_send_to_model.strip()\n            # Run the AI model and extract sections from its response\n            response = await retry_with_fallback_models(PredictionPreparator(self.ai_handler, self.vars,\n                                                                             get_settings().pr_help_docs_headings_prompts.system,\n                                                                             get_settings().pr_help_docs_headings_prompts.user),\n                                                        model_type=ModelType.REGULAR)\n            response_yaml = load_yaml(response)\n            if not response_yaml:\n                get_logger().error(\"Failed to parse the AI response.\", artifacts={'response': response})\n                return \"\"\n            # else: Sanitize the output so that the file names match 1:1 dictionary keys. Do this via the file index and not its name, which may be altered by the model.\n            valid_indices = [int(entry['idx']) for entry in response_yaml.get('relevant_files_ranking')\n                             if int(entry['idx']) >= 0 and int(entry['idx']) < len(docs_filepath_to_contents)]\n            valid_file_paths = [list(docs_filepath_to_contents.keys())[idx] for idx in valid_indices]\n            selected_docs_dict = {file_path: docs_filepath_to_contents[file_path] for file_path in valid_file_paths}\n            docs_prompt = aggregate_documentation_files_for_prompt_contents(selected_docs_dict)\n            docs_prompt_to_send_to_model = docs_prompt\n\n            # Check if the updated list of documents does not exceed limits and trim if it does:\n            docs_prompt_to_send_to_model = self._trim_docs_input(docs_prompt_to_send_to_model, max_allowed_txt_input,\n                                                                 only_return_if_trim_needed=False)\n            if not docs_prompt_to_send_to_model:\n                get_logger().error(\"_trim_docs_input returned an empty result.\")\n                return \"\"\n            return docs_prompt_to_send_to_model\n        except Exception as e:\n            get_logger().exception(f\"Unexpected exception thrown. Returning empty result.\")\n            return \"\"\n\n    def _format_model_answer(self, response_str: str, relevant_sections: list[dict[str, str]]) -> str:\n        try:\n            canonical_url_prefix, canonical_url_suffix = (\n                self.git_provider.get_canonical_url_parts(repo_git_url=self.repo_url if self.repo_url_given_explicitly else None,\n                                                          desired_branch=self.repo_desired_branch))\n            answer_str = format_markdown_q_and_a_response(self.question, response_str, relevant_sections,\n                                                          self.supported_doc_exts, canonical_url_prefix, canonical_url_suffix)\n            if answer_str:\n                #Remove the question phrase and replace with light bulb and a heading mentioning this is an automated answer:\n                answer_str = modify_answer_section(answer_str)\n            #In case the response should not be published and returned as string, stop here:\n            if answer_str and self.return_as_string:\n                get_logger().info(f\"Chat help docs answer\", artifacts={'answer_str': answer_str})\n                return answer_str\n            if not answer_str:\n                get_logger().info(f\"No answer found\")\n                return \"\"\n            if self.git_provider.is_supported(\"gfm_markdown\") and get_settings().pr_help_docs.enable_help_text:\n                answer_str += \"<hr>\\n\\n<details> <summary><strong>💡 Tool usage guide:</strong></summary><hr> \\n\\n\"\n                answer_str += HelpMessage.get_help_docs_usage_guide()\n                answer_str += \"\\n</details>\\n\"\n            return answer_str\n        except Exception as e:\n            get_logger().exception(f\"Unexpected exception thrown. Returning empty result.\")\n            return \"\"\n"
  },
  {
    "path": "pr_agent/tools/pr_help_message.py",
    "content": "import copy\nimport re\nfrom functools import partial\nfrom pathlib import Path\n\nfrom jinja2 import Environment, StrictUndefined\n\nfrom pr_agent.algo import MAX_TOKENS\nfrom pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler\nfrom pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler\nfrom pr_agent.algo.pr_processing import retry_with_fallback_models\nfrom pr_agent.algo.token_handler import TokenHandler\nfrom pr_agent.algo.utils import ModelType, clip_tokens, load_yaml, get_max_tokens\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers import BitbucketServerProvider, GithubProvider, get_git_provider_with_context\nfrom pr_agent.log import get_logger\n\n\ndef extract_header(snippet):\n    res = ''\n    lines = snippet.split('===Snippet content===')[0].split('\\n')\n    highest_header = ''\n    highest_level = float('inf')\n    for line in lines[::-1]:\n        line = line.strip()\n        if line.startswith('Header '):\n            highest_header = line.split(': ')[1]\n    if highest_header:\n        res = f\"#{highest_header.lower().replace(' ', '-')}\"\n    return res\n\nclass PRHelpMessage:\n    def __init__(self, pr_url: str, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler, return_as_string=False):\n        self.git_provider = get_git_provider_with_context(pr_url)\n        self.ai_handler = ai_handler()\n        self.question_str = self.parse_args(args)\n        self.return_as_string = return_as_string\n        if self.question_str:\n            self.vars = {\n                \"question\": self.question_str,\n                \"snippets\": \"\",\n            }\n            self.token_handler = TokenHandler(None,\n                                              self.vars,\n                                              get_settings().pr_help_prompts.system,\n                                              get_settings().pr_help_prompts.user)\n\n    async def _prepare_prediction(self, model: str):\n        try:\n            variables = copy.deepcopy(self.vars)\n            environment = Environment(undefined=StrictUndefined)\n            system_prompt = environment.from_string(get_settings().pr_help_prompts.system).render(variables)\n            user_prompt = environment.from_string(get_settings().pr_help_prompts.user).render(variables)\n            response, finish_reason = await self.ai_handler.chat_completion(\n                model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)\n            return response\n        except Exception as e:\n            get_logger().error(f\"Error while preparing prediction: {e}\")\n            return \"\"\n\n    def parse_args(self, args):\n        if args and len(args) > 0:\n            question_str = \" \".join(args)\n        else:\n            question_str = \"\"\n        return question_str\n\n    def format_markdown_header(self, header: str) -> str:\n        try:\n            # First, strip common characters from both ends\n            cleaned = header.strip('# 💎\\n')\n\n            # Define all characters to be removed/replaced in a single pass\n            replacements = {\n                \"'\": '',\n                \"`\": '',\n                '(': '',\n                ')': '',\n                ',': '',\n                '.': '',\n                '?': '',\n                '!': '',\n                ' ': '-'\n            }\n\n            # Compile regex pattern for characters to remove\n            pattern = re.compile('|'.join(map(re.escape, replacements.keys())))\n\n            # Perform replacements in a single pass and convert to lowercase\n            return pattern.sub(lambda m: replacements[m.group()], cleaned).lower()\n        except Exception:\n            get_logger().exception(f\"Error while formatting markdown header\", artifacts={'header': header})\n            return \"\"\n\n\n    async def run(self):\n        try:\n            if self.question_str:\n                get_logger().info(f'Answering a PR question about the PR {self.git_provider.pr_url} ')\n\n                if not get_settings().get('openai.key'):\n                    if get_settings().config.publish_output:\n                        self.git_provider.publish_comment(\n                            \"The `Help` tool chat feature requires an OpenAI API key for calculating embeddings\")\n                    else:\n                        get_logger().error(\"The `Help` tool chat feature requires an OpenAI API key for calculating embeddings\")\n                    return\n\n                # current path\n                docs_path= Path(__file__).parent.parent.parent / 'docs' / 'docs'\n                # get all the 'md' files inside docs_path and its subdirectories\n                md_files = list(docs_path.glob('**/*.md'))\n                folders_to_exclude = ['/finetuning_benchmark/']\n                files_to_exclude = {'EXAMPLE_BEST_PRACTICE.md', 'compression_strategy.md', '/docs/overview/index.md'}\n                md_files = [file for file in md_files if not any(folder in str(file) for folder in folders_to_exclude) and not any(file.name == file_to_exclude for file_to_exclude in files_to_exclude)]\n\n                # sort the 'md_files' so that 'priority_files' will be at the top\n                priority_files_strings = ['/docs/index.md', '/usage-guide', 'tools/describe.md', 'tools/review.md',\n                                          'tools/improve.md', '/faq']\n                md_files_priority = [file for file in md_files if\n                                     any(priority_string in str(file) for priority_string in priority_files_strings)]\n                md_files_not_priority = [file for file in md_files if file not in md_files_priority]\n                md_files = md_files_priority + md_files_not_priority\n\n                docs_prompt = \"\"\n                for file in md_files:\n                    try:\n                        with open(file, 'r') as f:\n                            file_path = str(file).replace(str(docs_path), '')\n                            docs_prompt += f\"\\n==file name==\\n\\n{file_path}\\n\\n==file content==\\n\\n{f.read().strip()}\\n=========\\n\\n\"\n                    except Exception as e:\n                        get_logger().error(f\"Error while reading the file {file}: {e}\")\n                token_count = self.token_handler.count_tokens(docs_prompt)\n                get_logger().debug(f\"Token count of full documentation website: {token_count}\")\n\n                model = get_settings().config.model\n                if model in MAX_TOKENS:\n                    max_tokens_full = MAX_TOKENS[model] # note - here we take the actual max tokens, without any reductions. we do aim to get the full documentation website in the prompt\n                else:\n                    max_tokens_full = get_max_tokens(model)\n                delta_output = 2000\n                if token_count > max_tokens_full - delta_output:\n                    get_logger().info(f\"Token count {token_count} exceeds the limit {max_tokens_full - delta_output}. Skipping the PR Help message.\")\n                    docs_prompt = clip_tokens(docs_prompt, max_tokens_full - delta_output)\n                self.vars['snippets'] = docs_prompt.strip()\n\n                # run the AI model\n                response = await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.REGULAR)\n                response_yaml = load_yaml(response)\n                if isinstance(response_yaml, str):\n                    get_logger().warning(f\"failing to parse response: {response_yaml}, publishing the response as is\")\n                    if get_settings().config.publish_output:\n                        answer_str = f\"### Question: \\n{self.question_str}\\n\\n\"\n                        answer_str += f\"### Answer:\\n\\n\"\n                        answer_str += response_yaml\n                        self.git_provider.publish_comment(answer_str)\n                    return \"\"\n                response_str = response_yaml.get('response')\n                relevant_sections = response_yaml.get('relevant_sections')\n\n                if not relevant_sections:\n                    get_logger().info(f\"Could not find relevant answer for the question: {self.question_str}\")\n                    if get_settings().config.publish_output:\n                        answer_str = f\"### Question: \\n{self.question_str}\\n\\n\"\n                        answer_str += f\"### Answer:\\n\\n\"\n                        answer_str += f\"Could not find relevant information to answer the question. Please provide more details and try again.\"\n                        self.git_provider.publish_comment(answer_str)\n                    return \"\"\n\n                # prepare the answer\n                answer_str = \"\"\n                if response_str:\n                    answer_str += f\"### Question: \\n{self.question_str}\\n\\n\"\n                    answer_str += f\"### Answer:\\n{response_str.strip()}\\n\\n\"\n                    answer_str += f\"#### Relevant Sources:\\n\\n\"\n                    base_path = \"https://qodo-merge-docs.qodo.ai/\"\n                    for section in relevant_sections:\n                        file = section.get('file_name').strip().removesuffix('.md')\n                        if str(section['relevant_section_header_string']).strip():\n                            markdown_header = self.format_markdown_header(section['relevant_section_header_string'])\n                            answer_str += f\"> - {base_path}{file}#{markdown_header}\\n\"\n                        else:\n                            answer_str += f\"> - {base_path}{file}\\n\"\n\n\n                # publish the answer\n                if get_settings().config.publish_output:\n                    self.git_provider.publish_comment(answer_str)\n                else:\n                    get_logger().info(f\"Answer:\\n{answer_str}\")\n            else:\n                if not isinstance(self.git_provider, BitbucketServerProvider) and not self.git_provider.is_supported(\"gfm_markdown\"):\n                    self.git_provider.publish_comment(\n                        \"The `Help` tool requires gfm markdown, which is not supported by your code platform.\")\n                    return\n\n                get_logger().info('Getting PR Help Message...')\n                relevant_configs = {'pr_help': dict(get_settings().pr_help),\n                                    'config': dict(get_settings().config)}\n                get_logger().debug(\"Relevant configs\", artifacts=relevant_configs)\n                pr_comment = \"## PR Agent Walkthrough 🤖\\n\\n\"\n                pr_comment += \"Welcome to the PR Agent, an AI-powered tool for automated pull request analysis, feedback, suggestions and more.\"\"\"\n                pr_comment += \"\\n\\nHere is a list of tools you can use to interact with the PR Agent:\\n\"\n                base_path = \"https://pr-agent-docs.codium.ai/tools\"\n\n                tool_names = []\n                tool_names.append(f\"[DESCRIBE]({base_path}/describe/)\")\n                tool_names.append(f\"[REVIEW]({base_path}/review/)\")\n                tool_names.append(f\"[IMPROVE]({base_path}/improve/)\")\n                tool_names.append(f\"[UPDATE CHANGELOG]({base_path}/update_changelog/)\")\n                tool_names.append(f\"[HELP DOCS]({base_path}/help_docs/)\")\n                tool_names.append(f\"[ADD DOCS]({base_path}/add_docs/)\")\n                tool_names.append(f\"[ASK]({base_path}/ask/)\")\n                tool_names.append(f\"[GENERATE CUSTOM LABELS]({base_path}/generate_labels/)\")\n\n                descriptions = []\n                descriptions.append(\"Generates PR description - title, type, summary, code walkthrough and labels\")\n                descriptions.append(\"Adjustable feedback about the PR, possible issues, security concerns, review effort and more\")\n                descriptions.append(\"Code suggestions for improving the PR\")\n                descriptions.append(\"Automatically updates the changelog\")\n                descriptions.append(\"Answers a question regarding this repository, or a given one, based on given documentation path\")\n                descriptions.append(\"Generates documentation to methods/functions/classes that changed in the PR\")\n                descriptions.append(\"Answering free-text questions about the PR\")\n                descriptions.append(\"Generates custom labels for the PR, based on specific guidelines defined by the user\")\n\n                commands  =[]\n                commands.append(\"`/describe`\")\n                commands.append(\"`/review`\")\n                commands.append(\"`/improve`\")\n                commands.append(\"`/update_changelog`\")\n                commands.append(\"`/help_docs`\")\n                commands.append(\"`/add_docs`\")\n                commands.append(\"`/ask`\")\n                commands.append(\"`/generate_labels`\")\n\n                checkbox_list = []\n                checkbox_list.append(\" - [ ] Run <!-- /describe -->\")\n                checkbox_list.append(\" - [ ] Run <!-- /review -->\")\n                checkbox_list.append(\" - [ ] Run <!-- /improve -->\")\n                checkbox_list.append(\" - [ ] Run <!-- /update_changelog -->\")\n                checkbox_list.append(\" - [ ] Run <!-- /help_docs -->\")\n                checkbox_list.append(\" - [ ] Run <!-- /add_docs -->\")\n                checkbox_list.append(\"[*]\")\n                checkbox_list.append(\"[*]\")\n                checkbox_list.append(\"[*]\")\n                checkbox_list.append(\"[*]\")\n                checkbox_list.append(\"[*]\")\n\n                if isinstance(self.git_provider, GithubProvider) and not get_settings().config.get('disable_checkboxes', False):\n                    pr_comment += f\"<table><tr align='left'><th align='left'>Tool</th><th align='left'>Description</th><th align='left'>Trigger Interactively :gem:</th></tr>\"\n                    for i in range(len(tool_names)):\n                        pr_comment += f\"\\n<tr><td align='left'>\\n\\n<strong>{tool_names[i]}</strong></td>\\n<td>{descriptions[i]}</td>\\n<td>\\n\\n{checkbox_list[i]}\\n</td></tr>\"\n                    pr_comment += \"</table>\\n\\n\"\n                    pr_comment += f\"\"\"\\n\\n(1) Note that each tool can be [triggered automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) when a new PR is opened, or called manually by [commenting on a PR](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#online-usage).\"\"\"\n                    pr_comment += f\"\"\"\\n\\n(2) Tools marked with [*] require additional parameters to be passed. For example, to invoke the `/ask` tool, you need to comment on a PR: `/ask \"<question content>\"`. See the relevant documentation for each tool for more details.\"\"\"\n                elif isinstance(self.git_provider, BitbucketServerProvider):\n                    # only support basic commands in BBDC\n                    pr_comment = generate_bbdc_table(tool_names[:4], descriptions[:4])\n                else:\n                    pr_comment += f\"<table><tr align='left'><th align='left'>Tool</th><th align='left'>Command</th><th align='left'>Description</th></tr>\"\n                    for i in range(len(tool_names)):\n                        pr_comment += f\"\\n<tr><td align='left'>\\n\\n<strong>{tool_names[i]}</strong></td><td>{commands[i]}</td><td>{descriptions[i]}</td></tr>\"\n                    pr_comment += \"</table>\\n\\n\"\n                    pr_comment += f\"\"\"\\n\\nNote that each tool can be [invoked automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/) when a new PR is opened, or called manually by [commenting on a PR](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#online-usage).\"\"\"\n\n                if get_settings().config.publish_output:\n                    self.git_provider.publish_comment(pr_comment)\n        except Exception as e:\n            get_logger().exception(f\"Error while running PRHelpMessage: {e}\")\n        return \"\"\n\n    async def prepare_relevant_snippets(self, sim_results):\n        # Get relevant snippets\n        relevant_snippets_full = []\n        relevant_pages_full = []\n        relevant_snippets_full_header = []\n        th = 0.75\n        for s in sim_results:\n            page = s[0].metadata['source']\n            content = s[0].page_content\n            score = s[1]\n            relevant_snippets_full.append(content)\n            relevant_snippets_full_header.append(extract_header(content))\n            relevant_pages_full.append(page)\n        # build the snippets string\n        relevant_snippets_str = \"\"\n        for i, s in enumerate(relevant_snippets_full):\n            relevant_snippets_str += f\"Snippet {i+1}:\\n\\n{s}\\n\\n\"\n            relevant_snippets_str += \"-------------------\\n\\n\"\n        return relevant_pages_full, relevant_snippets_full_header, relevant_snippets_str\n\n\ndef generate_bbdc_table(column_arr_1, column_arr_2):\n    # Generating header row\n    header_row = \"| Tool  | Description | \\n\"\n\n    # Generating separator row\n    separator_row = \"|--|--|\\n\"\n\n    # Generating data rows\n    data_rows = \"\"\n    max_len = max(len(column_arr_1), len(column_arr_2))\n    for i in range(max_len):\n        col1 = column_arr_1[i] if i < len(column_arr_1) else \"\"\n        col2 = column_arr_2[i] if i < len(column_arr_2) else \"\"\n        data_rows += f\"| {col1} | {col2} |\\n\"\n\n    # Combine all parts to form the complete table\n    markdown_table = header_row + separator_row + data_rows\n    return markdown_table\n"
  },
  {
    "path": "pr_agent/tools/pr_line_questions.py",
    "content": "import argparse\nimport copy\nfrom functools import partial\n\nfrom jinja2 import Environment, StrictUndefined\n\nfrom pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler\nfrom pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler\nfrom pr_agent.algo.git_patch_processing import (\n    decouple_and_convert_to_hunks_with_lines_numbers, extract_hunk_lines_from_patch)\nfrom pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models\nfrom pr_agent.algo.token_handler import TokenHandler\nfrom pr_agent.algo.utils import ModelType\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers import get_git_provider\nfrom pr_agent.git_providers.git_provider import get_main_pr_language\nfrom pr_agent.git_providers.github_provider import GithubProvider\nfrom pr_agent.log import get_logger\nfrom pr_agent.servers.help import HelpMessage\n\nclass PR_LineQuestions:\n    def __init__(self, pr_url: str, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):\n        self.question_str = self.parse_args(args)\n        self.git_provider = get_git_provider()(pr_url)\n        self.main_pr_language = get_main_pr_language(\n            self.git_provider.get_languages(), self.git_provider.get_files()\n        )\n        self.ai_handler = ai_handler()\n        self.ai_handler.main_pr_language = self.main_pr_language\n\n        self.vars = {\n            \"title\": self.git_provider.pr.title,\n            \"branch\": self.git_provider.get_pr_branch(),\n            \"diff\": \"\",  # empty diff for initial calculation\n            \"question\": self.question_str,\n            \"full_hunk\": \"\",\n            \"selected_lines\": \"\",\n            \"conversation_history\": \"\",  \n        }\n        self.token_handler = TokenHandler(self.git_provider.pr,\n                                          self.vars,\n                                          get_settings().pr_line_questions_prompt.system,\n                                          get_settings().pr_line_questions_prompt.user)\n        self.patches_diff = None\n        self.prediction = None\n\n    def parse_args(self, args):\n        if args and len(args) > 0:\n            question_str = \" \".join(args)\n        else:\n            question_str = \"\"\n        return question_str\n\n\n    async def run(self):\n        get_logger().info('Answering a PR lines question...')\n        # if get_settings().config.publish_output:\n        #     self.git_provider.publish_comment(\"Preparing answer...\", is_temporary=True)\n\n        # set conversation history if enabled\n        # currently only supports GitHub provider\n        if get_settings().pr_questions.use_conversation_history and isinstance(self.git_provider, GithubProvider):\n            conversation_history = self._load_conversation_history()\n            self.vars[\"conversation_history\"] = conversation_history\n\n        self.patch_with_lines = \"\"\n        ask_diff = get_settings().get('ask_diff_hunk', \"\")\n        line_start = get_settings().get('line_start', '')\n        line_end = get_settings().get('line_end', '')\n        side = get_settings().get('side', 'RIGHT')\n        file_name = get_settings().get('file_name', '')\n        comment_id = get_settings().get('comment_id', '')\n        if ask_diff:\n            self.patch_with_lines, self.selected_lines = extract_hunk_lines_from_patch(ask_diff,\n                                                                                       file_name,\n                                                                                       line_start=line_start,\n                                                                                       line_end=line_end,\n                                                                                       side=side\n                                                                                       )\n        else:\n            diff_files = self.git_provider.get_diff_files()\n            for file in diff_files:\n                if file.filename == file_name:\n                    self.patch_with_lines, self.selected_lines = extract_hunk_lines_from_patch(file.patch, file.filename,\n                                                                                               line_start=line_start,\n                                                                                               line_end=line_end,\n                                                                                               side=side)\n        if self.patch_with_lines:\n            model_answer = await retry_with_fallback_models(self._get_prediction, model_type=ModelType.WEAK)\n            # sanitize the answer so that no line will start with \"/\"\n            model_answer_sanitized = model_answer.strip().replace(\"\\n/\", \"\\n /\")\n            if model_answer_sanitized.startswith(\"/\"):\n                model_answer_sanitized = \" \" + model_answer_sanitized\n\n            get_logger().info('Preparing answer...')\n            if comment_id:\n                self.git_provider.reply_to_comment_from_comment_id(comment_id, model_answer_sanitized)\n            else:\n                self.git_provider.publish_comment(model_answer_sanitized)\n\n        return \"\"\n        \n    def _load_conversation_history(self) -> str:\n        \"\"\"Generate conversation history from the code review thread\n        \n        Returns:\n            str: The formatted conversation history\n        \"\"\"\n        comment_id = get_settings().get('comment_id', '')\n        file_path = get_settings().get('file_name', '')\n        line_number = get_settings().get('line_end', '')\n        \n        # early return if any required parameter is missing\n        if not all([comment_id, file_path, line_number]):\n            get_logger().error(\"Missing required parameters for conversation history\")\n            return \"\"\n        \n        try:\n            # retrieve thread comments\n            thread_comments = self.git_provider.get_review_thread_comments(comment_id)\n            \n            # filter and prepare comments\n            filtered_comments = []\n            for comment in thread_comments:\n                body = getattr(comment, 'body', '')\n\n                # skip empty comments, current comment(will be added as a question at prompt)\n                if not body or not body.strip() or comment_id == comment.id:\n                    continue\n                \n                user = comment.user\n                author = user.login if hasattr(user, 'login') else 'Unknown'\n                filtered_comments.append((author, body))\n            \n            # transform conversation history to string using the same pattern as get_commit_messages\n            if filtered_comments:\n                comment_count = len(filtered_comments)\n                get_logger().info(f\"Loaded {comment_count} comments from the code review thread\")\n                \n                # Format as numbered list, similar to get_commit_messages\n                conversation_history_str = \"\\n\".join([f\"{i + 1}. {author}: {body}\" \n                                                   for i, (author, body) in enumerate(filtered_comments)])\n                return conversation_history_str\n            \n            return \"\"\n        \n        except Exception as e:\n            get_logger().error(f\"Error processing conversation history, error: {e}\")\n            return \"\"\n\n    async def _get_prediction(self, model: str):\n        variables = copy.deepcopy(self.vars)\n        variables[\"full_hunk\"] = self.patch_with_lines  # update diff\n        variables[\"selected_lines\"] = self.selected_lines\n        environment = Environment(undefined=StrictUndefined)\n        system_prompt = environment.from_string(get_settings().pr_line_questions_prompt.system).render(variables)\n        user_prompt = environment.from_string(get_settings().pr_line_questions_prompt.user).render(variables)\n        if get_settings().config.verbosity_level >= 2:\n            # get_logger().info(f\"\\nSystem prompt:\\n{system_prompt}\")\n            # get_logger().info(f\"\\nUser prompt:\\n{user_prompt}\")\n            print(f\"\\nSystem prompt:\\n{system_prompt}\")\n            print(f\"\\nUser prompt:\\n{user_prompt}\")\n\n        response, finish_reason = await self.ai_handler.chat_completion(\n            model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)\n        return response\n"
  },
  {
    "path": "pr_agent/tools/pr_questions.py",
    "content": "import copy\nfrom functools import partial\n\nfrom jinja2 import Environment, StrictUndefined\n\nfrom pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler\nfrom pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler\nfrom pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models\nfrom pr_agent.algo.token_handler import TokenHandler\nfrom pr_agent.algo.utils import ModelType\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers import get_git_provider, GitLabProvider\nfrom pr_agent.git_providers.git_provider import get_main_pr_language\nfrom pr_agent.log import get_logger\nfrom pr_agent.servers.help import HelpMessage\n\n\nclass PRQuestions:\n    def __init__(self, pr_url: str, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):\n        question_str = self.parse_args(args)\n        self.pr_url = pr_url\n        self.git_provider = get_git_provider()(pr_url)\n        self.main_pr_language = get_main_pr_language(\n            self.git_provider.get_languages(), self.git_provider.get_files()\n        )\n        self.ai_handler = ai_handler()\n        self.ai_handler.main_pr_language = self.main_pr_language\n\n        self.question_str = question_str\n        self.vars = {\n            \"title\": self.git_provider.pr.title,\n            \"branch\": self.git_provider.get_pr_branch(),\n            \"description\": self.git_provider.get_pr_description(),\n            \"language\": self.main_pr_language,\n            \"diff\": \"\",  # empty diff for initial calculation\n            \"questions\": self.question_str,\n            \"commit_messages_str\": self.git_provider.get_commit_messages(),\n        }\n        self.token_handler = TokenHandler(self.git_provider.pr,\n                                          self.vars,\n                                          get_settings().pr_questions_prompt.system,\n                                          get_settings().pr_questions_prompt.user)\n        self.patches_diff = None\n        self.prediction = None\n\n    def parse_args(self, args):\n        if args and len(args) > 0:\n            question_str = \" \".join(args)\n        else:\n            question_str = \"\"\n        return question_str\n\n    async def run(self):\n        get_logger().info(f'Answering a PR question about the PR {self.pr_url} ')\n        relevant_configs = {'pr_questions': dict(get_settings().pr_questions),\n                            'config': dict(get_settings().config)}\n        get_logger().debug(\"Relevant configs\", artifacts=relevant_configs)\n        if get_settings().config.publish_output:\n            self.git_provider.publish_comment(\"Preparing answer...\", is_temporary=True)\n\n        # identify image\n        img_path = self.identify_image_in_comment()\n        if img_path:\n            get_logger().debug(f\"Image path identified\", artifact=img_path)\n\n        await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.WEAK)\n\n        pr_comment = self._prepare_pr_answer()\n        get_logger().debug(f\"PR output\", artifact=pr_comment)\n\n        if self.git_provider.is_supported(\"gfm_markdown\") and get_settings().pr_questions.enable_help_text:\n            pr_comment += \"<hr>\\n\\n<details> <summary><strong>💡 Tool usage guide:</strong></summary><hr> \\n\\n\"\n            pr_comment += HelpMessage.get_ask_usage_guide()\n            pr_comment += \"\\n</details>\\n\"\n\n        if get_settings().config.publish_output:\n            self.git_provider.publish_comment(pr_comment)\n            self.git_provider.remove_initial_comment()\n        return \"\"\n\n    def identify_image_in_comment(self):\n        img_path = ''\n        if '![image]' in self.question_str:\n            # assuming structure:\n            # /ask question ...  > ![image](img_path)\n            img_path = self.question_str.split('![image]')[1].strip().strip('()')\n            self.vars['img_path'] = img_path\n        elif 'https://' in self.question_str and ('.png' in self.question_str or 'jpg' in self.question_str): # direct image link\n            # include https:// in the image path\n            img_path = 'https://' + self.question_str.split('https://')[1]\n            self.vars['img_path'] = img_path\n        return img_path\n\n    async def _prepare_prediction(self, model: str):\n        self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)\n        if self.patches_diff:\n            get_logger().debug(f\"PR diff\", artifact=self.patches_diff)\n            self.prediction = await self._get_prediction(model)\n        else:\n            get_logger().error(f\"Error getting PR diff\")\n            self.prediction = \"\"\n\n    async def _get_prediction(self, model: str):\n        variables = copy.deepcopy(self.vars)\n        variables[\"diff\"] = self.patches_diff  # update diff\n        environment = Environment(undefined=StrictUndefined)\n        system_prompt = environment.from_string(get_settings().pr_questions_prompt.system).render(variables)\n        user_prompt = environment.from_string(get_settings().pr_questions_prompt.user).render(variables)\n        if 'img_path' in variables:\n            img_path = self.vars['img_path']\n            response, finish_reason = await (self.ai_handler.chat_completion\n                                             (model=model, temperature=get_settings().config.temperature,\n                                              system=system_prompt, user=user_prompt, img_path=img_path))\n        else:\n            response, finish_reason = await self.ai_handler.chat_completion(\n                model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)\n        return response\n\n    def gitlab_protections(self, model_answer: str) -> str:\n        github_quick_actions_MR = [\"/approve\", \"/close\", \"/merge\", \"/reopen\", \"/unapprove\", \"/title\", \"/assign\",\n                                \"/copy_metadata\", \"/target_branch\"]\n        if any(action in model_answer for action in github_quick_actions_MR):\n            str_err = \"Model answer contains GitHub quick actions, which are not supported in GitLab\"\n            get_logger().error(str_err)\n            return str_err\n        return model_answer\n\n    def _prepare_pr_answer(self) -> str:\n        model_answer = self.prediction.strip()\n        # sanitize the answer so that no line will start with \"/\"\n        model_answer_sanitized = model_answer.replace(\"\\n/\", \"\\n /\")\n        model_answer_sanitized = model_answer_sanitized.replace(\"\\r/\", \"\\r /\")\n        if isinstance(self.git_provider, GitLabProvider):\n            model_answer_sanitized = self.gitlab_protections(model_answer_sanitized)\n        if model_answer_sanitized.startswith(\"/\"):\n            model_answer_sanitized = \" \" + model_answer_sanitized\n        if model_answer_sanitized != model_answer:\n            get_logger().debug(f\"Sanitized model answer\",\n                               artifact={\"model_answer\": model_answer, \"sanitized_answer\": model_answer_sanitized})\n\n\n        answer_str = f\"### **Ask**❓\\n{self.question_str}\\n\\n\"\n        answer_str += f\"### **Answer:**\\n{model_answer_sanitized}\\n\\n\"\n        return answer_str\n"
  },
  {
    "path": "pr_agent/tools/pr_reviewer.py",
    "content": "import copy\nimport datetime\nimport traceback\nfrom collections import OrderedDict\nfrom functools import partial\nfrom typing import List, Tuple\n\nfrom jinja2 import Environment, StrictUndefined\n\nfrom pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler\nfrom pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler\nfrom pr_agent.algo.pr_processing import (add_ai_metadata_to_diff_files,\n                                         get_pr_diff,\n                                         retry_with_fallback_models)\nfrom pr_agent.algo.token_handler import TokenHandler\nfrom pr_agent.algo.utils import (ModelType, PRReviewHeader,\n                                 convert_to_markdown_v2, github_action_output,\n                                 load_yaml, show_relevant_configurations)\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers import (get_git_provider,\n                                    get_git_provider_with_context)\nfrom pr_agent.git_providers.git_provider import (IncrementalPR,\n                                                 get_main_pr_language)\nfrom pr_agent.log import get_logger\nfrom pr_agent.servers.help import HelpMessage\nfrom pr_agent.tools.ticket_pr_compliance_check import (\n    extract_and_cache_pr_tickets, extract_tickets)\n\n\nclass PRReviewer:\n    \"\"\"\n    The PRReviewer class is responsible for reviewing a pull request and generating feedback using an AI model.\n    \"\"\"\n\n    def __init__(self, pr_url: str, is_answer: bool = False, is_auto: bool = False, args: list = None,\n                 ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):\n        \"\"\"\n        Initialize the PRReviewer object with the necessary attributes and objects to review a pull request.\n\n        Args:\n            pr_url (str): The URL of the pull request to be reviewed.\n            is_answer (bool, optional): Indicates whether the review is being done in answer mode. Defaults to False.\n            is_auto (bool, optional): Indicates whether the review is being done in automatic mode. Defaults to False.\n            ai_handler (BaseAiHandler): The AI handler to be used for the review. Defaults to None.\n            args (list, optional): List of arguments passed to the PRReviewer class. Defaults to None.\n        \"\"\"\n        self.git_provider = get_git_provider_with_context(pr_url)\n        self.args = args\n        self.incremental = self.parse_incremental(args)  # -i command\n        if self.incremental and self.incremental.is_incremental:\n            self.git_provider.get_incremental_commits(self.incremental)\n\n        self.main_language = get_main_pr_language(\n            self.git_provider.get_languages(), self.git_provider.get_files()\n        )\n        self.pr_url = pr_url\n        self.is_answer = is_answer\n        self.is_auto = is_auto\n\n        if self.is_answer and not self.git_provider.is_supported(\"get_issue_comments\"):\n            raise Exception(f\"Answer mode is not supported for {get_settings().config.git_provider} for now\")\n        self.ai_handler = ai_handler()\n        self.ai_handler.main_pr_language = self.main_language\n        self.patches_diff = None\n        self.prediction = None\n        answer_str, question_str = self._get_user_answers()\n        self.pr_description, self.pr_description_files = (\n            self.git_provider.get_pr_description(split_changes_walkthrough=True))\n        if (self.pr_description_files and get_settings().get(\"config.is_auto_command\", False) and\n                get_settings().get(\"config.enable_ai_metadata\", False)):\n            add_ai_metadata_to_diff_files(self.git_provider, self.pr_description_files)\n            get_logger().debug(f\"AI metadata added to the this command\")\n        else:\n            get_settings().set(\"config.enable_ai_metadata\", False)\n            get_logger().debug(f\"AI metadata is disabled for this command\")\n\n        self.vars = {\n            \"title\": self.git_provider.pr.title,\n            \"branch\": self.git_provider.get_pr_branch(),\n            \"description\": self.pr_description,\n            \"language\": self.main_language,\n            \"diff\": \"\",  # empty diff for initial calculation\n            \"num_pr_files\": self.git_provider.get_num_of_files(),\n            \"num_max_findings\": get_settings().pr_reviewer.num_max_findings,\n            \"require_score\": get_settings().pr_reviewer.require_score_review,\n            \"require_tests\": get_settings().pr_reviewer.require_tests_review,\n            \"require_estimate_effort_to_review\": get_settings().pr_reviewer.require_estimate_effort_to_review,\n            \"require_estimate_contribution_time_cost\": get_settings().pr_reviewer.require_estimate_contribution_time_cost,\n            'require_can_be_split_review': get_settings().pr_reviewer.require_can_be_split_review,\n            'require_security_review': get_settings().pr_reviewer.require_security_review,\n            'require_todo_scan': get_settings().pr_reviewer.get(\"require_todo_scan\", False),\n            'question_str': question_str,\n            'answer_str': answer_str,\n            \"extra_instructions\": get_settings().pr_reviewer.extra_instructions,\n            \"commit_messages_str\": self.git_provider.get_commit_messages(),\n            \"custom_labels\": \"\",\n            \"enable_custom_labels\": get_settings().config.enable_custom_labels,\n            \"is_ai_metadata\":  get_settings().get(\"config.enable_ai_metadata\", False),\n            \"related_tickets\": get_settings().get('related_tickets', []),\n            'duplicate_prompt_examples': get_settings().config.get('duplicate_prompt_examples', False),\n            \"date\": datetime.datetime.now().strftime('%Y-%m-%d'),\n        }\n\n        self.token_handler = TokenHandler(\n            self.git_provider.pr,\n            self.vars,\n            get_settings().pr_review_prompt.system,\n            get_settings().pr_review_prompt.user\n        )\n\n    def parse_incremental(self, args: List[str]):\n        is_incremental = False\n        if args and len(args) >= 1:\n            arg = args[0]\n            if arg == \"-i\":\n                is_incremental = True\n        incremental = IncrementalPR(is_incremental)\n        return incremental\n\n    async def run(self) -> None:\n        try:\n            if not self.git_provider.get_files():\n                get_logger().info(f\"PR has no files: {self.pr_url}, skipping review\")\n                return None\n\n            if self.incremental.is_incremental and not self._can_run_incremental_review():\n                return None\n\n            # if isinstance(self.args, list) and self.args and self.args[0] == 'auto_approve':\n            #     get_logger().info(f'Auto approve flow PR: {self.pr_url} ...')\n            #     self.auto_approve_logic()\n            #     return None\n\n            get_logger().info(f'Reviewing PR: {self.pr_url} ...')\n            relevant_configs = {'pr_reviewer': dict(get_settings().pr_reviewer),\n                                'config': dict(get_settings().config)}\n            get_logger().debug(\"Relevant configs\", artifacts=relevant_configs)\n\n            # ticket extraction if exists\n            await extract_and_cache_pr_tickets(self.git_provider, self.vars)\n\n            if self.incremental.is_incremental and hasattr(self.git_provider, \"unreviewed_files_set\") and not self.git_provider.unreviewed_files_set:\n                get_logger().info(f\"Incremental review is enabled for {self.pr_url} but there are no new files\")\n                previous_review_url = \"\"\n                if hasattr(self.git_provider, \"previous_review\"):\n                    previous_review_url = self.git_provider.previous_review.html_url\n                if get_settings().config.publish_output:\n                    self.git_provider.publish_comment(f\"Incremental Review Skipped\\n\"\n                                    f\"No files were changed since the [previous PR Review]({previous_review_url})\")\n                return None\n\n            if get_settings().config.publish_output and not get_settings().config.get('is_auto_command', False):\n                self.git_provider.publish_comment(\"Preparing review...\", is_temporary=True)\n\n            await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.REGULAR)\n            if not self.prediction:\n                self.git_provider.remove_initial_comment()\n                return None\n\n            pr_review = self._prepare_pr_review()\n            get_logger().debug(f\"PR output\", artifact=pr_review)\n\n            should_publish = get_settings().config.publish_output and self._should_publish_review_no_suggestions(pr_review)\n            if not should_publish:\n                reason = \"Review output is not published\"\n                if get_settings().config.publish_output:\n                    reason += \": no major issues detected.\"\n                get_logger().info(reason)\n                get_settings().data = {\"artifact\": pr_review}\n                return\n\n            # publish the review\n            if get_settings().pr_reviewer.persistent_comment and not self.incremental.is_incremental:\n                final_update_message = get_settings().pr_reviewer.final_update_message\n                self.git_provider.publish_persistent_comment(pr_review,\n                                                            initial_header=f\"{PRReviewHeader.REGULAR.value} 🔍\",\n                                                            update_header=True,\n                                                            final_update_message=final_update_message, )\n            else:\n                self.git_provider.publish_comment(pr_review)\n\n            self.git_provider.remove_initial_comment()\n        except Exception as e:\n            get_logger().error(f\"Failed to review PR: {e}\")\n\n    def _should_publish_review_no_suggestions(self, pr_review: str) -> bool:\n        return get_settings().pr_reviewer.get('publish_output_no_suggestions', True) or \"No major issues detected\" not in pr_review\n\n    async def _prepare_prediction(self, model: str) -> None:\n        self.patches_diff = get_pr_diff(self.git_provider,\n                                        self.token_handler,\n                                        model,\n                                        add_line_numbers_to_hunks=True,\n                                        disable_extra_lines=False,)\n\n        if self.patches_diff:\n            get_logger().debug(f\"PR diff\", diff=self.patches_diff)\n            self.prediction = await self._get_prediction(model)\n        else:\n            get_logger().warning(f\"Empty diff for PR: {self.pr_url}\")\n            self.prediction = None\n\n    async def _get_prediction(self, model: str) -> str:\n        \"\"\"\n        Generate an AI prediction for the pull request review.\n\n        Args:\n            model: A string representing the AI model to be used for the prediction.\n\n        Returns:\n            A string representing the AI prediction for the pull request review.\n        \"\"\"\n        variables = copy.deepcopy(self.vars)\n        variables[\"diff\"] = self.patches_diff  # update diff\n\n        environment = Environment(undefined=StrictUndefined)\n        system_prompt = environment.from_string(get_settings().pr_review_prompt.system).render(variables)\n        user_prompt = environment.from_string(get_settings().pr_review_prompt.user).render(variables)\n\n        response, finish_reason = await self.ai_handler.chat_completion(\n            model=model,\n            temperature=get_settings().config.temperature,\n            system=system_prompt,\n            user=user_prompt\n        )\n\n        return response\n\n    def _prepare_pr_review(self) -> str:\n        \"\"\"\n        Prepare the PR review by processing the AI prediction and generating a markdown-formatted text that summarizes\n        the feedback.\n        \"\"\"\n        first_key = 'review'\n        last_key = 'security_concerns'\n        data = load_yaml(self.prediction.strip(),\n                         keys_fix_yaml=[\"ticket_compliance_check\", \"estimated_effort_to_review_[1-5]:\", \"security_concerns:\", \"key_issues_to_review:\",\n                                        \"relevant_file:\", \"relevant_line:\", \"suggestion:\"],\n                         first_key=first_key, last_key=last_key)\n        github_action_output(data, 'review')\n\n        if 'review' not in data:\n            get_logger().exception(\"Failed to parse review data\", artifact={\"data\": data})\n            return \"\"\n\n        # move data['review'] 'key_issues_to_review' key to the end of the dictionary\n        if 'key_issues_to_review' in data['review']:\n            key_issues_to_review = data['review'].pop('key_issues_to_review')\n            data['review']['key_issues_to_review'] = key_issues_to_review\n\n        incremental_review_markdown_text = None\n        # Add incremental review section\n        if self.incremental.is_incremental:\n            last_commit_url = f\"{self.git_provider.get_pr_url()}/commits/\" \\\n                              f\"{self.git_provider.incremental.first_new_commit_sha}\"\n            incremental_review_markdown_text = f\"Starting from commit {last_commit_url}\"\n\n        markdown_text = convert_to_markdown_v2(data, self.git_provider.is_supported(\"gfm_markdown\"),\n                                            incremental_review_markdown_text,\n                                               git_provider=self.git_provider,\n                                               files=self.git_provider.get_diff_files())\n\n        # Add help text if gfm_markdown is supported\n        if self.git_provider.is_supported(\"gfm_markdown\") and get_settings().pr_reviewer.enable_help_text:\n            markdown_text += \"<hr>\\n\\n<details> <summary><strong>💡 Tool usage guide:</strong></summary><hr> \\n\\n\"\n            markdown_text += HelpMessage.get_review_usage_guide()\n            markdown_text += \"\\n</details>\\n\"\n\n        # Output the relevant configurations if enabled\n        if get_settings().get('config', {}).get('output_relevant_configurations', False):\n            markdown_text += show_relevant_configurations(relevant_section='pr_reviewer')\n\n        # Add custom labels from the review prediction (effort, security)\n        self.set_review_labels(data)\n\n        if markdown_text == None or len(markdown_text) == 0:\n            markdown_text = \"\"\n\n        return markdown_text\n\n    def _get_user_answers(self) -> Tuple[str, str]:\n        \"\"\"\n        Retrieves the question and answer strings from the discussion messages related to a pull request.\n\n        Returns:\n            A tuple containing the question and answer strings.\n        \"\"\"\n        question_str = \"\"\n        answer_str = \"\"\n\n        if self.is_answer:\n            discussion_messages = self.git_provider.get_issue_comments()\n\n            for message in discussion_messages.reversed:\n                if \"Questions to better understand the PR:\" in message.body:\n                    question_str = message.body\n                elif '/answer' in message.body:\n                    answer_str = message.body\n\n                if answer_str and question_str:\n                    break\n\n        return question_str, answer_str\n\n    def _get_previous_review_comment(self):\n        \"\"\"\n        Get the previous review comment if it exists.\n        \"\"\"\n        try:\n            if hasattr(self.git_provider, \"get_previous_review\"):\n                return self.git_provider.get_previous_review(\n                    full=not self.incremental.is_incremental,\n                    incremental=self.incremental.is_incremental,\n                )\n        except Exception as e:\n            get_logger().exception(f\"Failed to get previous review comment, error: {e}\")\n\n    def _remove_previous_review_comment(self, comment):\n        \"\"\"\n        Remove the previous review comment if it exists.\n        \"\"\"\n        try:\n            if comment:\n                self.git_provider.remove_comment(comment)\n        except Exception as e:\n            get_logger().exception(f\"Failed to remove previous review comment, error: {e}\")\n\n    def _can_run_incremental_review(self) -> bool:\n        \"\"\"\n        Checks if we can run incremental review according the various configurations and previous review.\n        \"\"\"\n        # checking if running is auto mode but there are no new commits\n        if self.is_auto and not self.incremental.first_new_commit_sha:\n            get_logger().info(f\"Incremental review is enabled for {self.pr_url} but there are no new commits\")\n            return False\n\n        if not hasattr(self.git_provider, \"get_incremental_commits\"):\n            get_logger().info(f\"Incremental review is not supported for {get_settings().config.git_provider}\")\n            return False\n        # checking if there are enough commits to start the review\n        num_new_commits = len(self.incremental.commits_range)\n        num_commits_threshold = get_settings().pr_reviewer.minimal_commits_for_incremental_review\n        not_enough_commits = num_new_commits < num_commits_threshold\n        # checking if the commits are not too recent to start the review\n        recent_commits_threshold = datetime.datetime.now() - datetime.timedelta(\n            minutes=get_settings().pr_reviewer.minimal_minutes_for_incremental_review\n        )\n        last_seen_commit_date = (\n            self.incremental.last_seen_commit.commit.author.date if self.incremental.last_seen_commit else None\n        )\n        all_commits_too_recent = (\n            last_seen_commit_date > recent_commits_threshold if self.incremental.last_seen_commit else False\n        )\n        # check all the thresholds or just one to start the review\n        condition = any if get_settings().pr_reviewer.require_all_thresholds_for_incremental_review else all\n        if condition((not_enough_commits, all_commits_too_recent)):\n            get_logger().info(\n                f\"Incremental review is enabled for {self.pr_url} but didn't pass the threshold check to run:\"\n                f\"\\n* Number of new commits = {num_new_commits} (threshold is {num_commits_threshold})\"\n                f\"\\n* Last seen commit date = {last_seen_commit_date} (threshold is {recent_commits_threshold})\"\n            )\n            return False\n        return True\n\n    def set_review_labels(self, data):\n        if not get_settings().config.publish_output:\n            return\n\n        if not get_settings().pr_reviewer.require_estimate_effort_to_review:\n            get_settings().pr_reviewer.enable_review_labels_effort = False # we did not generate this output\n        if not get_settings().pr_reviewer.require_security_review:\n            get_settings().pr_reviewer.enable_review_labels_security = False # we did not generate this output\n\n        if (get_settings().pr_reviewer.enable_review_labels_security or\n                get_settings().pr_reviewer.enable_review_labels_effort):\n            try:\n                review_labels = []\n                if get_settings().pr_reviewer.enable_review_labels_effort:\n                    estimated_effort = data['review']['estimated_effort_to_review_[1-5]']\n                    estimated_effort_number = 0\n                    if isinstance(estimated_effort, str):\n                        try:\n                            estimated_effort_number = int(estimated_effort.split(',')[0])\n                        except ValueError:\n                            get_logger().warning(f\"Invalid estimated_effort value: {estimated_effort}\")\n                    elif isinstance(estimated_effort, int):\n                        estimated_effort_number = estimated_effort\n                    else:\n                        get_logger().warning(f\"Unexpected type for estimated_effort: {type(estimated_effort)}\")\n                    if 1 <= estimated_effort_number <= 5:  # 1, because ...\n                        review_labels.append(f'Review effort {estimated_effort_number}/5')\n                if get_settings().pr_reviewer.enable_review_labels_security and get_settings().pr_reviewer.require_security_review:\n                    security_concerns = data['review']['security_concerns']  # yes, because ...\n                    security_concerns_bool = 'yes' in security_concerns.lower() or 'true' in security_concerns.lower()\n                    if security_concerns_bool:\n                        review_labels.append('Possible security concern')\n\n                current_labels = self.git_provider.get_pr_labels(update=True)\n                if not current_labels:\n                    current_labels = []\n                get_logger().debug(f\"Current labels:\\n{current_labels}\")\n                if current_labels:\n                    current_labels_filtered = [label for label in current_labels if\n                                               not label.lower().startswith('review effort') and not label.lower().startswith(\n                                                   'possible security concern')]\n                else:\n                    current_labels_filtered = []\n                new_labels = review_labels + current_labels_filtered\n                if (current_labels or review_labels) and sorted(new_labels) != sorted(current_labels):\n                    get_logger().info(f\"Setting review labels:\\n{review_labels + current_labels_filtered}\")\n                    self.git_provider.publish_labels(new_labels)\n                else:\n                    get_logger().info(f\"Review labels are already set:\\n{review_labels + current_labels_filtered}\")\n            except Exception as e:\n                get_logger().error(f\"Failed to set review labels, error: {e}\")\n\n    def auto_approve_logic(self):\n        \"\"\"\n        Auto-approve a pull request if it meets the conditions for auto-approval.\n        \"\"\"\n        if get_settings().config.enable_auto_approval:\n            is_auto_approved = self.git_provider.auto_approve()\n            if is_auto_approved:\n                get_logger().info(\"Auto-approved PR\")\n                self.git_provider.publish_comment(\"Auto-approved PR\")\n        else:\n            get_logger().info(\"Auto-approval option is disabled\")\n            self.git_provider.publish_comment(\"Auto-approval option for PR-Agent is disabled. \"\n                                              \"You can enable it via a [configuration file](https://github.com/Codium-ai/pr-agent/blob/main/docs/REVIEW.md#auto-approval-1)\")\n"
  },
  {
    "path": "pr_agent/tools/pr_similar_issue.py",
    "content": "import time\nfrom enum import Enum\nfrom typing import List\n\nimport openai\nfrom pydantic import BaseModel, Field\n\nfrom pr_agent.algo import MAX_TOKENS\nfrom pr_agent.algo.token_handler import TokenHandler\nfrom pr_agent.algo.utils import get_max_tokens\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers import get_git_provider\nfrom pr_agent.log import get_logger\n\nMODEL = \"text-embedding-ada-002\"\n\n\nclass PRSimilarIssue:\n    def __init__(self, issue_url: str, ai_handler, args: list = None):\n        self.issue_url = issue_url\n        self.supported = get_settings().config.git_provider == \"github\"\n        if not self.supported:\n            return\n\n        self.cli_mode = get_settings().CONFIG.CLI_MODE\n        self.max_issues_to_scan = get_settings().pr_similar_issue.max_issues_to_scan\n        self.git_provider = get_git_provider()()\n        repo_name, issue_number = self.git_provider._parse_issue_url(issue_url.split('=')[-1])\n        self.git_provider.repo = repo_name\n        self.git_provider.repo_obj = self.git_provider.github_client.get_repo(repo_name)\n        self.token_handler = TokenHandler()\n        repo_obj = self.git_provider.repo_obj\n        repo_name_for_index = self.repo_name_for_index = repo_obj.full_name.lower().replace('/', '-').replace('_/', '-')\n        index_name = self.index_name = \"codium-ai-pr-agent-issues\"\n\n        if get_settings().pr_similar_issue.vectordb == \"pinecone\":\n            try:\n                import pandas as pd\n                import pinecone\n                from pinecone_datasets import Dataset, DatasetMetadata\n            except:\n                raise Exception(\"Please install 'pinecone' and 'pinecone_datasets' to use pinecone as vectordb\")\n            # assuming pinecone api key and environment are set in secrets file\n            try:\n                api_key = get_settings().pinecone.api_key\n                environment = get_settings().pinecone.environment\n            except Exception:\n                if not self.cli_mode:\n                    repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1])\n                    issue_main = self.git_provider.repo_obj.get_issue(original_issue_number)\n                    issue_main.create_comment(\"Please set pinecone api key and environment in secrets file\")\n                raise Exception(\"Please set pinecone api key and environment in secrets file\")\n\n            # check if index exists, and if repo is already indexed\n            run_from_scratch = False\n            if run_from_scratch:  # for debugging\n                pinecone.init(api_key=api_key, environment=environment)\n                if index_name in pinecone.list_indexes():\n                    get_logger().info('Removing index...')\n                    pinecone.delete_index(index_name)\n                    get_logger().info('Done')\n\n            upsert = True\n            pinecone.init(api_key=api_key, environment=environment)\n            if not index_name in pinecone.list_indexes():\n                run_from_scratch = True\n                upsert = False\n            else:\n                if get_settings().pr_similar_issue.force_update_dataset:\n                    upsert = True\n                else:\n                    pinecone_index = pinecone.Index(index_name=index_name)\n                    res = pinecone_index.fetch([f\"example_issue_{repo_name_for_index}\"]).to_dict()\n                    if res[\"vectors\"]:\n                        upsert = False\n\n            if run_from_scratch or upsert:  # index the entire repo\n                get_logger().info('Indexing the entire repo...')\n\n                get_logger().info('Getting issues...')\n                issues = list(repo_obj.get_issues(state='all'))\n                get_logger().info('Done')\n                self._update_index_with_issues(issues, repo_name_for_index, upsert=upsert)\n            else:  # update index if needed\n                pinecone_index = pinecone.Index(index_name=index_name)\n                issues_to_update = []\n                issues_paginated_list = repo_obj.get_issues(state='all')\n                counter = 1\n                for issue in issues_paginated_list:\n                    if issue.pull_request:\n                        continue\n                    issue_str, comments, number = self._process_issue(issue)\n                    issue_key = f\"issue_{number}\"\n                    id = issue_key + \".\" + \"issue\"\n                    res = pinecone_index.fetch([id]).to_dict()\n                    is_new_issue = True\n                    for vector in res[\"vectors\"].values():\n                        if vector['metadata']['repo'] == repo_name_for_index:\n                            is_new_issue = False\n                            break\n                    if is_new_issue:\n                        counter += 1\n                        issues_to_update.append(issue)\n                    else:\n                        break\n\n                if issues_to_update:\n                    get_logger().info(f'Updating index with {counter} new issues...')\n                    self._update_index_with_issues(issues_to_update, repo_name_for_index, upsert=True)\n                else:\n                    get_logger().info('No new issues to update')\n\n        elif get_settings().pr_similar_issue.vectordb == \"lancedb\":\n            try:\n                import lancedb  # import lancedb only if needed\n            except:\n                raise Exception(\"Please install lancedb to use lancedb as vectordb\")\n            self.db = lancedb.connect(get_settings().lancedb.uri)\n            self.table = None\n\n            run_from_scratch = False\n            if run_from_scratch:  # for debugging\n                if index_name in self.db.table_names():\n                    get_logger().info('Removing Table...')\n                    self.db.drop_table(index_name)\n                    get_logger().info('Done')\n\n            ingest = True\n            if index_name not in self.db.table_names():\n                run_from_scratch = True\n                ingest = False\n            else:\n                if get_settings().pr_similar_issue.force_update_dataset:\n                    ingest = True\n                else:\n                    self.table = self.db[index_name]\n                    res = self.table.search().limit(len(self.table)).where(f\"id='example_issue_{repo_name_for_index}'\").to_list()\n                    get_logger().info(\"result: \", res)\n                    if res[0].get(\"vector\"):\n                        ingest = False\n\n            if run_from_scratch or ingest:  # indexing the entire repo\n                get_logger().info('Indexing the entire repo...')\n\n                get_logger().info('Getting issues...')\n                issues = list(repo_obj.get_issues(state='all'))\n                get_logger().info('Done')\n\n                self._update_table_with_issues(issues, repo_name_for_index, ingest=ingest)\n            else:  # update table if needed\n                issues_to_update = []\n                issues_paginated_list = repo_obj.get_issues(state='all')\n                counter = 1\n                for issue in issues_paginated_list:\n                    if issue.pull_request:\n                        continue\n                    issue_str, comments, number = self._process_issue(issue)\n                    issue_key = f\"issue_{number}\"\n                    issue_id = issue_key + \".\" + \"issue\"\n                    res = self.table.search().limit(len(self.table)).where(f\"id='{issue_id}'\").to_list()\n                    is_new_issue = True\n                    for r in res:\n                        if r['metadata']['repo'] == repo_name_for_index:\n                            is_new_issue = False\n                            break\n                    if is_new_issue:\n                        counter += 1\n                        issues_to_update.append(issue)\n                    else:\n                        break\n\n                if issues_to_update:\n                    get_logger().info(f'Updating index with {counter} new issues...')\n                    self._update_table_with_issues(issues_to_update, repo_name_for_index, ingest=True)\n                else:\n                    get_logger().info('No new issues to update')\n\n        elif get_settings().pr_similar_issue.vectordb == \"qdrant\":\n            try:\n                import qdrant_client\n                from qdrant_client.models import (Distance, FieldCondition,\n                                                  Filter, MatchValue,\n                                                  PointStruct, VectorParams)\n            except Exception:\n                raise Exception(\"Please install qdrant-client to use qdrant as vectordb\")\n\n            api_key = None\n            url = None\n            try:\n                api_key = get_settings().qdrant.api_key\n                url = get_settings().qdrant.url\n            except Exception:\n                if not self.cli_mode:\n                    repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1])\n                    issue_main = self.git_provider.repo_obj.get_issue(original_issue_number)\n                    issue_main.create_comment(\"Please set qdrant url and api key in secrets file\")\n                raise Exception(\"Please set qdrant url and api key in secrets file\")\n\n            self.qdrant = qdrant_client.QdrantClient(url=url, api_key=api_key)\n\n            run_from_scratch = False\n            ingest = True\n\n            if not self.qdrant.collection_exists(collection_name=self.index_name):\n                run_from_scratch = True\n                ingest = False\n                self.qdrant.create_collection(\n                    collection_name=self.index_name,\n                    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),\n                )\n            else:\n                if get_settings().pr_similar_issue.force_update_dataset:\n                    ingest = True\n                else:\n                    response = self.qdrant.count(\n                        collection_name=self.index_name,\n                        count_filter=Filter(must=[\n                            FieldCondition(key=\"metadata.repo\", match=MatchValue(value=repo_name_for_index)),\n                            FieldCondition(key=\"id\", match=MatchValue(value=f\"example_issue_{repo_name_for_index}\")),\n                        ]),\n                    )\n                    ingest = True if response.count == 0 else False\n\n            if run_from_scratch or ingest:\n                get_logger().info('Indexing the entire repo...')\n                get_logger().info('Getting issues...')\n                issues = list(repo_obj.get_issues(state='all'))\n                get_logger().info('Done')\n                self._update_qdrant_with_issues(issues, repo_name_for_index, ingest=ingest)\n            else:\n                issues_to_update = []\n                issues_paginated_list = repo_obj.get_issues(state='all')\n                counter = 1\n                for issue in issues_paginated_list:\n                    if issue.pull_request:\n                        continue\n                    issue_str, comments, number = self._process_issue(issue)\n                    issue_key = f\"issue_{number}\"\n                    point_id = issue_key + \".\" + \"issue\"\n                    response = self.qdrant.count(\n                        collection_name=self.index_name,\n                        count_filter=Filter(must=[\n                            FieldCondition(key=\"id\", match=MatchValue(value=point_id)),\n                            FieldCondition(key=\"metadata.repo\", match=MatchValue(value=repo_name_for_index)),\n                        ]),\n                    )\n                    if response.count == 0:\n                        counter += 1\n                        issues_to_update.append(issue)\n                    else:\n                        break\n\n                if issues_to_update:\n                    get_logger().info(f'Updating index with {counter} new issues...')\n                    self._update_qdrant_with_issues(issues_to_update, repo_name_for_index, ingest=True)\n                else:\n                    get_logger().info('No new issues to update')\n\n\n    async def run(self):\n        if not self.supported:\n            message = \"The /similar_issue tool is currently supported only for GitHub.\"\n            if get_settings().config.publish_output:\n                try:\n                    from pr_agent.git_providers import get_git_provider_with_context\n\n                    provider = get_git_provider_with_context(self.issue_url)\n                    provider.publish_comment(message)\n                except Exception as e:\n                    get_logger().warning(\n                        \"Failed to publish /similar_issue unsupported message\",\n                        artifact={\"error\": str(e)},\n                    )\n            return \"\"\n        get_logger().info('Getting issue...')\n        repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1])\n        issue_main = self.git_provider.repo_obj.get_issue(original_issue_number)\n        issue_str, comments, number = self._process_issue(issue_main)\n        openai.api_key = get_settings().openai.key\n        get_logger().info('Done')\n\n        get_logger().info('Querying...')\n        res = openai.Embedding.create(input=[issue_str], engine=MODEL)\n        embeds = [record['embedding'] for record in res['data']]\n\n        relevant_issues_number_list = []\n        relevant_comment_number_list = []\n        score_list = []\n\n        if get_settings().pr_similar_issue.vectordb == \"pinecone\":\n            pinecone_index = pinecone.Index(index_name=self.index_name)\n            res = pinecone_index.query(embeds[0],\n                                    top_k=5,\n                                    filter={\"repo\": self.repo_name_for_index},\n                                    include_metadata=True).to_dict()\n\n            for r in res['matches']:\n                # skip example issue\n                if 'example_issue_' in r[\"id\"]:\n                    continue\n\n                try:\n                    issue_number = int(r[\"id\"].split('.')[0].split('_')[-1])\n                except:\n                    get_logger().debug(f\"Failed to parse issue number from {r['id']}\")\n                    continue\n\n                if original_issue_number == issue_number:\n                    continue\n                if issue_number not in relevant_issues_number_list:\n                    relevant_issues_number_list.append(issue_number)\n                if 'comment' in r[\"id\"]:\n                    relevant_comment_number_list.append(int(r[\"id\"].split('.')[1].split('_')[-1]))\n                else:\n                    relevant_comment_number_list.append(-1)\n                score_list.append(str(\"{:.2f}\".format(r['score'])))\n            get_logger().info('Done')\n\n        elif get_settings().pr_similar_issue.vectordb == \"lancedb\":\n            res = self.table.search(embeds[0]).where(f\"metadata.repo='{self.repo_name_for_index}'\", prefilter=True).to_list()\n\n            for r in res:\n                # skip example issue\n                if 'example_issue_' in r[\"id\"]:\n                    continue\n\n                try:\n                    issue_number = int(r[\"id\"].split('.')[0].split('_')[-1])\n                except:\n                    get_logger().debug(f\"Failed to parse issue number from {r['id']}\")\n                    continue\n\n                if original_issue_number == issue_number:\n                    continue\n                if issue_number not in relevant_issues_number_list:\n                    relevant_issues_number_list.append(issue_number)\n\n                if 'comment' in r[\"id\"]:\n                    relevant_comment_number_list.append(int(r[\"id\"].split('.')[1].split('_')[-1]))\n                else:\n                    relevant_comment_number_list.append(-1)\n                score_list.append(str(\"{:.2f}\".format(1-r['_distance'])))\n            get_logger().info('Done')\n\n        elif get_settings().pr_similar_issue.vectordb == \"qdrant\":\n            from qdrant_client.models import FieldCondition, Filter, MatchValue\n            res = self.qdrant.search(\n                collection_name=self.index_name,\n                query_vector=embeds[0],\n                limit=5,\n                query_filter=Filter(must=[FieldCondition(key=\"metadata.repo\", match=MatchValue(value=self.repo_name_for_index))]),\n                with_payload=True,\n            )\n\n            for r in res:\n                rid = r.payload.get(\"id\", \"\")\n                if 'example_issue_' in rid:\n                    continue\n                try:\n                    issue_number = int(rid.split('.')[0].split('_')[-1])\n                except Exception:\n                    get_logger().debug(f\"Failed to parse issue number from {rid}\")\n                    continue\n                if original_issue_number == issue_number:\n                    continue\n                if issue_number not in relevant_issues_number_list:\n                    relevant_issues_number_list.append(issue_number)\n                if 'comment' in rid:\n                    relevant_comment_number_list.append(int(rid.split('.')[1].split('_')[-1]))\n                else:\n                    relevant_comment_number_list.append(-1)\n                score_list.append(str(\"{:.2f}\".format(r.score)))\n            get_logger().info('Done')\n\n        get_logger().info('Publishing response...')\n        similar_issues_str = \"### Similar Issues\\n___\\n\\n\"\n\n        for i, issue_number_similar in enumerate(relevant_issues_number_list):\n            issue = self.git_provider.repo_obj.get_issue(issue_number_similar)\n            title = issue.title\n            url = issue.html_url\n            if relevant_comment_number_list[i] != -1:\n                url = list(issue.get_comments())[relevant_comment_number_list[i]].html_url\n            similar_issues_str += f\"{i + 1}. **[{title}]({url})** (score={score_list[i]})\\n\\n\"\n        if get_settings().config.publish_output:\n            response = issue_main.create_comment(similar_issues_str)\n        get_logger().info(similar_issues_str)\n        get_logger().info('Done')\n\n    def _process_issue(self, issue):\n        header = issue.title\n        body = issue.body\n        number = issue.number\n        if get_settings().pr_similar_issue.skip_comments:\n            comments = []\n        else:\n            comments = list(issue.get_comments())\n        issue_str = f\"Issue Header: \\\"{header}\\\"\\n\\nIssue Body:\\n{body}\"\n        return issue_str, comments, number\n\n    def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=False):\n        get_logger().info('Processing issues...')\n        corpus = Corpus()\n        example_issue_record = Record(\n            id=f\"example_issue_{repo_name_for_index}\",\n            text=\"example_issue\",\n            metadata=Metadata(repo=repo_name_for_index)\n        )\n        corpus.append(example_issue_record)\n\n        counter = 0\n        for issue in issues_list:\n            if issue.pull_request:\n                continue\n\n            counter += 1\n            if counter % 100 == 0:\n                get_logger().info(f\"Scanned {counter} issues\")\n            if counter >= self.max_issues_to_scan:\n                get_logger().info(f\"Scanned {self.max_issues_to_scan} issues, stopping\")\n                break\n\n            issue_str, comments, number = self._process_issue(issue)\n            issue_key = f\"issue_{number}\"\n            username = issue.user.login\n            created_at = str(issue.created_at)\n            if len(issue_str) < 8000 or \\\n                    self.token_handler.count_tokens(issue_str) < get_max_tokens(MODEL):  # fast reject first\n                issue_record = Record(\n                    id=issue_key + \".\" + \"issue\",\n                    text=issue_str,\n                    metadata=Metadata(repo=repo_name_for_index,\n                                      username=username,\n                                      created_at=created_at,\n                                      level=IssueLevel.ISSUE)\n                )\n                corpus.append(issue_record)\n                if comments:\n                    for j, comment in enumerate(comments):\n                        comment_body = comment.body\n                        num_words_comment = len(comment_body.split())\n                        if num_words_comment < 10 or not isinstance(comment_body, str):\n                            continue\n\n                        if len(comment_body) < 8000 or \\\n                                self.token_handler.count_tokens(comment_body) < MAX_TOKENS[MODEL]:\n                            comment_record = Record(\n                                id=issue_key + \".comment_\" + str(j + 1),\n                                text=comment_body,\n                                metadata=Metadata(repo=repo_name_for_index,\n                                                  username=username,  # use issue username for all comments\n                                                  created_at=created_at,\n                                                  level=IssueLevel.COMMENT)\n                            )\n                            corpus.append(comment_record)\n        df = pd.DataFrame(corpus.model_dump()[\"documents\"])\n        get_logger().info('Done')\n\n        get_logger().info('Embedding...')\n        openai.api_key = get_settings().openai.key\n        list_to_encode = list(df[\"text\"].values)\n        try:\n            res = openai.Embedding.create(input=list_to_encode, engine=MODEL)\n            embeds = [record['embedding'] for record in res['data']]\n        except:\n            embeds = []\n            get_logger().error('Failed to embed entire list, embedding one by one...')\n            for i, text in enumerate(list_to_encode):\n                try:\n                    res = openai.Embedding.create(input=[text], engine=MODEL)\n                    embeds.append(res['data'][0]['embedding'])\n                except:\n                    embeds.append([0] * 1536)\n        df[\"values\"] = embeds\n        meta = DatasetMetadata.empty()\n        meta.dense_model.dimension = len(embeds[0])\n        ds = Dataset.from_pandas(df, meta)\n        get_logger().info('Done')\n\n        api_key = get_settings().pinecone.api_key\n        environment = get_settings().pinecone.environment\n        if not upsert:\n            get_logger().info('Creating index from scratch...')\n            ds.to_pinecone_index(self.index_name, api_key=api_key, environment=environment)\n            time.sleep(15)  # wait for pinecone to finalize indexing before querying\n        else:\n            get_logger().info('Upserting index...')\n            namespace = \"\"\n            batch_size: int = 100\n            concurrency: int = 10\n            pinecone.init(api_key=api_key, environment=environment)\n            ds._upsert_to_index(self.index_name, namespace, batch_size, concurrency)\n            time.sleep(5)  # wait for pinecone to finalize upserting before querying\n        get_logger().info('Done')\n\n    def _update_table_with_issues(self, issues_list, repo_name_for_index, ingest=False):\n        get_logger().info('Processing issues...')\n\n        corpus = Corpus()\n        example_issue_record = Record(\n            id=f\"example_issue_{repo_name_for_index}\",\n            text=\"example_issue\",\n            metadata=Metadata(repo=repo_name_for_index)\n        )\n        corpus.append(example_issue_record)\n\n        counter = 0\n        for issue in issues_list:\n            if issue.pull_request:\n                continue\n\n            counter += 1\n            if counter % 100 == 0:\n                get_logger().info(f\"Scanned {counter} issues\")\n            if counter >= self.max_issues_to_scan:\n                get_logger().info(f\"Scanned {self.max_issues_to_scan} issues, stopping\")\n                break\n\n            issue_str, comments, number = self._process_issue(issue)\n            issue_key = f\"issue_{number}\"\n            username = issue.user.login\n            created_at = str(issue.created_at)\n            if len(issue_str) < 8000 or \\\n                    self.token_handler.count_tokens(issue_str) < get_max_tokens(MODEL):  # fast reject first\n                issue_record = Record(\n                    id=issue_key + \".\" + \"issue\",\n                    text=issue_str,\n                    metadata=Metadata(repo=repo_name_for_index,\n                                        username=username,\n                                        created_at=created_at,\n                                        level=IssueLevel.ISSUE)\n                )\n                corpus.append(issue_record)\n                if comments:\n                    for j, comment in enumerate(comments):\n                        comment_body = comment.body\n                        num_words_comment = len(comment_body.split())\n                        if num_words_comment < 10 or not isinstance(comment_body, str):\n                            continue\n\n                        if len(comment_body) < 8000 or \\\n                                self.token_handler.count_tokens(comment_body) < MAX_TOKENS[MODEL]:\n                            comment_record = Record(\n                                id=issue_key + \".comment_\" + str(j + 1),\n                                text=comment_body,\n                                metadata=Metadata(repo=repo_name_for_index,\n                                                    username=username,  # use issue username for all comments\n                                                    created_at=created_at,\n                                                    level=IssueLevel.COMMENT)\n                            )\n                            corpus.append(comment_record)\n        df = pd.DataFrame(corpus.model_dump()[\"documents\"])\n        get_logger().info('Done')\n\n        get_logger().info('Embedding...')\n        openai.api_key = get_settings().openai.key\n        list_to_encode = list(df[\"text\"].values)\n        try:\n            res = openai.Embedding.create(input=list_to_encode, engine=MODEL)\n            embeds = [record['embedding'] for record in res['data']]\n        except:\n            embeds = []\n            get_logger().error('Failed to embed entire list, embedding one by one...')\n            for i, text in enumerate(list_to_encode):\n                try:\n                    res = openai.Embedding.create(input=[text], engine=MODEL)\n                    embeds.append(res['data'][0]['embedding'])\n                except:\n                    embeds.append([0] * 1536)\n        df[\"vector\"] = embeds\n        get_logger().info('Done')\n\n        if not ingest:\n            get_logger().info('Creating table from scratch...')\n            self.table = self.db.create_table(self.index_name, data=df, mode=\"overwrite\")\n            time.sleep(15)\n        else:\n            get_logger().info('Ingesting in Table...')\n            if self.index_name not in self.db.table_names():\n                self.table.add(df)\n            else:\n                get_logger().info(f\"Table {self.index_name} doesn't exists!\")\n            time.sleep(5)\n        get_logger().info('Done')\n\n\n    def _update_qdrant_with_issues(self, issues_list, repo_name_for_index, ingest=False):\n        try:\n            import uuid\n\n            import pandas as pd\n            from qdrant_client.models import PointStruct\n        except Exception:\n            raise\n\n        get_logger().info('Processing issues...')\n        corpus = Corpus()\n        example_issue_record = Record(\n            id=f\"example_issue_{repo_name_for_index}\",\n            text=\"example_issue\",\n            metadata=Metadata(repo=repo_name_for_index)\n        )\n        corpus.append(example_issue_record)\n\n        counter = 0\n        for issue in issues_list:\n            if issue.pull_request:\n                continue\n\n            counter += 1\n            if counter % 100 == 0:\n                get_logger().info(f\"Scanned {counter} issues\")\n            if counter >= self.max_issues_to_scan:\n                get_logger().info(f\"Scanned {self.max_issues_to_scan} issues, stopping\")\n                break\n\n            issue_str, comments, number = self._process_issue(issue)\n            issue_key = f\"issue_{number}\"\n            username = issue.user.login\n            created_at = str(issue.created_at)\n            if len(issue_str) < 8000 or \\\n                    self.token_handler.count_tokens(issue_str) < get_max_tokens(MODEL):\n                issue_record = Record(\n                    id=issue_key + \".\" + \"issue\",\n                    text=issue_str,\n                    metadata=Metadata(repo=repo_name_for_index,\n                                      username=username,\n                                      created_at=created_at,\n                                      level=IssueLevel.ISSUE)\n                )\n                corpus.append(issue_record)\n                if comments:\n                    for j, comment in enumerate(comments):\n                        comment_body = comment.body\n                        num_words_comment = len(comment_body.split())\n                        if num_words_comment < 10 or not isinstance(comment_body, str):\n                            continue\n\n                        if len(comment_body) < 8000 or \\\n                                self.token_handler.count_tokens(comment_body) < MAX_TOKENS[MODEL]:\n                            comment_record = Record(\n                                id=issue_key + \".comment_\" + str(j + 1),\n                                text=comment_body,\n                                metadata=Metadata(repo=repo_name_for_index,\n                                                  username=username,\n                                                  created_at=created_at,\n                                                  level=IssueLevel.COMMENT)\n                            )\n                            corpus.append(comment_record)\n\n        df = pd.DataFrame(corpus.model_dump()[\"documents\"])\n        get_logger().info('Done')\n\n        get_logger().info('Embedding...')\n        openai.api_key = get_settings().openai.key\n        list_to_encode = list(df[\"text\"].values)\n        try:\n            res = openai.Embedding.create(input=list_to_encode, engine=MODEL)\n            embeds = [record['embedding'] for record in res['data']]\n        except Exception:\n            embeds = []\n            get_logger().error('Failed to embed entire list, embedding one by one...')\n            for i, text in enumerate(list_to_encode):\n                try:\n                    res = openai.Embedding.create(input=[text], engine=MODEL)\n                    embeds.append(res['data'][0]['embedding'])\n                except Exception:\n                    embeds.append([0] * 1536)\n        df[\"vector\"] = embeds\n        get_logger().info('Done')\n\n        get_logger().info('Upserting into Qdrant...')\n        points = []\n        for row in df.to_dict(orient=\"records\"):\n            points.append(\n                PointStruct(id=uuid.uuid5(uuid.NAMESPACE_DNS, row[\"id\"]).hex, vector=row[\"vector\"], payload={\"id\": row[\"id\"], \"text\": row[\"text\"], \"metadata\": row[\"metadata\"]})\n            )\n        self.qdrant.upsert(collection_name=self.index_name, points=points)\n        get_logger().info('Done')\n\n\nclass IssueLevel(str, Enum):\n    ISSUE = \"issue\"\n    COMMENT = \"comment\"\n\n\nclass Metadata(BaseModel):\n    repo: str\n    username: str = Field(default=\"@codium\")\n    created_at: str = Field(default=\"01-01-1970 00:00:00.00000\")\n    level: IssueLevel = Field(default=IssueLevel.ISSUE)\n\n    class Config:\n        use_enum_values = True\n\n\nclass Record(BaseModel):\n    id: str\n    text: str\n    metadata: Metadata\n\n\nclass Corpus(BaseModel):\n    documents: List[Record] = Field(default=[])\n\n    def append(self, r: Record):\n        self.documents.append(r)\n"
  },
  {
    "path": "pr_agent/tools/pr_update_changelog.py",
    "content": "import copy\nfrom datetime import date\nfrom functools import partial\nfrom time import sleep\nfrom typing import Tuple\n\nfrom jinja2 import Environment, StrictUndefined\n\nfrom pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler\nfrom pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler\nfrom pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models\nfrom pr_agent.algo.token_handler import TokenHandler\nfrom pr_agent.algo.utils import ModelType, show_relevant_configurations\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers import GithubProvider, get_git_provider\nfrom pr_agent.git_providers.git_provider import get_main_pr_language\nfrom pr_agent.log import get_logger\n\nCHANGELOG_LINES = 50\n\n\nclass PRUpdateChangelog:\n    def __init__(self, pr_url: str, cli_mode=False, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):\n\n        self.git_provider = get_git_provider()(pr_url)\n        self.main_language = get_main_pr_language(\n            self.git_provider.get_languages(), self.git_provider.get_files()\n        )\n        self.commit_changelog = get_settings().pr_update_changelog.push_changelog_changes\n        self._get_changelog_file()  # self.changelog_file_str\n\n        self.ai_handler = ai_handler()\n        self.ai_handler.main_pr_language = self.main_language\n\n        self.patches_diff = None\n        self.prediction = None\n        self.cli_mode = cli_mode\n        self.vars = {\n            \"title\": self.git_provider.pr.title,\n            \"branch\": self.git_provider.get_pr_branch(),\n            \"description\": self.git_provider.get_pr_description(),\n            \"language\": self.main_language,\n            \"diff\": \"\",  # empty diff for initial calculation\n            \"pr_link\": \"\",\n            \"changelog_file_str\": self.changelog_file_str,\n            \"today\": date.today(),\n            \"extra_instructions\": get_settings().pr_update_changelog.extra_instructions,\n            \"commit_messages_str\": self.git_provider.get_commit_messages(),\n        }\n        self.token_handler = TokenHandler(self.git_provider.pr,\n                                          self.vars,\n                                          get_settings().pr_update_changelog_prompt.system,\n                                          get_settings().pr_update_changelog_prompt.user)\n\n    async def run(self):\n        get_logger().info('Updating the changelog...')\n        relevant_configs = {'pr_update_changelog': dict(get_settings().pr_update_changelog),\n                            'config': dict(get_settings().config)}\n        get_logger().debug(\"Relevant configs\", artifacts=relevant_configs)\n\n        # check if the git provider supports pushing changelog changes\n        if get_settings().pr_update_changelog.push_changelog_changes and not hasattr(\n            self.git_provider, \"create_or_update_pr_file\"\n        ):\n            get_logger().error(\n                \"Pushing changelog changes is not currently supported for this code platform\"\n            )\n            if get_settings().config.publish_output:\n                self.git_provider.publish_comment(\n                    \"Pushing changelog changes is not currently supported for this code platform\"\n                )\n            return\n\n        if get_settings().config.publish_output:\n            self.git_provider.publish_comment(\"Preparing changelog updates...\", is_temporary=True)\n\n        await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.WEAK)\n\n        new_file_content, answer = self._prepare_changelog_update()\n\n        # Output the relevant configurations if enabled\n        if get_settings().get('config', {}).get('output_relevant_configurations', False):\n            answer += show_relevant_configurations(relevant_section='pr_update_changelog')\n\n        get_logger().debug(f\"PR output\", artifact=answer)\n\n        if get_settings().config.publish_output:\n            self.git_provider.remove_initial_comment()\n            if self.commit_changelog:\n                self._push_changelog_update(new_file_content, answer)\n            else:\n                self.git_provider.publish_comment(f\"**Changelog updates:** 🔄\\n\\n{answer}\")\n\n    async def _prepare_prediction(self, model: str):\n        self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)\n        if self.patches_diff:\n            get_logger().debug(f\"PR diff\", artifact=self.patches_diff)\n            self.prediction = await self._get_prediction(model)\n        else:\n            get_logger().error(f\"Error getting PR diff\")\n            self.prediction = \"\"\n\n    async def _get_prediction(self, model: str):\n        variables = copy.deepcopy(self.vars)\n        variables[\"diff\"] = self.patches_diff  # update diff\n        if get_settings().pr_update_changelog.add_pr_link:\n            variables[\"pr_link\"] = self.git_provider.get_pr_url()\n        environment = Environment(undefined=StrictUndefined)\n        system_prompt = environment.from_string(get_settings().pr_update_changelog_prompt.system).render(variables)\n        user_prompt = environment.from_string(get_settings().pr_update_changelog_prompt.user).render(variables)\n        response, finish_reason = await self.ai_handler.chat_completion(\n            model=model, system=system_prompt, user=user_prompt, temperature=get_settings().config.temperature)\n\n        # post-process the response\n        response = response.strip()\n        if not response:\n            return \"\"\n        if response.startswith(\"```\"):\n            response_lines = response.splitlines()\n            response_lines = response_lines[1:]\n            response = \"\\n\".join(response_lines)\n        response = response.strip(\"`\")\n        return response\n\n    def _prepare_changelog_update(self) -> Tuple[str, str]:\n        answer = self.prediction.strip().strip(\"```\").strip()  # noqa B005\n        if hasattr(self, \"changelog_file\"):\n            existing_content = self.changelog_file\n        else:\n            existing_content = \"\"\n        \n        if existing_content:\n            new_file_content = answer + \"\\n\\n\" + self.changelog_file\n        else:\n            new_file_content = answer\n\n        if not self.commit_changelog:\n            answer += \"\\n\\n\\n>to commit the new content to the CHANGELOG.md file, please type:\" \\\n                      \"\\n>'/update_changelog --pr_update_changelog.push_changelog_changes=true'\\n\"\n\n        return new_file_content, answer\n\n    def _push_changelog_update(self, new_file_content, answer):\n        if get_settings().pr_update_changelog.get(\"skip_ci_on_push\", True):\n            commit_message = \"[skip ci] Update CHANGELOG.md\"\n        else:\n            commit_message = \"Update CHANGELOG.md\"\n        self.git_provider.create_or_update_pr_file(\n            file_path=\"CHANGELOG.md\",\n            branch=self.git_provider.get_pr_branch(),\n            contents=new_file_content,\n            message=commit_message,\n        )\n\n        sleep(5)  # wait for the file to be updated\n        try:\n            if get_settings().config.git_provider == \"github\":\n                last_commit_id = list(self.git_provider.pr.get_commits())[-1]\n                d = dict(\n                    body=\"CHANGELOG.md update\",\n                    path=\"CHANGELOG.md\",\n                    line=max(2, len(answer.splitlines())),\n                    start_line=1,\n                )\n                self.git_provider.pr.create_review(commit=last_commit_id, comments=[d])\n        except Exception:\n            # we can't create a review for some reason, let's just publish a comment\n            self.git_provider.publish_comment(f\"**Changelog updates: 🔄**\\n\\n{answer}\")\n\n    def _get_default_changelog(self):\n        example_changelog = \\\n\"\"\"\nExample:\n## <current_date>\n\n### Added\n...\n### Changed\n...\n### Fixed\n...\n\"\"\"\n        return example_changelog\n\n    def _get_changelog_file(self):\n        try:\n            self.changelog_file = self.git_provider.get_pr_file_content(\n                \"CHANGELOG.md\", self.git_provider.get_pr_branch()\n            )\n            \n            if isinstance(self.changelog_file, bytes):\n                self.changelog_file = self.changelog_file.decode('utf-8')\n            \n            changelog_file_lines = self.changelog_file.splitlines()\n            changelog_file_lines = changelog_file_lines[:CHANGELOG_LINES]\n            self.changelog_file_str = \"\\n\".join(changelog_file_lines)\n        except Exception as e:\n            get_logger().warning(f\"Error getting changelog file: {e}\")\n            self.changelog_file_str = \"\"\n            self.changelog_file = \"\"\n            return\n\n        if not self.changelog_file_str:\n            self.changelog_file_str = self._get_default_changelog()\n"
  },
  {
    "path": "pr_agent/tools/ticket_pr_compliance_check.py",
    "content": "import re\nimport traceback\n\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers import GithubProvider\nfrom pr_agent.git_providers import AzureDevopsProvider\nfrom pr_agent.log import get_logger\n\n# Compile the regex pattern once, outside the function\nGITHUB_TICKET_PATTERN = re.compile(\n     r'(https://github[^/]+/[^/]+/[^/]+/issues/\\d+)|(\\b(\\w+)/(\\w+)#(\\d+)\\b)|(#\\d+)'\n)\n# Option A: issue number at start of branch or after /, followed by - or end (e.g. feature/1-test-issue, 123-fix)\nBRANCH_ISSUE_PATTERN = re.compile(r\"(?:^|/)(\\d{1,6})(?=-|$)\")\n\ndef find_jira_tickets(text):\n    # Regular expression patterns for JIRA tickets\n    patterns = [\n        r'\\b[A-Z]{2,10}-\\d{1,7}\\b',  # Standard JIRA ticket format (e.g., PROJ-123)\n        r'(?:https?://[^\\s/]+/browse/)?([A-Z]{2,10}-\\d{1,7})\\b'  # JIRA URL or just the ticket\n    ]\n\n    tickets = set()\n    for pattern in patterns:\n        matches = re.findall(pattern, text)\n        for match in matches:\n            if isinstance(match, tuple):\n                # If it's a tuple (from the URL pattern), take the last non-empty group\n                ticket = next((m for m in reversed(match) if m), None)\n            else:\n                ticket = match\n            if ticket:\n                tickets.add(ticket)\n\n    return list(tickets)\n\n\ndef extract_ticket_links_from_pr_description(pr_description, repo_path, base_url_html='https://github.com'):\n    \"\"\"\n    Extract all ticket links from PR description\n    \"\"\"\n    github_tickets = set()\n    try:\n        # Use the updated pattern to find matches\n        matches = GITHUB_TICKET_PATTERN.findall(pr_description)\n\n        for match in matches:\n            if match[0]:  # Full URL match\n                github_tickets.add(match[0])\n            elif match[1]:  # Shorthand notation match: owner/repo#issue_number\n                owner, repo, issue_number = match[2], match[3], match[4]\n                github_tickets.add(f'{base_url_html.strip(\"/\")}/{owner}/{repo}/issues/{issue_number}')\n            else:  # #123 format\n                issue_number = match[5][1:]  # remove #\n                if issue_number.isdigit() and len(issue_number) < 5 and repo_path:\n                    github_tickets.add(f'{base_url_html.strip(\"/\")}/{repo_path}/issues/{issue_number}')\n\n        if len(github_tickets) > 3:\n            get_logger().info(f\"Too many tickets found in PR description: {len(github_tickets)}\")\n            # Limit the number of tickets to 3\n            github_tickets = set(list(github_tickets)[:3])\n    except Exception as e:\n        get_logger().error(f\"Error extracting tickets error= {e}\",\n                           artifact={\"traceback\": traceback.format_exc()})\n\n    return list(github_tickets)\n\ndef extract_ticket_links_from_branch_name(branch_name, repo_path, base_url_html=\"https://github.com\"):\n    \"\"\"\n    Extract GitHub issue URLs from branch name. Numbers are matched at start of branch or after /,\n    followed by - or end (e.g. feature/1-test-issue -> #1). Respects extract_issue_from_branch\n    and optional branch_issue_regex (may be under [config] in TOML).\n    \"\"\"\n    if not branch_name or not repo_path:\n        return []\n    if not isinstance(branch_name, str):\n        return []\n    settings = get_settings()\n    if not settings.get(\"extract_issue_from_branch\", settings.get(\"config.extract_issue_from_branch\", True)):\n        return []\n    github_tickets = set()\n    custom_regex_str = settings.get(\"branch_issue_regex\") or settings.get(\"config.branch_issue_regex\", \"\") or \"\"\n    if custom_regex_str:\n        try:\n            pattern = re.compile(custom_regex_str)\n            if pattern.groups < 1:\n                get_logger().error(\n                    \"branch_issue_regex must contain at least one capturing group for the issue number; using default pattern.\"\n                )\n                pattern = BRANCH_ISSUE_PATTERN\n        except re.error as e:\n            get_logger().error(f\"Invalid custom regex for branch issue extraction: {e}\")\n            return []\n    else:\n        pattern = BRANCH_ISSUE_PATTERN\n    for match in pattern.finditer(branch_name):\n        try:\n            issue_number = match.group(1)\n        except IndexError:\n            continue\n        if issue_number and issue_number.isdigit():\n            github_tickets.add(\n                f\"{base_url_html.strip('/')}/{repo_path}/issues/{issue_number}\"\n            )\n    return list(github_tickets)\n\n\nasync def extract_tickets(git_provider):\n    MAX_TICKET_CHARACTERS = 10000\n    try:\n        if isinstance(git_provider, GithubProvider):\n            user_description = git_provider.get_user_description()\n            description_tickets = extract_ticket_links_from_pr_description(\n                user_description, git_provider.repo, git_provider.base_url_html\n            )\n            branch_name = git_provider.get_pr_branch()\n            branch_tickets = extract_ticket_links_from_branch_name(\n                branch_name, git_provider.repo, git_provider.base_url_html\n            )\n            seen = set()\n            merged = []\n            for link in description_tickets + branch_tickets:\n                if link not in seen:\n                    seen.add(link)\n                    merged.append(link)\n            if len(merged) > 3:\n                get_logger().info(f\"Too many tickets (description + branch): {len(merged)}\")\n                tickets = merged[:3]\n            else:\n                tickets = merged\n            tickets_content = []\n\n            if tickets:\n\n                for ticket in tickets:\n                    repo_name, original_issue_number = git_provider._parse_issue_url(ticket)\n\n                    try:\n                        issue_main = git_provider.repo_obj.get_issue(original_issue_number)\n                    except Exception as e:\n                        get_logger().error(f\"Error getting main issue: {e}\",\n                                           artifact={\"traceback\": traceback.format_exc()})\n                        continue\n\n                    issue_body_str = issue_main.body or \"\"\n                    if len(issue_body_str) > MAX_TICKET_CHARACTERS:\n                        issue_body_str = issue_body_str[:MAX_TICKET_CHARACTERS] + \"...\"\n\n                    # Extract sub-issues\n                    sub_issues_content = []\n                    try:\n                        sub_issues = git_provider.fetch_sub_issues(ticket)\n                        for sub_issue_url in sub_issues:\n                            try:\n                                sub_repo, sub_issue_number = git_provider._parse_issue_url(sub_issue_url)\n                                sub_issue = git_provider.repo_obj.get_issue(sub_issue_number)\n\n                                sub_body = sub_issue.body or \"\"\n                                if len(sub_body) > MAX_TICKET_CHARACTERS:\n                                    sub_body = sub_body[:MAX_TICKET_CHARACTERS] + \"...\"\n\n                                sub_issues_content.append({\n                                    'ticket_url': sub_issue_url,\n                                    'title': sub_issue.title,\n                                    'body': sub_body\n                                })\n                            except Exception as e:\n                                get_logger().warning(f\"Failed to fetch sub-issue content for {sub_issue_url}: {e}\")\n\n                    except Exception as e:\n                        get_logger().warning(f\"Failed to fetch sub-issues for {ticket}: {e}\")\n\n                    # Extract labels\n                    labels = []\n                    try:\n                        for label in issue_main.labels:\n                            labels.append(label.name if hasattr(label, 'name') else label)\n                    except Exception as e:\n                        get_logger().error(f\"Error extracting labels error= {e}\",\n                                           artifact={\"traceback\": traceback.format_exc()})\n\n                    tickets_content.append({\n                        'ticket_id': issue_main.number,\n                        'ticket_url': ticket,\n                        'title': issue_main.title,\n                        'body': issue_body_str,\n                        'labels': \", \".join(labels),\n                        'sub_issues': sub_issues_content  # Store sub-issues content\n                    })\n\n                return tickets_content\n\n        elif isinstance(git_provider, AzureDevopsProvider):\n            tickets_info = git_provider.get_linked_work_items()\n            tickets_content = []\n            for ticket in tickets_info:\n                try:\n                    ticket_body_str = ticket.get(\"body\", \"\")\n                    if len(ticket_body_str) > MAX_TICKET_CHARACTERS:\n                        ticket_body_str = ticket_body_str[:MAX_TICKET_CHARACTERS] + \"...\"\n\n                    tickets_content.append(\n                        {\n                            \"ticket_id\": ticket.get(\"id\"),\n                            \"ticket_url\": ticket.get(\"url\"),\n                            \"title\": ticket.get(\"title\"),\n                            \"body\": ticket_body_str,\n                            \"requirements\": ticket.get(\"acceptance_criteria\", \"\"),\n                            \"labels\": \", \".join(ticket.get(\"labels\", [])),\n                        }\n                    )\n                except Exception as e:\n                    get_logger().error(\n                        f\"Error processing Azure DevOps ticket: {e}\",\n                        artifact={\"traceback\": traceback.format_exc()},\n                    )\n            return tickets_content\n\n    except Exception as e:\n        get_logger().error(f\"Error extracting tickets error= {e}\",\n                           artifact={\"traceback\": traceback.format_exc()})\n\n\nasync def extract_and_cache_pr_tickets(git_provider, vars):\n    if not get_settings().get('pr_reviewer.require_ticket_analysis_review', False):\n        return\n\n    related_tickets = get_settings().get('related_tickets', [])\n\n    if not related_tickets:\n        tickets_content = await extract_tickets(git_provider)\n\n        if tickets_content:\n            # Store sub-issues along with main issues\n            for ticket in tickets_content:\n                if \"sub_issues\" in ticket and ticket[\"sub_issues\"]:\n                    for sub_issue in ticket[\"sub_issues\"]:\n                        related_tickets.append(sub_issue)  # Add sub-issues content\n\n                related_tickets.append(ticket)\n\n            get_logger().info(\"Extracted tickets and sub-issues from PR description\",\n                              artifact={\"tickets\": related_tickets})\n\n            vars['related_tickets'] = related_tickets\n            get_settings().set('related_tickets', related_tickets)\n    else:\n        get_logger().info(\"Using cached tickets\", artifact={\"tickets\": related_tickets})\n        vars['related_tickets'] = related_tickets\n\n\ndef check_tickets_relevancy():\n    return True\n"
  },
  {
    "path": "pr_compliance_checklist.yaml",
    "content": "pr_compliances:\n  - title: \"Consistent Naming Conventions\"\n    compliance_label: false\n    objective: \"All new variables, functions, and classes must follow the project's established naming standards\"\n    success_criteria: \"All identifiers follow the established naming patterns (camelCase, snake_case, etc.)\"\n    failure_criteria: \"Inconsistent or non-standard naming that deviates from project conventions\"\n\n  - title: \"No Dead or Commented-Out Code\"\n    compliance_label: false\n    objective: \"Keep the codebase clean by ensuring all submitted code is active and necessary\"\n    success_criteria: \"All code in the PR is active and serves a purpose; no commented-out blocks\"\n    failure_criteria: \"Presence of unused, dead, or commented-out code sections\"\n\n  - title: \"Robust Error Handling\"\n    compliance_label: false\n    objective: \"Ensure potential errors and edge cases are anticipated and handled gracefully throughout the code\"\n    success_criteria: \"All error scenarios are properly caught and handled with appropriate responses\"\n    failure_criteria: \"Unhandled exceptions, ignored errors, or missing edge case handling\"\n\n  - title: \"Single Responsibility for Functions\"\n    compliance_label: false\n    objective: \"Each function should have a single, well-defined responsibility\"\n    success_criteria: \"Functions perform one cohesive task with a single purpose\"\n    failure_criteria: \"Functions that combine multiple unrelated operations or handle several distinct concerns\"\n\n  - title: \"When relevant, utilize early return\"\n    compliance_label: false\n    objective: \"In a code snippet containing multiple logic conditions (such as 'if-else'), prefer an early return on edge cases than deep nesting\"\n    success_criteria: \"When relevant, utilize early return that reduces nesting\"\n    failure_criteria: \"Unjustified deep nesting that can be simplified by early return\"\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[build-system]\nrequires = [\"setuptools>=61.0\", \"wheel\"]\nbuild-backend = \"setuptools.build_meta\"\n\n[project]\nname = \"pr-agent\"\nversion = \"0.3.1\"\n\nauthors = [{ name = \"QodoAI\", email = \"ofir.f@qodo.ai\" }]\n\nmaintainers = [\n  { name = \"Ofir Friedman\", email = \"ofir.f@qodo.ai\" },\n]\n\ndescription = \"QodoAI PR-Agent aims to help efficiently review and handle pull requests, by providing AI feedbacks and suggestions.\"\nreadme = \"README.md\"\nrequires-python = \">=3.12\"\nkeywords = [\"AI\", \"Agents\", \"Pull Request\", \"Automation\", \"Code Review\"]\nlicense = { file = \"LICENSE\" }\n\nclassifiers = [\n  \"Intended Audience :: Developers\",\n  \"Programming Language :: Python :: 3\",\n]\ndynamic = [\"dependencies\"]\n\n\n[tool.setuptools.dynamic]\ndependencies = { file = [\"requirements.txt\"] }\n\n[project.urls]\n\"Homepage\" = \"https://github.com/qodo-ai/pr-agent\"\n\"Documentation\" = \"https://qodo-merge-docs.qodo.ai/\"\n\n[tool.setuptools]\ninclude-package-data = true\n\n[tool.setuptools.packages.find]\nwhere = [\".\"]\ninclude = [\n  \"pr_agent*\",\n] # include pr_agent and any sub-packages it finds under it.\n\n[project.scripts]\npr-agent = \"pr_agent.cli:run\"\n\n[tool.ruff]\nline-length = 120\n\nlint.select = [\n  \"E\",    # Pyflakes\n  \"F\",    # Pyflakes\n  \"B\",    # flake8-bugbear\n  \"I001\", # isort basic checks\n  \"I002\", # isort missing-required-import\n]\n\n# First commit - only fixing isort\nlint.fixable = [\n  \"I001\", # isort basic checks\n]\n\nlint.unfixable = [\n  \"B\", # Avoid trying to fix flake8-bugbear (`B`) violations.\n]\n\nlint.exclude = [\"api/code_completions\"]\n\nlint.ignore = [\"E999\", \"B008\"]\n\n[tool.ruff.lint.per-file-ignores]\n\"__init__.py\" = [\n  \"E402\",\n] # Ignore `E402` (import violations) in all `__init__.py` files, and in `path/to/file.py`.\n\n[tool.bandit]\nexclude_dirs = [\"tests\"]\nskips = [\"B101\"]\ntests = []\n\n[tool.pytest.ini_options]\nasyncio_mode = \"auto\"\ntestpaths = [\"tests\"]\npython_files = [\"test_*.py\"]\npython_classes = [\"Test*\"]\npython_functions = [\"test_*\"]\naddopts = \"--color=yes\"\nconsole_output_style = \"progress\"\n"
  },
  {
    "path": "requirements-dev.txt",
    "content": "pytest==9.0.2\npytest-asyncio>=1.3.0\npoetry\ntwine\npre-commit>=4,<5\n"
  },
  {
    "path": "requirements.txt",
    "content": "aiohttp==3.12.15\nanthropic>=0.69.0\n#anthropic[vertex]==0.47.1\natlassian-python-api==3.41.4\nazure-devops==7.1.0b4\nazure-identity==1.25.0\nboto3==1.40.45\ncertifi==2024.8.30\ndynaconf==3.2.4\nfastapi==0.118.0\nGitPython==3.1.41\ngoogle-cloud-aiplatform==1.38.0\ngoogle-generativeai==0.8.3\ngoogle-cloud-storage==2.10.0\nJinja2==3.1.6\nlitellm==1.81.12\nloguru==0.7.2\nmsrest==0.7.1\nopenai>=1.55.3\npytest==9.0.2\npytest-asyncio>=1.3.0\nPyGithub==1.59.*\nPyJWT==2.10.1\nPyYAML==6.0.1\npython-gitlab==3.15.0\nretry==0.9.2\nstarlette-context==0.3.6\ntiktoken==0.8.0\nujson==5.8.0\nuvicorn==0.22.0\ntenacity==8.2.3\ngunicorn==23.0.0\npytest-cov==7.0.0\npydantic==2.8.2\nhtml2text==2024.2.26\ngiteapy==1.0.8\n# Uncomment the following lines to enable the 'similar issue' tool\n# pinecone-client\n# pinecone-datasets @ git+https://github.com/mrT23/pinecone-datasets.git@main\n# lancedb==0.5.1\n# qdrant-client==1.15.1\n# uncomment this to support language LangChainOpenAIHandler\n# langchain==0.2.0\n# langchain-core==0.2.28\n# langchain-openai==0.1.20\n"
  },
  {
    "path": "setup.py",
    "content": "# for compatibility with legacy tools\n# see: https://setuptools.pypa.io/en/latest/userguide/pyproject_config.html\nfrom setuptools import setup\n\nsetup()\n"
  },
  {
    "path": "tests/e2e_tests/e2e_utils.py",
    "content": "FILE_PATH = \"pr_agent/cli_pip.py\"\n\nPR_HEADER_START_WITH = '### **User description**\\nupdate cli_pip.py\\n\\n\\n___\\n\\n### **PR Type**'\nREVIEW_START_WITH = '## PR Reviewer Guide 🔍\\n\\n<table>\\n<tr><td>⏱️&nbsp;<strong>Estimated effort to review</strong>:'\nIMPROVE_START_WITH_REGEX_PATTERN = r'^## PR Code Suggestions ✨\\n\\n<!-- [a-z0-9]+ -->\\n\\n<table><thead><tr><td>Category</td>'\n\nNUM_MINUTES = 5\n\nNEW_FILE_CONTENT = \"\"\"\\\nfrom pr_agent import cli\nfrom pr_agent.config_loader import get_settings\n\n\ndef main():\n    # Fill in the following values\n    provider = \"github\"  # GitHub provider\n    user_token = \"...\"  # GitHub user token\n    openai_key = \"ghs_afsdfasdfsdf\"  # Example OpenAI key\n    pr_url = \"...\"  # PR URL, for example 'https://github.com/Codium-ai/pr-agent/pull/809'\n    command = \"/improve\"  # Command to run (e.g. '/review', '/describe', 'improve', '/ask=\"What is the purpose of this PR?\"')\n\n    # Setting the configurations\n    get_settings().set(\"CONFIG.git_provider\", provider)\n    get_settings().set(\"openai.key\", openai_key)\n    get_settings().set(\"github.user_token\", user_token)\n\n    # Run the command. Feedback will appear in GitHub PR comments\n    output = cli.run_command(pr_url, command)\n\n    print(output)\n\nif __name__ == '__main__':\n    main()\n\"\"\"\n"
  },
  {
    "path": "tests/e2e_tests/langchain_ai_handler.py",
    "content": "import asyncio\nimport os\nimport time\n\nfrom pr_agent.algo.ai_handlers.langchain_ai_handler import LangChainOpenAIHandler\nfrom pr_agent.config_loader import get_settings\n\n\ndef check_settings():\n    print('Checking settings...')\n    settings = get_settings()\n    \n    # Check OpenAI settings\n    if not hasattr(settings, 'openai'):\n        print('OpenAI settings not found')\n        return False\n    \n    if not hasattr(settings.openai, 'key'):\n        print('OpenAI API key not found')\n        return False\n    \n    print('OpenAI API key found')\n    return True\n\nasync def measure_performance(handler, num_requests=3):\n    print(f'\\nRunning performance test with {num_requests} requests...')\n    start_time = time.time()\n    \n    # Create multiple requests\n    tasks = [\n        handler.chat_completion(\n            model='gpt-3.5-turbo',\n            system='You are a helpful assistant',\n            user=f'Test message {i}',\n            temperature=0.2\n        ) for i in range(num_requests)\n    ]\n    \n    # Execute requests concurrently\n    responses = await asyncio.gather(*tasks)\n    \n    end_time = time.time()\n    total_time = end_time - start_time\n    avg_time = total_time / num_requests\n    \n    print(f'Performance results:')\n    print(f'Total time: {total_time:.2f} seconds')\n    print(f'Average time per request: {avg_time:.2f} seconds')\n    print(f'Requests per second: {num_requests/total_time:.2f}')\n    \n    return responses\n\nasync def test():\n    print('Starting test...')\n    \n    # Check settings first\n    if not check_settings():\n        print('Please set up your environment variables or configuration file')\n        print('Required: OPENAI_API_KEY')\n        return\n    \n    try:\n        handler = LangChainOpenAIHandler()\n        print('Handler created')\n        \n        # Basic functionality test\n        response = await handler.chat_completion(\n            model='gpt-3.5-turbo',\n            system='You are a helpful assistant',\n            user='Hello',\n            temperature=0.2,\n            img_path='test.jpg'\n        )\n        print('Response:', response)\n        \n        # Performance test\n        await measure_performance(handler)\n        \n    except Exception as e:\n        print('Error:', str(e))\n        print('Error type:', type(e))\n        print('Error details:', e.__dict__ if hasattr(e, '__dict__') else 'No additional details')\n\nif __name__ == '__main__':\n    print('Environment variables:')\n    print('OPENAI_API_KEY:', 'Set' if os.getenv('OPENAI_API_KEY') else 'Not set')\n    print('OPENAI_API_TYPE:', os.getenv('OPENAI_API_TYPE', 'Not set'))\n    print('OPENAI_API_BASE:', os.getenv('OPENAI_API_BASE', 'Not set'))\n    \n    asyncio.run(test()) \n  \n    "
  },
  {
    "path": "tests/e2e_tests/test_bitbucket_app.py",
    "content": "import hashlib\nimport os\nimport re\nimport time\nfrom datetime import datetime\n\nimport jwt\nimport requests\nfrom atlassian.bitbucket import Cloud\nfrom requests.auth import HTTPBasicAuth\n\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.log import get_logger, setup_logger\nfrom tests.e2e_tests.e2e_utils import (\n    FILE_PATH,\n    IMPROVE_START_WITH_REGEX_PATTERN,\n    NEW_FILE_CONTENT,\n    NUM_MINUTES,\n    PR_HEADER_START_WITH,\n    REVIEW_START_WITH,\n)\n\nlog_level = os.environ.get(\"LOG_LEVEL\", \"INFO\")\nsetup_logger(log_level)\nlogger = get_logger()\n\ndef test_e2e_run_bitbucket_app():\n    repo_slug = 'pr-agent-tests'\n    project_key = 'codiumai'\n    base_branch = \"main\"  # or any base branch you want\n    new_branch = f\"bitbucket_app_e2e_test-{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}\"\n    get_settings().config.git_provider = \"bitbucket\"\n\n    try:\n        # Add username and password for authentication\n        username = get_settings().get(\"BITBUCKET.USERNAME\", None)\n        password = get_settings().get(\"BITBUCKET.PASSWORD\", None)\n        s = requests.Session()\n        s.auth = (username, password)  # Use HTTP Basic Auth\n        bitbucket_client = Cloud(session=s)\n        repo = bitbucket_client.workspaces.get(workspace=project_key).repositories.get(repo_slug)\n\n        # Create a new branch from the base branch\n        logger.info(f\"Creating a new branch {new_branch} from {base_branch}\")\n        source_branch = repo.branches.get(base_branch)\n        target_repo = repo.branches.create(new_branch,source_branch.hash)\n\n        # Update the file content\n        url = f\"https://api.bitbucket.org/2.0/repositories/{project_key}/{repo_slug}/src\"\n        files={FILE_PATH: NEW_FILE_CONTENT}\n        data={\n            \"message\": \"update cli_pip.py\",\n            \"branch\": new_branch,\n        }\n        requests.request(\"POST\", url, auth=HTTPBasicAuth(username, password), data=data, files=files)\n\n\n        # Create a pull request\n        logger.info(f\"Creating a pull request from {new_branch} to {base_branch}\")\n        pr = repo.pullrequests.create(\n            title=f'{new_branch}',\n            description=\"update cli_pip.py\",\n            source_branch=new_branch,\n            destination_branch=base_branch\n        )\n\n        # check every 1 minute, for 5 minutes if the PR has all the tool results\n        for i in range(NUM_MINUTES):\n            logger.info(f\"Waiting for the PR to get all the tool results...\")\n            time.sleep(60)\n            comments = list(pr.comments())\n            comments_raw = [c.raw for c in comments]\n            if len(comments) >= 5: # header, 3 suggestions, 1 review\n                valid_review = False\n                for comment_raw in comments_raw:\n                    if comment_raw.startswith('## PR Reviewer Guide 🔍'):\n                        valid_review = True\n                        break\n                if valid_review:\n                    break\n                else:\n                    logger.error(f\"REVIEW feedback is invalid\")\n                    raise Exception(\"REVIEW feedback is invalid\")\n            else:\n                logger.info(f\"Waiting for the PR to get all the tool results. {i + 1} minute(s) passed\")\n        else:\n            assert False, f\"After {NUM_MINUTES} minutes, the PR did not get all the tool results\"\n\n        # cleanup - delete the branch\n        pr.decline()\n        repo.branches.delete(new_branch)\n\n        # If we reach here, the test is successful\n        logger.info(f\"Succeeded in running e2e test for Bitbucket app on the PR\")\n    except Exception as e:\n        logger.error(f\"Failed to run e2e test for Bitbucket app: {e}\")\n        # delete the branch\n        pr.decline()\n        repo.branches.delete(new_branch)\n        assert False\n\n\nif __name__ == '__main__':\n    test_e2e_run_bitbucket_app()\n"
  },
  {
    "path": "tests/e2e_tests/test_gitea_app.py",
    "content": "import os\nimport time\nfrom datetime import datetime\n\nimport requests\n\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.log import get_logger, setup_logger\nfrom tests.e2e_tests.e2e_utils import (\n    FILE_PATH,\n    IMPROVE_START_WITH_REGEX_PATTERN,\n    NEW_FILE_CONTENT,\n    NUM_MINUTES,\n    PR_HEADER_START_WITH,\n    REVIEW_START_WITH,\n)\n\nlog_level = os.environ.get(\"LOG_LEVEL\", \"INFO\")\nsetup_logger(log_level)\nlogger = get_logger()\n\ndef test_e2e_run_gitea_app():\n    repo_name = 'pr-agent-tests'\n    owner = 'codiumai'\n    base_branch = \"main\"\n    new_branch = f\"gitea_app_e2e_test-{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}\"\n    get_settings().config.git_provider = \"gitea\"\n\n    headers = None\n    pr_number = None\n    \n    try:\n        gitea_url = get_settings().get(\"GITEA.URL\", None)\n        gitea_token = get_settings().get(\"GITEA.TOKEN\", None)\n        \n        if not gitea_url:\n            logger.error(\"GITEA.URL is not set in the configuration\")\n            logger.info(\"Please set GITEA.URL in .env file or environment variables\")\n            assert False, \"GITEA.URL is not set in the configuration\"\n        \n        if not gitea_token:\n            logger.error(\"GITEA.TOKEN is not set in the configuration\")\n            logger.info(\"Please set GITEA.TOKEN in .env file or environment variables\")\n            assert False, \"GITEA.TOKEN is not set in the configuration\"\n        \n        headers = {\n            'Authorization': f'token {gitea_token}',\n            'Content-Type': 'application/json',\n            'Accept': 'application/json'\n        }\n        \n        logger.info(f\"Creating a new branch {new_branch} from {base_branch}\")\n        \n        response = requests.get(\n            f\"{gitea_url}/api/v1/repos/{owner}/{repo_name}/branches/{base_branch}\",\n            headers=headers\n        )\n        response.raise_for_status()\n        base_branch_data = response.json()\n        base_commit_sha = base_branch_data['commit']['id']\n        \n        branch_data = {\n            'ref': f\"refs/heads/{new_branch}\",\n            'sha': base_commit_sha\n        }\n        response = requests.post(\n            f\"{gitea_url}/api/v1/repos/{owner}/{repo_name}/git/refs\",\n            headers=headers,\n            json=branch_data\n        )\n        response.raise_for_status()\n        \n        logger.info(f\"Updating file {FILE_PATH} in branch {new_branch}\")\n        \n        import base64\n        file_content_encoded = base64.b64encode(NEW_FILE_CONTENT.encode()).decode()\n        \n        try:\n            response = requests.get(\n                f\"{gitea_url}/api/v1/repos/{owner}/{repo_name}/contents/{FILE_PATH}?ref={new_branch}\",\n                headers=headers\n            )\n            response.raise_for_status()\n            existing_file = response.json()\n            file_sha = existing_file.get('sha')\n            \n            file_data = {\n                'message': 'Update cli_pip.py',\n                'content': file_content_encoded,\n                'sha': file_sha,\n                'branch': new_branch\n            }\n        except:\n            file_data = {\n                'message': 'Add cli_pip.py',\n                'content': file_content_encoded,\n                'branch': new_branch\n            }\n        \n        response = requests.put(\n            f\"{gitea_url}/api/v1/repos/{owner}/{repo_name}/contents/{FILE_PATH}\",\n            headers=headers,\n            json=file_data\n        )\n        response.raise_for_status()\n        \n        logger.info(f\"Creating a pull request from {new_branch} to {base_branch}\")\n        pr_data = {\n            'title': f'Test PR from {new_branch}',\n            'body': 'update cli_pip.py',\n            'head': new_branch,\n            'base': base_branch\n        }\n        response = requests.post(\n            f\"{gitea_url}/api/v1/repos/{owner}/{repo_name}/pulls\",\n            headers=headers,\n            json=pr_data\n        )\n        response.raise_for_status()\n        pr = response.json()\n        pr_number = pr['number']\n        \n        for i in range(NUM_MINUTES):\n            logger.info(f\"Waiting for the PR to get all the tool results...\")\n            time.sleep(60)\n            \n            response = requests.get(\n                f\"{gitea_url}/api/v1/repos/{owner}/{repo_name}/issues/{pr_number}/comments\",\n                headers=headers\n            )\n            response.raise_for_status()\n            comments = response.json()\n            \n            if len(comments) >= 5:\n                valid_review = False\n                for comment in comments:\n                    if comment['body'].startswith('## PR Reviewer Guide 🔍'):\n                        valid_review = True\n                        break\n                if valid_review:\n                    break\n                else:\n                    logger.error(\"REVIEW feedback is invalid\")\n                    raise Exception(\"REVIEW feedback is invalid\")\n            else:\n                logger.info(f\"Waiting for the PR to get all the tool results. {i + 1} minute(s) passed\")\n        else:\n            assert False, f\"After {NUM_MINUTES} minutes, the PR did not get all the tool results\"\n        \n        logger.info(f\"Cleaning up: closing PR and deleting branch {new_branch}\")\n        \n        close_data = {'state': 'closed'}\n        response = requests.patch(\n            f\"{gitea_url}/api/v1/repos/{owner}/{repo_name}/pulls/{pr_number}\",\n            headers=headers,\n            json=close_data\n        )\n        response.raise_for_status()\n        \n        response = requests.delete(\n            f\"{gitea_url}/api/v1/repos/{owner}/{repo_name}/git/refs/heads/{new_branch}\",\n            headers=headers\n        )\n        response.raise_for_status()\n        \n        logger.info(f\"Succeeded in running e2e test for Gitea app on the PR\")\n    except Exception as e:\n        logger.error(f\"Failed to run e2e test for Gitea app: {e}\")\n        raise\n    finally:\n        try:\n            if headers is None or gitea_url is None:\n                return\n                \n            if pr_number is not None:\n                requests.patch(\n                    f\"{gitea_url}/api/v1/repos/{owner}/{repo_name}/pulls/{pr_number}\",\n                    headers=headers,\n                    json={'state': 'closed'}\n                )\n            \n            requests.delete(\n                f\"{gitea_url}/api/v1/repos/{owner}/{repo_name}/git/refs/heads/{new_branch}\",\n                headers=headers\n            )\n        except Exception as cleanup_error:\n            logger.error(f\"Failed to clean up after test: {cleanup_error}\")\n\nif __name__ == '__main__':\n    test_e2e_run_gitea_app() "
  },
  {
    "path": "tests/e2e_tests/test_github_app.py",
    "content": "import os\nimport re\nimport time\nfrom datetime import datetime\n\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers import get_git_provider\nfrom pr_agent.log import get_logger, setup_logger\nfrom tests.e2e_tests.e2e_utils import (\n    FILE_PATH,\n    IMPROVE_START_WITH_REGEX_PATTERN,\n    NEW_FILE_CONTENT,\n    NUM_MINUTES,\n    PR_HEADER_START_WITH,\n    REVIEW_START_WITH,\n)\n\nlog_level = os.environ.get(\"LOG_LEVEL\", \"INFO\")\nsetup_logger(log_level)\nlogger = get_logger()\n\n\ndef test_e2e_run_github_app():\n    \"\"\"\n    What we want to do:\n    (1) open a PR in a repo 'https://github.com/Codium-ai/pr-agent-tests'\n    (2) wait for 5 minutes until the PR is processed by the GitHub app\n    (3) check that the relevant tools have been executed\n    \"\"\"\n    base_branch = \"main\"  # or any base branch you want\n    new_branch = f\"github_app_e2e_test-{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}\"\n    repo_url = 'Codium-ai/pr-agent-tests'\n    get_settings().config.git_provider = \"github\"\n    git_provider = get_git_provider()()\n    github_client = git_provider.github_client\n    repo = github_client.get_repo(repo_url)\n\n    try:\n        # Create a new branch from the base branch\n        source = repo.get_branch(base_branch)\n        logger.info(f\"Creating a new branch {new_branch} from {base_branch}\")\n        repo.create_git_ref(ref=f\"refs/heads/{new_branch}\", sha=source.commit.sha)\n\n        # Get the file you want to edit\n        file = repo.get_contents(FILE_PATH, ref=base_branch)\n        # content = file.decoded_content.decode()\n\n        # Update the file content\n        logger.info(f\"Updating the file {FILE_PATH}\")\n        commit_message = \"update cli_pip.py\"\n        repo.update_file(\n            file.path,\n            commit_message,\n            NEW_FILE_CONTENT,\n            file.sha,\n            branch=new_branch\n        )\n\n        # Create a pull request\n        logger.info(f\"Creating a pull request from {new_branch} to {base_branch}\")\n        pr = repo.create_pull(\n            title=new_branch,\n            body=\"update cli_pip.py\",\n            head=new_branch,\n            base=base_branch\n        )\n\n        # check every 1 minute, for 5, minutes if the PR has all the tool results\n        for i in range(NUM_MINUTES):\n            logger.info(f\"Waiting for the PR to get all the tool results...\")\n            time.sleep(60)\n            logger.info(f\"Checking the PR {pr.html_url} after {i + 1} minute(s)\")\n            pr.update()\n            pr_header_body = pr.body\n            comments = list(pr.get_issue_comments())\n            if len(comments) == 2:\n                comments_body = [comment.body for comment in comments]\n                assert pr_header_body.startswith(PR_HEADER_START_WITH), \"DESCRIBE feedback is invalid\"\n                assert comments_body[0].startswith(REVIEW_START_WITH), \"REVIEW feedback is invalid\"\n                assert re.match(IMPROVE_START_WITH_REGEX_PATTERN, comments_body[1]), \"IMPROVE feedback is invalid\"\n                break\n            else:\n                logger.info(f\"Waiting for the PR to get all the tool results. {i + 1} minute(s) passed\")\n        else:\n            assert False, f\"After {NUM_MINUTES} minutes, the PR did not get all the tool results\"\n\n        # cleanup - delete the branch\n        logger.info(f\"Deleting the branch {new_branch}\")\n        repo.get_git_ref(f\"heads/{new_branch}\").delete()\n\n        # If we reach here, the test is successful\n        logger.info(f\"Succeeded in running e2e test for GitHub app on the PR {pr.html_url}\")\n    except Exception as e:\n        logger.error(f\"Failed to run e2e test for GitHub app: {e}\")\n        # delete the branch\n        logger.info(f\"Deleting the branch {new_branch}\")\n        repo.get_git_ref(f\"heads/{new_branch}\").delete()\n        assert False\n\n\nif __name__ == '__main__':\n    test_e2e_run_github_app()\n"
  },
  {
    "path": "tests/e2e_tests/test_gitlab_webhook.py",
    "content": "import os\nimport re\nimport time\nfrom datetime import datetime\n\nimport gitlab\n\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers import get_git_provider\nfrom pr_agent.log import get_logger, setup_logger\nfrom tests.e2e_tests.e2e_utils import (\n    FILE_PATH,\n    IMPROVE_START_WITH_REGEX_PATTERN,\n    NEW_FILE_CONTENT,\n    NUM_MINUTES,\n    PR_HEADER_START_WITH,\n    REVIEW_START_WITH,\n)\n\nlog_level = os.environ.get(\"LOG_LEVEL\", \"INFO\")\nsetup_logger(log_level)\nlogger = get_logger()\n\ndef test_e2e_run_github_app():\n    # GitLab setup\n    GITLAB_URL = \"https://gitlab.com\"\n    GITLAB_TOKEN = get_settings().gitlab.PERSONAL_ACCESS_TOKEN\n    gl = gitlab.Gitlab(GITLAB_URL, private_token=GITLAB_TOKEN)\n    repo_url = 'codiumai/pr-agent-tests'\n    project = gl.projects.get(repo_url)\n\n    base_branch = \"main\"  # or any base branch you want\n    new_branch = f\"github_app_e2e_test-{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}\"\n\n    try:\n        # Create a new branch from the base branch\n        logger.info(f\"Creating a new branch {new_branch} from {base_branch}\")\n        project.branches.create({'branch': new_branch, 'ref': base_branch})\n\n        # Get the file you want to edit\n        file = project.files.get(file_path=FILE_PATH, ref=base_branch)\n        # content = file.decode()\n\n        # Update the file content\n        logger.info(f\"Updating the file {FILE_PATH}\")\n        commit_message = \"update cli_pip.py\"\n        file.content = NEW_FILE_CONTENT\n        file.save(branch=new_branch, commit_message=commit_message)\n\n        # Create a merge request\n        logger.info(f\"Creating a merge request from {new_branch} to {base_branch}\")\n        mr = project.mergerequests.create({\n            'source_branch': new_branch,\n            'target_branch': base_branch,\n            'title': new_branch,\n            'description': \"update cli_pip.py\"\n        })\n        logger.info(f\"Merge request created: {mr.web_url}\")\n\n        # check every 1 minute, for 5, minutes if the PR has all the tool results\n        for i in range(NUM_MINUTES):\n            logger.info(f\"Waiting for the MR to get all the tool results...\")\n            time.sleep(60)\n            logger.info(f\"Checking the MR {mr.web_url} after {i + 1} minute(s)\")\n            mr = project.mergerequests.get(mr.iid)\n            mr_header_body = mr.description\n            comments = mr.notes.list()[::-1]\n            # clean all system comments\n            comments = [comment for comment in comments if comment.system is False]\n            if len(comments) == 2: # \"changed the description\" is received as the first comment\n                comments_body = [comment.body for comment in comments]\n                if 'Work in progress' in comments_body[1]:\n                    continue\n                assert mr_header_body.startswith(PR_HEADER_START_WITH), \"DESCRIBE feedback is invalid\"\n                assert comments_body[0].startswith(REVIEW_START_WITH), \"REVIEW feedback is invalid\"\n                assert re.match(IMPROVE_START_WITH_REGEX_PATTERN, comments_body[1]), \"IMPROVE feedback is invalid\"\n                break\n            else:\n                logger.info(f\"Waiting for the MR to get all the tool results. {i + 1} minute(s) passed\")\n        else:\n            assert False, f\"After {NUM_MINUTES} minutes, the MR did not get all the tool results\"\n\n        # cleanup - delete the branch\n        logger.info(f\"Deleting the branch {new_branch}\")\n        project.branches.delete(new_branch)\n\n        # If we reach here, the test is successful\n        logger.info(f\"Succeeded in running e2e test for GitLab app on the MR {mr.web_url}\")\n    except Exception as e:\n        logger.error(f\"Failed to run e2e test for GitHub app: {e}\")\n        logger.info(f\"Deleting the branch {new_branch}\")\n        project.branches.delete(new_branch)\n        assert False\n\n\nif __name__ == '__main__':\n    test_e2e_run_github_app()\n"
  },
  {
    "path": "tests/health_test/main.py",
    "content": "import argparse\nimport asyncio\nimport copy\nimport os\nfrom pathlib import Path\n\nfrom starlette_context import context, request_cycle_context\n\nfrom pr_agent.agent.pr_agent import PRAgent, commands\nfrom pr_agent.cli import run_command\nfrom pr_agent.config_loader import get_settings, global_settings\nfrom pr_agent.log import get_logger, setup_logger\nfrom tests.e2e_tests import e2e_utils\n\nlog_level = os.environ.get(\"LOG_LEVEL\", \"INFO\")\nsetup_logger(log_level)\n\n\nasync def run_async():\n    pr_url = os.getenv('TEST_PR_URL', 'https://github.com/Codium-ai/pr-agent/pull/1385')\n\n    get_settings().set(\"config.git_provider\", \"github\")\n    get_settings().set(\"config.publish_output\", False)\n    get_settings().set(\"config.fallback_models\", [])\n\n    agent = PRAgent()\n    try:\n        # Run the 'describe' command\n        get_logger().info(f\"\\nSanity check for the 'describe' command...\")\n        original_settings = copy.deepcopy(get_settings())\n        await agent.handle_request(pr_url, ['describe'])\n        pr_header_body = dict(get_settings().data)['artifact']\n        assert pr_header_body.startswith('###') and 'PR Type' in pr_header_body and 'Description' in pr_header_body\n        context['settings'] = copy.deepcopy(original_settings) # Restore settings state after each test to prevent test interference\n        get_logger().info(\"PR description generated successfully\\n\")\n\n        # Run the 'review' command\n        get_logger().info(f\"\\nSanity check for the 'review' command...\")\n        original_settings = copy.deepcopy(get_settings())\n        await agent.handle_request(pr_url, ['review'])\n        pr_review_body = dict(get_settings().data)['artifact']\n        assert pr_review_body.startswith('##') and 'PR Reviewer Guide' in pr_review_body\n        context['settings'] = copy.deepcopy(original_settings)  # Restore settings state after each test to prevent test interference\n        get_logger().info(\"PR review generated successfully\\n\")\n\n        # Run the 'improve' command\n        get_logger().info(f\"\\nSanity check for the 'improve' command...\")\n        original_settings = copy.deepcopy(get_settings())\n        await agent.handle_request(pr_url, ['improve'])\n        pr_improve_body = dict(get_settings().data)['artifact']\n        assert pr_improve_body.startswith('##') and 'PR Code Suggestions' in pr_improve_body\n        context['settings'] = copy.deepcopy(original_settings)  # Restore settings state after each test to prevent test interference\n        get_logger().info(\"PR improvements generated successfully\\n\")\n\n        get_logger().info(f\"\\n\\n========\\nHealth test passed successfully\\n========\")\n\n    except Exception as e:\n        get_logger().exception(f\"\\n\\n========\\nHealth test failed\\n========\")\n        raise e\n\n\ndef run():\n    with request_cycle_context({}):\n        context['settings'] = copy.deepcopy(global_settings)\n        asyncio.run(run_async())\n\n\nif __name__ == '__main__':\n    run()\n"
  },
  {
    "path": "tests/unittest/test_add_docs_trigger.py",
    "content": "import pytest\n\nfrom pr_agent.agent.pr_agent import PRAgent\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.identity_providers import get_identity_provider\nfrom pr_agent.identity_providers.identity_provider import Eligibility\nfrom pr_agent.servers.github_app import handle_new_pr_opened\nfrom pr_agent.tools.pr_add_docs import PRAddDocs\n\n\n@pytest.mark.asyncio\n@pytest.mark.parametrize(\n    \"action,draft,state,should_run\",\n    [\n        (\"opened\", False, \"open\", True),\n        (\"edited\", False, \"open\", False),\n        (\"opened\", True, \"open\", False),\n        (\"opened\", False, \"closed\", False),\n    ],\n)\nasync def test_add_docs_trigger(monkeypatch, action, draft, state, should_run):\n    # Mock settings to enable the \"/add_docs\" auto-command on PR opened\n    settings = get_settings()\n    settings.github_app.pr_commands = [\"/add_docs\"]\n    settings.github_app.handle_pr_actions = [\"opened\"]\n\n    # Define a FakeGitProvider for both apply_repo_settings and PRAddDocs\n    class FakeGitProvider:\n        def __init__(self, pr_url, *args, **kwargs):\n            self.pr = type(\"pr\", (), {\"title\": \"Test PR\"})()\n            self.get_pr_branch = lambda: \"test-branch\"\n            self.get_pr_description = lambda: \"desc\"\n            self.get_languages = lambda: [\"Python\"]\n            self.get_files = lambda: []\n            self.get_commit_messages = lambda: \"msg\"\n            self.publish_comment = lambda *args, **kwargs: None\n            self.remove_initial_comment = lambda: None\n            self.publish_code_suggestions = lambda suggestions: True\n            self.diff_files = []\n            self.get_repo_settings = lambda: {}\n\n    # Patch Git provider lookups\n    monkeypatch.setattr(\n        \"pr_agent.git_providers.utils.get_git_provider_with_context\",\n        lambda pr_url: FakeGitProvider(pr_url),\n    )\n    monkeypatch.setattr(\n        \"pr_agent.tools.pr_add_docs.get_git_provider\",\n        lambda: FakeGitProvider,\n    )\n\n    # Ensure identity provider always eligible\n    monkeypatch.setattr(\n        get_identity_provider().__class__,\n        \"verify_eligibility\",\n        lambda *args, **kwargs: Eligibility.ELIGIBLE,\n    )\n\n    # Spy on PRAddDocs.run()\n    ran = {\"flag\": False}\n\n    async def fake_run(self):\n        ran[\"flag\"] = True\n\n    monkeypatch.setattr(PRAddDocs, \"run\", fake_run)\n\n    # Build minimal PR payload\n    body = {\n        \"action\": action,\n        \"pull_request\": {\n            \"url\": \"https://example.com/fake/pr\",\n            \"state\": state,\n            \"draft\": draft,\n        },\n    }\n    log_context = {}\n\n    # Invoke the PR-open handler\n    agent = PRAgent()\n    await handle_new_pr_opened(\n        body=body,\n        event=\"pull_request\",\n        sender=\"tester\",\n        sender_id=\"123\",\n        action=action,\n        log_context=log_context,\n        agent=agent,\n    )\n\n    assert ran[\"flag\"] is should_run, (\n        f\"Expected run() to be {'called' if should_run else 'skipped'}\"\n        f\" for action={action!r}, draft={draft}, state={state!r}\"\n    )\n"
  },
  {
    "path": "tests/unittest/test_aws_secrets_manager_provider.py",
    "content": "import json\nfrom unittest.mock import MagicMock, patch\n\nimport pytest\nfrom botocore.exceptions import ClientError\n\nfrom pr_agent.secret_providers.aws_secrets_manager_provider import AWSSecretsManagerProvider\n\n\nclass TestAWSSecretsManagerProvider:\n\n    def _provider(self):\n        \"\"\"Create provider following existing pattern\"\"\"\n        with patch('pr_agent.secret_providers.aws_secrets_manager_provider.get_settings') as mock_get_settings, \\\n             patch('pr_agent.secret_providers.aws_secrets_manager_provider.boto3.client') as mock_boto3_client:\n\n            settings = MagicMock()\n            settings.get.side_effect = lambda k, d=None: {\n                'aws_secrets_manager.secret_arn': 'arn:aws:secretsmanager:us-east-1:123456789012:secret:test-secret',\n                'aws_secrets_manager.region_name': 'us-east-1',\n                'aws.AWS_REGION_NAME': 'us-east-1'\n            }.get(k, d)\n            settings.aws_secrets_manager.secret_arn = 'arn:aws:secretsmanager:us-east-1:123456789012:secret:test-secret'\n            mock_get_settings.return_value = settings\n\n            # Mock boto3 client\n            mock_client = MagicMock()\n            mock_boto3_client.return_value = mock_client\n\n            provider = AWSSecretsManagerProvider()\n            provider.client = mock_client  # Set client directly for testing\n            return provider, mock_client\n\n    # Positive test cases\n    def test_get_secret_success(self):\n        provider, mock_client = self._provider()\n        mock_client.get_secret_value.return_value = {'SecretString': 'test-secret-value'}\n\n        result = provider.get_secret('test-secret-name')\n        assert result == 'test-secret-value'\n        mock_client.get_secret_value.assert_called_once_with(SecretId='test-secret-name')\n\n    def test_get_all_secrets_success(self):\n        provider, mock_client = self._provider()\n        secret_data = {'openai.key': 'sk-test', 'github.webhook_secret': 'webhook-secret'}\n        mock_client.get_secret_value.return_value = {'SecretString': json.dumps(secret_data)}\n\n        result = provider.get_all_secrets()\n        assert result == secret_data\n\n    # Negative test cases (following Google Cloud Storage pattern)\n    def test_get_secret_failure(self):\n        provider, mock_client = self._provider()\n        mock_client.get_secret_value.side_effect = Exception(\"AWS error\")\n\n        result = provider.get_secret('nonexistent-secret')\n        assert result == \"\"  # Confirm empty string is returned\n\n    def test_get_all_secrets_failure(self):\n        provider, mock_client = self._provider()\n        mock_client.get_secret_value.side_effect = Exception(\"AWS error\")\n\n        result = provider.get_all_secrets()\n        assert result == {}  # Confirm empty dictionary is returned\n\n    def test_store_secret_update_existing(self):\n        provider, mock_client = self._provider()\n        mock_client.update_secret.return_value = {}\n\n        provider.store_secret('test-secret', 'test-value')\n        mock_client.put_secret_value.assert_called_once_with(\n            SecretId='test-secret',\n            SecretString='test-value'\n        )\n\n    def test_init_failure_invalid_config(self):\n        with patch('pr_agent.secret_providers.aws_secrets_manager_provider.get_settings') as mock_get_settings:\n            settings = MagicMock()\n            settings.aws_secrets_manager.secret_arn = None  # Configuration error\n            mock_get_settings.return_value = settings\n\n            with pytest.raises(Exception):\n                AWSSecretsManagerProvider()\n\n    def test_store_secret_failure(self):\n        provider, mock_client = self._provider()\n        mock_client.put_secret_value.side_effect = Exception(\"AWS error\")\n\n        with pytest.raises(Exception):\n            provider.store_secret('test-secret', 'test-value') \n"
  },
  {
    "path": "tests/unittest/test_azure_devops_comment.py",
    "content": "import unittest\nfrom unittest.mock import MagicMock, patch\n\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.git_providers import AzureDevopsProvider\n\n\nclass TestAzureDevopsProviderPublishComment(unittest.TestCase):\n    @patch(\"pr_agent.git_providers.azuredevops_provider.get_settings\")\n    def test_publish_comment_default_closed(self, mock_get_settings):\n        # Simulate config with no default_comment_status\n        mock_settings = MagicMock()\n        mock_settings.azure_devops.get.return_value = \"closed\"\n        mock_settings.config.publish_output_progress = True\n        mock_get_settings.return_value = mock_settings\n\n        with patch.object(AzureDevopsProvider, \"_get_azure_devops_client\", return_value=(MagicMock(), MagicMock())):\n            provider = AzureDevopsProvider()\n            provider.workspace_slug = \"ws\"\n            provider.repo_slug = \"repo\"\n            provider.pr_num = 1\n\n            # Patch CommentThread and create_thread\n            with patch(\"pr_agent.git_providers.azuredevops_provider.CommentThread\") as MockThread:\n                provider.azure_devops_client.create_thread.return_value.comments = [MagicMock()]\n                provider.azure_devops_client.create_thread.return_value.comments[0].thread_id = 123\n                provider.azure_devops_client.create_thread.return_value.id = 123\n\n                provider.publish_comment(\"test comment\")\n                args, kwargs = MockThread.call_args\n                assert kwargs.get(\"status\") == \"closed\"\n\n    @patch(\"pr_agent.git_providers.azuredevops_provider.get_settings\")\n    def test_publish_comment_active(self, mock_get_settings):\n        # Simulate config with default_comment_status = \"active\"\n        mock_settings = MagicMock()\n        mock_settings.azure_devops.get.return_value = \"active\"\n        mock_settings.config.publish_output_progress = True\n        mock_get_settings.return_value = mock_settings\n\n        with patch.object(AzureDevopsProvider, \"_get_azure_devops_client\", return_value=(MagicMock(), MagicMock())):\n            provider = AzureDevopsProvider()\n            provider.workspace_slug = \"ws\"\n            provider.repo_slug = \"repo\"\n            provider.pr_num = 1\n\n            # Patch CommentThread and create_thread\n            with patch(\"pr_agent.git_providers.azuredevops_provider.CommentThread\") as MockThread:\n                provider.azure_devops_client.create_thread.return_value.comments = [MagicMock()]\n                provider.azure_devops_client.create_thread.return_value.comments[0].thread_id = 123\n                provider.azure_devops_client.create_thread.return_value.id = 123\n\n                provider.publish_comment(\"test comment\")\n                args, kwargs = MockThread.call_args\n                assert kwargs.get(\"status\") == \"active\"\n\n    def test_default_comment_status_from_config_file(self):\n        # Import get_settings directly to read from configuration.toml\n        status = get_settings().azure_devops.default_comment_status\n        # The expected value should match what's in your configuration.toml\n        self.assertEqual(status, \"closed\")        "
  },
  {
    "path": "tests/unittest/test_azure_devops_parsing.py",
    "content": "from pr_agent.git_providers import AzureDevopsProvider\n\n\nclass TestAzureDevOpsParsing:\n    def test_regular_address(self):\n        pr_url = \"https://dev.azure.com/organization/project/_git/repo/pullrequest/1\"\n\n        # workspace_slug, repo_slug, pr_number\n        assert AzureDevopsProvider._parse_pr_url(pr_url) == (\"project\", \"repo\", 1)\n\n    def test_visualstudio_address(self):\n        pr_url = \"https://organization.visualstudio.com/project/_git/repo/pullrequest/1\"\n\n        # workspace_slug, repo_slug, pr_number\n        assert AzureDevopsProvider._parse_pr_url(pr_url) == (\"project\", \"repo\", 1)\n        \n    def test_self_hosted_address(self):\n        pr_url = \"http://server.be:8080/tfs/department/project/_git/repo/pullrequest/1\"\n\n        # workspace_slug, repo_slug, pr_number\n        assert AzureDevopsProvider._parse_pr_url(pr_url) == (\"project\", \"repo\", 1)\n\n"
  },
  {
    "path": "tests/unittest/test_bitbucket_provider.py",
    "content": "from unittest.mock import MagicMock\n\nfrom atlassian.bitbucket import Bitbucket\n\nfrom pr_agent.algo.types import EDIT_TYPE, FilePatchInfo\nfrom pr_agent.git_providers import BitbucketServerProvider\nfrom pr_agent.git_providers.bitbucket_provider import BitbucketProvider\n\n\nclass TestBitbucketProvider:\n    def test_parse_pr_url(self):\n        url = \"https://bitbucket.org/WORKSPACE_XYZ/MY_TEST_REPO/pull-requests/321\"\n        workspace_slug, repo_slug, pr_number = BitbucketProvider._parse_pr_url(url)\n        assert workspace_slug == \"WORKSPACE_XYZ\"\n        assert repo_slug == \"MY_TEST_REPO\"\n        assert pr_number == 321\n\n\nclass TestBitbucketServerProvider:\n    def test_parse_pr_url(self):\n        url = \"https://git.onpreminstance.com/projects/AAA/repos/my-repo/pull-requests/1\"\n        workspace_slug, repo_slug, pr_number = BitbucketServerProvider._parse_pr_url(url)\n        assert workspace_slug == \"AAA\"\n        assert repo_slug == \"my-repo\"\n        assert pr_number == 1\n\n    def test_parse_pr_url_with_users(self):\n        url = \"https://bitbucket.company-server.url/users/username/repos/my-repo/pull-requests/1\"\n        workspace_slug, repo_slug, pr_number = BitbucketServerProvider._parse_pr_url(url)\n        assert workspace_slug == \"~username\"\n        assert repo_slug == \"my-repo\"\n        assert pr_number == 1\n\n    def mock_get_content_of_file(self, project_key, repository_slug, filename, at=None, markup=None):\n        content_map = {\n            '9c1cffdd9f276074bfb6fb3b70fbee62d298b058': 'file\\nwith\\nsome\\nlines\\nto\\nemulate\\na\\nreal\\nfile\\n',\n            '2a1165446bdf991caf114d01f7c88d84ae7399cf': 'file\\nwith\\nmultiple \\nlines\\nto\\nemulate\\na\\nfake\\nfile\\n',\n            'f617708826cdd0b40abb5245eda71630192a17e3': 'file\\nwith\\nmultiple \\nlines\\nto\\nemulate\\na\\nreal\\nfile\\n',\n            'cb68a3027d6dda065a7692ebf2c90bed1bcdec28': 'file\\nwith\\nsome\\nchanges\\nto\\nemulate\\na\\nreal\\nfile\\n',\n            '1905dcf16c0aac6ac24f7ab617ad09c73dc1d23b': 'file\\nwith\\nsome\\nlines\\nto\\nemulate\\na\\nfake\\ntest\\n',\n            'ae4eca7f222c96d396927d48ab7538e2ee13ca63': 'readme\\nwithout\\nsome\\nlines\\nto\\nsimulate\\na\\nreal\\nfile',\n            '548f8ba15abc30875a082156314426806c3f4d97': 'file\\nwith\\nsome\\nlines\\nto\\nemulate\\na\\nreal\\nfile',\n            '0e898cb355a5170d8c8771b25d43fcaa1d2d9489': 'file\\nwith\\nmultiple\\nlines\\nto\\nemulate\\na\\nreal\\nfile'\n        }\n        return content_map.get(at, '')\n\n    def mock_get_from_bitbucket_60(self, url):\n        response_map = {\n            \"rest/api/1.0/application-properties\": {\n                \"version\": \"6.0\"\n            }\n        }\n        return response_map.get(url, '')\n\n    def mock_get_from_bitbucket_70(self, url):\n        response_map = {\n            \"rest/api/1.0/application-properties\": {\n                \"version\": \"7.0\"\n            }\n        }\n        return response_map.get(url, '')\n\n    def mock_get_from_bitbucket_816(self, url):\n        response_map = {\n            \"rest/api/1.0/application-properties\": {\n                \"version\": \"8.16\"\n            },\n            \"rest/api/latest/projects/AAA/repos/my-repo/pull-requests/1/merge-base\": {\n                'id': '548f8ba15abc30875a082156314426806c3f4d97'\n            }\n        }\n        return response_map.get(url, '')\n\n\n    '''\n    tests the 2-way diff functionality where the diff should be between the HEAD of branch b and node c\n    NOT between the HEAD of main and the HEAD of branch b\n\n          - o  branch b\n         /\n    o - o - o  main\n        ^ node c\n    '''\n    def test_get_diff_files_simple_diverge_70(self):\n        bitbucket_client = MagicMock(Bitbucket)\n        bitbucket_client.get_pull_request.return_value = {\n            'toRef': {'latestCommit': '9c1cffdd9f276074bfb6fb3b70fbee62d298b058'},\n            'fromRef': {'latestCommit': '2a1165446bdf991caf114d01f7c88d84ae7399cf'}\n        }\n        bitbucket_client.get_pull_requests_commits.return_value = [\n            {'id': '2a1165446bdf991caf114d01f7c88d84ae7399cf',\n             'parents': [{'id': 'f617708826cdd0b40abb5245eda71630192a17e3'}]}\n        ]\n        bitbucket_client.get_commits.return_value = [\n            {'id': '9c1cffdd9f276074bfb6fb3b70fbee62d298b058'},\n            {'id': 'dbca09554567d2e4bee7f07993390153280ee450'}\n        ]\n        bitbucket_client.get_pull_requests_changes.return_value = [\n            {\n                'path': {'toString': 'Readme.md'},\n                'type': 'MODIFY',\n            }\n        ]\n\n        bitbucket_client.get.side_effect = self.mock_get_from_bitbucket_70\n        bitbucket_client.get_content_of_file.side_effect = self.mock_get_content_of_file\n\n        provider = BitbucketServerProvider(\n            \"https://git.onpreminstance.com/projects/AAA/repos/my-repo/pull-requests/1\",\n            bitbucket_client=bitbucket_client\n        )\n\n        expected = [\n            FilePatchInfo(\n                'file\\nwith\\nmultiple \\nlines\\nto\\nemulate\\na\\nreal\\nfile\\n',\n                'file\\nwith\\nmultiple \\nlines\\nto\\nemulate\\na\\nfake\\nfile\\n',\n                '--- \\n+++ \\n@@ -5,5 +5,5 @@\\n to\\n emulate\\n a\\n-real\\n+fake\\n file\\n',\n                'Readme.md',\n                edit_type=EDIT_TYPE.MODIFIED,\n            )\n        ]\n\n        actual = provider.get_diff_files()\n\n        assert actual == expected\n\n\n    '''\n    tests the 2-way diff functionality where the diff should be between the HEAD of branch b and node c\n    NOT between the HEAD of main and the HEAD of branch b\n\n          - o - o - o  branch b\n         /     /\n    o - o -- o - o     main\n             ^ node c\n    '''\n    def test_get_diff_files_diverge_with_merge_commit_70(self):\n        bitbucket_client = MagicMock(Bitbucket)\n        bitbucket_client.get_pull_request.return_value = {\n            'toRef': {'latestCommit': 'cb68a3027d6dda065a7692ebf2c90bed1bcdec28'},\n            'fromRef': {'latestCommit': '1905dcf16c0aac6ac24f7ab617ad09c73dc1d23b'}\n        }\n        bitbucket_client.get_pull_requests_commits.return_value = [\n            {'id': '1905dcf16c0aac6ac24f7ab617ad09c73dc1d23b',\n             'parents': [{'id': '692772f456c3db77a90b11ce39ea516f8c2bad93'}]},\n            {'id': '692772f456c3db77a90b11ce39ea516f8c2bad93', 'parents': [\n                {'id': '2a1165446bdf991caf114d01f7c88d84ae7399cf'},\n                {'id': '9c1cffdd9f276074bfb6fb3b70fbee62d298b058'},\n            ]},\n            {'id': '2a1165446bdf991caf114d01f7c88d84ae7399cf',\n             'parents': [{'id': 'f617708826cdd0b40abb5245eda71630192a17e3'}]}\n        ]\n        bitbucket_client.get_commits.return_value = [\n            {'id': 'cb68a3027d6dda065a7692ebf2c90bed1bcdec28'},\n            {'id': '9c1cffdd9f276074bfb6fb3b70fbee62d298b058'},\n            {'id': 'dbca09554567d2e4bee7f07993390153280ee450'}\n        ]\n        bitbucket_client.get_pull_requests_changes.return_value = [\n            {\n                'path': {'toString': 'Readme.md'},\n                'type': 'MODIFY',\n            }\n        ]\n\n        bitbucket_client.get.side_effect = self.mock_get_from_bitbucket_70\n        bitbucket_client.get_content_of_file.side_effect = self.mock_get_content_of_file\n\n        provider = BitbucketServerProvider(\n            \"https://git.onpreminstance.com/projects/AAA/repos/my-repo/pull-requests/1\",\n            bitbucket_client=bitbucket_client\n        )\n\n        expected = [\n            FilePatchInfo(\n                'file\\nwith\\nsome\\nlines\\nto\\nemulate\\na\\nreal\\nfile\\n',\n                'file\\nwith\\nsome\\nlines\\nto\\nemulate\\na\\nfake\\ntest\\n',\n                '--- \\n+++ \\n@@ -5,5 +5,5 @@\\n to\\n emulate\\n a\\n-real\\n-file\\n+fake\\n+test\\n',\n                'Readme.md',\n                edit_type=EDIT_TYPE.MODIFIED,\n            )\n        ]\n\n        actual = provider.get_diff_files()\n\n        assert actual == expected\n\n\n    '''\n    tests the 2-way diff functionality where the diff should be between the HEAD of branch c and node d\n    NOT between the HEAD of main and the HEAD of branch c\n\n            ---- o - o branch c\n           /    /\n          ---- o       branch b\n         /    /\n        o - o - o      main\n            ^ node d\n    '''\n    def get_multi_merge_diverge_mock_client(self, api_version):\n        bitbucket_client = MagicMock(Bitbucket)\n        bitbucket_client.get_pull_request.return_value = {\n            'toRef': {'latestCommit': '9569922b22fe4fd0968be6a50ed99f71efcd0504'},\n            'fromRef': {'latestCommit': 'ae4eca7f222c96d396927d48ab7538e2ee13ca63'}\n        }\n        bitbucket_client.get_pull_requests_commits.return_value = [\n            {'id': 'ae4eca7f222c96d396927d48ab7538e2ee13ca63',\n             'parents': [{'id': 'bbf300fb3af5129af8c44659f8cc7a526a6a6f31'}]},\n            {'id': 'bbf300fb3af5129af8c44659f8cc7a526a6a6f31', 'parents': [\n                {'id': '10b7b8e41cb370b48ceda8da4e7e6ad033182213'},\n                {'id': 'd1bb183c706a3ebe4c2b1158c25878201a27ad8c'},\n            ]},\n            {'id': 'd1bb183c706a3ebe4c2b1158c25878201a27ad8c', 'parents': [\n                {'id': '5bd76251866cb415fc5ff232f63a581e89223bda'},\n                {'id': '548f8ba15abc30875a082156314426806c3f4d97'}\n            ]},\n            {'id': '5bd76251866cb415fc5ff232f63a581e89223bda',\n             'parents': [{'id': '0e898cb355a5170d8c8771b25d43fcaa1d2d9489'}]},\n            {'id': '10b7b8e41cb370b48ceda8da4e7e6ad033182213',\n             'parents': [{'id': '0e898cb355a5170d8c8771b25d43fcaa1d2d9489'}]}\n        ]\n        bitbucket_client.get_commits.return_value = [\n            {'id': '9569922b22fe4fd0968be6a50ed99f71efcd0504'},\n            {'id': '548f8ba15abc30875a082156314426806c3f4d97'}\n        ]\n        bitbucket_client.get_pull_requests_changes.return_value = [\n            {\n                'path': {'toString': 'Readme.md'},\n                'type': 'MODIFY',\n            }\n        ]\n\n        bitbucket_client.get_content_of_file.side_effect = self.mock_get_content_of_file\n        if api_version == 60:\n            bitbucket_client.get.side_effect = self.mock_get_from_bitbucket_60\n        elif api_version == 70:\n            bitbucket_client.get.side_effect = self.mock_get_from_bitbucket_70\n        elif api_version == 816:\n            bitbucket_client.get.side_effect = self.mock_get_from_bitbucket_816\n\n        return bitbucket_client\n\n    def test_get_diff_files_multi_merge_diverge_60(self):\n        bitbucket_client = self.get_multi_merge_diverge_mock_client(60)\n\n        provider = BitbucketServerProvider(\n            \"https://git.onpreminstance.com/projects/AAA/repos/my-repo/pull-requests/1\",\n            bitbucket_client=bitbucket_client\n        )\n\n        expected = [\n            FilePatchInfo(\n                'file\\nwith\\nmultiple\\nlines\\nto\\nemulate\\na\\nreal\\nfile',\n                'readme\\nwithout\\nsome\\nlines\\nto\\nsimulate\\na\\nreal\\nfile',\n                '--- \\n+++ \\n@@ -1,9 +1,9 @@\\n-file\\n-with\\n-multiple\\n+readme\\n+without\\n+some\\n lines\\n to\\n-emulate\\n+simulate\\n a\\n real\\n file\\n',\n                'Readme.md',\n                edit_type=EDIT_TYPE.MODIFIED,\n            )\n        ]\n\n        actual = provider.get_diff_files()\n\n        assert actual == expected\n\n    def test_get_diff_files_multi_merge_diverge_70(self):\n        bitbucket_client = self.get_multi_merge_diverge_mock_client(70)\n\n        provider = BitbucketServerProvider(\n            \"https://git.onpreminstance.com/projects/AAA/repos/my-repo/pull-requests/1\",\n            bitbucket_client=bitbucket_client\n        )\n\n        expected = [\n            FilePatchInfo(\n                'file\\nwith\\nsome\\nlines\\nto\\nemulate\\na\\nreal\\nfile',\n                'readme\\nwithout\\nsome\\nlines\\nto\\nsimulate\\na\\nreal\\nfile',\n                '--- \\n+++ \\n@@ -1,9 +1,9 @@\\n-file\\n-with\\n+readme\\n+without\\n some\\n lines\\n to\\n-emulate\\n+simulate\\n a\\n real\\n file\\n',\n                'Readme.md',\n                edit_type=EDIT_TYPE.MODIFIED,\n            )\n        ]\n\n        actual = provider.get_diff_files()\n\n        assert actual == expected\n\n    def test_get_diff_files_multi_merge_diverge_816(self):\n        bitbucket_client = self.get_multi_merge_diverge_mock_client(816)\n\n        provider = BitbucketServerProvider(\n            \"https://git.onpreminstance.com/projects/AAA/repos/my-repo/pull-requests/1\",\n            bitbucket_client=bitbucket_client\n        )\n\n        expected = [\n            FilePatchInfo(\n                'file\\nwith\\nsome\\nlines\\nto\\nemulate\\na\\nreal\\nfile',\n                'readme\\nwithout\\nsome\\nlines\\nto\\nsimulate\\na\\nreal\\nfile',\n                '--- \\n+++ \\n@@ -1,9 +1,9 @@\\n-file\\n-with\\n+readme\\n+without\\n some\\n lines\\n to\\n-emulate\\n+simulate\\n a\\n real\\n file\\n',\n                'Readme.md',\n                edit_type=EDIT_TYPE.MODIFIED,\n            )\n        ]\n\n        actual = provider.get_diff_files()\n\n        assert actual == expected\n"
  },
  {
    "path": "tests/unittest/test_clip_tokens.py",
    "content": "from unittest.mock import MagicMock, patch\n\nimport pytest\n\nfrom pr_agent.algo.token_handler import TokenEncoder\nfrom pr_agent.algo.utils import clip_tokens\n\n\nclass TestClipTokens:\n    \"\"\"Comprehensive test suite for the clip_tokens function.\"\"\"\n\n    def test_empty_input_text(self):\n        \"\"\"Test that empty input returns empty string.\"\"\"\n        assert clip_tokens(\"\", 10) == \"\"\n        assert clip_tokens(None, 10) is None\n\n    def test_text_under_token_limit(self):\n        \"\"\"Test that text under the token limit is returned unchanged.\"\"\"\n        text = \"Short text\"\n        max_tokens = 100\n        result = clip_tokens(text, max_tokens)\n        assert result == text\n\n    def test_text_exactly_at_token_limit(self):\n        \"\"\"Test text that is exactly at the token limit.\"\"\"\n        text = \"This is exactly at the limit\"\n        # Mock the token encoder to return exact limit\n        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:\n            mock_tokenizer = MagicMock()\n            mock_tokenizer.encode.return_value = [1] * 10  # Exactly 10 tokens\n            mock_encoder.return_value = mock_tokenizer\n\n            result = clip_tokens(text, 10)\n            assert result == text\n\n    def test_text_over_token_limit_with_three_dots(self):\n        \"\"\"Test text over token limit with three dots addition.\"\"\"\n        text = \"This is a longer text that should be clipped when it exceeds the token limit\"\n        max_tokens = 5\n\n        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:\n            mock_tokenizer = MagicMock()\n            mock_tokenizer.encode.return_value = [1] * 20  # 20 tokens\n            mock_encoder.return_value = mock_tokenizer\n\n            result = clip_tokens(text, max_tokens)\n            assert result.endswith(\"\\n...(truncated)\")\n            assert len(result) < len(text)\n\n    def test_text_over_token_limit_without_three_dots(self):\n        \"\"\"Test text over token limit without three dots addition.\"\"\"\n        text = \"This is a longer text that should be clipped\"\n        max_tokens = 5\n\n        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:\n            mock_tokenizer = MagicMock()\n            mock_tokenizer.encode.return_value = [1] * 20  # 20 tokens\n            mock_encoder.return_value = mock_tokenizer\n\n            result = clip_tokens(text, max_tokens, add_three_dots=False)\n            assert not result.endswith(\"\\n...(truncated)\")\n            assert len(result) < len(text)\n\n    def test_negative_max_tokens(self):\n        \"\"\"Test that negative max_tokens returns empty string.\"\"\"\n        text = \"Some text\"\n        result = clip_tokens(text, -1)\n        assert result == \"\"\n\n        result = clip_tokens(text, -100)\n        assert result == \"\"\n\n    def test_zero_max_tokens(self):\n        \"\"\"Test that zero max_tokens returns empty string.\"\"\"\n        text = \"Some text\"\n        result = clip_tokens(text, 0)\n        assert result == \"\"\n\n    def test_delete_last_line_functionality(self):\n        \"\"\"Test the delete_last_line parameter functionality.\"\"\"\n        text = \"Line 1\\nLine 2\\nLine 3\\nLine 4\"\n        max_tokens = 5\n\n        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:\n            mock_tokenizer = MagicMock()\n            mock_tokenizer.encode.return_value = [1] * 20  # 20 tokens\n            mock_encoder.return_value = mock_tokenizer\n\n            # Without delete_last_line\n            result_normal = clip_tokens(text, max_tokens, delete_last_line=False)\n\n            # With delete_last_line\n            result_deleted = clip_tokens(text, max_tokens, delete_last_line=True)\n\n            # The result with delete_last_line should be shorter or equal\n            assert len(result_deleted) <= len(result_normal)\n\n    def test_pre_computed_num_input_tokens(self):\n        \"\"\"Test using pre-computed num_input_tokens parameter.\"\"\"\n        text = \"This is a test text\"\n        max_tokens = 10\n        num_input_tokens = 15\n\n        # Should not call the encoder when num_input_tokens is provided\n        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:\n            mock_encoder.return_value = None  # Should not be called\n\n            result = clip_tokens(text, max_tokens, num_input_tokens=num_input_tokens)\n            assert result.endswith(\"\\n...(truncated)\")\n            mock_encoder.assert_not_called()\n\n    def test_pre_computed_tokens_under_limit(self):\n        \"\"\"Test pre-computed tokens under the limit.\"\"\"\n        text = \"Short text\"\n        max_tokens = 20\n        num_input_tokens = 5\n\n        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:\n            mock_encoder.return_value = None  # Should not be called\n\n            result = clip_tokens(text, max_tokens, num_input_tokens=num_input_tokens)\n            assert result == text\n            mock_encoder.assert_not_called()\n\n    def test_special_characters_and_unicode(self):\n        \"\"\"Test text with special characters and Unicode content.\"\"\"\n        text = \"Special chars: @#$%^&*()_+ áéíóú 中문 🚀 emoji\"\n        max_tokens = 5\n\n        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:\n            mock_tokenizer = MagicMock()\n            mock_tokenizer.encode.return_value = [1] * 20  # 20 tokens\n            mock_encoder.return_value = mock_tokenizer\n\n            result = clip_tokens(text, max_tokens)\n            assert isinstance(result, str)\n            assert len(result) < len(text)\n\n    def test_multiline_text_handling(self):\n        \"\"\"Test handling of multiline text.\"\"\"\n        text = \"Line 1\\nLine 2\\nLine 3\\nLine 4\\nLine 5\"\n        max_tokens = 5\n\n        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:\n            mock_tokenizer = MagicMock()\n            mock_tokenizer.encode.return_value = [1] * 20  # 20 tokens\n            mock_encoder.return_value = mock_tokenizer\n\n            result = clip_tokens(text, max_tokens)\n            assert isinstance(result, str)\n\n    def test_very_long_text(self):\n        \"\"\"Test with very long text.\"\"\"\n        text = \"A\" * 10000  # Very long text\n        max_tokens = 10\n\n        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:\n            mock_tokenizer = MagicMock()\n            mock_tokenizer.encode.return_value = [1] * 5000  # Many tokens\n            mock_encoder.return_value = mock_tokenizer\n\n            result = clip_tokens(text, max_tokens)\n            assert len(result) < len(text)\n            assert result.endswith(\"\\n...(truncated)\")\n\n    def test_encoder_exception_handling(self):\n        \"\"\"Test handling of encoder exceptions.\"\"\"\n        text = \"Test text\"\n        max_tokens = 10\n\n        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:\n            mock_encoder.side_effect = Exception(\"Encoder error\")\n\n            # Should return original text when encoder fails\n            result = clip_tokens(text, max_tokens)\n            assert result == text\n\n    def test_zero_division_scenario(self):\n        \"\"\"Test scenario that could lead to division by zero.\"\"\"\n        text = \"Test\"\n        max_tokens = 10\n\n        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:\n            mock_tokenizer = MagicMock()\n            mock_tokenizer.encode.return_value = []  # Empty tokens (could cause division by zero)\n            mock_encoder.return_value = mock_tokenizer\n\n            result = clip_tokens(text, max_tokens)\n            # Should handle gracefully and return original text\n            assert result == text\n\n    def test_various_edge_cases(self):\n        \"\"\"Test various edge cases.\"\"\"\n        # Single character\n        assert clip_tokens(\"A\", 1000) == \"A\"\n\n        # Only whitespace\n        text = \"   \\n  \\t  \"\n        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:\n            mock_tokenizer = MagicMock()\n            mock_tokenizer.encode.return_value = [1] * 10\n            mock_encoder.return_value = mock_tokenizer\n\n            result = clip_tokens(text, 5)\n            assert isinstance(result, str)\n\n        # Text with only newlines\n        text = \"\\n\\n\\n\\n\"\n        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:\n            mock_tokenizer = MagicMock()\n            mock_tokenizer.encode.return_value = [1] * 10\n            mock_encoder.return_value = mock_tokenizer\n\n            result = clip_tokens(text, 2, delete_last_line=True)\n            assert isinstance(result, str)\n\n    def test_parameter_combinations(self):\n        \"\"\"Test different parameter combinations.\"\"\"\n        text = \"Multi\\nline\\ntext\\nfor\\ntesting\"\n        max_tokens = 5\n\n        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:\n            mock_tokenizer = MagicMock()\n            mock_tokenizer.encode.return_value = [1] * 20\n            mock_encoder.return_value = mock_tokenizer\n\n            # Test all combinations\n            combinations = [\n                (True, True),   # add_three_dots=True, delete_last_line=True\n                (True, False),  # add_three_dots=True, delete_last_line=False\n                (False, True),  # add_three_dots=False, delete_last_line=True\n                (False, False), # add_three_dots=False, delete_last_line=False\n            ]\n\n            for add_dots, delete_line in combinations:\n                result = clip_tokens(text, max_tokens,\n                                     add_three_dots=add_dots,\n                                     delete_last_line=delete_line)\n                assert isinstance(result, str)\n                if add_dots and len(result) > 0:\n                    assert result.endswith(\"\\n...(truncated)\") or result == text\n\n    def test_num_output_chars_zero_scenario(self):\n        \"\"\"Test scenario where num_output_chars becomes zero or negative.\"\"\"\n        text = \"Short\"\n        max_tokens = 1\n\n        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:\n            mock_tokenizer = MagicMock()\n            mock_tokenizer.encode.return_value = [1] * 1000  # Many tokens for short text\n            mock_encoder.return_value = mock_tokenizer\n\n            result = clip_tokens(text, max_tokens)\n            # When num_output_chars is 0 or negative, should return empty string\n            assert result == \"\"\n\n    def test_logging_on_exception(self):\n        \"\"\"Test that exceptions are properly logged.\"\"\"\n        text = \"Test text\"\n        max_tokens = 10\n\n        # Patch the logger at the module level where it's imported\n        with patch('pr_agent.algo.utils.get_logger') as mock_logger:\n            mock_log_instance = MagicMock()\n            mock_logger.return_value = mock_log_instance\n\n            with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:\n                mock_encoder.side_effect = Exception(\"Test exception\")\n\n                result = clip_tokens(text, max_tokens)\n\n                # Should log the warning\n                mock_log_instance.warning.assert_called_once()\n                # Should return original text\n                assert result == text\n\n    def test_factor_safety_calculation(self):\n        \"\"\"Test that the 0.9 factor (10% reduction) works correctly.\"\"\"\n        text = \"Test text that should be reduced by 10 percent for safety\"\n        max_tokens = 10\n\n        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:\n            mock_tokenizer = MagicMock()\n            mock_tokenizer.encode.return_value = [1] * 20  # 20 tokens\n            mock_encoder.return_value = mock_tokenizer\n\n            result = clip_tokens(text, max_tokens)\n\n            # The result should be shorter due to the 0.9 factor\n            # Characters per token = len(text) / 20\n            # Expected chars = int(0.9 * (len(text) / 20) * 10)\n            expected_chars = int(0.9 * (len(text) / 20) * 10)\n\n            # Result should be around expected_chars length (plus truncation text)\n            if result.endswith(\"\\n...(truncated)\"):\n                actual_content = result[:-len(\"\\n...(truncated)\")]\n                assert len(actual_content) <= expected_chars + 5  # Some tolerance\n\n    # Test the original basic functionality to ensure backward compatibility\n    def test_clip_original_functionality(self):\n        \"\"\"Test original functionality from the existing test.\"\"\"\n        text = \"line1\\nline2\\nline3\\nline4\\nline5\\nline6\"\n        max_tokens = 25\n        result = clip_tokens(text, max_tokens)\n        assert result == text\n\n        max_tokens = 10\n        result = clip_tokens(text, max_tokens)\n        expected_results = 'line1\\nline2\\nline3\\n\\n...(truncated)'\n        assert result == expected_results"
  },
  {
    "path": "tests/unittest/test_codecommit_client.py",
    "content": "from unittest.mock import MagicMock\n\nfrom pr_agent.git_providers.codecommit_client import CodeCommitClient\n\n\nclass TestCodeCommitProvider:\n    def test_get_differences(self):\n        # Create a mock CodeCommitClient instance and codecommit_client member\n        api = CodeCommitClient()\n        api.boto_client = MagicMock()\n\n        # Mock the response from the AWS client for get_differences method\n        api.boto_client.get_paginator.return_value.paginate.return_value = [\n            {\n                \"differences\": [\n                    {\n                        \"beforeBlob\": {\n                            \"path\": \"file1.py\",\n                            \"blobId\": \"291b15c3ab4219e43a5f4f9091e5a97ee9d7400b\",\n                        },\n                        \"afterBlob\": {\n                            \"path\": \"file1.py\",\n                            \"blobId\": \"46ad86582da03cc34c804c24b17976571bca1eba\",\n                        },\n                        \"changeType\": \"M\",\n                    },\n                    {\n                        \"beforeBlob\": {\"path\": \"\", \"blobId\": \"\"},\n                        \"afterBlob\": {\n                            \"path\": \"file2.py\",\n                            \"blobId\": \"2404c7874fcbd684d6779c1420072f088647fd79\",\n                        },\n                        \"changeType\": \"A\",\n                    },\n                    {\n                        \"beforeBlob\": {\n                            \"path\": \"file3.py\",\n                            \"blobId\": \"9af7989045ce40e9478ebb8089dfbadac19a9cde\",\n                        },\n                        \"afterBlob\": {\"path\": \"\", \"blobId\": \"\"},\n                        \"changeType\": \"D\",\n                    },\n                    {\n                        \"beforeBlob\": {\n                            \"path\": \"file5.py\",\n                            \"blobId\": \"738e36eec120ef9d6393a149252698f49156d5b4\",\n                        },\n                        \"afterBlob\": {\n                            \"path\": \"file6.py\",\n                            \"blobId\": \"faecdb85f7ba199df927a783b261378a1baeca85\",\n                        },\n                        \"changeType\": \"R\",\n                    },\n                ]\n            }\n        ]\n\n        diffs = api.get_differences(\"my_test_repo\", \"commit1\", \"commit2\")\n\n        assert len(diffs) == 4\n        assert diffs[0].before_blob_path == \"file1.py\"\n        assert diffs[0].before_blob_id == \"291b15c3ab4219e43a5f4f9091e5a97ee9d7400b\"\n        assert diffs[0].after_blob_path == \"file1.py\"\n        assert diffs[0].after_blob_id == \"46ad86582da03cc34c804c24b17976571bca1eba\"\n        assert diffs[0].change_type == \"M\"\n        assert diffs[1].before_blob_path == \"\"\n        assert diffs[1].before_blob_id == \"\"\n        assert diffs[1].after_blob_path == \"file2.py\"\n        assert diffs[1].after_blob_id == \"2404c7874fcbd684d6779c1420072f088647fd79\"\n        assert diffs[1].change_type == \"A\"\n        assert diffs[2].before_blob_path == \"file3.py\"\n        assert diffs[2].before_blob_id == \"9af7989045ce40e9478ebb8089dfbadac19a9cde\"\n        assert diffs[2].after_blob_path == \"\"\n        assert diffs[2].after_blob_id == \"\"\n        assert diffs[2].change_type == \"D\"\n        assert diffs[3].before_blob_path == \"file5.py\"\n        assert diffs[3].before_blob_id == \"738e36eec120ef9d6393a149252698f49156d5b4\"\n        assert diffs[3].after_blob_path == \"file6.py\"\n        assert diffs[3].after_blob_id == \"faecdb85f7ba199df927a783b261378a1baeca85\"\n        assert diffs[3].change_type == \"R\"\n\n    def test_get_file(self):\n        # Create a mock CodeCommitClient instance and codecommit_client member\n        api = CodeCommitClient()\n        api.boto_client = MagicMock()\n\n        # Mock the response from the AWS client for get_pull_request method\n        # def get_file(self, repo_name: str, file_path: str, sha_hash: str):\n        api.boto_client.get_file.return_value = {\n            \"commitId\": \"6335d6d4496e8d50af559560997604bb03abc122\",\n            \"blobId\": \"c172209495d7968a8fdad76469564fb708460bc1\",\n            \"filePath\": \"requirements.txt\",\n            \"fileSize\": 65,\n            \"fileContent\": b\"boto3==1.28.25\\ndynaconf==3.1.12\\nfastapi==0.99.0\\nPyGithub==1.59.*\\n\",\n        }\n\n        repo_name = \"my_test_repo\"\n        file_path = \"requirements.txt\"\n        sha_hash = \"84114a356ece1e5b7637213c8e486fea7c254656\"\n        content = api.get_file(repo_name, file_path, sha_hash)\n\n        assert len(content) == 65\n        assert content == b\"boto3==1.28.25\\ndynaconf==3.1.12\\nfastapi==0.99.0\\nPyGithub==1.59.*\\n\"\n        assert content.decode(\"utf-8\") == \"boto3==1.28.25\\ndynaconf==3.1.12\\nfastapi==0.99.0\\nPyGithub==1.59.*\\n\"\n\n    def test_get_pr(self):\n        # Create a mock CodeCommitClient instance and codecommit_client member\n        api = CodeCommitClient()\n        api.boto_client = MagicMock()\n\n        # Mock the response from the AWS client for get_pull_request method\n        api.boto_client.get_pull_request.return_value = {\n            \"pullRequest\": {\n                \"pullRequestId\": \"321\",\n                \"title\": \"My PR\",\n                \"description\": \"My PR description\",\n                \"pullRequestTargets\": [\n                    {\n                        \"sourceCommit\": \"commit1\",\n                        \"sourceReference\": \"branch1\",\n                        \"destinationCommit\": \"commit2\",\n                        \"destinationReference\": \"branch2\",\n                        \"repositoryName\": \"my_test_repo\",\n                    }\n                ],\n            }\n        }\n\n        pr = api.get_pr(\"my_test_repo\", 321)\n\n        assert pr.title == \"My PR\"\n        assert pr.description == \"My PR description\"\n        assert len(pr.targets) == 1\n        assert pr.targets[0].source_commit == \"commit1\"\n        assert pr.targets[0].source_branch == \"branch1\"\n        assert pr.targets[0].destination_commit == \"commit2\"\n        assert pr.targets[0].destination_branch == \"branch2\"\n"
  },
  {
    "path": "tests/unittest/test_codecommit_provider.py",
    "content": "from unittest.mock import patch\n\nimport pytest\n\nfrom pr_agent.algo.types import EDIT_TYPE, FilePatchInfo\nfrom pr_agent.git_providers.codecommit_provider import CodeCommitFile, CodeCommitProvider, PullRequestCCMimic\n\n\nclass TestCodeCommitFile:\n    # Test that a CodeCommitFile object is created successfully with valid parameters.\n    # Generated by CodiumAI\n    def test_valid_parameters(self):\n        a_path = \"path/to/file_a\"\n        a_blob_id = \"12345\"\n        b_path = \"path/to/file_b\"\n        b_blob_id = \"67890\"\n        edit_type = EDIT_TYPE.ADDED\n\n        file = CodeCommitFile(a_path, a_blob_id, b_path, b_blob_id, edit_type)\n\n        assert file.a_path == a_path\n        assert file.a_blob_id == a_blob_id\n        assert file.b_path == b_path\n        assert file.b_blob_id == b_blob_id\n        assert file.edit_type == edit_type\n        assert file.filename == b_path\n\n\nclass TestCodeCommitProvider:\n    def test_get_title(self):\n        # Test that the get_title() function returns the PR title\n        with patch.object(CodeCommitProvider, \"__init__\", lambda x, y: None):\n            provider = CodeCommitProvider(None)\n            provider.pr = PullRequestCCMimic(\"My Test PR Title\", [])\n            assert provider.get_title() == \"My Test PR Title\"\n\n    def test_get_pr_id(self):\n        # Test that the get_pr_id() function returns the correct ID\n        with patch.object(CodeCommitProvider, \"__init__\", lambda x, y: None):\n            provider = CodeCommitProvider(None)\n            provider.repo_name = \"my_test_repo\"\n            provider.pr_num = 321\n            assert provider.get_pr_id() == \"my_test_repo/321\"\n\n    def test_parse_pr_url(self):\n        # Test that the _parse_pr_url() function can extract the repo name and PR number from a CodeCommit URL\n        url = \"https://us-east-1.console.aws.amazon.com/codesuite/codecommit/repositories/my_test_repo/pull-requests/321\"\n        repo_name, pr_number = CodeCommitProvider._parse_pr_url(url)\n        assert repo_name == \"my_test_repo\"\n        assert pr_number == 321\n\n    def test_is_valid_codecommit_hostname(self):\n        # Test the various AWS regions\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"af-south-1.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"ap-east-1.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"ap-northeast-1.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"ap-northeast-2.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"ap-northeast-3.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"ap-south-1.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"ap-south-2.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"ap-southeast-1.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"ap-southeast-2.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"ap-southeast-3.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"ap-southeast-4.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"ca-central-1.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"eu-central-1.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"eu-central-2.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"eu-north-1.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"eu-south-1.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"eu-south-2.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"eu-west-1.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"eu-west-2.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"eu-west-3.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"il-central-1.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"me-central-1.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"me-south-1.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"sa-east-1.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"us-east-1.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"us-east-2.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"us-gov-east-1.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"us-gov-west-1.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"us-west-1.console.aws.amazon.com\")\n        assert CodeCommitProvider._is_valid_codecommit_hostname(\"us-west-2.console.aws.amazon.com\")\n        # Test non-AWS regions\n        assert not CodeCommitProvider._is_valid_codecommit_hostname(\"no-such-region.console.aws.amazon.com\")\n        assert not CodeCommitProvider._is_valid_codecommit_hostname(\"console.aws.amazon.com\")\n\n    # Test that an error is raised when an invalid CodeCommit URL is provided to the set_pr() method of the CodeCommitProvider class.\n    # Generated by CodiumAI\n    def test_invalid_codecommit_url(self):\n        provider = CodeCommitProvider()\n        with pytest.raises(ValueError):\n            provider.set_pr(\"https://example.com/codecommit/repositories/my_test_repo/pull-requests/4321\")\n\n    def test_get_file_extensions(self):\n        filenames = [\n            \"app.py\",\n            \"cli.py\",\n            \"composer.json\",\n            \"composer.lock\",\n            \"hello.py\",\n            \"image1.jpg\",\n            \"image2.JPG\",\n            \"index.js\",\n            \"provider.py\",\n            \"README\",\n            \"test.py\",\n        ]\n        expected_extensions = [\n            \".py\",\n            \".py\",\n            \".json\",\n            \".lock\",\n            \".py\",\n            \".jpg\",\n            \".jpg\",\n            \".js\",\n            \".py\",\n            \"\",\n            \".py\",\n        ]\n        extensions = CodeCommitProvider._get_file_extensions(filenames)\n        assert extensions == expected_extensions\n\n    def test_get_language_percentages(self):\n        extensions = [\n            \".py\",\n            \".py\",\n            \".json\",\n            \".lock\",\n            \".py\",\n            \".jpg\",\n            \".jpg\",\n            \".js\",\n            \".py\",\n            \"\",\n            \".py\",\n        ]\n        percentages = CodeCommitProvider._get_language_percentages(extensions)\n        assert percentages[\".py\"] == 45\n        assert percentages[\".json\"] == 9\n        assert percentages[\".lock\"] == 9\n        assert percentages[\".jpg\"] == 18\n        assert percentages[\".js\"] == 9\n        assert percentages[\"\"] == 9\n\n        # The _get_file_extensions function needs the \".\" prefix on the extension,\n        # but the _get_language_percentages function will work with or without the \".\" prefix\n        extensions = [\n            \"txt\",\n            \"py\",\n            \"py\",\n        ]\n        percentages = CodeCommitProvider._get_language_percentages(extensions)\n        assert percentages[\"py\"] == 67\n        assert percentages[\"txt\"] == 33\n\n        # test an empty list\n        percentages = CodeCommitProvider._get_language_percentages([])\n        assert percentages == {}\n\n    def test_get_edit_type(self):\n        # Test that the _get_edit_type() function can convert a CodeCommit letter to an EDIT_TYPE enum\n        assert CodeCommitProvider._get_edit_type(\"A\") == EDIT_TYPE.ADDED\n        assert CodeCommitProvider._get_edit_type(\"D\") == EDIT_TYPE.DELETED\n        assert CodeCommitProvider._get_edit_type(\"M\") == EDIT_TYPE.MODIFIED\n        assert CodeCommitProvider._get_edit_type(\"R\") == EDIT_TYPE.RENAMED\n\n        assert CodeCommitProvider._get_edit_type(\"a\") == EDIT_TYPE.ADDED\n        assert CodeCommitProvider._get_edit_type(\"d\") == EDIT_TYPE.DELETED\n        assert CodeCommitProvider._get_edit_type(\"m\") == EDIT_TYPE.MODIFIED\n        assert CodeCommitProvider._get_edit_type(\"r\") == EDIT_TYPE.RENAMED\n\n        assert CodeCommitProvider._get_edit_type(\"X\") is None\n\n    def test_add_additional_newlines(self):\n        # a short string to test adding double newlines\n        input = \"abc\\ndef\\n\\n___\\nghi\\njkl\\nmno\\n\\npqr\\n\"\n        expect = \"abc\\n\\ndef\\n\\n___\\n\\nghi\\n\\njkl\\n\\nmno\\n\\npqr\\n\\n\"\n        assert CodeCommitProvider._add_additional_newlines(input) == expect\n        # a test example from a real PR\n        input = \"## PR Type:\\nEnhancement\\n\\n___\\n## PR Description:\\nThis PR introduces a new feature to the script, allowing users to filter servers by name.\\n\\n___\\n## PR Main Files Walkthrough:\\n`foo`: The foo script has been updated to include a new command line option `-f` or `--filter`.\\n`bar`: The bar script has been updated to list stopped servers.\\n\"\n        expect = \"## PR Type:\\n\\nEnhancement\\n\\n___\\n\\n## PR Description:\\n\\nThis PR introduces a new feature to the script, allowing users to filter servers by name.\\n\\n___\\n\\n## PR Main Files Walkthrough:\\n\\n`foo`: The foo script has been updated to include a new command line option `-f` or `--filter`.\\n\\n`bar`: The bar script has been updated to list stopped servers.\\n\\n\"\n        assert CodeCommitProvider._add_additional_newlines(input) == expect\n\n    def test_remove_markdown_html(self):\n        input = \"## PR Feedback\\n<details><summary>Code feedback:</summary>\\nfile foo\\n</summary>\\n\"\n        expect = \"## PR Feedback\\nCode feedback:\\nfile foo\\n\\n\"\n        assert CodeCommitProvider._remove_markdown_html(input) == expect\n"
  },
  {
    "path": "tests/unittest/test_config_loader_secrets.py",
    "content": "from unittest.mock import MagicMock, patch\n\nfrom pr_agent.config_loader import apply_secrets_manager_config, apply_secrets_to_config\n\n\nclass TestConfigLoaderSecrets:\n\n    def test_apply_secrets_manager_config_success(self):\n        with patch('pr_agent.secret_providers.get_secret_provider') as mock_get_provider, \\\n             patch('pr_agent.config_loader.apply_secrets_to_config') as mock_apply_secrets, \\\n             patch('pr_agent.config_loader.get_settings') as mock_get_settings:\n\n            # Mock secret provider\n            mock_provider = MagicMock()\n            mock_provider.get_all_secrets.return_value = {'openai.key': 'sk-test'}\n            mock_get_provider.return_value = mock_provider\n\n            # Mock settings\n            settings = MagicMock()\n            settings.get.return_value = \"aws_secrets_manager\"\n            mock_get_settings.return_value = settings\n\n            apply_secrets_manager_config()\n\n            mock_apply_secrets.assert_called_once_with({'openai.key': 'sk-test'})\n\n    def test_apply_secrets_manager_config_no_provider(self):\n        with patch('pr_agent.secret_providers.get_secret_provider') as mock_get_provider:\n            mock_get_provider.return_value = None\n\n            # Confirm no exception is raised\n            apply_secrets_manager_config()\n\n    def test_apply_secrets_manager_config_not_aws(self):\n        with patch('pr_agent.secret_providers.get_secret_provider') as mock_get_provider, \\\n             patch('pr_agent.config_loader.get_settings') as mock_get_settings:\n\n            # Mock Google Cloud Storage provider\n            mock_provider = MagicMock()\n            mock_get_provider.return_value = mock_provider\n\n            # Mock settings (Google Cloud Storage)\n            settings = MagicMock()\n            settings.get.return_value = \"google_cloud_storage\"\n            mock_get_settings.return_value = settings\n\n            # Confirm execution is skipped for non-AWS Secrets Manager\n            apply_secrets_manager_config()\n            \n            # Confirm get_all_secrets is not called\n            assert not hasattr(mock_provider, 'get_all_secrets') or \\\n                   not mock_provider.get_all_secrets.called\n\n    def test_apply_secrets_to_config_nested_keys(self):\n        with patch('pr_agent.config_loader.get_settings') as mock_get_settings:\n            settings = MagicMock()\n            settings.get.return_value = None  # No existing value\n            settings.set = MagicMock()\n            mock_get_settings.return_value = settings\n\n            secrets = {\n                'openai.key': 'sk-test',\n                'github.webhook_secret': 'webhook-secret'\n            }\n\n            apply_secrets_to_config(secrets)\n\n            # Confirm settings are applied correctly\n            settings.set.assert_any_call('OPENAI.KEY', 'sk-test')\n            settings.set.assert_any_call('GITHUB.WEBHOOK_SECRET', 'webhook-secret')\n\n    def test_apply_secrets_to_config_existing_value_preserved(self):\n        with patch('pr_agent.config_loader.get_settings') as mock_get_settings:\n            settings = MagicMock()\n            settings.get.return_value = \"existing-value\"  # Existing value present\n            settings.set = MagicMock()\n            mock_get_settings.return_value = settings\n\n            secrets = {'openai.key': 'sk-test'}\n\n            apply_secrets_to_config(secrets)\n\n            # Confirm settings are not overridden when existing value present\n            settings.set.assert_not_called()\n\n    def test_apply_secrets_to_config_single_key(self):\n        with patch('pr_agent.config_loader.get_settings') as mock_get_settings:\n            settings = MagicMock()\n            settings.get.return_value = None\n            settings.set = MagicMock()\n            mock_get_settings.return_value = settings\n\n            secrets = {'simple_key': 'simple_value'}\n\n            apply_secrets_to_config(secrets)\n\n            # Confirm non-dot notation keys are ignored\n            settings.set.assert_not_called()\n\n    def test_apply_secrets_to_config_multiple_dots(self):\n        with patch('pr_agent.config_loader.get_settings') as mock_get_settings:\n            settings = MagicMock()\n            settings.get.return_value = None\n            settings.set = MagicMock()\n            mock_get_settings.return_value = settings\n\n            secrets = {'section.subsection.key': 'value'}\n\n            apply_secrets_to_config(secrets)\n\n            # Confirm keys with multiple dots are ignored\n            settings.set.assert_not_called()\n\n    def test_apply_secrets_manager_config_exception_handling(self):\n        with patch('pr_agent.secret_providers.get_secret_provider') as mock_get_provider:\n            mock_get_provider.side_effect = Exception(\"Provider error\")\n\n            # Confirm processing continues even when exception occurs\n            apply_secrets_manager_config()  # Confirm no exception is raised \n"
  },
  {
    "path": "tests/unittest/test_convert_to_markdown.py",
    "content": "# Generated by CodiumAI\nimport textwrap\nfrom unittest.mock import Mock\n\nfrom pr_agent.algo.utils import PRReviewHeader, convert_to_markdown_v2\nfrom pr_agent.tools.pr_description import insert_br_after_x_chars\n\n\"\"\"\nCode Analysis\n\nObjective:\nThe objective of the 'convert_to_markdown' function is to convert a dictionary of data into a markdown-formatted text.\nThe function takes in a dictionary as input and recursively iterates through its keys and values to generate the\nmarkdown text.\n\nInputs:\n- A dictionary of data containing information about a pull request.\n\nFlow:\n- Initialize an empty string variable 'markdown_text'.\n- Create a dictionary 'emojis' containing emojis for each key in the input dictionary.\n- Iterate through the input dictionary:\n  - If the value is empty, continue to the next iteration.\n  - If the value is a dictionary, recursively call the 'convert_to_markdown' function with the value as input and\n  append the returned markdown text to 'markdown_text'.\n  - If the value is a list:\n    - If the key is 'code suggestions', add an additional line break to 'markdown_text'.\n    - Get the corresponding emoji for the key from the 'emojis' dictionary. If no emoji is found, use a dash.\n    - Append the emoji and key to 'markdown_text'.\n    - Iterate through the items in the list:\n      - If the item is a dictionary and the key is 'code suggestions', call the 'parse_code_suggestion' function with\n      the item as input and append the returned markdown text to 'markdown_text'.\n      - If the item is not empty, append it to 'markdown_text'.\n  - If the value is not 'n/a', get the corresponding emoji for the key from the 'emojis' dictionary. If no emoji is\n  found, use a dash. Append the emoji, key, and value to 'markdown_text'.\n- Return 'markdown_text'.\n\nOutputs:\n- A markdown-formatted string containing the information from the input dictionary.\n\nAdditional aspects:\n- The function uses recursion to handle nested dictionaries.\n- The 'parse_code_suggestion' function is called for items in the 'code suggestions' list.\n- The function uses emojis to add visual cues to the markdown text.\n\"\"\"\n\n\nclass TestConvertToMarkdown:\n    # Tests that the function works correctly with a simple dictionary input\n    def test_simple_dictionary_input(self):\n        input_data = {'review': {\n            'estimated_effort_to_review_[1-5]': '1, because the changes are minimal and straightforward, focusing on a single functionality addition.\\n',\n            'relevant_tests': 'No\\n', 'possible_issues': 'No\\n', 'security_concerns': 'No\\n'}}\n\n        expected_output = textwrap.dedent(f\"\"\"\\\n            {PRReviewHeader.REGULAR.value} 🔍\n\n            Here are some key observations to aid the review process:\n\n            <table>\n            <tr><td>⏱️&nbsp;<strong>Estimated effort to review</strong>: 1 🔵⚪⚪⚪⚪</td></tr>\n            <tr><td>🧪&nbsp;<strong>No relevant tests</strong></td></tr>\n            <tr><td>&nbsp;<strong>Possible issues</strong>: No\n            </td></tr>\n            <tr><td>🔒&nbsp;<strong>No security concerns identified</strong></td></tr>\n            </table>\n        \"\"\")\n\n        assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()\n\n    def test_simple_dictionary_input_without_gfm_supported(self):\n        input_data = {'review': {\n            'estimated_effort_to_review_[1-5]': '1, because the changes are minimal and straightforward, focusing on a single functionality addition.\\n',\n            'relevant_tests': 'No\\n', 'possible_issues': 'No\\n', 'security_concerns': 'No\\n'}}\n\n        expected_output = textwrap.dedent(\"\"\"\\\n            ## PR Reviewer Guide 🔍\n\n            Here are some key observations to aid the review process:\n\n            ### ⏱️ Estimated effort to review: 1 🔵⚪⚪⚪⚪\n\n            ### 🧪 No relevant tests\n\n            ###  Possible issues: No\n\n\n            ### 🔒 No security concerns identified\n        \"\"\")\n\n        assert convert_to_markdown_v2(input_data, gfm_supported=False).strip() == expected_output.strip()\n\n    def test_key_issues_to_review(self):\n        input_data = {'review': {\n            'key_issues_to_review': [\n                {\n                    'relevant_file': 'src/utils.py',\n                    'issue_header': 'Code Smell',\n                    'issue_content': 'The function is too long and complex.',\n                    'start_line': 30,\n                    'end_line': 50,\n                }\n            ]\n        }}\n        mock_git_provider = Mock()\n        reference_link = 'https://github.com/qodo/pr-agent/pull/1/files#diff-hashvalue-R174'\n        mock_git_provider.get_line_link.return_value = reference_link\n\n        expected_output = textwrap.dedent(f\"\"\"\\\n            ## PR Reviewer Guide 🔍\n\n            Here are some key observations to aid the review process:\n\n            <table>\n            <tr><td>⚡&nbsp;<strong>Recommended focus areas for review</strong><br><br>\n\n            <a href='{reference_link}'><strong>Code Smell</strong></a><br>The function is too long and complex.\n\n            </td></tr>\n            </table>\n        \"\"\")\n\n        assert convert_to_markdown_v2(input_data, git_provider=mock_git_provider).strip() == expected_output.strip()\n        mock_git_provider.get_line_link.assert_called_with('src/utils.py', 30, 50)\n\n    def test_ticket_compliance(self):\n        input_data = {'review': {\n            'ticket_compliance_check': [\n                {\n                    'ticket_url': 'https://example.com/ticket/123',\n                    'ticket_requirements': '- Requirement 1\\n- Requirement 2\\n',\n                    'fully_compliant_requirements': '- Requirement 1\\n- Requirement 2\\n',\n                    'not_compliant_requirements': '',\n                    'requires_further_human_verification': '',\n                }\n            ]\n        }}\n\n        expected_output = textwrap.dedent(\"\"\"\\\n            ## PR Reviewer Guide 🔍\n\n            Here are some key observations to aid the review process:\n\n            <table>\n            <tr><td>\n\n            **🎫 Ticket compliance analysis ✅**\n\n\n\n            **[123](https://example.com/ticket/123) - Fully compliant**\n\n            Compliant requirements:\n\n            - Requirement 1\n            - Requirement 2\n\n\n\n            </td></tr>\n            </table>\n        \"\"\")\n\n        assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()\n\n    def test_can_be_split(self):\n        input_data = {'review': {\n            'can_be_split': [\n                {\n                    'relevant_files': [\n                        'src/file1.py',\n                        'src/file2.py'\n                    ],\n                    'title': 'Refactoring',\n                },\n                {\n                    'relevant_files': [\n                        'src/file3.py'\n                    ],\n                    'title': 'Bug Fix',\n                }\n            ]\n        }\n        }\n\n        expected_output = textwrap.dedent(\"\"\"\\\n            ## PR Reviewer Guide 🔍\n\n            Here are some key observations to aid the review process:\n\n            <table>\n            <tr><td>🔀 <strong>Multiple PR themes</strong><br><br>\n\n            <details><summary>\n            Sub-PR theme: <b>Refactoring</b></summary>\n\n            ___\n\n            Relevant files:\n\n            - src/file1.py\n            - src/file2.py\n            ___\n\n            </details>\n\n            <details><summary>\n            Sub-PR theme: <b>Bug Fix</b></summary>\n\n            ___\n\n            Relevant files:\n\n            - src/file3.py\n            ___\n\n            </details>\n\n            </td></tr>\n            </table>\n        \"\"\")\n\n        assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()\n\n    def test_contribution_time_cost_estimate(self):\n        input_data = {\n            'review': {\n                'contribution_time_cost_estimate': {\n                    'best_case': '1h',\n                    'average_case': '2h',\n                    'worst_case': '30m',\n                }\n            }\n        }\n\n        expected_output = textwrap.dedent(f\"\"\"\n            {PRReviewHeader.REGULAR.value} 🔍\n\n            Here are some key observations to aid the review process:\n\n            <table>\n            <tr><td>⏳&nbsp;<strong>Contribution time estimate</strong> (best, average, worst case): 1h | 2h | 30 minutes</td></tr>\n            </table>\n        \"\"\")\n        assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()\n\n        # Non-GFM branch\n        expected_output_no_gfm = textwrap.dedent(f\"\"\"\n        {PRReviewHeader.REGULAR.value} 🔍\n\n        Here are some key observations to aid the review process:\n\n        ### ⏳ Contribution time estimate (best, average, worst case): 1h | 2h | 30 minutes\n\n        \"\"\")\n        assert convert_to_markdown_v2(input_data, gfm_supported=False).strip() == expected_output_no_gfm.strip()\n\n\n    # Tests that the function works correctly with an empty dictionary input\n    def test_empty_dictionary_input(self):\n        input_data = {}\n\n        expected_output = ''\n\n        assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()\n\n    def test_dictionary_with_empty_dictionaries(self):\n        input_data = {'review': {}}\n\n        expected_output = ''\n\n        assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()\n\n\nclass TestBR:\n    def test_br1(self):\n        file_change_description = '- Imported `FilePatchInfo` and `EDIT_TYPE` from `pr_agent.algo.types` instead of `pr_agent.git_providers.git_provider`.'\n        file_change_description_br = insert_br_after_x_chars(file_change_description)\n        expected_output = ('<ul><li>Imported <code>FilePatchInfo</code> and <code>EDIT_TYPE</code> from '\n                           '<code>pr_agent.algo.types</code> instead <br>of '\n                           '<code>pr_agent.git_providers.git_provider</code>.</ul>')\n        assert file_change_description_br == expected_output\n        # print(\"-----\")\n        # print(file_change_description_br)\n\n    def test_br2(self):\n        file_change_description = (\n            '- Created a - new -class `ColorPaletteResourcesCollection ColorPaletteResourcesCollection '\n            'ColorPaletteResourcesCollection ColorPaletteResourcesCollection`')\n        file_change_description_br = insert_br_after_x_chars(file_change_description)\n        expected_output = ('<ul><li>Created a - new -class <code>ColorPaletteResourcesCollection </code><br><code>'\n                           'ColorPaletteResourcesCollection ColorPaletteResourcesCollection '\n                           '</code><br><code>ColorPaletteResourcesCollection</code></ul>')\n        assert file_change_description_br == expected_output\n        # print(\"-----\")\n        # print(file_change_description_br)\n\n    def test_br3(self):\n        file_change_description = 'Created a new class `ColorPaletteResourcesCollection` which extends `AvaloniaDictionary<ThemeVariant, ColorPaletteResources>` and implements aaa'\n        file_change_description_br = insert_br_after_x_chars(file_change_description)\n        assert file_change_description_br == ('Created a new class <code>ColorPaletteResourcesCollection</code> which '\n                                              'extends <br><code>AvaloniaDictionary<ThemeVariant, ColorPaletteResources>'\n                                              '</code> and implements <br>aaa')\n        # print(\"-----\")\n        # print(file_change_description_br)\n"
  },
  {
    "path": "tests/unittest/test_delete_hunks.py",
    "content": "# Generated by CodiumAI\n\nfrom pr_agent.algo.git_patch_processing import omit_deletion_hunks\n\n\"\"\"\nCode Analysis\n\nObjective:\nThe objective of the \"omit_deletion_hunks\" function is to remove deletion hunks from a patch file and return only the\nadded lines.\n\nInputs:\n- \"patch_lines\": a list of strings representing the lines of a patch file.\n\nFlow:\n- Initialize empty lists \"temp_hunk\" and \"added_patched\", and boolean variables \"add_hunk\" and \"inside_hunk\".\n- Compile a regular expression pattern to match hunk headers.\n- Iterate through each line in \"patch_lines\".\n- If the line starts with \"@@\", match the line with the hunk header pattern, finish the previous hunk if necessary,\nand append the line to \"temp_hunk\".\n- If the line does not start with \"@@\", append the line to \"temp_hunk\", check if it is an added line, and set\n\"add_hunk\" to True if it is.\n- If the function reaches the end of \"patch_lines\" and there is an unfinished hunk with added lines, append it to\n\"added_patched\".\n- Join the lines in \"added_patched\" with newline characters and return the resulting string.\n\nOutputs:\n- A string representing the added lines in the patch file.\n\nAdditional aspects:\n- The function only considers hunks with added lines and ignores hunks with deleted lines.\n- The function assumes that the input patch file is well-formed and follows the unified diff format.\n\"\"\"\n\n\nclass TestOmitDeletionHunks:\n    # Tests that the function correctly handles a simple patch containing only additions\n    def test_simple_patch_additions(self):\n        patch_lines = ['@@ -1,0 +1,1 @@\\n', '+added line\\n']\n        expected_output = '@@ -1,0 +1,1 @@\\n\\n+added line\\n'\n        assert omit_deletion_hunks(patch_lines) == expected_output\n\n    # Tests that the function correctly omits deletion hunks and concatenates multiple hunks in a patch.\n    def test_patch_multiple_hunks(self):\n        patch_lines = ['@@ -1,0 +1,1 @@\\n', '-deleted line', '+added line\\n', '@@ -2,0 +3,1 @@\\n', '-deleted line\\n',\n                       '-another deleted line\\n']\n        expected_output = '@@ -1,0 +1,1 @@\\n\\n-deleted line\\n+added line\\n'\n        assert omit_deletion_hunks(patch_lines) == expected_output\n\n    # Tests that the function correctly omits deletion lines from the patch when there are no additions or context\n    # lines.\n    def test_patch_only_deletions(self):\n        patch_lines = ['@@ -1,1 +1,0 @@\\n', '-deleted line\\n']\n        expected_output = ''\n        assert omit_deletion_hunks(patch_lines) == expected_output\n\n        # Additional deletion lines\n        patch_lines = ['@@ -1,1 +1,0 @@\\n', '-deleted line\\n', '-another deleted line\\n']\n        expected_output = ''\n        assert omit_deletion_hunks(patch_lines) == expected_output\n\n        # Additional context lines\n        patch_lines = ['@@ -1,1 +1,0 @@\\n', '-deleted line\\n', '-another deleted line\\n', 'context line 1\\n',\n                       'context line 2\\n', 'context line 3\\n']\n        expected_output = ''\n        assert omit_deletion_hunks(patch_lines) == expected_output\n\n    # Tests that the function correctly handles an empty patch\n    def test_empty_patch(self):\n        patch_lines = []\n        expected_output = ''\n        assert omit_deletion_hunks(patch_lines) == expected_output\n\n    # Tests that the function correctly handles a patch containing only one hunk\n    def test_patch_one_hunk(self):\n        patch_lines = ['@@ -1,0 +1,1 @@\\n', '+added line\\n']\n        expected_output = '@@ -1,0 +1,1 @@\\n\\n+added line\\n'\n        assert omit_deletion_hunks(patch_lines) == expected_output\n\n    # Tests that the function correctly handles a patch containing only deletions and no additions\n    def test_patch_deletions_no_additions(self):\n        patch_lines = ['@@ -1,1 +1,0 @@\\n', '-deleted line\\n']\n        expected_output = ''\n        assert omit_deletion_hunks(patch_lines) == expected_output\n"
  },
  {
    "path": "tests/unittest/test_extend_patch.py",
    "content": "import pytest\n\nfrom pr_agent.algo.git_patch_processing import extend_patch\nfrom pr_agent.algo.pr_processing import pr_generate_extended_diff\nfrom pr_agent.algo.token_handler import TokenHandler\nfrom pr_agent.algo.utils import load_large_diff\nfrom pr_agent.config_loader import get_settings\n\nget_settings(use_context=False).set(\"CONFIG.CLI_MODE\", True)\nget_settings(use_context=False).config.allow_dynamic_context = False\n\n\nclass TestExtendPatch:\n    # Tests that the function works correctly with valid input\n    def test_happy_path(self):\n        original_file_str = 'line1\\nline2\\nline3\\nline4\\nline5'\n        patch_str = '@@ -2,2 +2,2 @@ init()\\n-line2\\n+new_line2\\n line3'\n        num_lines = 1\n        expected_output = '\\n@@ -1,4 +1,4 @@ init()\\n line1\\n-line2\\n+new_line2\\n line3\\n line4'\n        actual_output = extend_patch(original_file_str, patch_str,\n                                     patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines)\n        assert actual_output == expected_output\n\n    # Tests that the function returns an empty string when patch_str is empty\n    def test_empty_patch(self):\n        original_file_str = 'line1\\nline2\\nline3\\nline4\\nline5'\n        patch_str = ''\n        num_lines = 1\n        expected_output = ''\n        assert extend_patch(original_file_str, patch_str,\n                            patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines) == expected_output\n\n    # Tests that the function returns the original patch when num_lines is 0\n    def test_zero_num_lines(self):\n        original_file_str = 'line1\\nline2\\nline3\\nline4\\nline5'\n        patch_str = '@@ -2,2 +2,2 @@ init()\\n-line2\\n+new_line2\\nline3'\n        num_lines = 0\n        assert extend_patch(original_file_str, patch_str,\n                            patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines) == patch_str\n\n    # Tests that the function returns the original patch when patch_str contains no hunks\n    def test_no_hunks(self):\n        original_file_str = 'line1\\nline2\\nline3\\nline4\\nline5'\n        patch_str = 'no hunks here'\n        num_lines = 1\n        expected_output = 'no hunks here'\n        assert extend_patch(original_file_str, patch_str, num_lines) == expected_output\n\n    # Tests that the function extends a patch with a single hunk correctly\n    def test_single_hunk(self):\n        original_file_str = 'line1\\nline2\\nline3\\nline4\\nline5'\n        patch_str = '@@ -2,3 +2,3 @@ init()\\n-line2\\n+new_line2\\n line3\\n line4'\n\n        for num_lines in [1, 2, 3]: # check that even if we are over the number of lines in the file, the function still works\n            expected_output = '\\n@@ -1,5 +1,5 @@ init()\\n line1\\n-line2\\n+new_line2\\n line3\\n line4\\n line5'\n            actual_output = extend_patch(original_file_str, patch_str,\n                                         patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines)\n            assert actual_output == expected_output\n\n    # Tests the functionality of extending a patch with multiple hunks.\n    def test_multiple_hunks(self):\n        original_file_str = 'line1\\nline2\\nline3\\nline4\\nline5\\nline6'\n        patch_str = '@@ -2,3 +2,3 @@ init()\\n-line2\\n+new_line2\\n line3\\n line4\\n@@ -4,1 +4,1 @@ init2()\\n-line4\\n+new_line4'  # noqa: E501\n        num_lines = 1\n        original_allow_dynamic_context = get_settings(use_context=False).config.allow_dynamic_context\n\n        get_settings(use_context=False).config.allow_dynamic_context = False\n        expected_output = '\\n@@ -1,5 +1,5 @@ init()\\n line1\\n-line2\\n+new_line2\\n line3\\n line4\\n line5\\n\\n@@ -3,3 +3,3 @@ init2()\\n line3\\n-line4\\n+new_line4\\n line5' # noqa: E501\n        actual_output = extend_patch(original_file_str, patch_str,\n                                     patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines)\n        assert actual_output == expected_output\n\n        get_settings(use_context=False).config.allow_dynamic_context = True\n        expected_output = '\\n@@ -1,5 +1,5 @@ init()\\n line1\\n-line2\\n+new_line2\\n line3\\n line4\\n line5\\n\\n@@ -3,3 +3,3 @@ init2()\\n line3\\n-line4\\n+new_line4\\n line5' # noqa: E501\n        actual_output = extend_patch(original_file_str, patch_str,\n                                     patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines)\n        assert actual_output == expected_output\n        get_settings(use_context=False).config.allow_dynamic_context = original_allow_dynamic_context\n\n\n    def test_dynamic_context(self):\n        get_settings(use_context=False).config.max_extra_lines_before_dynamic_context = 10\n        original_file_str = \"def foo():\"\n        for i in range(9):\n            original_file_str += f\"\\n    line({i})\"\n        patch_str =\"@@ -10,1 +10,1 @@ def foo():\\n-    line(8)\\n+    new_line(8)\"\n        new_file_str = \"\\n\".join(original_file_str.splitlines()[:-1] + [\"    new_line(8)\"])\n        num_lines=1\n\n        get_settings(use_context=False).config.allow_dynamic_context = True\n        actual_output = extend_patch(original_file_str, patch_str,\n                                     patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines, new_file_str=new_file_str)\n        expected_output='\\n@@ -1,10 +1,10 @@ \\n def foo():\\n     line(0)\\n     line(1)\\n     line(2)\\n     line(3)\\n     line(4)\\n     line(5)\\n     line(6)\\n     line(7)\\n-    line(8)\\n+    new_line(8)'\n        assert actual_output == expected_output\n\n        get_settings(use_context=False).config.allow_dynamic_context = False\n        actual_output2 = extend_patch(original_file_str, patch_str,\n                                     patch_extra_lines_before=1, patch_extra_lines_after=1)\n        expected_output_no_dynamic_context = '\\n@@ -9,2 +9,2 @@ def foo():\\n     line(7)\\n-    line(8)\\n+    new_line(8)'\n        assert actual_output2 == expected_output_no_dynamic_context\n\n        get_settings(use_context=False).config.allow_dynamic_context = False\n        actual_output3 = extend_patch(original_file_str, patch_str,\n                                     patch_extra_lines_before=3, patch_extra_lines_after=3)\n        expected_output_no_dynamic_context = '\\n@@ -7,4 +7,4 @@ def foo():\\n     line(5)\\n     line(6)\\n     line(7)\\n-    line(8)\\n+    new_line(8)'\n        assert actual_output3 == expected_output_no_dynamic_context\n\n\n\n\n\nclass TestExtendedPatchMoreLines:\n    class File:\n        def __init__(self, base_file, patch, head_file, filename, ai_file_summary=None):\n            self.base_file = base_file\n            self.patch = patch\n            self.head_file = head_file\n            self.filename = filename\n            self.ai_file_summary = ai_file_summary\n\n    @pytest.fixture\n    def token_handler(self):\n        # Create a TokenHandler instance with dummy data\n        th = TokenHandler(system=\"System prompt\", user=\"User prompt\")\n        th.prompt_tokens = 100\n        return th\n\n    @pytest.fixture\n    def pr_languages(self):\n        # Create a list of languages with files containing base_file and patch data\n        return [\n            {\n                'files': [\n                    self.File(base_file=\"line000\\nline00\\nline0\\nline1\\noriginal content\\nline2\\nline3\\nline4\\nline5\\nline6\\nline7\\nline8\\nline9\\nline10\",\n                              patch=\"@@ -5,5 +5,5 @@\\n-original content\\n+modified content\\n line2\\n line3\\n line4\\n line5\",\n                              head_file=\"line000\\nline00\\nline0\\nline1\\nmodified content\\nline2\\nline3\\nline4\\nline5\\nline6\\nline7\\nline8\\nline9\\nline10\",\n                              filename=\"file1\"),\n                    self.File(base_file=\"original content\\nline2\\nline3\\nline4\\nline5\\nline6\\nline7\\nline8\\nline9\\nline10\",\n                              patch=\"@@ -6,5 +6,5 @@\\nline6\\nline7\\nline8\\n-line9\\n+modified line9\\nline10\",\n                              head_file=\"original content\\nline2\\nline3\\nline4\\nline5\\nline6\\nline7\\nline8\\nmodified line9\\nline10\",\n                              filename=\"file2\")\n                ]\n            }\n        ]\n\n    def test_extend_patches_with_extra_lines(self, token_handler, pr_languages):\n        patches_extended_no_extra_lines, total_tokens, patches_extended_tokens = pr_generate_extended_diff(\n            pr_languages, token_handler, add_line_numbers_to_hunks=False,\n            patch_extra_lines_before=0,\n            patch_extra_lines_after=0\n        )\n\n        # Check that with no extra lines, the patches are the same as the original patches\n        p0 = patches_extended_no_extra_lines[0].strip()\n        p1 = patches_extended_no_extra_lines[1].strip()\n        assert p0 == \"## File: 'file1'\\n\\n\" + pr_languages[0]['files'][0].patch.strip()\n        assert p1 == \"## File: 'file2'\\n\\n\" + pr_languages[0]['files'][1].patch.strip()\n\n        patches_extended_with_extra_lines, total_tokens, patches_extended_tokens = pr_generate_extended_diff(\n            pr_languages, token_handler, add_line_numbers_to_hunks=False,\n            patch_extra_lines_before=2,\n            patch_extra_lines_after=1\n        )\n\n        p0_extended = patches_extended_with_extra_lines[0].strip()\n        assert p0_extended == \"## File: 'file1'\\n\\n@@ -3,8 +3,8 @@ \\n line0\\n line1\\n-original content\\n+modified content\\n line2\\n line3\\n line4\\n line5\\n line6\"\n\nclass TestLoadLargeDiff:\n    def test_no_newline(self):\n        patch = load_large_diff(\"test.py\",\n                                \"\"\"\\\n                                old content 1\n                                some new content\n                                another line\n                                \"\"\",\n                                \"\"\"\n                                old content 1\n                                old content 2\"\"\")\n\n        patch_expected=\"\"\"\\\n--- \n+++ \n@@ -1,3 +1,3 @@\n-\n                                 old content 1\n-                                old content 2\n+                                some new content\n+                                another line\n\"\"\"\n        assert patch == patch_expected\n\n    def test_empty_inputs(self):\n        assert load_large_diff(\"test.py\", \"\", \"\") == \"\"\n        assert load_large_diff(\"test.py\", None, None) == \"\"\n        assert (load_large_diff(\"test.py\", \"content\\n\", \"\") ==\n                '--- \\n+++ \\n@@ -1 +1 @@\\n-\\n+content\\n')"
  },
  {
    "path": "tests/unittest/test_extract_issue_from_branch.py",
    "content": "import pytest\n\nfrom pr_agent.tools.ticket_pr_compliance_check import extract_ticket_links_from_branch_name\n\n\nclass TestExtractTicketsLinkFromBranchName:\n    \"\"\"Unit tests for branch-name issue extraction (option A: number at start of segment).\"\"\"\n\n    def test_feature_slash_number_suffix(self):\n        \"\"\"feature/1-test-issue -> issue #1\"\"\"\n        result = extract_ticket_links_from_branch_name(\n            \"feature/1-test-issue\", \"org/repo\", \"https://github.com\"\n        )\n        assert result == [\"https://github.com/org/repo/issues/1\"]\n\n    def test_fix_slash_number_suffix(self):\n        \"\"\"fix/123-bug -> issue #123\"\"\"\n        result = extract_ticket_links_from_branch_name(\n            \"fix/123-bug\", \"owner/repo\", \"https://github.com\"\n        )\n        assert result == [\"https://github.com/owner/repo/issues/123\"]\n\n    def test_number_at_start_no_slash(self):\n        \"\"\"123-fix -> issue #123\"\"\"\n        result = extract_ticket_links_from_branch_name(\n            \"123-fix\", \"org/repo\", \"https://github.com\"\n        )\n        assert result == [\"https://github.com/org/repo/issues/123\"]\n\n    def test_empty_branch_returns_empty(self):\n        \"\"\"Empty branch name -> []\"\"\"\n        result = extract_ticket_links_from_branch_name(\"\", \"org/repo\")\n        assert result == []\n\n    def test_none_branch_returns_empty(self):\n        \"\"\"None branch name -> []\"\"\"\n        result = extract_ticket_links_from_branch_name(None, \"org/repo\")\n        assert result == []\n\n    def test_no_digits_in_segment_returns_empty(self):\n        \"\"\"feature/no-issue -> []\"\"\"\n        result = extract_ticket_links_from_branch_name(\n            \"feature/no-issue\", \"org/repo\", \"https://github.com\"\n        )\n        assert result == []\n\n    def test_base_url_no_trailing_slash(self):\n        \"\"\"base_url_html without trailing slash is normalized\"\"\"\n        result = extract_ticket_links_from_branch_name(\n            \"feature/1-test\", \"org/repo\", \"https://github.com/\"\n        )\n        assert result == [\"https://github.com/org/repo/issues/1\"]\n\n    def test_disable_via_config_returns_empty(self, monkeypatch):\n        \"\"\"When extract_issue_from_branch is False, return []\"\"\"\n        fake_settings = type(\"Settings\", (), {})()\n        fake_settings.get = lambda key, default=None: (\n            False if key in (\"extract_issue_from_branch\", \"config.extract_issue_from_branch\") else (\n                \"\" if key in (\"branch_issue_regex\", \"config.branch_issue_regex\") else default\n            )\n        )\n        import pr_agent.tools.ticket_pr_compliance_check as m\n        monkeypatch.setattr(m, \"get_settings\", lambda: fake_settings)\n        result = extract_ticket_links_from_branch_name(\n            \"feature/1-test\", \"org/repo\", \"https://github.com\"\n        )\n        assert result == []\n\n    def test_invalid_custom_regex_returns_empty(self, monkeypatch):\n        \"\"\"When branch_issue_regex is invalid, log and return []\"\"\"\n        fake_settings = type(\"Settings\", (), {})()\n        fake_settings.get = lambda key, default=None: (\n            True if key in (\"extract_issue_from_branch\", \"config.extract_issue_from_branch\") else (\n                \"[\" if key in (\"branch_issue_regex\", \"config.branch_issue_regex\") else default\n            )\n        )\n        import pr_agent.tools.ticket_pr_compliance_check as m\n        monkeypatch.setattr(m, \"get_settings\", lambda: fake_settings)\n        result = extract_ticket_links_from_branch_name(\n            \"feature/1-test\", \"org/repo\", \"https://github.com\"\n        )\n        assert result == []\n\n    def test_custom_regex_without_capturing_group_falls_back_to_default(self, monkeypatch):\n        \"\"\"When branch_issue_regex has no capturing group, fall back to default pattern (no crash).\"\"\"\n        fake_settings = type(\"Settings\", (), {})()\n        fake_settings.get = lambda key, default=None: (\n            True if key in (\"extract_issue_from_branch\", \"config.extract_issue_from_branch\") else (\n                r\"\\d+\" if key in (\"branch_issue_regex\", \"config.branch_issue_regex\") else default\n            )\n        )\n        import pr_agent.tools.ticket_pr_compliance_check as m\n        monkeypatch.setattr(m, \"get_settings\", lambda: fake_settings)\n        result = extract_ticket_links_from_branch_name(\n            \"feature/1-test\", \"org/repo\", \"https://github.com\"\n        )\n        assert result == [\"https://github.com/org/repo/issues/1\"]\n\n    def test_empty_repo_path_returns_empty(self):\n        \"\"\"Empty repo_path -> [] (guard in function)\"\"\"\n        result = extract_ticket_links_from_branch_name(\"feature/1-test\", \"\", \"https://github.com\")\n        assert result == []\n\n    def test_multiple_matches_deduplicated(self):\n        \"\"\"Branch with multiple segments with numbers yields unique issue URLs\"\"\"\n        result = extract_ticket_links_from_branch_name(\n            \"feature/1-test/2-other\", \"org/repo\", \"https://github.com\"\n        )\n        assert set(result) == {\n            \"https://github.com/org/repo/issues/1\",\n            \"https://github.com/org/repo/issues/2\",\n        }\n"
  },
  {
    "path": "tests/unittest/test_fetching_sub_issues.py",
    "content": "# Currently doing API calls - wrong !\n\n\n# import unittest\n# import asyncio\n# from unittest.mock import AsyncMock, patch\n# from pr_agent.tools.ticket_pr_compliance_check import extract_tickets, extract_and_cache_pr_tickets\n# from pr_agent.git_providers.github_provider import GithubProvider\n#\n#\n# class TestTicketCompliance(unittest.TestCase):\n#\n#     @patch.object(GithubProvider, 'get_user_description', return_value=\"Fixes #1 and relates to #2\")\n#     @patch.object(GithubProvider, '_parse_issue_url', side_effect=lambda url: (\"WonOfAKind/KimchiBot\", int(url.split('#')[-1])))\n#     @patch.object(GithubProvider, 'repo_obj')\n#     async def test_extract_tickets(self, mock_repo, mock_parse_issue_url, mock_user_desc):\n#         \"\"\"\n#         Test extract_tickets() to ensure it extracts tickets correctly\n#         and fetches their content.\n#         \"\"\"\n#         github_provider = GithubProvider()\n#         github_provider.repo = \"WonOfAKind/KimchiBot\"\n#         github_provider.base_url_html = \"https://github.com\"\n#\n#         # Mock issue retrieval\n#         mock_issue = AsyncMock()\n#         mock_issue.number = 1\n#         mock_issue.title = \"Sample Issue\"\n#         mock_issue.body = \"This is a test issue body.\"\n#         mock_issue.labels = [\"bug\", \"high priority\"]\n#\n#         # Mock repo object\n#         mock_repo.get_issue.return_value = mock_issue\n#\n#         tickets = await extract_tickets(github_provider)\n#\n#         # Verify tickets were extracted correctly\n#         self.assertIsInstance(tickets, list)\n#         self.assertGreater(len(tickets), 0, \"Expected at least one ticket!\")\n#\n#         # Verify ticket structure\n#         first_ticket = tickets[0]\n#         self.assertIn(\"ticket_id\", first_ticket)\n#         self.assertIn(\"ticket_url\", first_ticket)\n#         self.assertIn(\"title\", first_ticket)\n#         self.assertIn(\"body\", first_ticket)\n#         self.assertIn(\"labels\", first_ticket)\n#\n#         print(\"\\n Test Passed: extract_tickets() successfully retrieved ticket info!\")\n#\n#     @patch.object(GithubProvider, 'get_user_description', return_value=\"Fixes #1 and relates to #2\")\n#     @patch.object(GithubProvider, '_parse_issue_url', side_effect=lambda url: (\"WonOfAKind/KimchiBot\", int(url.split('#')[-1])))\n#     @patch.object(GithubProvider, 'repo_obj')\n#     async def test_extract_and_cache_pr_tickets(self, mock_repo, mock_parse_issue_url, mock_user_desc):\n#         \"\"\"\n#         Test extract_and_cache_pr_tickets() to ensure tickets are extracted and cached correctly.\n#         \"\"\"\n#         github_provider = GithubProvider()\n#         github_provider.repo = \"WonOfAKind/KimchiBot\"\n#         github_provider.base_url_html = \"https://github.com\"\n#\n#         vars = {}  # Simulate the dictionary to store results\n#\n#         # Mock issue retrieval\n#         mock_issue = AsyncMock()\n#         mock_issue.number = 1\n#         mock_issue.title = \"Sample Issue\"\n#         mock_issue.body = \"This is a test issue body.\"\n#         mock_issue.labels = [\"bug\", \"high priority\"]\n#\n#         # Mock repo object\n#         mock_repo.get_issue.return_value = mock_issue\n#\n#         # Run function\n#         await extract_and_cache_pr_tickets(github_provider, vars)\n#\n#         # Ensure tickets are cached\n#         self.assertIn(\"related_tickets\", vars)\n#         self.assertIsInstance(vars[\"related_tickets\"], list)\n#         self.assertGreater(len(vars[\"related_tickets\"]), 0, \"Expected at least one cached ticket!\")\n#\n#         print(\"\\n Test Passed: extract_and_cache_pr_tickets() successfully cached ticket data!\")\n#\n#     def test_fetch_sub_issues(self):\n#         \"\"\"\n#         Test fetch_sub_issues() to ensure sub-issues are correctly retrieved.\n#         \"\"\"\n#         github_provider = GithubProvider()\n#         issue_url = \"https://github.com/WonOfAKind/KimchiBot/issues/1\"  # Known issue with sub-issues\n#         result = github_provider.fetch_sub_issues(issue_url)\n#\n#         print(\"Fetched sub-issues:\", result)\n#\n#         self.assertIsInstance(result, set)  # Ensure result is a set\n#         self.assertGreater(len(result), 0, \"Expected at least one sub-issue but found none!\")\n#\n#         print(\"\\n Test Passed: fetch_sub_issues() retrieved sub-issues correctly!\")\n#\n#     def test_fetch_sub_issues_with_no_results(self):\n#         \"\"\"\n#         Test fetch_sub_issues() to ensure an empty set is returned for an issue with no sub-issues.\n#         \"\"\"\n#         github_provider = GithubProvider()\n#         issue_url = \"https://github.com/qodo-ai/pr-agent/issues/1499\"  # Likely non-existent issue\n#         result = github_provider.fetch_sub_issues(issue_url)\n#\n#         print(\"Fetched sub-issues for non-existent issue:\", result)\n#\n#         self.assertIsInstance(result, set)  # Ensure result is a set\n#         self.assertEqual(len(result), 0, \"Expected no sub-issues but some were found!\")\n#\n#         print(\"\\n Test Passed: fetch_sub_issues_with_no_results() correctly returned an empty set!\")\n#\n#\n# if __name__ == \"__main__\":\n#     asyncio.run(unittest.main())\n#\n#\n#\n#\n#\n"
  },
  {
    "path": "tests/unittest/test_file_filter.py",
    "content": "from pr_agent.algo.file_filter import filter_ignored\nfrom pr_agent.config_loader import global_settings\n\n\nclass TestIgnoreFilter:\n    def test_no_ignores(self):\n        \"\"\"\n        Test no files are ignored when no patterns are specified.\n        \"\"\"\n        files = [\n            type('', (object,), {'filename': 'file1.py'})(),\n            type('', (object,), {'filename': 'file2.java'})(),\n            type('', (object,), {'filename': 'file3.cpp'})(),\n            type('', (object,), {'filename': 'file4.py'})(),\n            type('', (object,), {'filename': 'file5.py'})()\n        ]\n        assert filter_ignored(files) == files, \"Expected all files to be returned when no ignore patterns are given.\"\n\n    def test_glob_ignores(self, monkeypatch):\n        \"\"\"\n        Test files are ignored when glob patterns are specified.\n        \"\"\"\n        monkeypatch.setattr(global_settings.ignore, 'glob', ['*.py'])\n\n        files = [\n            type('', (object,), {'filename': 'file1.py'})(),\n            type('', (object,), {'filename': 'file2.java'})(),\n            type('', (object,), {'filename': 'file3.cpp'})(),\n            type('', (object,), {'filename': 'file4.py'})(),\n            type('', (object,), {'filename': 'file5.py'})()\n        ]\n        expected = [\n            files[1],\n            files[2]\n        ]\n\n        filtered_files = filter_ignored(files)\n        assert filtered_files == expected, f\"Expected {[file.filename for file in expected]}, but got {[file.filename for file in filtered_files]}.\"\n\n    def test_regex_ignores(self, monkeypatch):\n        \"\"\"\n        Test files are ignored when regex patterns are specified.\n        \"\"\"\n        monkeypatch.setattr(global_settings.ignore, 'regex', ['^file[2-4]\\..*$'])\n\n        files = [\n            type('', (object,), {'filename': 'file1.py'})(),\n            type('', (object,), {'filename': 'file2.java'})(),\n            type('', (object,), {'filename': 'file3.cpp'})(),\n            type('', (object,), {'filename': 'file4.py'})(),\n            type('', (object,), {'filename': 'file5.py'})()\n        ]\n        expected = [\n            files[0],\n            files[4]\n        ]\n\n        filtered_files = filter_ignored(files)\n        assert filtered_files == expected, f\"Expected {[file.filename for file in expected]}, but got {[file.filename for file in filtered_files]}.\"\n\n    def test_invalid_regex(self, monkeypatch):\n        \"\"\"\n        Test invalid patterns are quietly ignored.\n        \"\"\"\n        monkeypatch.setattr(global_settings.ignore, 'regex', ['(((||', '^file[2-4]\\..*$'])\n\n        files = [\n            type('', (object,), {'filename': 'file1.py'})(),\n            type('', (object,), {'filename': 'file2.java'})(),\n            type('', (object,), {'filename': 'file3.cpp'})(),\n            type('', (object,), {'filename': 'file4.py'})(),\n            type('', (object,), {'filename': 'file5.py'})()\n        ]\n        expected = [\n            files[0],\n            files[4]\n        ]\n\n        filtered_files = filter_ignored(files)\n        assert filtered_files == expected, f\"Expected {[file.filename for file in expected]}, but got {[file.filename for file in filtered_files]}.\"\n    \n    def test_language_framework_ignores(self, monkeypatch):\n        \"\"\"\n        Test files are ignored based on language/framework mapping (e.g., protobuf).\n        \"\"\"\n        monkeypatch.setattr(global_settings.config, 'ignore_language_framework', ['protobuf', 'go_gen'])\n\n        files = [\n            type('', (object,), {'filename': 'main.go'})(),\n            type('', (object,), {'filename': 'dir1/service.pb.go'})(),\n            type('', (object,), {'filename': 'dir1/dir/data_pb2.py'})(),\n            type('', (object,), {'filename': 'file.py'})(),\n            type('', (object,), {'filename': 'dir2/file_gen.go'})(),\n            type('', (object,), {'filename': 'file.generated.go'})()\n        ]\n        expected = [\n            files[0],\n            files[3]\n        ]\n\n        filtered = filter_ignored(files)\n        assert filtered == expected, (\n            f\"Expected {[f.filename for f in expected]}, \"\n            f\"but got {[f.filename for f in filtered]}\"\n        )\n\n    def test_skip_invalid_ignore_language_framework(self, monkeypatch):\n        \"\"\"\n        Test skipping of generated code filtering when ignore_language_framework is not a list\n        \"\"\"\n        monkeypatch.setattr(global_settings.config, 'ignore_language_framework', 'protobuf')\n\n        files = [\n            type('', (object,), {'filename': 'main.go'})(),\n            type('', (object,), {'filename': 'file.py'})(),\n            type('', (object,), {'filename': 'dir1/service.pb.go'})(),\n            type('', (object,), {'filename': 'file_pb2.py'})()\n        ]\n        expected = [\n            files[0],\n            files[1],\n            files[2],\n            files[3]\n        ]\n\n        filtered = filter_ignored(files)\n        assert filtered == expected, (\n            f\"Expected {[f.filename for f in expected]}, \"\n            f\"but got {[f.filename for f in filtered]}\"\n        )\n"
  },
  {
    "path": "tests/unittest/test_find_line_number_of_relevant_line_in_file.py",
    "content": "# Generated by CodiumAI\n\nfrom pr_agent.algo.types import FilePatchInfo\nfrom pr_agent.algo.utils import find_line_number_of_relevant_line_in_file\n\n\nclass TestFindLineNumberOfRelevantLineInFile:\n    # Tests that the function returns the correct line number and absolute position when the relevant line is found in the patch\n    def test_relevant_line_found_in_patch(self):\n        diff_files = [\n            FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\\n-line1\\n+line2\\n+relevant_line\\n', filename='file1')\n        ]\n        relevant_file = 'file1'\n        relevant_line_in_file = 'relevant_line'\n        expected = (3, 2) # (position in patch, absolute_position in new file)\n        assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected\n\n    # Tests that the function returns the correct line number and absolute position when a similar line is found using difflib\n    def test_similar_line_found_using_difflib(self):\n        diff_files = [\n            FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\\n-line1\\n+relevant_line in file similar match\\n', filename='file1')\n        ]\n        relevant_file = 'file1'\n        relevant_line_in_file = '+relevant_line in file similar match ' # note the space at the end. This is to simulate a similar line found using difflib\n        expected = (2, 1)\n        assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected\n\n    # Tests that the function returns (-1, -1) when the relevant line is not found in the patch and no similar line is found using difflib\n    def test_relevant_line_not_found(self):\n        diff_files = [\n            FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\\n-line1\\n+relevant_line\\n', filename='file1')\n        ]\n        relevant_file = 'file1'\n        relevant_line_in_file = 'not_found'\n        expected = (-1, -1)\n        assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected\n\n    # Tests that the function returns (-1, -1) when the relevant file is not found in any of the patches\n    def test_relevant_file_not_found(self):\n        diff_files = [\n            FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\\n-line1\\n+relevant_line\\n', filename='file2')\n        ]\n        relevant_file = 'file1'\n        relevant_line_in_file = 'relevant_line'\n        expected = (-1, -1)\n        assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected\n\n    # Tests that the function returns (-1, -1) when the relevant_line_in_file is an empty string\n    def test_empty_relevant_line(self):\n        diff_files = [\n            FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\\n-line1\\n+relevant_line\\n', filename='file1')\n        ]\n        relevant_file = 'file1'\n        relevant_line_in_file = ''\n        expected = (0, 0)\n        assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected\n\n    # Tests that the function returns (-1, -1) when the relevant_line_in_file is found in the patch but it is a deleted line\n    def test_relevant_line_found_but_deleted(self):\n        diff_files = [\n            FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,2 +1,1 @@\\n-line1\\n-relevant_line\\n', filename='file1')\n        ]\n        relevant_file = 'file1'\n        relevant_line_in_file = 'relevant_line'\n        expected = (-1, -1)\n        assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected\n"
  },
  {
    "path": "tests/unittest/test_fix_json_escape_char.py",
    "content": "from pr_agent.algo.utils import fix_json_escape_char\n\n\nclass TestFixJsonEscapeChar:\n    def test_valid_json(self):\n        \"\"\"Return unchanged when input JSON is already valid\"\"\"\n        text = '{\"a\": 1, \"b\": \"ok\"}'\n        expected_output = {\"a\": 1, \"b\": \"ok\"}\n        assert fix_json_escape_char(text) == expected_output\n\n    def test_single_control_char(self):\n        \"\"\"Remove a single ASCII control-character\"\"\"\n        text = '{\"msg\": \"hel\\x01lo\"}'\n        expected_output = {\"msg\": \"hel lo\"}\n        assert fix_json_escape_char(text) == expected_output\n\n    def test_multiple_control_chars(self):\n        \"\"\"Remove multiple control-characters recursively\"\"\"\n        text = '{\"x\": \"A\\x02B\\x03C\"}'\n        expected_output = {\"x\": \"A B C\"}\n        assert fix_json_escape_char(text) == expected_output\n"
  },
  {
    "path": "tests/unittest/test_fix_output.py",
    "content": "# Generated by CodiumAI\n\nfrom pr_agent.algo.utils import try_fix_json\n\n\nclass TestTryFixJson:\n    # Tests that JSON with complete 'Code suggestions' section returns expected output\n    def test_incomplete_code_suggestions(self):\n        review = '{\"PR Analysis\": {\"Main theme\": \"xxx\", \"Type of PR\": \"Bug fix\"}, \"PR Feedback\": {\"General PR suggestions\": \"..., `xxx`...\", \"Code suggestions\": [{\"relevant file\": \"xxx.py\", \"suggestion content\": \"xxx [important]\"}, {\"suggestion number\": 2, \"relevant file\": \"yyy.py\", \"suggestion content\": \"yyy [incomp...'  # noqa: E501\n        expected_output = {\n            'PR Analysis': {\n                'Main theme': 'xxx',\n                'Type of PR': 'Bug fix'\n            },\n            'PR Feedback': {\n                'General PR suggestions': '..., `xxx`...',\n                'Code suggestions': [\n                    {\n                        'relevant file': 'xxx.py',\n                        'suggestion content': 'xxx [important]'\n                    }\n                ]\n            }\n        }\n        assert try_fix_json(review) == expected_output\n\n    def test_incomplete_code_suggestions_new_line(self):\n        review = '{\"PR Analysis\": {\"Main theme\": \"xxx\", \"Type of PR\": \"Bug fix\"}, \"PR Feedback\": {\"General PR suggestions\": \"..., `xxx`...\", \"Code suggestions\": [{\"relevant file\": \"xxx.py\", \"suggestion content\": \"xxx [important]\"} \\n\\t, {\"suggestion number\": 2, \"relevant file\": \"yyy.py\", \"suggestion content\": \"yyy [incomp...'  # noqa: E501\n        expected_output = {\n            'PR Analysis': {\n                'Main theme': 'xxx',\n                'Type of PR': 'Bug fix'\n            },\n            'PR Feedback': {\n                'General PR suggestions': '..., `xxx`...',\n                'Code suggestions': [\n                    {\n                        'relevant file': 'xxx.py',\n                        'suggestion content': 'xxx [important]'\n                    }\n                ]\n            }\n        }\n        assert try_fix_json(review) == expected_output\n\n    def test_incomplete_code_suggestions_many_close_brackets(self):\n        review = '{\"PR Analysis\": {\"Main theme\": \"xxx\", \"Type of PR\": \"Bug fix\"}, \"PR Feedback\": {\"General PR suggestions\": \"..., `xxx`...\", \"Code suggestions\": [{\"relevant file\": \"xxx.py\", \"suggestion content\": \"xxx [important]\"} \\n, {\"suggestion number\": 2, \"relevant file\": \"yyy.py\", \"suggestion content\": \"yyy }, [}\\n ,incomp.}  ,..'  # noqa: E501\n        expected_output = {\n            'PR Analysis': {\n                'Main theme': 'xxx',\n                'Type of PR': 'Bug fix'\n            },\n            'PR Feedback': {\n                'General PR suggestions': '..., `xxx`...',\n                'Code suggestions': [\n                    {\n                        'relevant file': 'xxx.py',\n                        'suggestion content': 'xxx [important]'\n                    }\n                ]\n            }\n        }\n        assert try_fix_json(review) == expected_output\n\n    def test_incomplete_code_suggestions_relevant_file(self):\n        review = '{\"PR Analysis\": {\"Main theme\": \"xxx\", \"Type of PR\": \"Bug fix\"}, \"PR Feedback\": {\"General PR suggestions\": \"..., `xxx`...\", \"Code suggestions\": [{\"relevant file\": \"xxx.py\", \"suggestion content\": \"xxx [important]\"}, {\"suggestion number\": 2, \"relevant file\": \"yyy.p'  # noqa: E501\n        expected_output = {\n            'PR Analysis': {\n                'Main theme': 'xxx',\n                'Type of PR': 'Bug fix'\n            },\n            'PR Feedback': {\n                'General PR suggestions': '..., `xxx`...',\n                'Code suggestions': [\n                    {\n                        'relevant file': 'xxx.py',\n                        'suggestion content': 'xxx [important]'\n                    }\n                ]\n            }\n        }\n        assert try_fix_json(review) == expected_output\n"
  },
  {
    "path": "tests/unittest/test_fresh_vars_functionality.py",
    "content": "\"\"\"\nComprehensive unit tests for Dynaconf fresh_vars functionality.\n\nThese tests verify that the fresh_vars feature works correctly with the custom_merge_loader,\nparticularly for the GitLab credentials use case where values should be reloaded from disk\non each access rather than being cached.\n\nThe tests are designed to detect if fresh_vars is broken due to custom loader changes,\nsuch as those introduced in https://github.com/qodo-ai/pr-agent/pull/2087.\n\"\"\"\n\nimport os\nimport tempfile\nfrom pathlib import Path\nfrom unittest.mock import patch\n\nimport pytest\nfrom dynaconf import Dynaconf\n\n# Import get_settings at module level to complete the import chain and avoid circular import issues\n# This ensures pr_agent.config_loader is fully loaded before custom_merge_loader is used in tests\nfrom pr_agent.config_loader import get_settings  # noqa: F401\n\n\n# Module-level helper function\ndef create_dynaconf_with_custom_loader(temp_dir, secrets_file):\n    \"\"\"\n    Create a Dynaconf instance matching the production configuration.\n\n    This mimics the config_loader.py setup with:\n    - core_loaders disabled\n    - custom_merge_loader and env_loader enabled\n    - merge_enabled = True\n\n    Note: fresh_vars should be configured via FRESH_VARS_FOR_DYNACONF environment variable,\n    which is the only way to configure it in pr-agent.\n\n    Args:\n        temp_dir: Temporary directory path\n        secrets_file: Path to secrets file\n\n    Returns:\n        Dynaconf instance configured like production\n    \"\"\"\n    return Dynaconf(\n        core_loaders=[],\n        loaders=[\"pr_agent.custom_merge_loader\", \"dynaconf.loaders.env_loader\"],\n        root_path=temp_dir,\n        merge_enabled=True,\n        envvar_prefix=False,\n        load_dotenv=False,\n        settings_files=[str(secrets_file)],\n    )\n\n\nclass TestFreshVarsGitLabScenario:\n    \"\"\"\n    Test fresh_vars functionality for the GitLab credentials use case.\n\n    This class tests the specific scenario where:\n    - FRESH_VARS_FOR_DYNACONF='[\"GITLAB\"]' is set\n    - .secrets.toml contains gitlab.personal_access_token and gitlab.shared_secret\n    - Values should be reloaded from disk on each access (not cached)\n    \"\"\"\n\n    def setup_method(self):\n        \"\"\"Set up temporary directory and files for each test.\"\"\"\n        self.temp_dir = tempfile.mkdtemp()\n        self.secrets_file = Path(self.temp_dir) / \".secrets.toml\"\n\n    def teardown_method(self):\n        \"\"\"Clean up temporary files after each test.\"\"\"\n        import shutil\n\n        if hasattr(self, \"temp_dir\") and Path(self.temp_dir).exists():\n            shutil.rmtree(self.temp_dir)\n\n    def create_secrets_toml(self, personal_access_token=\"initial_token\", shared_secret=\"initial_secret\"):\n        \"\"\"\n        Create a .secrets.toml file with GitLab credentials.\n\n        Args:\n            personal_access_token: The GitLab personal access token value\n            shared_secret: The GitLab shared secret value\n        \"\"\"\n        content = f\"\"\"[gitlab]\npersonal_access_token = \"{personal_access_token}\"\nshared_secret = \"{shared_secret}\"\n\"\"\"\n        self.secrets_file.write_text(content)\n\n    def test_gitlab_personal_access_token_reload(self):\n        \"\"\"\n        Test that gitlab.personal_access_token is reloaded when marked as fresh.\n\n        This is the critical test for the user's use case. It verifies that:\n        1. Initial value is loaded correctly\n        2. After modifying the file, the new value is returned (not cached)\n        3. This works with the custom_merge_loader\n        \"\"\"\n        # Create initial secrets file\n        self.create_secrets_toml(personal_access_token=\"token_v1\", shared_secret=\"secret_v1\")\n\n        # Set FRESH_VARS_FOR_DYNACONF environment variable (the only way to configure fresh_vars in pr-agent)\n        with patch.dict(os.environ, {\"FRESH_VARS_FOR_DYNACONF\": '[\"GITLAB\"]'}):\n            # Create Dynaconf with GITLAB marked as fresh via env var\n            settings = create_dynaconf_with_custom_loader(self.temp_dir, self.secrets_file)\n\n            # First access - should return initial value\n            first_token = settings.GITLAB.PERSONAL_ACCESS_TOKEN\n        assert first_token == \"token_v1\", \"Initial personal_access_token should be 'token_v1'\"\n\n        # Modify the secrets file\n        self.create_secrets_toml(personal_access_token=\"token_v2_updated\", shared_secret=\"secret_v1\")\n\n        # Second access - should return NEW value (not cached)\n        second_token = settings.GITLAB.PERSONAL_ACCESS_TOKEN\n        assert second_token == \"token_v2_updated\", (\n            \"After file modification, personal_access_token should be reloaded to 'token_v2_updated'\"\n        )\n\n        # Verify the values are different (fresh_vars working)\n        assert first_token != second_token, \"fresh_vars should cause values to be reloaded, not cached\"\n\n    def test_gitlab_multiple_fields_reload(self):\n        \"\"\"\n        Test that both gitlab fields reload together when GITLAB is marked as fresh.\n\n        This verifies that fresh_vars works correctly when multiple fields\n        in the same section are modified simultaneously.\n        \"\"\"\n        # Create initial secrets file\n        self.create_secrets_toml(personal_access_token=\"token_v1\", shared_secret=\"secret_v1\")\n\n        # Set FRESH_VARS_FOR_DYNACONF environment variable\n        with patch.dict(os.environ, {\"FRESH_VARS_FOR_DYNACONF\": '[\"GITLAB\"]'}):\n            # Create Dynaconf with GITLAB marked as fresh via env var\n            settings = create_dynaconf_with_custom_loader(self.temp_dir, self.secrets_file)\n\n            # First access - both fields\n            first_token = settings.GITLAB.PERSONAL_ACCESS_TOKEN\n            first_secret = settings.GITLAB.SHARED_SECRET\n            assert first_token == \"token_v1\"\n            assert first_secret == \"secret_v1\"\n\n            # Modify both fields in the secrets file\n            self.create_secrets_toml(\n                personal_access_token=\"token_v2_both_updated\", shared_secret=\"secret_v2_both_updated\"\n            )\n\n            # Second access - both fields should be updated\n            second_token = settings.GITLAB.PERSONAL_ACCESS_TOKEN\n            second_secret = settings.GITLAB.SHARED_SECRET\n\n            assert second_token == \"token_v2_both_updated\", \"personal_access_token should be reloaded\"\n            assert second_secret == \"secret_v2_both_updated\", \"shared_secret should be reloaded\"\n\n            # Verify both fields were reloaded\n            assert first_token != second_token, \"personal_access_token should not be cached\"\n            assert first_secret != second_secret, \"shared_secret should not be cached\"\n\n\nclass TestFreshVarsCustomLoaderIntegration:\n    \"\"\"\n    Test fresh_vars integration with custom_merge_loader.\n\n    These tests verify that fresh_vars works correctly when using the\n    custom_merge_loader instead of Dynaconf's default core loaders.\n    \"\"\"\n\n    def setup_method(self):\n        \"\"\"Set up temporary directory and files for each test.\"\"\"\n        self.temp_dir = tempfile.mkdtemp()\n        self.secrets_file = Path(self.temp_dir) / \".secrets.toml\"\n\n    def teardown_method(self):\n        \"\"\"Clean up temporary files after each test.\"\"\"\n        import shutil\n\n        if hasattr(self, \"temp_dir\") and Path(self.temp_dir).exists():\n            shutil.rmtree(self.temp_dir)\n\n    def create_secrets_toml(self, personal_access_token=\"initial_token\", shared_secret=\"initial_secret\"):\n        \"\"\"Create a .secrets.toml file with GitLab credentials.\"\"\"\n        content = f\"\"\"[gitlab]\npersonal_access_token = \"{personal_access_token}\"\nshared_secret = \"{shared_secret}\"\n\"\"\"\n        self.secrets_file.write_text(content)\n\n    def test_fresh_vars_without_core_loaders(self):\n        \"\"\"\n        Critical test: Verify fresh_vars works when core_loaders are disabled.\n\n        This test detects if the bug exists where fresh_vars stops working\n        when core_loaders=[] is set. This is the key issue that may have been\n        introduced by the custom_merge_loader changes.\n\n        Expected behavior:\n        - If fresh_vars works: second_value != first_value\n        - If fresh_vars is broken: second_value == first_value (cached)\n        \"\"\"\n        # Create initial secrets file\n        self.create_secrets_toml(personal_access_token=\"token_before_bug_test\")\n\n        # Set FRESH_VARS_FOR_DYNACONF environment variable\n        with patch.dict(os.environ, {\"FRESH_VARS_FOR_DYNACONF\": '[\"GITLAB\"]'}):\n            # Create Dynaconf WITHOUT core loaders but WITH fresh_vars via env var\n            settings = create_dynaconf_with_custom_loader(self.temp_dir, self.secrets_file)\n\n            # First access\n            first_value = settings.GITLAB.PERSONAL_ACCESS_TOKEN\n        assert first_value == \"token_before_bug_test\", \"Initial value should be loaded correctly\"\n\n        # Modify the file\n        self.create_secrets_toml(personal_access_token=\"token_after_bug_test\")\n\n        # Second access - THIS IS THE CRITICAL CHECK\n        second_value = settings.GITLAB.PERSONAL_ACCESS_TOKEN\n\n        # If this assertion fails, fresh_vars is broken with custom_merge_loader\n        assert second_value == \"token_after_bug_test\", (\n            \"CRITICAL: fresh_vars should reload the value even with core_loaders=[]\"\n        )\n\n        assert first_value != second_value, \"CRITICAL: Values should be different, indicating fresh_vars is working\"\n\n    def test_custom_loader_respects_fresh_vars(self):\n        \"\"\"\n        Test that custom_merge_loader respects the fresh_vars configuration.\n\n        Verifies that when a section is marked as fresh, the custom loader\n        doesn't cache values from that section.\n        \"\"\"\n        # Create initial secrets file with multiple sections\n        content = \"\"\"[gitlab]\npersonal_access_token = \"gitlab_token_v1\"\n\n[github]\nuser_token = \"github_token_v1\"\n\"\"\"\n        self.secrets_file.write_text(content)\n\n        # Set FRESH_VARS_FOR_DYNACONF environment variable (only GITLAB)\n        with patch.dict(os.environ, {\"FRESH_VARS_FOR_DYNACONF\": '[\"GITLAB\"]'}):\n            # Create Dynaconf with only GITLAB marked as fresh via env var\n            settings = create_dynaconf_with_custom_loader(self.temp_dir, self.secrets_file)\n\n            # Access both sections\n            gitlab_token_1 = settings.GITLAB.PERSONAL_ACCESS_TOKEN\n            github_token_1 = settings.GITHUB.USER_TOKEN\n\n            # Modify both sections\n            content = \"\"\"[gitlab]\npersonal_access_token = \"gitlab_token_v2\"\n\n[github]\nuser_token = \"github_token_v2\"\n\"\"\"\n            self.secrets_file.write_text(content)\n\n            # Access again\n            gitlab_token_2 = settings.GITLAB.PERSONAL_ACCESS_TOKEN\n            github_token_2 = settings.GITHUB.USER_TOKEN\n\n            # GITLAB should be reloaded (marked as fresh)\n            assert gitlab_token_2 == \"gitlab_token_v2\", \"GITLAB section should be reloaded (marked as fresh)\"\n            assert gitlab_token_1 != gitlab_token_2, \"GITLAB values should not be cached\"\n\n            # GITHUB should be cached (not marked as fresh)\n            assert github_token_2 == \"github_token_v1\", \"GITHUB section should be cached (not marked as fresh)\"\n            assert github_token_1 == github_token_2, \"GITHUB values should be cached\"\n\n\nclass TestFreshVarsBasicFunctionality:\n    \"\"\"\n    Test basic fresh_vars functionality and edge cases.\n\n    These tests verify fundamental fresh_vars behavior and ensure\n    the feature works as expected in various scenarios.\n    \"\"\"\n\n    def setup_method(self):\n        \"\"\"Set up temporary directory and files for each test.\"\"\"\n        self.temp_dir = tempfile.mkdtemp()\n        self.secrets_file = Path(self.temp_dir) / \".secrets.toml\"\n\n    def teardown_method(self):\n        \"\"\"Clean up temporary files after each test.\"\"\"\n        import shutil\n\n        if hasattr(self, \"temp_dir\") and Path(self.temp_dir).exists():\n            shutil.rmtree(self.temp_dir)\n\n    def create_secrets_toml(self, personal_access_token=\"initial_token\"):\n        \"\"\"Create a .secrets.toml file with GitLab credentials.\"\"\"\n        content = f\"\"\"[gitlab]\npersonal_access_token = \"{personal_access_token}\"\n\"\"\"\n        self.secrets_file.write_text(content)\n\n    def test_gitlab_credentials_not_cached_when_fresh(self):\n        \"\"\"\n        Test that GitLab credentials are not cached when marked as fresh.\n\n        This verifies the core requirement: when GITLAB is in fresh_vars,\n        accessing the credentials multiple times should reload from disk\n        each time, not return a cached value.\n        \"\"\"\n        # Create initial secrets file\n        self.create_secrets_toml(personal_access_token=\"no_cache_v1\")\n\n        # Set FRESH_VARS_FOR_DYNACONF environment variable\n        with patch.dict(os.environ, {\"FRESH_VARS_FOR_DYNACONF\": '[\"GITLAB\"]'}):\n            # Create Dynaconf with GITLAB marked as fresh via env var\n            settings = create_dynaconf_with_custom_loader(self.temp_dir, self.secrets_file)\n\n            # Access the token multiple times before modification\n            access_1 = settings.GITLAB.PERSONAL_ACCESS_TOKEN\n            access_2 = settings.GITLAB.PERSONAL_ACCESS_TOKEN\n            access_3 = settings.GITLAB.PERSONAL_ACCESS_TOKEN\n\n        # All should return the same value (file hasn't changed)\n        assert access_1 == access_2 == access_3 == \"no_cache_v1\", (\n            \"Multiple accesses before modification should return same value\"\n        )\n\n        # Modify the file\n        self.create_secrets_toml(personal_access_token=\"no_cache_v2\")\n\n        # Access again - should get new value immediately\n        access_4 = settings.GITLAB.PERSONAL_ACCESS_TOKEN\n        assert access_4 == \"no_cache_v2\", \"First access after modification should return new value\"\n\n        # Verify no caching occurred\n        assert access_1 != access_4, \"Value should change after file modification (no caching)\"\n\n        # Modify again\n        self.create_secrets_toml(personal_access_token=\"no_cache_v3\")\n\n        # Access again - should get newest value\n        access_5 = settings.GITLAB.PERSONAL_ACCESS_TOKEN\n        assert access_5 == \"no_cache_v3\", \"Second modification should also be detected\"\n\n        # Verify the progression\n        assert access_1 != access_4 != access_5, \"Each modification should result in a different value (no caching)\"\n\n    def test_fresh_vars_works_with_default_loaders(self):\n        \"\"\"\n        Test that fresh_vars works correctly with Dynaconf's default core loaders.\n\n        This is a control test to prove that fresh_vars functionality works\n        as expected when using the standard Dynaconf configuration (with core_loaders).\n        This helps isolate the bug to the custom_merge_loader configuration.\n        \"\"\"\n        # Create initial secrets file\n        self.create_secrets_toml(personal_access_token=\"default_v1\")\n\n        # Create Dynaconf with DEFAULT loaders (not custom_merge_loader)\n        settings = Dynaconf(\n            # Use default core_loaders (don't disable them)\n            root_path=self.temp_dir,\n            merge_enabled=True,\n            envvar_prefix=False,\n            load_dotenv=False,\n            settings_files=[str(self.secrets_file)],\n            fresh_vars=[\"GITLAB\"],\n        )\n\n        # First access\n        first_value = settings.GITLAB.PERSONAL_ACCESS_TOKEN\n        assert first_value == \"default_v1\"\n\n        # Modify file\n        self.create_secrets_toml(personal_access_token=\"default_v2\")\n\n        # Second access - should be reloaded with default loaders\n        second_value = settings.GITLAB.PERSONAL_ACCESS_TOKEN\n        assert second_value == \"default_v2\", (\n            \"With default loaders, fresh_vars SHOULD work correctly. \"\n            \"If this test fails, the issue is not specific to custom_merge_loader.\"\n        )\n\n        assert first_value != second_value, \"Values should be different when using default loaders with fresh_vars\"\n\n\nif __name__ == \"__main__\":\n    pytest.main([__file__, \"-v\"])\n"
  },
  {
    "path": "tests/unittest/test_get_max_tokens.py",
    "content": "import pytest\n\nimport pr_agent.algo.utils as utils\nfrom pr_agent.algo.utils import MAX_TOKENS, get_max_tokens\n\n\nclass TestGetMaxTokens:\n\n    # Test if the file is in MAX_TOKENS\n    def test_model_max_tokens(self, monkeypatch):\n        fake_settings = type('', (), {\n            'config': type('', (), {\n                'custom_model_max_tokens': 0,\n                'max_model_tokens': 0\n            })()\n        })()\n\n        monkeypatch.setattr(utils, \"get_settings\", lambda: fake_settings)\n\n        model = \"gpt-3.5-turbo\"\n        expected = MAX_TOKENS[model]\n\n        assert get_max_tokens(model) == expected\n\n    @pytest.mark.parametrize(\"model\", [\"gpt-5.4\", \"gpt-5.4-2026-03-05\"])\n    def test_gpt54_model_max_tokens(self, monkeypatch, model):\n        fake_settings = type('', (), {\n            'config': type('', (), {\n                'custom_model_max_tokens': 0,\n                'max_model_tokens': 0\n            })()\n        })()\n\n        monkeypatch.setattr(utils, \"get_settings\", lambda: fake_settings)\n\n        assert get_max_tokens(model) == 272000\n\n    # Test situations where the model is not registered and exists as a custom model\n    def test_model_has_custom(self, monkeypatch):\n        fake_settings = type('', (), {\n            'config': type('', (), {\n                'custom_model_max_tokens': 5000,\n                'max_model_tokens': 0  # 제한 없음\n            })()\n        })()\n\n        monkeypatch.setattr(utils, \"get_settings\", lambda: fake_settings)\n\n        model = \"custom-model\"\n        expected = 5000\n\n        assert get_max_tokens(model) == expected\n\n    @pytest.mark.parametrize(\"model\", [\n        \"gpt-5.1-codex\",\n        \"gpt-5.2-codex\",\n        \"gpt-5.3-codex\",\n    ])\n    def test_gpt_codex_models_max_tokens(self, monkeypatch, model):\n        fake_settings = type('', (), {\n            'config': type('', (), {\n                'custom_model_max_tokens': 0,\n                'max_model_tokens': 0\n            })()\n        })()\n\n        monkeypatch.setattr(utils, \"get_settings\", lambda: fake_settings)\n\n        expected = MAX_TOKENS[model]\n\n        assert get_max_tokens(model) == expected\n\n    def test_model_not_max_tokens_and_not_has_custom(self, monkeypatch):\n        fake_settings = type('', (), {\n            'config': type('', (), {\n                'custom_model_max_tokens': 0,\n                'max_model_tokens': 0\n            })()\n        })()\n\n        monkeypatch.setattr(utils, \"get_settings\", lambda: fake_settings)\n\n        model = \"custom-model\"\n\n        with pytest.raises(Exception):\n            get_max_tokens(model)\n\n    def test_model_max_tokens_with__limit(self, monkeypatch):\n        fake_settings = type('', (), {\n            'config': type('', (), {\n                'custom_model_max_tokens': 0,\n                'max_model_tokens': 10000\n            })()\n        })()\n\n        monkeypatch.setattr(utils, \"get_settings\", lambda: fake_settings)\n\n        model = \"gpt-3.5-turbo\"  # this model setting is 160000\n        expected = 10000\n\n        assert get_max_tokens(model) == expected\n\n    @pytest.mark.parametrize(\"model\", [\n        \"gemini/gemini-3-flash-preview\",\n        \"vertex_ai/gemini-3-flash-preview\",\n        \"gemini/gemini-3-pro-preview\",\n        \"vertex_ai/gemini-3-pro-preview\",\n        \"gemini/gemini-3.1-pro-preview\",\n        \"vertex_ai/gemini-3.1-pro-preview\",\n    ])\n    def test_gemini_3_and_3_1_pro_preview(self, monkeypatch, model):\n        fake_settings = type(\"\", (), {\n            \"config\": type(\"\", (), {\n                \"custom_model_max_tokens\": 0,\n                \"max_model_tokens\": 0,\n            })()\n        })()\n        monkeypatch.setattr(utils, \"get_settings\", lambda: fake_settings)\n        assert get_max_tokens(model) == 1048576\n\n    @pytest.mark.parametrize(\n        \"model\",\n        [\n            \"anthropic/claude-opus-4-6\",\n            \"claude-opus-4-6\",\n            \"vertex_ai/claude-opus-4-6\",\n            \"bedrock/anthropic.claude-opus-4-6-v1:0\",\n            \"bedrock/global.anthropic.claude-opus-4-6-v1:0\",\n            \"bedrock/us.anthropic.claude-opus-4-6-v1:0\",\n        ],\n    )\n    def test_claude_opus_4_6_model_max_tokens(self, monkeypatch, model):\n        fake_settings = type('', (), {\n            'config': type('', (), {\n                'custom_model_max_tokens': 0,\n                'max_model_tokens': 0\n            })()\n        })()\n\n        monkeypatch.setattr(utils, \"get_settings\", lambda: fake_settings)\n\n        assert get_max_tokens(model) == 200000\n\n    @pytest.mark.parametrize(\n        \"model\",\n        [\n            \"anthropic/claude-sonnet-4-6\",\n            \"claude-sonnet-4-6\",\n            \"vertex_ai/claude-sonnet-4-6\",\n            \"bedrock/anthropic.claude-sonnet-4-6\",\n            \"bedrock/global.anthropic.claude-sonnet-4-6\",\n            \"bedrock/us.anthropic.claude-sonnet-4-6\",\n            \"bedrock/au.anthropic.claude-sonnet-4-6\",\n            \"bedrock/eu.anthropic.claude-sonnet-4-6\",\n            \"bedrock/jp.anthropic.claude-sonnet-4-6\",\n        ],\n    )\n    def test_claude_sonnet_4_6_model_max_tokens(self, monkeypatch, model):\n        fake_settings = type('', (), {\n            'config': type('', (), {\n                'custom_model_max_tokens': 0,\n                'max_model_tokens': 0\n            })()\n        })()\n\n        monkeypatch.setattr(utils, \"get_settings\", lambda: fake_settings)\n\n        assert get_max_tokens(model) == 200000\n"
  },
  {
    "path": "tests/unittest/test_gitea_provider.py",
    "content": "from io import BytesIO\nfrom unittest.mock import MagicMock, patch\n\n\nclass TestGiteaProvider:\n    @patch('pr_agent.git_providers.gitea_provider.get_settings')\n    @patch('pr_agent.git_providers.gitea_provider.giteapy.ApiClient')\n    def test_gitea_provider_auth_header(self, mock_api_client_cls, mock_get_settings):\n        # Setup settings\n        settings = MagicMock()\n        settings.get.side_effect = lambda k, d=None: {\n            'GITEA.URL': 'https://gitea.example.com',\n            'GITEA.PERSONAL_ACCESS_TOKEN': 'test-token',\n            'GITEA.REPO_SETTING': None,\n            'GITEA.SKIP_SSL_VERIFICATION': False,\n            'GITEA.SSL_CA_CERT': None\n        }.get(k, d)\n        mock_get_settings.return_value = settings\n\n        # Setup ApiClient mock\n        mock_api_client = mock_api_client_cls.return_value\n        # Mock configuration object on client\n        mock_api_client.configuration.api_key = {'Authorization': 'token test-token'}\n\n        # Mock responses for calls made during initialization\n        def call_api_side_effect(path, method, **kwargs):\n            mock_resp = MagicMock()\n            if 'files' in path: # get_change_file_pull_request\n                mock_resp.data = BytesIO(b'[]')\n                return mock_resp\n            if 'commits' in path:\n                mock_resp.data = BytesIO(b'[]')\n                return mock_resp\n\n            # Default fallback\n            mock_resp.data = BytesIO(b'{}')\n            return mock_resp\n\n        mock_api_client.call_api.side_effect = call_api_side_effect\n\n        from pr_agent.git_providers.gitea_provider import RepoApi\n\n        client = mock_api_client\n        repo_api = RepoApi(client)\n\n        # Now test methods independently\n\n        # 1. get_change_file_pull_request\n        mock_api_client.reset_mock()\n        mock_resp = MagicMock()\n        mock_resp.data = BytesIO(b'[]')\n        mock_api_client.call_api.return_value = mock_resp\n\n        repo_api.get_change_file_pull_request('owner', 'repo', 123)\n\n        args, kwargs = mock_api_client.call_api.call_args\n        assert '/repos/owner/repo/pulls/123/files' in args[0]\n        assert kwargs.get('auth_settings') == ['AuthorizationHeaderToken']\n        assert 'token=' not in args[0]\n\n        # 2. get_pull_request_diff\n        mock_api_client.reset_mock()\n        mock_resp = MagicMock()\n        mock_resp.data = BytesIO(b'diff content')\n        mock_api_client.call_api.return_value = mock_resp\n\n        repo_api.get_pull_request_diff('owner', 'repo', 123)\n\n        args, kwargs = mock_api_client.call_api.call_args\n        assert args[0] == '/repos/owner/repo/pulls/123.diff'\n        assert kwargs.get('auth_settings') == ['AuthorizationHeaderToken']\n\n        # 3. get_languages\n        mock_api_client.reset_mock()\n        mock_resp.data = BytesIO(b'{\"Python\": 100}')\n        mock_api_client.call_api.return_value = mock_resp\n\n        repo_api.get_languages('owner', 'repo')\n\n        args, kwargs = mock_api_client.call_api.call_args\n        assert args[0] == '/repos/owner/repo/languages'\n        assert kwargs.get('auth_settings') == ['AuthorizationHeaderToken']\n\n        # 4. get_file_content\n        mock_api_client.reset_mock()\n        mock_resp.data = BytesIO(b'content')\n        mock_api_client.call_api.return_value = mock_resp\n\n        repo_api.get_file_content('owner', 'repo', 'sha1', 'file.txt')\n\n        args, kwargs = mock_api_client.call_api.call_args\n        assert args[0] == '/repos/owner/repo/raw/file.txt'\n        assert kwargs.get('query_params') == [('ref', 'sha1')]\n        assert kwargs.get('auth_settings') == ['AuthorizationHeaderToken']\n\n        # 5. get_pr_commits\n        mock_api_client.reset_mock()\n        mock_resp.data = BytesIO(b'[]')\n        mock_api_client.call_api.return_value = mock_resp\n\n        repo_api.get_pr_commits('owner', 'repo', 123)\n\n        args, kwargs = mock_api_client.call_api.call_args\n        assert args[0] == '/repos/owner/repo/pulls/123/commits'\n        assert kwargs.get('auth_settings') == ['AuthorizationHeaderToken']\n"
  },
  {
    "path": "tests/unittest/test_github_action_output.py",
    "content": "import json\nimport os\n\nfrom pr_agent.algo.utils import get_settings, github_action_output\n\n\nclass TestGitHubOutput:\n    def test_github_action_output_enabled(self, monkeypatch, tmp_path):\n        get_settings().set('GITHUB_ACTION_CONFIG.ENABLE_OUTPUT', True)\n        monkeypatch.setenv('GITHUB_OUTPUT', str(tmp_path / 'output'))\n        output_data = {'key1': {'value1': 1, 'value2': 2}}\n        key_name = 'key1'\n\n        github_action_output(output_data, key_name)\n\n        with open(str(tmp_path / 'output'), 'r') as f:\n            env_value = f.read()\n\n        actual_key = env_value.split('=')[0]\n        actual_data = json.loads(env_value.split('=')[1])\n\n        assert actual_key == key_name\n        assert actual_data == output_data[key_name]\n\n    def test_github_action_output_disabled(self, monkeypatch, tmp_path):\n        get_settings().set('GITHUB_ACTION_CONFIG.ENABLE_OUTPUT', False)\n        monkeypatch.setenv('GITHUB_OUTPUT', str(tmp_path / 'output'))\n        output_data = {'key1': {'value1': 1, 'value2': 2}}\n        key_name = 'key1'\n\n        github_action_output(output_data, key_name)\n\n        assert not os.path.exists(str(tmp_path / 'output'))\n\n    def test_github_action_output_notset(self, monkeypatch, tmp_path):\n        # not set config\n        monkeypatch.setenv('GITHUB_OUTPUT', str(tmp_path / 'output'))\n        output_data = {'key1': {'value1': 1, 'value2': 2}}\n        key_name = 'key1'\n\n        github_action_output(output_data, key_name)\n\n        assert not os.path.exists(str(tmp_path / 'output'))\n\n    def test_github_action_output_error_case(self, monkeypatch, tmp_path):\n        monkeypatch.setenv('GITHUB_OUTPUT', str(tmp_path / 'output'))\n        output_data = None # invalid data\n        key_name = 'key1'\n\n        github_action_output(output_data, key_name)\n\n        assert not os.path.exists(str(tmp_path / 'output'))\n"
  },
  {
    "path": "tests/unittest/test_gitlab_provider.py",
    "content": "from unittest.mock import MagicMock, patch\n\nimport pytest\nfrom gitlab import Gitlab\nfrom gitlab.exceptions import GitlabGetError\nfrom gitlab.v4.objects import Project, ProjectFile\n\nfrom pr_agent.git_providers.gitlab_provider import GitLabProvider\n\n\nclass TestGitLabProvider:\n    \"\"\"Test suite for GitLab provider functionality.\"\"\"\n\n    @pytest.fixture\n    def mock_gitlab_client(self):\n        client = MagicMock()\n        return client\n\n    @pytest.fixture\n    def mock_project(self):\n        project = MagicMock()\n        return project\n\n    @pytest.fixture\n    def gitlab_provider(self, mock_gitlab_client, mock_project):\n        with patch('pr_agent.git_providers.gitlab_provider.gitlab.Gitlab', return_value=mock_gitlab_client), \\\n             patch('pr_agent.git_providers.gitlab_provider.get_settings') as mock_settings:\n\n            mock_settings.return_value.get.side_effect = lambda key, default=None: {\n                \"GITLAB.URL\": \"https://gitlab.com\",\n                \"GITLAB.PERSONAL_ACCESS_TOKEN\": \"fake_token\"\n            }.get(key, default)\n\n            mock_gitlab_client.projects.get.return_value = mock_project\n            provider = GitLabProvider(\"https://gitlab.com/test/repo/-/merge_requests/1\")\n            provider.gl = mock_gitlab_client\n            provider.id_project = \"test/repo\"\n            return provider\n\n    def test_get_pr_file_content_success(self, gitlab_provider, mock_project):\n        mock_file = MagicMock(ProjectFile)\n        mock_file.decode.return_value = \"# Changelog\\n\\n## v1.0.0\\n- Initial release\"\n        mock_project.files.get.return_value = mock_file\n\n        content = gitlab_provider.get_pr_file_content(\"CHANGELOG.md\", \"main\")\n\n        assert content == \"# Changelog\\n\\n## v1.0.0\\n- Initial release\"\n        mock_project.files.get.assert_called_once_with(\"CHANGELOG.md\", \"main\")\n        mock_file.decode.assert_called_once()\n\n    def test_get_pr_file_content_with_bytes(self, gitlab_provider, mock_project):\n        mock_file = MagicMock(ProjectFile)\n        mock_file.decode.return_value = b\"# Changelog\\n\\n## v1.0.0\\n- Initial release\"\n        mock_project.files.get.return_value = mock_file\n\n        content = gitlab_provider.get_pr_file_content(\"CHANGELOG.md\", \"main\")\n\n        assert content == \"# Changelog\\n\\n## v1.0.0\\n- Initial release\"\n        mock_project.files.get.assert_called_once_with(\"CHANGELOG.md\", \"main\")\n\n    def test_get_pr_file_content_file_not_found(self, gitlab_provider, mock_project):\n        mock_project.files.get.side_effect = GitlabGetError(\"404 Not Found\")\n\n        content = gitlab_provider.get_pr_file_content(\"CHANGELOG.md\", \"main\")\n\n        assert content == \"\"\n        mock_project.files.get.assert_called_once_with(\"CHANGELOG.md\", \"main\")\n\n    def test_get_pr_file_content_other_exception(self, gitlab_provider, mock_project):\n        mock_project.files.get.side_effect = Exception(\"Network error\")\n\n        content = gitlab_provider.get_pr_file_content(\"CHANGELOG.md\", \"main\")\n\n        assert content == \"\"\n\n    def test_create_or_update_pr_file_create_new(self, gitlab_provider, mock_project):\n        mock_project.files.get.side_effect = GitlabGetError(\"404 Not Found\")\n        mock_file = MagicMock()\n        mock_project.files.create.return_value = mock_file\n\n        new_content = \"# Changelog\\n\\n## v1.1.0\\n- New feature\"\n        commit_message = \"Add CHANGELOG.md\"\n\n        gitlab_provider.create_or_update_pr_file(\n            \"CHANGELOG.md\", \"feature-branch\", new_content, commit_message\n        )\n\n        mock_project.files.get.assert_called_once_with(\"CHANGELOG.md\", \"feature-branch\")\n        mock_project.files.create.assert_called_once_with({\n            'file_path': 'CHANGELOG.md',\n            'branch': 'feature-branch',\n            'content': new_content,\n            'commit_message': commit_message,\n        })\n\n    def test_create_or_update_pr_file_update_existing(self, gitlab_provider, mock_project):\n        mock_file = MagicMock(ProjectFile)\n        mock_file.decode.return_value = \"# Old changelog content\"\n        mock_project.files.get.return_value = mock_file\n\n        new_content = \"# New changelog content\"\n        commit_message = \"Update CHANGELOG.md\"\n\n        gitlab_provider.create_or_update_pr_file(\n            \"CHANGELOG.md\", \"feature-branch\", new_content, commit_message\n        )\n\n        mock_project.files.get.assert_called_once_with(\"CHANGELOG.md\", \"feature-branch\")\n        mock_file.content = new_content\n        mock_file.save.assert_called_once_with(branch=\"feature-branch\", commit_message=commit_message)\n\n    def test_create_or_update_pr_file_update_exception(self, gitlab_provider, mock_project):\n        mock_project.files.get.side_effect = Exception(\"Network error\")\n\n        with pytest.raises(Exception):\n            gitlab_provider.create_or_update_pr_file(\n                \"CHANGELOG.md\", \"feature-branch\", \"content\", \"message\"\n            )\n\n    def test_has_create_or_update_pr_file_method(self, gitlab_provider):\n        assert hasattr(gitlab_provider, \"create_or_update_pr_file\")\n        assert callable(getattr(gitlab_provider, \"create_or_update_pr_file\"))\n\n    def test_method_signature_compatibility(self, gitlab_provider):\n        import inspect\n\n        sig = inspect.signature(gitlab_provider.create_or_update_pr_file)\n        params = list(sig.parameters.keys())\n\n        expected_params = ['file_path', 'branch', 'contents', 'message']\n        assert params == expected_params\n\n    @pytest.mark.parametrize(\"content,expected\", [\n        (\"simple text\", \"simple text\"),\n        (b\"bytes content\", \"bytes content\"),\n        (\"\", \"\"),\n        (b\"\", \"\"),\n        (\"unicode: café\", \"unicode: café\"),\n        (b\"unicode: caf\\xc3\\xa9\", \"unicode: café\"),\n    ])\n    def test_content_encoding_handling(self, gitlab_provider, mock_project, content, expected):\n        mock_file = MagicMock(ProjectFile)\n        mock_file.decode.return_value = content\n        mock_project.files.get.return_value = mock_file\n\n        result = gitlab_provider.get_pr_file_content(\"test.md\", \"main\")\n\n        assert result == expected\n\n    def test_get_gitmodules_map_parsing(self, gitlab_provider, mock_project):\n        gitlab_provider.id_project = \"1\"\n        gitlab_provider.mr = MagicMock()\n        gitlab_provider.mr.target_branch = \"main\"\n\n        file_obj = MagicMock(ProjectFile)\n        file_obj.decode.return_value = (\n            \"[submodule \\\"libs/a\\\"]\\n\"\n            \"    path = \\\"libs/a\\\"\\n\"\n            \"    url = \\\"https://gitlab.com/a.git\\\"\\n\"\n            \"[submodule \\\"libs/b\\\"]\\n\"\n            \"    path = libs/b\\n\"\n            \"    url = git@gitlab.com:b.git\\n\"\n        )\n        mock_project.files.get.return_value = file_obj\n        gitlab_provider.gl.projects.get.return_value = mock_project\n\n        result = gitlab_provider._get_gitmodules_map()\n        assert result == {\n            \"libs/a\": \"https://gitlab.com/a.git\",\n            \"libs/b\": \"git@gitlab.com:b.git\",\n        }\n\n    def test_project_by_path_requires_exact_match(self, gitlab_provider):\n        gitlab_provider.gl.projects.get.reset_mock()\n        gitlab_provider.gl.projects.get.side_effect = Exception(\"not found\")\n        fake = MagicMock()\n        fake.path_with_namespace = \"other/group/repo\"\n        gitlab_provider.gl.projects.list.return_value = [fake]\n\n        result = gitlab_provider._project_by_path(\"group/repo\")\n\n        assert result is None\n        assert gitlab_provider.gl.projects.get.call_count == 2\n\n    def test_compare_submodule_cached(self, gitlab_provider):\n        proj = MagicMock()\n        proj.repository_compare.return_value = {\"diffs\": [{\"diff\": \"d\"}]}\n        with patch.object(gitlab_provider, \"_project_by_path\", return_value=proj) as m_pbp:\n            first = gitlab_provider._compare_submodule(\"grp/repo\", \"old\", \"new\")\n            second = gitlab_provider._compare_submodule(\"grp/repo\", \"old\", \"new\")\n\n        assert first == second == [{\"diff\": \"d\"}]\n        m_pbp.assert_called_once_with(\"grp/repo\")\n        proj.repository_compare.assert_called_once_with(\"old\", \"new\")\n"
  },
  {
    "path": "tests/unittest/test_gitlab_webhook_port.py",
    "content": "import os\nfrom unittest import mock\n\nos.environ.setdefault(\"GITLAB__URL\", \"https://gitlab.example.com\")\nimport pr_agent.servers.gitlab_webhook as gitlab_webhook\n\n\ndef test_start_uses_port_env(monkeypatch):\n    monkeypatch.setenv(\"PORT\", \"4567\")\n\n    with mock.patch.object(gitlab_webhook.uvicorn, \"run\") as mock_run:\n        gitlab_webhook.start()\n\n    _, kwargs = mock_run.call_args\n    assert kwargs[\"port\"] == 4567\n    assert kwargs[\"host\"] == \"0.0.0.0\"\n\n\ndef test_start_invalid_port_env(monkeypatch):\n    monkeypatch.setenv(\"PORT\", \"not-a-number\")\n\n    with mock.patch.object(gitlab_webhook.uvicorn, \"run\") as mock_run:\n        gitlab_webhook.start()\n\n    _, kwargs = mock_run.call_args\n    assert kwargs[\"port\"] == 3000\n\n\ndef test_start_default_port(monkeypatch):\n    monkeypatch.delenv(\"PORT\", raising=False)\n\n    with mock.patch.object(gitlab_webhook.uvicorn, \"run\") as mock_run:\n        gitlab_webhook.start()\n\n    _, kwargs = mock_run.call_args\n    assert kwargs[\"port\"] == 3000\n\n\ndef test_start_invalid_port_range(monkeypatch):\n    monkeypatch.setenv(\"PORT\", \"70000\")\n\n    with mock.patch.object(gitlab_webhook.uvicorn, \"run\") as mock_run:\n        gitlab_webhook.start()\n\n    _, kwargs = mock_run.call_args\n    assert kwargs[\"port\"] == 3000\n"
  },
  {
    "path": "tests/unittest/test_handle_patch_deletions.py",
    "content": "# Generated by CodiumAI\nimport logging\n\nfrom pr_agent.algo.git_patch_processing import handle_patch_deletions\nfrom pr_agent.config_loader import get_settings\n\n\"\"\"\nCode Analysis\n\nObjective:\nThe objective of the function is to handle entire file or deletion patches and return the patch after omitting the\ndeletion hunks.\n\nInputs:\n- patch: a string representing the patch to be handled\n- original_file_content_str: a string representing the original content of the file\n- new_file_content_str: a string representing the new content of the file\n- file_name: a string representing the name of the file\n\nFlow:\n- If new_file_content_str is empty, set patch to \"File was deleted\" and return it\n- Otherwise, split patch into lines and omit the deletion hunks using the omit_deletion_hunks function\n- If the resulting patch is different from the original patch, log a message and set patch to the new patch\n- Return the resulting patch\n\nOutputs:\n- A string representing the patch after omitting the deletion hunks\n\nAdditional aspects:\n- The function uses the settings from the configuration files to determine the verbosity level of the logging messages\n- The omit_deletion_hunks function is called to remove the deletion hunks from the patch\n- The function handles the case where the new_file_content_str is empty by setting the patch to \"File was deleted\"\n\"\"\"\n\n\nclass TestHandlePatchDeletions:\n    # Tests that handle_patch_deletions returns the original patch when new_file_content_str is not empty\n    def test_handle_patch_deletions_happy_path_new_file_content_exists(self):\n        patch = '--- a/file.py\\n+++ b/file.py\\n@@ -1,2 +1,2 @@\\n-foo\\n-bar\\n+baz\\n'\n        original_file_content_str = 'foo\\nbar\\n'\n        new_file_content_str = 'foo\\nbaz\\n'\n        file_name = 'file.py'\n        assert handle_patch_deletions(patch, original_file_content_str, new_file_content_str,\n                                      file_name) == patch.rstrip()\n\n    # Tests that handle_patch_deletions returns 'File was deleted' when new_file_content_str is empty\n    def test_handle_patch_deletions_edge_case_new_file_content_empty(self):\n        patch = '--- a/file.py\\n+++ b/file.py\\n@@ -1,2 +1,2 @@\\n-foo\\n-bar\\n'\n        original_file_content_str = 'foo\\nbar\\n'\n        new_file_content_str = ''\n        file_name = 'file.py'\n        assert handle_patch_deletions(patch, original_file_content_str, new_file_content_str,\n                                      file_name) is None\n\n    # Tests that handle_patch_deletions returns the original patch when patch and patch_new are equal\n    def test_handle_patch_deletions_edge_case_patch_and_patch_new_are_equal(self):\n        patch = '--- a/file.py\\n+++ b/file.py\\n@@ -1,2 +1,2 @@\\n-foo\\n-bar\\n'\n        original_file_content_str = 'foo\\nbar\\n'\n        new_file_content_str = 'foo\\nbar\\n'\n        file_name = 'file.py'\n        assert handle_patch_deletions(patch, original_file_content_str, new_file_content_str,\n                                      file_name).rstrip() == patch.rstrip()\n\n    # Tests that handle_patch_deletions returns the modified patch when patch and patch_new are not equal\n    def test_handle_patch_deletions_edge_case_patch_and_patch_new_are_not_equal(self):\n        patch = '--- a/file.py\\n+++ b/file.py\\n@@ -1,2 +1,2 @@\\n-foo\\n-bar\\n'\n        original_file_content_str = 'foo\\nbar\\n'\n        new_file_content_str = 'foo\\nbaz\\n'\n        file_name = 'file.py'\n        expected_patch = '--- a/file.py\\n+++ b/file.py\\n@@ -1,2 +1,2 @@\\n-foo\\n-bar'\n        assert handle_patch_deletions(patch, original_file_content_str, new_file_content_str,\n                                      file_name) == expected_patch\n"
  },
  {
    "path": "tests/unittest/test_ignore_repositories.py",
    "content": "import pytest\n\nfrom pr_agent.config_loader import get_settings\nfrom pr_agent.servers.bitbucket_app import should_process_pr_logic as bitbucket_should_process_pr_logic\nfrom pr_agent.servers.github_app import should_process_pr_logic as github_should_process_pr_logic\nfrom pr_agent.servers.gitlab_webhook import should_process_pr_logic as gitlab_should_process_pr_logic\n\n\ndef make_bitbucket_payload(full_name):\n    return {\n        \"data\": {\n            \"pullrequest\": {\n                \"title\": \"Test PR\",\n                \"source\": {\"branch\": {\"name\": \"feature/test\"}},\n                \"destination\": {\n                    \"branch\": {\"name\": \"main\"},\n                    \"repository\": {\"full_name\": full_name}\n                }\n            },\n            \"actor\": {\"username\": \"user\", \"type\": \"user\"}\n        }\n    }\n\ndef make_github_body(full_name):\n    return {\n        \"pull_request\": {},\n        \"repository\": {\"full_name\": full_name},\n        \"sender\": {\"login\": \"user\"}\n    }\n\ndef make_gitlab_body(full_name):\n    return {\n        \"object_attributes\": {\"title\": \"Test MR\"},\n        \"project\": {\"path_with_namespace\": full_name}\n    }\n\nPROVIDERS = [\n    (\"github\", github_should_process_pr_logic, make_github_body),\n    (\"bitbucket\", bitbucket_should_process_pr_logic, make_bitbucket_payload),\n    (\"gitlab\", gitlab_should_process_pr_logic, make_gitlab_body),\n]\n\nclass TestIgnoreRepositories:\n    def setup_method(self):\n        get_settings().set(\"CONFIG.IGNORE_REPOSITORIES\", [])\n\n    @pytest.mark.parametrize(\"provider_name, provider_func, body_func\", PROVIDERS)\n    def test_should_ignore_matching_repository(self, provider_name, provider_func, body_func):\n        get_settings().set(\"CONFIG.IGNORE_REPOSITORIES\", [\"org/repo-to-ignore\"])\n        body = {\n            \"pull_request\": {},\n            \"repository\": {\"full_name\": \"org/repo-to-ignore\"},\n            \"sender\": {\"login\": \"user\"}\n        }\n        result = provider_func(body_func(body[\"repository\"][\"full_name\"]))\n        # print(f\"DEBUG: Provider={provider_name}, test_should_ignore_matching_repository, result={result}\")\n        assert result is False, f\"{provider_name}: PR from ignored repository should be ignored (return False)\"\n\n    @pytest.mark.parametrize(\"provider_name, provider_func, body_func\", PROVIDERS)\n    def test_should_not_ignore_non_matching_repository(self, provider_name, provider_func, body_func):\n        get_settings().set(\"CONFIG.IGNORE_REPOSITORIES\", [\"org/repo-to-ignore\"])\n        body = {\n            \"pull_request\": {},\n            \"repository\": {\"full_name\": \"org/other-repo\"},\n            \"sender\": {\"login\": \"user\"}\n        }\n        result = provider_func(body_func(body[\"repository\"][\"full_name\"]))\n        # print(f\"DEBUG: Provider={provider_name}, test_should_not_ignore_non_matching_repository, result={result}\")\n        assert result is True, f\"{provider_name}: PR from non-ignored repository should not be ignored (return True)\"\n\n    @pytest.mark.parametrize(\"provider_name, provider_func, body_func\", PROVIDERS)\n    def test_should_not_ignore_when_config_empty(self, provider_name, provider_func, body_func):\n        get_settings().set(\"CONFIG.IGNORE_REPOSITORIES\", [])\n        body = {\n            \"pull_request\": {},\n            \"repository\": {\"full_name\": \"org/repo-to-ignore\"},\n            \"sender\": {\"login\": \"user\"}\n        }\n        result = provider_func(body_func(body[\"repository\"][\"full_name\"]))\n        # print(f\"DEBUG: Provider={provider_name}, test_should_not_ignore_when_config_empty, result={result}\")\n        assert result is True, f\"{provider_name}: PR should not be ignored if ignore_repositories config is empty\" "
  },
  {
    "path": "tests/unittest/test_language_handler.py",
    "content": "\n# Generated by CodiumAI\n\nfrom pr_agent.algo.language_handler import sort_files_by_main_languages\n\n\"\"\"\nCode Analysis\n\nObjective:\nThe objective of the function is to sort a list of files by their main language, putting the files that are in the main\nlanguage first and the rest of the files after. It takes in a dictionary of languages and their sizes, and a list of\nfiles.\n\nInputs:\n- languages: a dictionary containing the languages and their sizes\n- files: a list of files\n\nFlow:\n1. Sort the languages by their size in descending order\n2. Get all extensions for the languages\n3. Filter out files with bad extensions\n4. Sort files by their extension, putting the files that are in the main extension first and the rest of the files after\n5. Map languages_sorted to their respective files\n6. Append the files to the files_sorted list\n7. Append the rest of the files to the files_sorted list under the \"Other\" language category\n8. Return the files_sorted list\n\nOutputs:\n- files_sorted: a list of dictionaries containing the language and its respective files\n\nAdditional aspects:\n- The function uses a language_extension_map dictionary to map the languages to their respective extensions\n- The function uses the filter_bad_extensions function to filter out files with bad extensions\n- The function uses a rest_files dictionary to store the files that do not belong to any of the main extensions\n\"\"\"\n\n\nclass TestSortFilesByMainLanguages:\n    # Tests that files are sorted by main language, with files in main language first and the rest after\n    def test_happy_path_sort_files_by_main_languages(self):\n        languages = {'Python': 10, 'Java': 5, 'C++': 3}\n        files = [\n            type('', (object,), {'filename': 'file1.py'})(),\n            type('', (object,), {'filename': 'file2.java'})(),\n            type('', (object,), {'filename': 'file3.cpp'})(),\n            type('', (object,), {'filename': 'file4.py'})(),\n            type('', (object,), {'filename': 'file5.py'})()\n        ]\n        expected_output = [\n            {'language': 'Python', 'files': [files[0], files[3], files[4]]},\n            {'language': 'Java', 'files': [files[1]]},\n            {'language': 'C++', 'files': [files[2]]},\n            {'language': 'Other', 'files': []}\n        ]\n        assert sort_files_by_main_languages(languages, files) == expected_output\n\n    # Tests that function handles empty languages dictionary\n    def test_edge_case_empty_languages(self):\n        languages = {}\n        files = [\n            type('', (object,), {'filename': 'file1.py'})(),\n            type('', (object,), {'filename': 'file2.java'})()\n        ]\n        expected_output = [{'language': 'Other', 'files': files}]\n        assert sort_files_by_main_languages(languages, files) == expected_output\n\n    # Tests that function handles empty files list\n    def test_edge_case_empty_files(self):\n        languages = {'Python': 10, 'Java': 5}\n        files = []\n        expected_output = [\n            {'language': 'Other', 'files': []}\n        ]\n        assert sort_files_by_main_languages(languages, files) == expected_output\n\n    # Tests that function handles languages with no extensions\n    def test_edge_case_languages_with_no_extensions(self):\n        languages = {'Python': 10, 'Java': 5, 'C++': 3}\n        files = [\n            type('', (object,), {'filename': 'file1.py'})(),\n            type('', (object,), {'filename': 'file2.java'})(),\n            type('', (object,), {'filename': 'file3.cpp'})(),\n            type('', (object,), {'filename': 'file3.test'})()\n        ]\n        expected_output = [\n            {'language': 'Python', 'files': [files[0]]},\n            {'language': 'Java', 'files': [files[1]]},\n            {'language': 'C++', 'files': [files[2]]},\n            {'language': 'Other', 'files': [files[3]]}\n        ]\n        assert sort_files_by_main_languages(languages, files) == expected_output\n\n    # Tests the behavior of the function when all files have bad extensions and only one new valid file is added.\n    def test_edge_case_files_with_bad_extensions_only(self):\n        languages = {'Python': 10, 'Java': 5, 'C++': 3}\n        files = [\n            type('', (object,), {'filename': 'file1.csv'})(),\n            type('', (object,), {'filename': 'file2.pdf'})(),\n            type('', (object,), {'filename': 'file3.py'})()  # new valid file\n        ]\n        expected_output = [{'language': 'Python', 'files': [files[2]]}, {'language': 'Other', 'files': []}]\n        assert sort_files_by_main_languages(languages, files) == expected_output\n\n    # Tests general behaviour of function\n    def test_general_behaviour_sort_files_by_main_languages(self):\n        languages = {'Python': 10, 'Java': 5, 'C++': 3}\n        files = [\n            type('', (object,), {'filename': 'file1.py'})(),\n            type('', (object,), {'filename': 'file2.java'})(),\n            type('', (object,), {'filename': 'file3.cpp'})(),\n            type('', (object,), {'filename': 'file4.py'})(),\n            type('', (object,), {'filename': 'file5.py'})(),\n            type('', (object,), {'filename': 'file6.py'})(),\n            type('', (object,), {'filename': 'file7.java'})(),\n            type('', (object,), {'filename': 'file8.cpp'})(),\n            type('', (object,), {'filename': 'file9.py'})()\n        ]\n        expected_output = [\n            {'language': 'Python', 'files': [files[0], files[3], files[4], files[5], files[8]]},\n            {'language': 'Java', 'files': [files[1], files[6]]},\n            {'language': 'C++', 'files': [files[2], files[7]]},\n            {'language': 'Other', 'files': []}\n        ]\n        assert sort_files_by_main_languages(languages, files) == expected_output\n"
  },
  {
    "path": "tests/unittest/test_litellm_reasoning_effort.py",
    "content": "from unittest.mock import AsyncMock, MagicMock, call, patch\n\nimport pytest\n\nimport pr_agent.algo.ai_handlers.litellm_ai_handler as litellm_handler\nfrom pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler\n\n\ndef create_mock_settings(reasoning_effort_value):\n    \"\"\"Create a fake settings object with configurable reasoning_effort.\"\"\"\n    return type('', (), {\n        'config': type('', (), {\n            'reasoning_effort': reasoning_effort_value,\n            'ai_timeout': 120,\n            'custom_reasoning_model': False,\n            'max_model_tokens': 32000,\n            'verbosity_level': 0,\n            'get': lambda self, key, default=None: default\n        })(),\n        'litellm': type('', (), {\n            'get': lambda self, key, default=None: default\n        })(),\n        'get': lambda self, key, default=None: default\n    })()\n\n\ndef create_mock_acompletion_response():\n    \"\"\"Create a properly structured mock response for acompletion.\"\"\"\n    mock_response = MagicMock()\n    mock_response.__getitem__ = lambda self, key: {\n        \"choices\": [{\"message\": {\"content\": \"test\"}, \"finish_reason\": \"stop\"}]\n    }[key]\n    mock_response.dict.return_value = {\"choices\": [{\"message\": {\"content\": \"test\"}, \"finish_reason\": \"stop\"}]}\n    return mock_response\n\n\n@pytest.fixture\ndef mock_logger():\n    \"\"\"Mock logger to capture info and warning calls.\"\"\"\n    with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.get_logger') as mock_log:\n        mock_log_instance = MagicMock()\n        mock_log.return_value = mock_log_instance\n        yield mock_log_instance\n\n\nclass TestLiteLLMReasoningEffort:\n    \"\"\"\n    Comprehensive test suite for GPT-5 reasoning_effort configuration handling.\n\n    Tests cover:\n    - Valid reasoning_effort values for GPT-5 models\n    - Invalid reasoning_effort values with warning logging\n    - Model detection (GPT-5 vs non-GPT-5)\n    - Model suffix handling (_thinking vs regular)\n    - Default fallback logic\n    - Logging behavior (info and warning messages)\n    - thinking_kwargs_gpt5 structure validation\n    \"\"\"\n\n    # ========== Group 1: Valid Configuration Tests ==========\n\n    @pytest.mark.asyncio\n    async def test_gpt5_valid_reasoning_effort_none(self, monkeypatch, mock_logger):\n        \"\"\"Test GPT-5 with valid reasoning_effort='none' from config.\"\"\"\n        fake_settings = create_mock_settings(\"none\")\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        # Mock acompletion to capture kwargs\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5-2025-08-07\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            # Verify the call was made with correct reasoning_effort\n            assert mock_completion.called\n            call_kwargs = mock_completion.call_args[1]\n            assert call_kwargs[\"reasoning_effort\"] == \"none\"\n            assert \"reasoning_effort\" in call_kwargs[\"allowed_openai_params\"]\n\n            # Verify info log\n            mock_logger.info.assert_any_call(\"Using reasoning_effort='none' for GPT-5 model\")\n\n    @pytest.mark.asyncio\n    async def test_gpt5_valid_reasoning_effort_low(self, monkeypatch, mock_logger):\n        \"\"\"Test GPT-5 with valid reasoning_effort='low' from config.\"\"\"\n        fake_settings = create_mock_settings(\"low\")\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5-2025-08-07\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            call_kwargs = mock_completion.call_args[1]\n            assert call_kwargs[\"reasoning_effort\"] == \"low\"\n            assert \"reasoning_effort\" in call_kwargs[\"allowed_openai_params\"]\n            mock_logger.info.assert_any_call(\"Using reasoning_effort='low' for GPT-5 model\")\n\n    @pytest.mark.asyncio\n    async def test_gpt5_valid_reasoning_effort_medium(self, monkeypatch, mock_logger):\n        \"\"\"Test GPT-5 with valid reasoning_effort='medium' from config.\"\"\"\n        fake_settings = create_mock_settings(\"medium\")\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5-2025-08-07\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            call_kwargs = mock_completion.call_args[1]\n            assert call_kwargs[\"reasoning_effort\"] == \"medium\"\n            assert \"reasoning_effort\" in call_kwargs[\"allowed_openai_params\"]\n            mock_logger.info.assert_any_call(\"Using reasoning_effort='medium' for GPT-5 model\")\n\n    @pytest.mark.asyncio\n    async def test_gpt5_valid_reasoning_effort_high(self, monkeypatch, mock_logger):\n        \"\"\"Test GPT-5 with valid reasoning_effort='high' from config.\"\"\"\n        fake_settings = create_mock_settings(\"high\")\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5-2025-08-07\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            call_kwargs = mock_completion.call_args[1]\n            assert call_kwargs[\"reasoning_effort\"] == \"high\"\n            assert \"reasoning_effort\" in call_kwargs[\"allowed_openai_params\"]\n            mock_logger.info.assert_any_call(\"Using reasoning_effort='high' for GPT-5 model\")\n\n    @pytest.mark.asyncio\n    async def test_gpt5_valid_reasoning_effort_xhigh(self, monkeypatch, mock_logger):\n        \"\"\"Test GPT-5 with valid reasoning_effort='xhigh' from config.\"\"\"\n        fake_settings = create_mock_settings(\"xhigh\")\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5.2\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            call_kwargs = mock_completion.call_args[1]\n            assert call_kwargs[\"reasoning_effort\"] == \"xhigh\"\n            assert \"reasoning_effort\" in call_kwargs[\"allowed_openai_params\"]\n            mock_logger.info.assert_any_call(\"Using reasoning_effort='xhigh' for GPT-5 model\")\n\n    @pytest.mark.asyncio\n    async def test_gpt5_valid_reasoning_effort_minimal(self, monkeypatch, mock_logger):\n        \"\"\"Test GPT-5 with valid reasoning_effort='minimal' from config.\"\"\"\n        fake_settings = create_mock_settings(\"minimal\")\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5-2025-08-07\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            call_kwargs = mock_completion.call_args[1]\n            assert call_kwargs[\"reasoning_effort\"] == \"minimal\"\n            assert \"reasoning_effort\" in call_kwargs[\"allowed_openai_params\"]\n            mock_logger.info.assert_any_call(\"Using reasoning_effort='minimal' for GPT-5 model\")\n\n    # ========== Group 2: Invalid Configuration Tests ==========\n\n    @pytest.mark.asyncio\n    async def test_gpt5_invalid_reasoning_effort_with_warning(self, monkeypatch, mock_logger):\n        \"\"\"Test GPT-5 with invalid reasoning_effort logs warning and uses default.\"\"\"\n        fake_settings = create_mock_settings(\"extreme\")\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5-2025-08-07\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            # Should default to 'medium'\n            call_kwargs = mock_completion.call_args[1]\n            assert call_kwargs[\"reasoning_effort\"] == \"medium\"\n\n            # Verify warning logged\n            mock_logger.warning.assert_called_once()\n            warning_call = mock_logger.warning.call_args[0][0]\n            assert \"Invalid reasoning_effort 'extreme' in config\" in warning_call\n            assert \"Valid values:\" in warning_call\n\n            # Verify info log\n            mock_logger.info.assert_any_call(\"Using reasoning_effort='medium' for GPT-5 model\")\n\n    @pytest.mark.asyncio\n    async def test_gpt5_invalid_reasoning_effort_thinking_model(self, monkeypatch, mock_logger):\n        \"\"\"Test GPT-5 _thinking model with invalid reasoning_effort defaults to 'medium'.\"\"\"\n        fake_settings = create_mock_settings(\"invalid_value\")\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5-2025-08-07_thinking\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            # Should default to 'medium' (no special handling for _thinking models)\n            call_kwargs = mock_completion.call_args[1]\n            assert call_kwargs[\"reasoning_effort\"] == \"medium\"\n\n            # Verify warning logged\n            mock_logger.warning.assert_called_once()\n\n            # Verify info log\n            mock_logger.info.assert_any_call(\"Using reasoning_effort='medium' for GPT-5 model\")\n\n    @pytest.mark.asyncio\n    async def test_gpt5_none_config_defaults_to_medium(self, monkeypatch, mock_logger):\n        \"\"\"Test GPT-5 with None config defaults to 'medium' without warning.\"\"\"\n        fake_settings = create_mock_settings(None)\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5-2025-08-07\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            # Should default to 'medium'\n            call_kwargs = mock_completion.call_args[1]\n            assert call_kwargs[\"reasoning_effort\"] == \"medium\"\n\n            # No warning should be logged\n            mock_logger.warning.assert_not_called()\n\n            # Info log should show effort\n            mock_logger.info.assert_any_call(\"Using reasoning_effort='medium' for GPT-5 model\")\n\n    @pytest.mark.asyncio\n    async def test_gpt5_none_config_thinking_model_defaults_to_medium(self, monkeypatch, mock_logger):\n        \"\"\"Test GPT-5 _thinking model with None config defaults to 'medium' without warning.\"\"\"\n        fake_settings = create_mock_settings(None)\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5-2025-08-07_thinking\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            # Should default to 'medium' (no special handling for _thinking models)\n            call_kwargs = mock_completion.call_args[1]\n            assert call_kwargs[\"reasoning_effort\"] == \"medium\"\n\n            # No warning should be logged\n            mock_logger.warning.assert_not_called()\n\n            # Info log\n            mock_logger.info.assert_any_call(\"Using reasoning_effort='medium' for GPT-5 model\")\n\n    # ========== Group 3: Model Detection Tests ==========\n\n    @pytest.mark.asyncio\n    async def test_gpt5_model_detection_various_versions(self, monkeypatch, mock_logger):\n        \"\"\"Test various GPT-5 model version strings trigger the reasoning_effort logic.\"\"\"\n        fake_settings = create_mock_settings(\"medium\")\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        gpt5_models = [\n            \"gpt-5-2025-08-07\",\n            \"gpt-5.1\",\n            \"gpt-5.4\",\n            \"gpt-5.4-2026-03-05\",\n            \"gpt-5-turbo\",\n            \"gpt-5.1-codex\",\n            \"gpt-5.3-codex\",\n        ]\n\n        for model in gpt5_models:\n            with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n                mock_completion.return_value = create_mock_acompletion_response()\n\n                handler = LiteLLMAIHandler()\n                await handler.chat_completion(\n                    model=model,\n                    system=\"test system\",\n                    user=\"test user\"\n                )\n\n                # All should trigger GPT-5 logic\n                call_kwargs = mock_completion.call_args[1]\n                assert call_kwargs[\"reasoning_effort\"] == \"medium\"\n                assert \"reasoning_effort\" in call_kwargs[\"allowed_openai_params\"]\n\n    @pytest.mark.asyncio\n    async def test_non_gpt5_model_no_thinking_kwargs(self, monkeypatch, mock_logger):\n        \"\"\"Test non-GPT-5 models do not trigger reasoning_effort logic.\"\"\"\n        fake_settings = create_mock_settings(\"high\")\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        non_gpt5_models = [\"gpt-4o\", \"gpt-4-turbo\", \"claude-3-5-sonnet\"]\n\n        for model in non_gpt5_models:\n            with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n                mock_completion.return_value = create_mock_acompletion_response()\n\n                handler = LiteLLMAIHandler()\n                await handler.chat_completion(\n                    model=model,\n                    system=\"test system\",\n                    user=\"test user\"\n                )\n\n                # Should not have reasoning_effort in kwargs\n                call_kwargs = mock_completion.call_args[1]\n                assert \"reasoning_effort\" not in call_kwargs\n\n    @pytest.mark.asyncio\n    async def test_gpt5_suffix_removal(self, monkeypatch, mock_logger):\n        \"\"\"Test that _thinking suffix is properly removed from model name.\"\"\"\n        fake_settings = create_mock_settings(\"low\")\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5_thinking\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            # Model should be transformed to openai/gpt-5\n            call_kwargs = mock_completion.call_args[1]\n            assert call_kwargs[\"model\"] == \"openai/gpt-5\"\n\n    # ========== Group 4: Model Suffix Handling Tests ==========\n\n    @pytest.mark.asyncio\n    async def test_gpt5_thinking_suffix_default_medium(self, monkeypatch, mock_logger):\n        \"\"\"Test _thinking suffix models default to 'medium' when config is None.\"\"\"\n        fake_settings = create_mock_settings(None)\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5-2025-08-07_thinking\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            call_kwargs = mock_completion.call_args[1]\n            assert call_kwargs[\"reasoning_effort\"] == \"medium\"\n            mock_logger.info.assert_any_call(\"Using reasoning_effort='medium' for GPT-5 model\")\n\n    @pytest.mark.asyncio\n    async def test_gpt5_regular_suffix_default_medium(self, monkeypatch, mock_logger):\n        \"\"\"Test regular GPT-5 models default to 'medium' when config is None.\"\"\"\n        fake_settings = create_mock_settings(None)\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5-2025-08-07\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            call_kwargs = mock_completion.call_args[1]\n            assert call_kwargs[\"reasoning_effort\"] == \"medium\"\n            mock_logger.info.assert_any_call(\"Using reasoning_effort='medium' for GPT-5 model\")\n\n    @pytest.mark.asyncio\n    async def test_gpt5_thinking_suffix_config_overrides_default(self, monkeypatch, mock_logger):\n        \"\"\"Test that config overrides the default for _thinking models.\"\"\"\n        fake_settings = create_mock_settings(\"high\")\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5-2025-08-07_thinking\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            # Should use 'high' from config, not 'medium' default\n            call_kwargs = mock_completion.call_args[1]\n            assert call_kwargs[\"reasoning_effort\"] == \"high\"\n            mock_logger.info.assert_any_call(\"Using reasoning_effort='high' for GPT-5 model\")\n\n    # ========== Group 5: Logging Behavior Tests ==========\n\n    @pytest.mark.asyncio\n    async def test_gpt5_info_logging_configured_value(self, monkeypatch, mock_logger):\n        \"\"\"Test info log when using configured value.\"\"\"\n        fake_settings = create_mock_settings(\"low\")\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5-2025-08-07\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            # Verify log\n            mock_logger.info.assert_any_call(\"Using reasoning_effort='low' for GPT-5 model\")\n\n    @pytest.mark.asyncio\n    async def test_gpt5_info_logging_default_value(self, monkeypatch, mock_logger):\n        \"\"\"Test info log when using default value.\"\"\"\n        fake_settings = create_mock_settings(None)\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5-2025-08-07\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            # Verify log\n            mock_logger.info.assert_any_call(\"Using reasoning_effort='medium' for GPT-5 model\")\n\n    @pytest.mark.asyncio\n    async def test_gpt5_warning_only_for_invalid_non_none(self, monkeypatch, mock_logger):\n        \"\"\"Test warning logged only for invalid non-None values.\"\"\"\n        # Test None - should not warn\n        fake_settings = create_mock_settings(None)\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5-2025-08-07\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            # No warning for None\n            mock_logger.warning.assert_not_called()\n\n        # Reset mock\n        mock_logger.reset_mock()\n\n        # Test invalid string - should warn\n        fake_settings = create_mock_settings(\"ultra\")\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5-2025-08-07\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            # Warning should be logged for invalid value\n            mock_logger.warning.assert_called_once()\n\n    # ========== Group 6: Structure Validation Tests ==========\n\n    @pytest.mark.asyncio\n    async def test_thinking_kwargs_gpt5_structure(self, monkeypatch, mock_logger):\n        \"\"\"Test that thinking_kwargs_gpt5 has correct structure.\"\"\"\n        fake_settings = create_mock_settings(\"medium\")\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5-2025-08-07\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            call_kwargs = mock_completion.call_args[1]\n\n            # Verify structure\n            assert \"reasoning_effort\" in call_kwargs\n            assert call_kwargs[\"reasoning_effort\"] == \"medium\"\n            assert \"allowed_openai_params\" in call_kwargs\n            assert isinstance(call_kwargs[\"allowed_openai_params\"], list)\n            assert \"reasoning_effort\" in call_kwargs[\"allowed_openai_params\"]\n\n    @pytest.mark.asyncio\n    async def test_thinking_kwargs_not_created_for_non_gpt5(self, monkeypatch, mock_logger):\n        \"\"\"Test that thinking_kwargs_gpt5 is not created for non-GPT-5 models.\"\"\"\n        fake_settings = create_mock_settings(\"high\")\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-4o\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            call_kwargs = mock_completion.call_args[1]\n\n            # Should not have reasoning_effort keys\n            assert \"reasoning_effort\" not in call_kwargs\n            assert call_kwargs.get(\"allowed_openai_params\") is None or \"reasoning_effort\" not in call_kwargs.get(\"allowed_openai_params\", [])\n\n    # ========== Group 7: Edge Cases ==========\n\n    @pytest.mark.asyncio\n    async def test_empty_string_reasoning_effort(self, monkeypatch, mock_logger):\n        \"\"\"Test empty string reasoning_effort is treated as invalid.\"\"\"\n        fake_settings = create_mock_settings(\"\")\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5-2025-08-07\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            # Should default to 'medium' and log warning\n            call_kwargs = mock_completion.call_args[1]\n            assert call_kwargs[\"reasoning_effort\"] == \"medium\"\n            mock_logger.warning.assert_called_once()\n\n    @pytest.mark.asyncio\n    async def test_case_sensitive_reasoning_effort(self, monkeypatch, mock_logger):\n        \"\"\"Test that reasoning_effort validation is case-sensitive.\"\"\"\n        fake_settings = create_mock_settings(\"LOW\")\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5-2025-08-07\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            # Should treat uppercase as invalid and default to 'medium'\n            call_kwargs = mock_completion.call_args[1]\n            assert call_kwargs[\"reasoning_effort\"] == \"medium\"\n            mock_logger.warning.assert_called_once()\n\n    @pytest.mark.asyncio\n    async def test_whitespace_reasoning_effort(self, monkeypatch, mock_logger):\n        \"\"\"Test that reasoning_effort with whitespace is treated as invalid.\"\"\"\n        fake_settings = create_mock_settings(\" low \")\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5-2025-08-07\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            # Should treat value with whitespace as invalid\n            call_kwargs = mock_completion.call_args[1]\n            assert call_kwargs[\"reasoning_effort\"] == \"medium\"\n            mock_logger.warning.assert_called_once()\n\n    @pytest.mark.asyncio\n    async def test_gpt5_prefix_match_only(self, monkeypatch, mock_logger):\n        \"\"\"Test that model.startswith('gpt-5') matching behavior.\n\n        Note: The current logic uses startswith('gpt-5'), which means\n        models like 'gpt-50' will also match (since 'gpt-50'.startswith('gpt-5') is True).\n        This test documents the current behavior.\n        \"\"\"\n        fake_settings = create_mock_settings(\"medium\")\n        monkeypatch.setattr(litellm_handler, \"get_settings\", lambda: fake_settings)\n\n        # Test gpt-50 (will match due to startswith logic)\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-50\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            # Due to startswith('gpt-5'), gpt-50 will match and have reasoning_effort\n            call_kwargs = mock_completion.call_args[1]\n            assert \"reasoning_effort\" in call_kwargs\n\n        # Reset mock\n        mock_logger.reset_mock()\n\n        # Test gpt-5 (should match)\n        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:\n            mock_completion.return_value = create_mock_acompletion_response()\n\n            handler = LiteLLMAIHandler()\n            await handler.chat_completion(\n                model=\"gpt-5\",\n                system=\"test system\",\n                user=\"test user\"\n            )\n\n            # Should have reasoning_effort\n            call_kwargs = mock_completion.call_args[1]\n            assert call_kwargs[\"reasoning_effort\"] == \"medium\"\n"
  },
  {
    "path": "tests/unittest/test_load_yaml.py",
    "content": "\n# Generated by CodiumAI\n\nimport pytest\nimport yaml\nfrom yaml.scanner import ScannerError\n\nfrom pr_agent.algo.utils import load_yaml\n\n\nclass TestLoadYaml:\n    #  Tests that load_yaml loads a valid YAML string\n    def test_load_valid_yaml(self):\n        yaml_str = 'name: John Smith\\nage: 35'\n        expected_output = {'name': 'John Smith', 'age': 35}\n        assert load_yaml(yaml_str) == expected_output\n\n    def test_load_invalid_yaml1(self):\n        yaml_str = \\\n'''\\\nPR Analysis:\n  Main theme: Enhancing the `/describe` command prompt by adding title and description\n  Type of PR: Enhancement\n  Relevant tests: No\n  Focused PR: Yes, the PR is focused on enhancing the `/describe` command prompt.\n\nPR Feedback:\n  General suggestions: The PR seems to be well-structured and focused on a specific enhancement. However, it would be beneficial to add tests to ensure the new feature works as expected.\n  Code feedback:\n    - relevant file: pr_agent/settings/pr_description_prompts.toml\n      suggestion: Consider using a more descriptive variable name than 'user' for the command prompt. A more descriptive name would make the code more readable and maintainable. [medium]\n      relevant line: user=\"\"\"PR Info: aaa\n  Security concerns: No'''\n        with pytest.raises(ScannerError):\n            yaml.safe_load(yaml_str)\n\n        expected_output = {'PR Analysis': {'Main theme': 'Enhancing the `/describe` command prompt by adding title and description', 'Type of PR': 'Enhancement', 'Relevant tests': False, 'Focused PR': 'Yes, the PR is focused on enhancing the `/describe` command prompt.'}, 'PR Feedback': {'General suggestions': 'The PR seems to be well-structured and focused on a specific enhancement. However, it would be beneficial to add tests to ensure the new feature works as expected.', 'Code feedback': [{'relevant file': 'pr_agent/settings/pr_description_prompts.toml\\n', 'suggestion': \"Consider using a more descriptive variable name than 'user' for the command prompt. A more descriptive name would make the code more readable and maintainable. [medium]\", 'relevant line': 'user=\"\"\"PR Info: aaa\\n'}], 'Security concerns': False}}\n        assert load_yaml(yaml_str) == expected_output\n\n    def test_load_invalid_yaml2(self):\n        yaml_str = '''\\\n- relevant file: src/app.py:\n  suggestion content: The print statement is outside inside the if __name__ ==: \\\n'''\n        with pytest.raises(ScannerError):\n            yaml.safe_load(yaml_str)\n\n        expected_output = [{'relevant file': 'src/app.py:\\n', 'suggestion content': 'The print statement is outside inside the if __name__ ==:'}]\n        assert load_yaml(yaml_str) == expected_output\n"
  },
  {
    "path": "tests/unittest/test_parse_code_suggestion.py",
    "content": "\n# Generated by CodiumAI\nfrom pr_agent.algo.utils import parse_code_suggestion\n\n\"\"\"\nCode Analysis\n\nObjective:\nThe objective of the function is to convert a dictionary into a markdown format. The function takes in a dictionary as\ninput and recursively converts it into a markdown format. The function is specifically designed to handle dictionaries\nthat contain code suggestions.\n\nInputs:\n- output_data: a dictionary containing the data to be converted into markdown format\n\nFlow:\n- Initialize an empty string variable called markdown_text\n- Create a dictionary of emojis to be used in the markdown format\n- Iterate through the items in the input dictionary\n- If the value is empty, skip to the next item\n- If the value is a dictionary, recursively call the function with the value as input\n- If the value is a list, iterate through the list and add each item to the markdown format\n- If the value is not 'n/a', add it to the markdown format\n- If the key is 'code suggestions', call the parse_code_suggestion function to handle the list of code suggestions\n- Return the markdown format as a string\n\nOutputs:\n- markdown_text: a string containing the input dictionary converted into markdown format\n\nAdditional aspects:\n- The function uses the textwrap module to indent code examples in the markdown format\n- The parse_code_suggestion function is called to handle the 'code suggestions' key in the input dictionary\n- The function uses emojis to add visual cues to the markdown format\n\"\"\"\n\n\nclass TestParseCodeSuggestion:\n    # Tests that function returns empty string when input is an empty dictionary\n    def test_empty_dict(self):\n        input_data = {}\n        expected_output = \"\\n\"  # modified to expect a newline character\n        assert parse_code_suggestion(input_data) == expected_output\n\n\n    # Tests that function returns correct output when 'before' or 'after' key has a non-string value\n    def test_non_string_before_or_after(self):\n        input_data = {\n            \"Code example\": {\n                \"Before\": 123,\n                \"After\": [\"a\", \"b\", \"c\"]\n            }\n        }\n        expected_output = \"  - **Code example:**\\n    - **Before:**\\n        ```\\n        123\\n        ```\\n    - **After:**\\n        ```\\n        ['a', 'b', 'c']\\n        ```\\n\\n\"  # noqa: E501\n        assert parse_code_suggestion(input_data) == expected_output\n\n    # Tests that function returns correct output when input dictionary does not have 'code example' key\n    def test_no_code_example_key(self):\n        code_suggestions = {\n            'suggestion': 'Suggestion 1',\n            'description': 'Description 1',\n            'before': 'Before 1',\n            'after': 'After 1'\n        }\n        expected_output = '   **suggestion:** Suggestion 1     \\n   **description:** Description 1     \\n   **before:** Before 1     \\n   **after:** After 1     \\n\\n'  # noqa: E501\n        assert parse_code_suggestion(code_suggestions) == expected_output\n\n    # Tests that function returns correct output when input dictionary has 'code example' key\n    def test_with_code_example_key(self):\n        code_suggestions = {\n            'suggestion': 'Suggestion 2',\n            'description': 'Description 2',\n            'code example': {\n                'before': 'Before 2',\n                'after': 'After 2'\n            }\n        }\n        expected_output = '   **suggestion:** Suggestion 2     \\n   **description:** Description 2     \\n  - **code example:**\\n    - **before:**\\n        ```\\n        Before 2\\n        ```\\n    - **after:**\\n        ```\\n        After 2\\n        ```\\n\\n'  # noqa: E501\n        assert parse_code_suggestion(code_suggestions) == expected_output\n"
  },
  {
    "path": "tests/unittest/test_pr_update_changelog.py",
    "content": "from unittest.mock import AsyncMock, MagicMock, patch\n\nimport pytest\n\nfrom pr_agent.tools.pr_update_changelog import PRUpdateChangelog\n\n\nclass TestPRUpdateChangelog:\n    \"\"\"Test suite for the PR Update Changelog functionality.\"\"\"\n    \n    @pytest.fixture\n    def mock_git_provider(self):\n        \"\"\"Create a mock git provider.\"\"\"\n        provider = MagicMock()\n        provider.get_pr_branch.return_value = \"feature-branch\"\n        provider.get_pr_file_content.return_value = \"\"\n        provider.pr.title = \"Test PR\"\n        provider.get_pr_description.return_value = \"Test description\"\n        provider.get_commit_messages.return_value = \"fix: test commit\"\n        provider.get_languages.return_value = {\"Python\": 80, \"JavaScript\": 20}\n        provider.get_files.return_value = [\"test.py\", \"test.js\"]\n        return provider\n\n    @pytest.fixture\n    def mock_ai_handler(self):\n        \"\"\"Create a mock AI handler.\"\"\"\n        handler = MagicMock()\n        handler.chat_completion = AsyncMock(return_value=(\"Test changelog entry\", \"stop\"))\n        return handler\n\n    @pytest.fixture\n    def changelog_tool(self, mock_git_provider, mock_ai_handler):\n        \"\"\"Create a PRUpdateChangelog instance with mocked dependencies.\"\"\"\n        with patch('pr_agent.tools.pr_update_changelog.get_git_provider', return_value=lambda url: mock_git_provider), \\\n             patch('pr_agent.tools.pr_update_changelog.get_main_pr_language', return_value=\"Python\"), \\\n             patch('pr_agent.tools.pr_update_changelog.get_settings') as mock_settings:\n            \n            # Configure mock settings\n            mock_settings.return_value.pr_update_changelog.push_changelog_changes = False\n            mock_settings.return_value.pr_update_changelog.extra_instructions = \"\"\n            mock_settings.return_value.pr_update_changelog_prompt.system = \"System prompt\"\n            mock_settings.return_value.pr_update_changelog_prompt.user = \"User prompt\"\n            mock_settings.return_value.config.temperature = 0.2\n            \n            tool = PRUpdateChangelog(\"https://gitlab.com/test/repo/-/merge_requests/1\", ai_handler=lambda: mock_ai_handler)\n            return tool\n\n    def test_get_changelog_file_with_existing_content(self, changelog_tool, mock_git_provider):\n        \"\"\"Test retrieving existing changelog content.\"\"\"\n        # Arrange\n        existing_content = \"# Changelog\\n\\n## v1.0.0\\n- Initial release\\n- Bug fixes\"\n        mock_git_provider.get_pr_file_content.return_value = existing_content\n        \n        # Act\n        changelog_tool._get_changelog_file()\n        \n        # Assert\n        assert changelog_tool.changelog_file == existing_content\n        assert \"# Changelog\" in changelog_tool.changelog_file_str\n\n    def test_get_changelog_file_with_no_existing_content(self, changelog_tool, mock_git_provider):\n        \"\"\"Test handling when no changelog file exists.\"\"\"\n        # Arrange\n        mock_git_provider.get_pr_file_content.return_value = \"\"\n        \n        # Act\n        changelog_tool._get_changelog_file()\n        \n        # Assert\n        assert changelog_tool.changelog_file == \"\"\n        assert \"Example:\" in changelog_tool.changelog_file_str  # Default template\n\n    def test_get_changelog_file_with_bytes_content(self, changelog_tool, mock_git_provider):\n        \"\"\"Test handling when git provider returns bytes instead of string.\"\"\"\n        # Arrange\n        content_bytes = b\"# Changelog\\n\\n## v1.0.0\\n- Initial release\"\n        mock_git_provider.get_pr_file_content.return_value = content_bytes\n        \n        # Act\n        changelog_tool._get_changelog_file()\n        \n        # Assert\n        assert isinstance(changelog_tool.changelog_file, str)\n        assert changelog_tool.changelog_file == \"# Changelog\\n\\n## v1.0.0\\n- Initial release\"\n\n    def test_get_changelog_file_with_exception(self, changelog_tool, mock_git_provider):\n        \"\"\"Test handling exceptions during file retrieval.\"\"\"\n        # Arrange\n        mock_git_provider.get_pr_file_content.side_effect = Exception(\"Network error\")\n        \n        # Act\n        changelog_tool._get_changelog_file()\n        \n        # Assert\n        assert changelog_tool.changelog_file == \"\"\n        assert changelog_tool.changelog_file_str == \"\"  # Exception should result in empty string, no default template\n\n    def test_prepare_changelog_update_with_existing_content(self, changelog_tool):\n        \"\"\"Test preparing changelog update when existing content exists.\"\"\"\n        # Arrange\n        changelog_tool.prediction = \"## v1.1.0\\n- New feature\\n- Bug fix\"\n        changelog_tool.changelog_file = \"# Changelog\\n\\n## v1.0.0\\n- Initial release\"\n        changelog_tool.commit_changelog = True\n        \n        # Act\n        new_content, answer = changelog_tool._prepare_changelog_update()\n        \n        # Assert\n        assert new_content.startswith(\"## v1.1.0\\n- New feature\\n- Bug fix\\n\\n\")\n        assert \"# Changelog\\n\\n## v1.0.0\\n- Initial release\" in new_content\n        assert answer == \"## v1.1.0\\n- New feature\\n- Bug fix\"\n\n    def test_prepare_changelog_update_without_existing_content(self, changelog_tool):\n        \"\"\"Test preparing changelog update when no existing content.\"\"\"\n        # Arrange\n        changelog_tool.prediction = \"## v1.0.0\\n- Initial release\"\n        changelog_tool.changelog_file = \"\"\n        changelog_tool.commit_changelog = True\n        \n        # Act\n        new_content, answer = changelog_tool._prepare_changelog_update()\n        \n        # Assert\n        assert new_content == \"## v1.0.0\\n- Initial release\"\n        assert answer == \"## v1.0.0\\n- Initial release\"\n\n    def test_prepare_changelog_update_no_commit(self, changelog_tool):\n        \"\"\"Test preparing changelog update when not committing.\"\"\"\n        # Arrange\n        changelog_tool.prediction = \"## v1.1.0\\n- New feature\"\n        changelog_tool.changelog_file = \"\"\n        changelog_tool.commit_changelog = False\n        \n        # Act\n        new_content, answer = changelog_tool._prepare_changelog_update()\n        \n        # Assert\n        assert new_content == \"## v1.1.0\\n- New feature\"\n        assert \"to commit the new content\" in answer\n\n    @pytest.mark.asyncio\n    async def test_run_without_push_support(self, changelog_tool, mock_git_provider):\n        \"\"\"Test running changelog update when git provider doesn't support pushing.\"\"\"\n        # Arrange\n        delattr(mock_git_provider, 'create_or_update_pr_file')  # Remove the method\n        changelog_tool.commit_changelog = True\n        \n        with patch('pr_agent.tools.pr_update_changelog.get_settings') as mock_settings:\n            mock_settings.return_value.pr_update_changelog.push_changelog_changes = True\n            mock_settings.return_value.config.publish_output = True\n            \n            # Act\n            await changelog_tool.run()\n            \n            # Assert\n            mock_git_provider.publish_comment.assert_called_once()\n            assert \"not currently supported\" in str(mock_git_provider.publish_comment.call_args)\n\n    @pytest.mark.asyncio\n    async def test_run_with_push_support(self, changelog_tool, mock_git_provider):\n        \"\"\"Test running changelog update when git provider supports pushing.\"\"\"\n        # Arrange\n        mock_git_provider.create_or_update_pr_file = MagicMock()\n        changelog_tool.commit_changelog = True\n        changelog_tool.prediction = \"## v1.1.0\\n- New feature\"\n        \n        with patch('pr_agent.tools.pr_update_changelog.get_settings') as mock_settings, \\\n             patch('pr_agent.tools.pr_update_changelog.retry_with_fallback_models') as mock_retry, \\\n             patch('pr_agent.tools.pr_update_changelog.sleep'):\n            \n            mock_settings.return_value.pr_update_changelog.push_changelog_changes = True\n            mock_settings.return_value.pr_update_changelog.get.return_value = True\n            mock_settings.return_value.config.publish_output = True\n            mock_settings.return_value.config.git_provider = \"gitlab\"\n            mock_retry.return_value = None\n            \n            # Act\n            await changelog_tool.run()\n            \n            # Assert\n            mock_git_provider.create_or_update_pr_file.assert_called_once()\n            call_args = mock_git_provider.create_or_update_pr_file.call_args\n            assert call_args[1]['file_path'] == 'CHANGELOG.md'\n            assert call_args[1]['branch'] == 'feature-branch'\n\n    def test_push_changelog_update(self, changelog_tool, mock_git_provider):\n        \"\"\"Test the push changelog update functionality.\"\"\"\n        # Arrange\n        mock_git_provider.create_or_update_pr_file = MagicMock()\n        mock_git_provider.get_pr_branch.return_value = \"feature-branch\"\n        new_content = \"# Updated changelog content\"\n        answer = \"Changes made\"\n        \n        with patch('pr_agent.tools.pr_update_changelog.get_settings') as mock_settings, \\\n             patch('pr_agent.tools.pr_update_changelog.sleep'):\n            \n            mock_settings.return_value.pr_update_changelog.get.return_value = True\n            \n            # Act\n            changelog_tool._push_changelog_update(new_content, answer)\n            \n            # Assert\n            mock_git_provider.create_or_update_pr_file.assert_called_once_with(\n                file_path=\"CHANGELOG.md\",\n                branch=\"feature-branch\",\n                contents=new_content,\n                message=\"[skip ci] Update CHANGELOG.md\"\n            )\n\n    def test_gitlab_provider_method_detection(self, changelog_tool, mock_git_provider):\n        \"\"\"Test that the tool correctly detects GitLab provider method availability.\"\"\"\n        # Arrange\n        mock_git_provider.create_or_update_pr_file = MagicMock()\n        \n        # Act & Assert\n        assert hasattr(mock_git_provider, \"create_or_update_pr_file\")\n\n    @pytest.mark.parametrize(\"existing_content,new_entry,expected_order\", [\n        (\n            \"# Changelog\\n\\n## v1.0.0\\n- Old feature\", \n            \"## v1.1.0\\n- New feature\",\n            [\"v1.1.0\", \"v1.0.0\"]\n        ),\n        (\n            \"\", \n            \"## v1.0.0\\n- Initial release\",\n            [\"v1.0.0\"]\n        ),\n        (\n            \"Some existing content\", \n            \"## v1.0.0\\n- New entry\",\n            [\"v1.0.0\", \"Some existing content\"]\n        ),\n    ])\n    def test_changelog_order_preservation(self, changelog_tool, existing_content, new_entry, expected_order):\n        \"\"\"Test that changelog entries are properly ordered (newest first).\"\"\"\n        # Arrange\n        changelog_tool.prediction = new_entry\n        changelog_tool.changelog_file = existing_content\n        changelog_tool.commit_changelog = True\n        \n        # Act\n        new_content, _ = changelog_tool._prepare_changelog_update()\n        \n        # Assert\n        for i, expected in enumerate(expected_order[:-1]):\n            current_pos = new_content.find(expected)\n            next_pos = new_content.find(expected_order[i + 1])\n            assert current_pos < next_pos, f\"Expected {expected} to come before {expected_order[i + 1]}\" "
  },
  {
    "path": "tests/unittest/test_secret_provider_factory.py",
    "content": "from unittest.mock import MagicMock, patch\n\nimport pytest\n\nfrom pr_agent.secret_providers import get_secret_provider\n\n\nclass TestSecretProviderFactory:\n\n    def test_get_secret_provider_none_when_not_configured(self):\n        with patch('pr_agent.secret_providers.get_settings') as mock_get_settings:\n            settings = MagicMock()\n            settings.get.return_value = None\n            mock_get_settings.return_value = settings\n\n            result = get_secret_provider()\n            assert result is None\n\n    def test_get_secret_provider_google_cloud_storage(self):\n        with patch('pr_agent.secret_providers.get_settings') as mock_get_settings:\n            settings = MagicMock()\n            settings.get.return_value = \"google_cloud_storage\"\n            settings.config.secret_provider = \"google_cloud_storage\"\n            mock_get_settings.return_value = settings\n\n            with patch('pr_agent.secret_providers.google_cloud_storage_secret_provider.GoogleCloudStorageSecretProvider') as MockProvider:\n                mock_instance = MagicMock()\n                MockProvider.return_value = mock_instance\n                \n                result = get_secret_provider()\n                assert result is mock_instance\n                MockProvider.assert_called_once()\n\n    def test_get_secret_provider_aws_secrets_manager(self):\n        with patch('pr_agent.secret_providers.get_settings') as mock_get_settings:\n            settings = MagicMock()\n            settings.get.return_value = \"aws_secrets_manager\"\n            settings.config.secret_provider = \"aws_secrets_manager\"\n            mock_get_settings.return_value = settings\n\n            with patch('pr_agent.secret_providers.aws_secrets_manager_provider.AWSSecretsManagerProvider') as MockProvider:\n                mock_instance = MagicMock()\n                MockProvider.return_value = mock_instance\n                \n                result = get_secret_provider()\n                assert result is mock_instance\n                MockProvider.assert_called_once()\n\n    def test_get_secret_provider_unknown_provider(self):\n        with patch('pr_agent.secret_providers.get_settings') as mock_get_settings:\n            settings = MagicMock()\n            settings.get.return_value = \"unknown_provider\"\n            settings.config.secret_provider = \"unknown_provider\"\n            mock_get_settings.return_value = settings\n\n            with pytest.raises(ValueError, match=\"Unknown SECRET_PROVIDER\"):\n                get_secret_provider()\n\n    def test_get_secret_provider_initialization_error(self):\n        with patch('pr_agent.secret_providers.get_settings') as mock_get_settings:\n            settings = MagicMock()\n            settings.get.return_value = \"aws_secrets_manager\"\n            settings.config.secret_provider = \"aws_secrets_manager\"\n            mock_get_settings.return_value = settings\n\n            with patch('pr_agent.secret_providers.aws_secrets_manager_provider.AWSSecretsManagerProvider') as MockProvider:\n                MockProvider.side_effect = Exception(\"Initialization failed\")\n                \n                with pytest.raises(ValueError, match=\"Failed to initialize aws_secrets_manager secret provider\"):\n                    get_secret_provider() \n"
  },
  {
    "path": "tests/unittest/test_similar_issue_non_github.py",
    "content": "import pytest\n\nfrom pr_agent.tools.pr_similar_issue import PRSimilarIssue\n\n\n@pytest.mark.asyncio\nasync def test_similar_issue_non_github_publishes_message(monkeypatch):\n    class FakeProvider:\n        def __init__(self):\n            self.comments = []\n\n        def publish_comment(self, body):\n            self.comments.append(body)\n\n    fake_provider = FakeProvider()\n\n    class FakeSettings:\n        class config:\n            git_provider = \"gitlab\"\n            publish_output = True\n\n    monkeypatch.setattr(\"pr_agent.tools.pr_similar_issue.get_settings\", lambda: FakeSettings)\n    monkeypatch.setattr(\n        \"pr_agent.git_providers.get_git_provider_with_context\",\n        lambda _: fake_provider,\n    )\n\n    tool = PRSimilarIssue(\"https://gitlab.example.com/group/repo/-/merge_requests/1\", None)\n    result = await tool.run()\n\n    assert result == \"\"\n    assert fake_provider.comments == [\n        \"The /similar_issue tool is currently supported only for GitHub.\"\n    ]\n\n\n@pytest.mark.asyncio\nasync def test_similar_issue_non_github_no_publish(monkeypatch):\n    class FakeSettings:\n        class config:\n            git_provider = \"gitlab\"\n            publish_output = False\n\n    monkeypatch.setattr(\"pr_agent.tools.pr_similar_issue.get_settings\", lambda: FakeSettings)\n\n    tool = PRSimilarIssue(\"https://gitlab.example.com/group/repo/-/merge_requests/1\", None)\n    result = await tool.run()\n\n    assert result == \"\"\n"
  },
  {
    "path": "tests/unittest/test_try_fix_yaml.py",
    "content": "# Generated by CodiumAI\n\nfrom pr_agent.algo.utils import try_fix_yaml\n\n\nclass TestTryFixYaml:\n\n    # The function successfully parses a valid YAML string.\n    def test_valid_yaml(self):\n        review_text = \"key: value\\n\"\n        expected_output = {\"key\": \"value\"}\n        assert try_fix_yaml(review_text) == expected_output\n\n    # The function adds '|-' to 'relevant line:' if it is not already present and successfully parses the YAML string.\n    def test_add_relevant_line(self):\n        review_text = \"relevant line: value: 3\\n\"\n        expected_output = {'relevant line': 'value: 3\\n'}\n        assert try_fix_yaml(review_text) == expected_output\n\n    # The function extracts YAML snippet\n    def test_extract_snippet(self):\n        review_text = '''\\\nHere is the answer in YAML format:\n\n```yaml\nname: John Smith\nage: 35\n```\n'''\n        expected_output = {'name': 'John Smith', 'age': 35}\n        assert try_fix_yaml(review_text) == expected_output\n\n\n    # The YAML string is empty.\n    def test_empty_yaml_fixed(self):\n        review_text = \"\"\n        assert try_fix_yaml(review_text) is None\n\n\n    # The function extracts YAML snippet\n    def test_no_initial_yaml(self):\n        review_text = '''\\\nI suggest the following:\n\ncode_suggestions:\n- relevant_file: |\n    src/index.ts\n  label: |\n    best practice\n\n- relevant_file: |\n    src/index2.ts\n  label: |\n    enhancement\n```\n\nWe can further improve the code by using the `const` keyword instead of `var` in the `src/index.ts` file.\n'''\n        expected_output = {'code_suggestions': [{'relevant_file': 'src/index.ts\\n', 'label': 'best practice\\n'}, {'relevant_file': 'src/index2.ts\\n', 'label': 'enhancement'}]}\n\n        assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='label') == expected_output\n\n    def test_with_initial_yaml(self):\n        review_text = '''\\\nI suggest the following:\n\n```\ncode_suggestions:\n- relevant_file: |\n    src/index.ts\n  label: |\n    best practice\n\n- relevant_file: |\n    src/index2.ts\n  label: |\n    enhancement\n```\n\nWe can further improve the code by using the `const` keyword instead of `var` in the `src/index.ts` file.\n'''\n        expected_output = {'code_suggestions': [{'relevant_file': 'src/index.ts\\n', 'label': 'best practice\\n'}, {'relevant_file': 'src/index2.ts\\n', 'label': 'enhancement'}]}\n        assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='label') == expected_output\n\n\n    def test_with_brackets_yaml_content(self):\n        review_text = '''\\\n{\ncode_suggestions:\n- relevant_file: |\n    src/index.ts\n  label: |\n    best practice\n\n- relevant_file: |\n    src/index2.ts\n  label: |\n    enhancement\n}\n'''\n        expected_output = {'code_suggestions': [{'relevant_file': 'src/index.ts\\n', 'label': 'best practice\\n'}, {'relevant_file': 'src/index2.ts\\n', 'label': 'enhancement'}]}\n        assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='label') == expected_output\n\n    def test_tab_indent_yaml(self):\n        review_text = '''\\\ncode_suggestions:\n- relevant_file: |\n    src/index.ts\n  label: |\n\\tbest practice\n\n- relevant_file: |\n    src/index2.ts\n  label: |\n    enhancement\n'''\n        expected_output = {'code_suggestions': [{'relevant_file': 'src/index.ts\\n', 'label': 'best practice\\n'}, {'relevant_file': 'src/index2.ts\\n', 'label': 'enhancement\\n'}]}\n        assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='label') == expected_output\n\n\n    def test_leading_plus_mark_code(self):\n        review_text = '''\\\ncode_suggestions:\n- relevant_file: |\n    src/index.ts\n  label: |\n    best practice\n  existing_code: |\n+   var router = createBrowserRouter([\n  improved_code: |\n+   const router = createBrowserRouter([\n'''\n        expected_output = {'code_suggestions': [{\n            'relevant_file': 'src/index.ts\\n',\n            'label': 'best practice\\n',\n            'existing_code': 'var router = createBrowserRouter([\\n',\n            'improved_code': 'const router = createBrowserRouter([\\n'\n        }]}\n        assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='improved_code') == expected_output\n\n\n    def test_inconsistent_indentation_in_block_scalar_yaml(self):\n        \"\"\"\n            This test case represents a situation where the AI outputs the opening '{' with 5 spaces\n            (resulting in an inferred indent level of 5), while the closing '}' is output with only 4 spaces.\n            This inconsistency makes it impossible for the YAML parser to automatically determine the correct\n            indent level, causing a parsing failure.\n\n            The root cause may be the LLM miscounting spaces or misunderstanding the active block scalar context\n            while generating YAML output.\n        \"\"\"\n\n        review_text = '''\\\ncode_suggestions:\n- relevant_file: |\n    tsconfig.json\n  existing_code: |\n     {\n        \"key1\": \"value1\",\n        \"key2\": {\n          \"subkey\": \"value\"\n         }\n    }\n'''\n        expected_json = '''\\\n {\n    \"key1\": \"value1\",\n    \"key2\": {\n      \"subkey\": \"value\"\n     }\n}\n'''\n        expected_output = {\n            'code_suggestions': [{\n                'relevant_file': 'tsconfig.json\\n',\n                'existing_code': expected_json\n            }]\n        }\n        assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='existing_code') == expected_output\n\n\n    def test_inconsistent_and_insufficient_indentation_in_block_scalar_yaml(self):\n        \"\"\"\n            This test case reproduces a YAML parsing failure where the block scalar content\n            generated by the AI includes inconsistent and insufficient indentation levels.\n\n            The root cause may be the LLM miscounting spaces or misunderstanding the active block scalar context\n            while generating YAML output.\n        \"\"\"\n\n        review_text = '''\\\ncode_suggestions:\n- relevant_file: |\n    tsconfig.json\n  existing_code: |\n    {\n      \"key1\": \"value1\",\n      \"key2\": {\n        \"subkey\": \"value\"\n      }\n  }\n'''\n        expected_json = '''\\\n{\n  \"key1\": \"value1\",\n  \"key2\": {\n    \"subkey\": \"value\"\n  }\n}\n'''\n        expected_output = {\n            'code_suggestions': [{\n                'relevant_file': 'tsconfig.json\\n',\n                'existing_code': expected_json\n            }]\n        }\n        assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='existing_code') == expected_output\n\n\n    def test_wrong_indentation_code_block_scalar(self):\n        review_text = '''\\\ncode_suggestions:\n- relevant_file: |\n    a.c\n  existing_code: |\n  int sum(int a, int b) {\n    return a + b;\n  }\n\n  int sub(int a, int b) {\n    return a - b;\n  }\n'''\n        expected_code_block = '''\\\nint sum(int a, int b) {\n  return a + b;\n}\n\nint sub(int a, int b) {\n  return a - b;\n}\n'''\n        expected_output = {'code_suggestions': [{'relevant_file': 'a.c\\n', 'existing_code': '  int sum(int a, int b) {\\n    return a + b;\\n  }\\n\\n  int sub(int a, int b) {\\n    return a - b;\\n  }\\n'}]}\n        assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='existing_code') == expected_output\n"
  }
]